Skip to content

Commit

Permalink
fixed windows unicode/utf8 stuff
Browse files Browse the repository at this point in the history
  • Loading branch information
Tieske committed May 17, 2024
1 parent 41e0510 commit 3f17fc3
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 68 deletions.
14 changes: 8 additions & 6 deletions examples/readline.lua
Original file line number Diff line number Diff line change
Expand Up @@ -253,11 +253,15 @@ local handle_key do -- keyboard input handler
end,

home = function(self)
self:set_cursor(1, 1)
local new_pos = 1
io.write(cursor_move_horiz(new_pos - self.position))
self.position = new_pos
end,

["end"] = function(self)
self:set_cursor(1, #self.prompt + #self.value + 1)
local new_pos = #self.value + 1
io.write(cursor_move_horiz(new_pos - self.position))
self.position = new_pos
end,

delete = function(self)
Expand Down Expand Up @@ -448,10 +452,8 @@ sys.tcsetattr(io.stdin, sys.TCSANOW, {

local rl = readline.new{
prompt = "Enter something: ",
max_length = 20,
-- value = "abcde",
-- value = "Hello, 你-好 World!",
value = "你好你好你",
max_length = 60,
value = "Hello, 你-好 World 🚀!",
-- position = 2,
exit_keys = {key_sequences.enter, "\27", "\t", "\27[Z"}, -- enter, escape, tab, shift-tab
}
Expand Down
120 changes: 64 additions & 56 deletions src/term.c
Original file line number Diff line number Diff line change
Expand Up @@ -703,102 +703,111 @@ static int lst_getnonblock(lua_State *L)
static char utf8_buffer[4];
static int utf8_buffer_len = 0;
static int utf8_buffer_index = 0;
#endif


/***
Reads a key from the console non-blocking.
Only available on Windows. This "w" version reads a wide character and
converts it to UTF-8. Multi-byte sequences will be buffered and returned
one byte at a time.
Reads a key from the console non-blocking. This function should not be called
directly, but through the `system.readkey` or `system.readansi` functions. It
will return the next byte from the input stream, or `nil` if no key was pressed.
On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock`
before calling this function. Otherwise it will block. No conversions are
done on Posix, so the byte read is returned as-is.
On Windows this reads a wide character and converts it to UTF-8. Multi-byte
sequences will be buffered internally and returned one byte at a time.
@function _readkeyw
@function _readkey
@treturn[1] integer the byte read from the input stream
@treturn[2] nil if no key was pressed
@treturn[3] nil on error
@treturn[3] string error message
@treturn[3] int errnum (on posix)
*/
static int lst_readkeyw(lua_State *L) {
// First, try to return any remaining characters from the buffer
static int lst_readkey(lua_State *L) {
#ifdef _WIN32
if (utf8_buffer_len > 0) {
// Buffer not empty, return the next byte
lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
utf8_buffer_index++;
utf8_buffer_len--;

// printf("returning from buffer: %d\n", luaL_checkinteger(L, -1));
if (utf8_buffer_len == 0) {
utf8_buffer_index = 0; // Reset the buffer index if empty
utf8_buffer_index = 0;
}
printf("returning from buffer: %d\n", luaL_checkinteger(L, -1));

return 1;
}

if (!_kbhit()) {
return 0;
}
printf("\n---\n");
printf("kb was hit, reading...\n");

// UINT oldCp = GetConsoleCP(); // Store the original code page
// SetConsoleCP(65001); // Set console code page to UTF-8
WCHAR wc = _getwch();
// SetConsoleCP(oldCp); // Set console output code page to UTF-8

wchar_t wc = _getwch();
// printf("----\nread wchar_t: %x\n", wc);
if (wc == WEOF) {
// Error handling for end-of-file or read error
lua_pushnil(L);
lua_pushliteral(L, "read error");
return 2;
}
printf("got wc: %d\n", wc);

// Convert WCHAR to UTF-8
utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
utf8_buffer_index = 0;
if (sizeof(wchar_t) == 2) {
// printf("2-byte wchar_t\n");
// only 2 bytes wide, not 4
if (wc >= 0xD800 && wc <= 0xDBFF) {
// printf("2-byte wchar_t, received high, getting low...\n");

// we got a high surrogate, so we need to read the next one as the low surrogate
if (!_kbhit()) {
lua_pushnil(L);
lua_pushliteral(L, "incomplete surrogate pair");
return 2;
}

wchar_t wc2 = _getwch();
// printf("read wchar_t 2: %x\n", wc2);
if (wc2 == WEOF) {
lua_pushnil(L);
lua_pushliteral(L, "read error");
return 2;
}

if (wc2 < 0xDC00 || wc2 > 0xDFFF) {
lua_pushnil(L);
lua_pushliteral(L, "invalid surrogate pair");
return 2;
}
// printf("2-byte pair complete now\n");
wchar_t wch_pair[2] = { wc, wc2 };
utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, wch_pair, 2, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);

} else {
// printf("2-byte wchar_t, no surrogate pair\n");
// not a high surrogate, so we can handle just the 2 bytes directly
utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
}

} else {
// printf("4-byte wchar_t\n");
// 4 bytes wide, so handle as UTF-32 directly
utf8_buffer_len = WideCharToMultiByte(CP_UTF8, 0, &wc, 1, utf8_buffer, sizeof(utf8_buffer), NULL, NULL);
}
// printf("utf8_buffer_len: %d\n", utf8_buffer_len);
utf8_buffer_index = 0;
if (utf8_buffer_len <= 0) {
lua_pushnil(L);
lua_pushliteral(L, "UTF-8 conversion error");
return 2;
}

// Return the first byte and prepare the rest for subsequent calls
lua_pushinteger(L, (unsigned char)utf8_buffer[utf8_buffer_index]);
utf8_buffer_index++;
utf8_buffer_len--;
printf("returning now: %d\n", luaL_checkinteger(L, -1));
// printf("returning from buffer: %x\n", luaL_checkinteger(L, -1));
return 1;
}

#endif

/***
Reads a key from the console non-blocking.
On Posix, `io.stdin` must be set to non-blocking mode using `setnonblock`
before calling this function. Otherwise it will block.
@function _readkey
@treturn[1] integer the byte read from the input stream
@treturn[2] nil if no key was pressed
@treturn[3] nil on error
@treturn[3] string error message
@treturn[3] int errnum (on posix)
*/
static int lst_readkey(lua_State *L) {
#ifdef _WIN32
if (_kbhit()) {
int ch = _getch();
if (ch == EOF) {
// Error handling for end-of-file or read error
lua_pushnil(L);
lua_pushliteral(L, "_getch error");
return 2;
}
lua_pushinteger(L, (unsigned char)ch);
return 1;
}
return 0;

#else
// Posix implementation
char ch;
ssize_t bytes_read = read(STDIN_FILENO, &ch, 1);
if (bytes_read > 0) {
Expand Down Expand Up @@ -1020,7 +1029,6 @@ static luaL_Reg func[] = {
{ "tcsetattr", lst_tcsetattr },
{ "getnonblock", lst_setnonblock },
{ "setnonblock", lst_setnonblock },
{ "_readkeyw", lst_readkeyw },
{ "_readkey", lst_readkey },
{ "termsize", lst_termsize },
{ "utf8width", lst_utf8width },
Expand Down
8 changes: 2 additions & 6 deletions system/init.lua
Original file line number Diff line number Diff line change
Expand Up @@ -226,12 +226,12 @@ do
end

local interval = 0.0125
local key = sys._readkeyw()
local key = sys._readkey()
while key == nil and timeout > 0 do
sys.sleep(math.min(interval, timeout))
timeout = timeout - interval
interval = math.min(0.2, interval * 2)
key = sys._readkeyw()
key = sys._readkey()
end

if key then
Expand Down Expand Up @@ -279,7 +279,6 @@ do
-- read a new key
local err
key, err = sys.readkey(timeout)
-- print("\n"..key, err, string.char(key))
if key == nil then -- timeout or error
return nil, err
end
Expand All @@ -304,7 +303,6 @@ do
if utf8_length == 1 then
-- single byte character
utf8_length = nil
-- print("readansi: returning", "single char", key)
return string.char(key), "char"
else
-- UTF8 sequence detected
Expand All @@ -326,7 +324,6 @@ do

if #sequence == utf8_length then
-- end of sequence, return the full sequence
-- print("readansi: returning", "utf8 char", unpack(sequence))
local result = string.char(unpack(sequence))
sequence = nil
utf8_length = nil
Expand All @@ -346,7 +343,6 @@ do

if (key >= 65 and key <= 90) or (key >= 97 and key <= 126) then
-- end of sequence, return the full sequence
-- print("readansi: returning", "ansi", unpack(sequence))
local result = string.char(unpack(sequence))
sequence = nil
return result, "ansi"
Expand Down

0 comments on commit 3f17fc3

Please sign in to comment.