aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTomáš Pazdiora <tomas.pazdiora@gmail.com>2023-04-08 17:49:39 +0200
committerGitHub <noreply@github.com>2023-04-08 17:49:39 +0200
commitaaf3b23debc1fe1a06733c8c6468fb84233cc44f (patch)
treec18392a399ef7369b5df1719ab7547e649408bb7
parentf2d1c472946dee2aba9077e8df73346796752b10 (diff)
fix for windows utf-8 input (#840)
Use UTF-16 as input on Windows, since UTF-8 does not work and reads multibyte characters as zeros
-rw-r--r--examples/common.cpp21
-rw-r--r--examples/common.h1
-rw-r--r--examples/main/main.cpp9
3 files changed, 28 insertions, 3 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index 5400f6b..b27aa6c 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -16,12 +16,19 @@
#endif
#if defined (_WIN32)
+#include <fcntl.h>
+#include <io.h>
#pragma comment(lib,"kernel32.lib")
extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
+extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags,
+ const wchar_t * lpWideCharStr, int cchWideChar,
+ char * lpMultiByteStr, int cbMultiByte,
+ const char * lpDefaultChar, bool * lpUsedDefaultChar);
+#define CP_UTF8 65001
#endif
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
@@ -307,12 +314,20 @@ void win32_console_init(bool enable_color) {
SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
}
// Set console output codepage to UTF8
- SetConsoleOutputCP(65001); // CP_UTF8
+ SetConsoleOutputCP(CP_UTF8);
}
void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
- // Set console input codepage to UTF8
- SetConsoleCP(65001); // CP_UTF8
+ // Set console input codepage to UTF16
+ _setmode(_fileno(stdin), _O_WTEXT);
}
}
+
+// Convert a wide Unicode string to an UTF8 string
+void win32_utf8_encode(const std::wstring & wstr, std::string & str) {
+ int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
+ std::string strTo(size_needed, 0);
+ WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
+ str = strTo;
+}
#endif
diff --git a/examples/common.h b/examples/common.h
index 1505aa9..7a8848f 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -92,4 +92,5 @@ void set_console_color(console_state & con_st, console_color_t color);
#if defined (_WIN32)
void win32_console_init(bool enable_color);
+void win32_utf8_encode(const std::wstring & wstr, std::string & str);
#endif
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 67a34e6..d59eeb4 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -386,10 +386,19 @@ int main(int argc, char ** argv) {
std::string line;
bool another_line = true;
do {
+#if defined(_WIN32)
+ std::wstring wline;
+ if (!std::getline(std::wcin, wline)) {
+ // input stream is bad or EOF received
+ return 0;
+ }
+ win32_utf8_encode(wline, line);
+#else
if (!std::getline(std::cin, line)) {
// input stream is bad or EOF received
return 0;
}
+#endif
if (line.empty() || line.back() != '\\') {
another_line = false;
} else {