mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-12 05:17:21 +01:00
fix for windows utf-8 input (#840)
Use UTF-16 as input on Windows, since UTF-8 does not work and reads multibyte characters as zeros
This commit is contained in:
parent
f2d1c47294
commit
aaf3b23deb
@ -16,12 +16,19 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <io.h>
|
||||||
#pragma comment(lib,"kernel32.lib")
|
#pragma comment(lib,"kernel32.lib")
|
||||||
extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
|
extern "C" __declspec(dllimport) void* __stdcall GetStdHandle(unsigned long nStdHandle);
|
||||||
extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
|
extern "C" __declspec(dllimport) int __stdcall GetConsoleMode(void* hConsoleHandle, unsigned long* lpMode);
|
||||||
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
|
extern "C" __declspec(dllimport) int __stdcall SetConsoleMode(void* hConsoleHandle, unsigned long dwMode);
|
||||||
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
|
extern "C" __declspec(dllimport) int __stdcall SetConsoleCP(unsigned int wCodePageID);
|
||||||
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
|
extern "C" __declspec(dllimport) int __stdcall SetConsoleOutputCP(unsigned int wCodePageID);
|
||||||
|
extern "C" __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int CodePage, unsigned long dwFlags,
|
||||||
|
const wchar_t * lpWideCharStr, int cchWideChar,
|
||||||
|
char * lpMultiByteStr, int cbMultiByte,
|
||||||
|
const char * lpDefaultChar, bool * lpUsedDefaultChar);
|
||||||
|
#define CP_UTF8 65001
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
||||||
@ -307,12 +314,20 @@ void win32_console_init(bool enable_color) {
|
|||||||
SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
|
SetConsoleMode(hConOut, dwMode | 0x4); // ENABLE_VIRTUAL_TERMINAL_PROCESSING (0x4)
|
||||||
}
|
}
|
||||||
// Set console output codepage to UTF8
|
// Set console output codepage to UTF8
|
||||||
SetConsoleOutputCP(65001); // CP_UTF8
|
SetConsoleOutputCP(CP_UTF8);
|
||||||
}
|
}
|
||||||
void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
|
void* hConIn = GetStdHandle((unsigned long)-10); // STD_INPUT_HANDLE (-10)
|
||||||
if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
|
if (hConIn && hConIn != (void*)-1 && GetConsoleMode(hConIn, &dwMode)) {
|
||||||
// Set console input codepage to UTF8
|
// Set console input codepage to UTF16
|
||||||
SetConsoleCP(65001); // CP_UTF8
|
_setmode(_fileno(stdin), _O_WTEXT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convert a wide Unicode string to an UTF8 string
|
||||||
|
void win32_utf8_encode(const std::wstring & wstr, std::string & str) {
|
||||||
|
int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), NULL, 0, NULL, NULL);
|
||||||
|
std::string strTo(size_needed, 0);
|
||||||
|
WideCharToMultiByte(CP_UTF8, 0, &wstr[0], (int)wstr.size(), &strTo[0], size_needed, NULL, NULL);
|
||||||
|
str = strTo;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -92,4 +92,5 @@ void set_console_color(console_state & con_st, console_color_t color);
|
|||||||
|
|
||||||
#if defined (_WIN32)
|
#if defined (_WIN32)
|
||||||
void win32_console_init(bool enable_color);
|
void win32_console_init(bool enable_color);
|
||||||
|
void win32_utf8_encode(const std::wstring & wstr, std::string & str);
|
||||||
#endif
|
#endif
|
||||||
|
@ -386,10 +386,19 @@ int main(int argc, char ** argv) {
|
|||||||
std::string line;
|
std::string line;
|
||||||
bool another_line = true;
|
bool another_line = true;
|
||||||
do {
|
do {
|
||||||
|
#if defined(_WIN32)
|
||||||
|
std::wstring wline;
|
||||||
|
if (!std::getline(std::wcin, wline)) {
|
||||||
|
// input stream is bad or EOF received
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
win32_utf8_encode(wline, line);
|
||||||
|
#else
|
||||||
if (!std::getline(std::cin, line)) {
|
if (!std::getline(std::cin, line)) {
|
||||||
// input stream is bad or EOF received
|
// input stream is bad or EOF received
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
if (line.empty() || line.back() != '\\') {
|
if (line.empty() || line.back() != '\\') {
|
||||||
another_line = false;
|
another_line = false;
|
||||||
} else {
|
} else {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user