|
@@ -111,21 +111,15 @@ PAL_IsWINVersion_Exit:
|
|
|
|
|
|
CODEPAGE
|
|
|
PAL_DetectCodePage(
|
|
|
- void
|
|
|
+ const char * filename
|
|
|
)
|
|
|
{
|
|
|
- // Try to convert the content of word.dat with different codepages,
|
|
|
- // and use the codepage with minimal inconvertible characters
|
|
|
- // Works fine currently for detecting Simplified Chinese & Traditional Chinese.
|
|
|
- // Since we're using language files to support additional languages, this detection
|
|
|
- // should be fine for us now.
|
|
|
-
|
|
|
FILE *fp;
|
|
|
char *word_buf = NULL;
|
|
|
long word_len = 0;
|
|
|
- CODEPAGE cp = CP_BIG5; // Defaults to BIG5
|
|
|
+ CODEPAGE cp = CP_BIG5;
|
|
|
|
|
|
- if (NULL != (fp = UTIL_OpenFile("word.dat")))
|
|
|
+ if (NULL != (fp = UTIL_OpenFile(filename)))
|
|
|
{
|
|
|
fseek(fp, 0, SEEK_END);
|
|
|
word_len = ftell(fp);
|
|
@@ -143,54 +137,15 @@ PAL_DetectCodePage(
|
|
|
|
|
|
if (word_buf)
|
|
|
{
|
|
|
- // The WORD.DAT should not contain characters outside these ranges
|
|
|
- const static int valid_ranges[][2] = {
|
|
|
- { 0x4E00, 0x9FFF }, // CJK Unified Ideographs
|
|
|
- { 0x3400, 0x4DBF }, // CJK Unified Ideographs Extension A
|
|
|
- { 0xF900, 0xFAFF }, // CJK Compatibility Ideographs
|
|
|
- { 0x0030, 0x0039 }, // 0 - 9, ASCII
|
|
|
- { 0xFF10, 0xFF19 }, // 0 - 9, full wide
|
|
|
- { 0x0041, 0x005A }, // A - Z, ASCII
|
|
|
- { 0xFF21, 0xFF3A }, // A - Z, full wide
|
|
|
- { 0x0061, 0x007A }, // a - z, ASCII
|
|
|
- { 0xFF41, 0xFF5A }, // a - z, full wide
|
|
|
- };
|
|
|
- int min_invalids = INT_MAX;
|
|
|
-
|
|
|
- for (CODEPAGE i = CP_BIG5; i <= CP_GBK; i++)
|
|
|
- {
|
|
|
- int invalids, length = PAL_MultiByteToWideCharCP(i, word_buf, word_len, NULL, 0);
|
|
|
- WCHAR *wbuf = (WCHAR *)malloc(length * sizeof(WCHAR));
|
|
|
- PAL_MultiByteToWideCharCP(i, word_buf, word_len, wbuf, length);
|
|
|
- for (int j = invalids = 0; j < length; j++)
|
|
|
- {
|
|
|
- int score = 1;
|
|
|
- if (iswspace(wbuf[j]) != 0) continue;
|
|
|
- for (int k = 0; k < sizeof(valid_ranges) / sizeof(valid_ranges[0]); k++)
|
|
|
- {
|
|
|
- if (wbuf[j] >= valid_ranges[k][0] &&
|
|
|
- wbuf[j] <= valid_ranges[k][1])
|
|
|
- {
|
|
|
- score = 0;
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- invalids += score;
|
|
|
- }
|
|
|
- // code page with less invalid chars wins
|
|
|
- if (invalids < min_invalids)
|
|
|
- {
|
|
|
- min_invalids = invalids;
|
|
|
- cp = i;
|
|
|
- }
|
|
|
- free(wbuf);
|
|
|
- }
|
|
|
+ int probability;
|
|
|
+ cp = PAL_DetectCodePageForString(word_buf, (int)word_len, cp, &probability);
|
|
|
+
|
|
|
free(word_buf);
|
|
|
|
|
|
- if (min_invalids == 0)
|
|
|
- UTIL_LogOutput(LOGLEVEL_INFO, "PAL_DetectCodePage detected code page: %s\n", cp ? "GBK" : "BIG5");
|
|
|
+ if (probability == 100)
|
|
|
+ UTIL_LogOutput(LOGLEVEL_INFO, "PAL_DetectCodePage detected code page '%s' for %s\n", cp ? "GBK" : "BIG5", filename);
|
|
|
else
|
|
|
- UTIL_LogOutput(LOGLEVEL_WARNING, "PAL_DetectCodePage detected possible code page: %s [%d invalids]\n", cp ? "GBK" : "BIG5", min_invalids);
|
|
|
+ UTIL_LogOutput(LOGLEVEL_WARNING, "PAL_DetectCodePage detected the most possible (%d) code page '%s' for %s\n", probability, cp ? "GBK" : "BIG5", filename);
|
|
|
}
|
|
|
|
|
|
return cp;
|
|
@@ -235,7 +190,7 @@ PAL_InitGlobals(
|
|
|
//
|
|
|
// Detect game language only when no message file specified
|
|
|
//
|
|
|
- if (!gConfig.pszMsgFile) PAL_SetCodePage(PAL_DetectCodePage());
|
|
|
+ if (!gConfig.pszMsgFile) PAL_SetCodePage(PAL_DetectCodePage("word.dat"));
|
|
|
|
|
|
//
|
|
|
// Set decompress function
|