371 lines
9.3 KiB
C++
371 lines
9.3 KiB
C++
#include "stdafx.h"
|
|
#include "Msg.h"
|
|
#include "ConvEng.h"
|
|
#include "TextFile.h"
|
|
|
|
#ifdef RTF_SUPPORT
|
|
#include "RtfParser.h"
|
|
#endif
|
|
|
|
BOOL ConvertTextFile(
|
|
PBYTE pbySource,
|
|
DWORD dwFileSize,
|
|
PBYTE pbyTarget,
|
|
DWORD dwTargetSize,
|
|
BOOL fAnsiToUnicode,
|
|
PINT pnTargetFileSize)
|
|
{
|
|
BOOL fRet = FALSE;
|
|
|
|
if (!fAnsiToUnicode && *((PWORD)pbySource) != 0xFEFF) {
|
|
MsgNotUnicodeTextSourceFile();
|
|
return FALSE;
|
|
}
|
|
|
|
if (fAnsiToUnicode && *((PWORD)pbySource) == 0xFEFF) {
|
|
MsgNotAnsiTextSourceFile();
|
|
return FALSE;
|
|
}
|
|
|
|
if (fAnsiToUnicode) {
|
|
PWCH pwchTarget = (PWCH)pbyTarget;
|
|
// Put Unicode text file flag
|
|
*pwchTarget = 0xFEFF;
|
|
*pnTargetFileSize = 1;
|
|
|
|
// Null file
|
|
if (!dwFileSize) {
|
|
fRet = TRUE;
|
|
goto Exit;
|
|
}
|
|
|
|
// Convert
|
|
*pnTargetFileSize += AnsiStrToUnicodeStr(pbySource, dwFileSize,
|
|
pwchTarget+1, dwTargetSize-2);
|
|
|
|
*pnTargetFileSize *= sizeof(WCHAR);
|
|
} else {
|
|
// Check and skip Uncode text file flag
|
|
if (dwFileSize < 2) {
|
|
goto Exit;
|
|
}
|
|
|
|
PWCH pwchData = (PWCH)pbySource;
|
|
if (*pwchData != 0xFEFF) {
|
|
goto Exit;
|
|
}
|
|
pwchData++;
|
|
|
|
// Null file w/ Unicode flag only
|
|
if (dwFileSize == 2) {
|
|
fRet = TRUE;
|
|
goto Exit;
|
|
}
|
|
|
|
// Convert
|
|
*pnTargetFileSize = UnicodeStrToAnsiStr(pwchData,
|
|
dwFileSize/sizeof(WCHAR) - 1, (PCHAR)pbyTarget, dwTargetSize);
|
|
|
|
}
|
|
|
|
if (*pnTargetFileSize) {
|
|
fRet = TRUE;
|
|
}
|
|
|
|
Exit:
|
|
return fRet;
|
|
}
|
|
|
|
BOOL ConvertHtmlFile(
|
|
PBYTE pbySource,
|
|
DWORD dwFileSize,
|
|
PBYTE pbyTarget,
|
|
DWORD dwTargetSize,
|
|
BOOL fAnsiToUnicode,
|
|
PINT pnTargetFileSize)
|
|
{
|
|
BOOL fRet = FALSE;
|
|
|
|
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
|
|
// Reserve the last space to explicitly assign zero to the last
|
|
// character in the buffer
|
|
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
|
|
fAnsiToUnicode, pnTargetFileSize)) {
|
|
return FALSE;
|
|
}
|
|
|
|
// Change charset
|
|
if (fAnsiToUnicode) {
|
|
const WCHAR* const wszUnicodeCharset = L"charset=unicode";
|
|
WCHAR *pwch1, *pwch2;
|
|
int nLengthIncrease;
|
|
|
|
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
|
|
pwch1 = wcsstr((PWCH)pbyTarget, L"charset=");
|
|
|
|
if (!pwch1) {
|
|
// Some Html file may haven't code page flag,
|
|
// We skip charset replace step for this kind of files
|
|
fRet = TRUE;
|
|
goto Exit;
|
|
}
|
|
|
|
pwch2 = wcschr(pwch1, L'\"');
|
|
if (!pwch2 || (pwch2 - pwch1 >= 20)) {
|
|
goto Exit;
|
|
}
|
|
|
|
nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
|
|
|
|
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
|
|
goto Exit;
|
|
}
|
|
|
|
MoveMemory(pwch2 + nLengthIncrease, pwch2,
|
|
pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
|
|
CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
|
|
*pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
|
|
|
|
fRet = TRUE;
|
|
|
|
} else {
|
|
const CHAR* const szGBCharset = "charset=gb18030";
|
|
CHAR *pch1, *pch2;
|
|
int nLengthIncrease;
|
|
|
|
*((PCHAR)(pbyTarget+*pnTargetFileSize)) = 0;
|
|
pch1 = strstr((PCHAR)pbyTarget, "charset=");
|
|
|
|
if (!pch1) {
|
|
// Some Html file may haven't code page flag,
|
|
// We skip charset replace step for this kind of files
|
|
fRet = TRUE;
|
|
goto Exit;
|
|
}
|
|
|
|
pch2 = strchr(pch1, '\"');
|
|
if (!pch2 || (pch2 - pch1 >= 20)) {
|
|
goto Exit;
|
|
}
|
|
|
|
nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
|
|
|
|
if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
|
|
goto Exit;
|
|
}
|
|
|
|
MoveMemory(pch2 + nLengthIncrease, pch2,
|
|
(PCHAR)pbyTarget + *pnTargetFileSize - pch2);
|
|
CopyMemory(pch1, szGBCharset, strlen(szGBCharset)*sizeof(char));
|
|
*pnTargetFileSize += nLengthIncrease*sizeof(char);
|
|
|
|
fRet = TRUE;
|
|
}
|
|
|
|
|
|
Exit:
|
|
return fRet;
|
|
}
|
|
|
|
#ifdef XML_SUPPORT
|
|
BOOL ConvertXmlFile(
|
|
PBYTE pbySource,
|
|
DWORD dwFileSize,
|
|
PBYTE pbyTarget,
|
|
DWORD dwTargetSize,
|
|
BOOL fAnsiToUnicode,
|
|
PINT pnTargetFileSize)
|
|
{
|
|
BOOL fRet = FALSE;
|
|
|
|
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
|
|
// Reserve the last space to explicitly assign zero to the last
|
|
// character in the buffer
|
|
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
|
|
fAnsiToUnicode, pnTargetFileSize)) {
|
|
return FALSE;
|
|
}
|
|
|
|
// Change charset
|
|
if (fAnsiToUnicode) {
|
|
const WCHAR* const wszUnicodeCharset = L"UTF-16";
|
|
WCHAR *pwchEnd, *pwch1, *pwch2;
|
|
int nLengthIncrease;
|
|
|
|
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
|
|
|
|
pwch1 = wcsstr((PWCH)pbyTarget, L"<?xml");
|
|
if (!pwch1) {
|
|
goto Exit;
|
|
}
|
|
pwchEnd = wcsstr(pwch1, L"?>");
|
|
if (!pwchEnd) {
|
|
goto Exit;
|
|
}
|
|
|
|
// temp set to null-terminal
|
|
*pwchEnd = 0;
|
|
|
|
pwch1 = wcsstr(pwch1, L"encoding=");
|
|
|
|
if (!pwch1) {
|
|
// Some Html file may haven't code page flag,
|
|
// We skip charset replace step for this kind of files
|
|
fRet = TRUE;
|
|
*pwchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
pwch1 += wcslen(L"encoding=");
|
|
WCHAR wch = *pwch1;
|
|
pwch1++;
|
|
|
|
if (wch != '\"' && wch != '\'') {
|
|
*pwchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
pwch2 = wcschr(pwch1, wch);
|
|
if (!pwch2 || (pwch2 - pwch1 >= 20)) {
|
|
*pwchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
// restore *pwch2
|
|
*pwchEnd = '?';
|
|
|
|
nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
|
|
|
|
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
|
|
goto Exit;
|
|
}
|
|
|
|
MoveMemory(pwch2 + nLengthIncrease, pwch2,
|
|
pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
|
|
CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
|
|
*pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
|
|
|
|
fRet = TRUE;
|
|
|
|
} else {
|
|
const char* const szGBCharset = "GB18030";
|
|
char *pchEnd, *pch1, *pch2;
|
|
int nLengthIncrease;
|
|
|
|
*((PCH)(pbyTarget+*pnTargetFileSize)) = 0;
|
|
|
|
pch1 = strstr((char*)pbyTarget, "<?xml");
|
|
if (!pch1) {
|
|
goto Exit;
|
|
}
|
|
pchEnd = strstr(pch1, "?>");
|
|
if (!pchEnd) {
|
|
goto Exit;
|
|
}
|
|
|
|
// temp set to null-terminal
|
|
*pchEnd = 0;
|
|
|
|
pch1 = strstr(pch1, "encoding=");
|
|
|
|
if (!pch1) {
|
|
// Some Html file may haven't code page flag,
|
|
// We skip charset replace step for this kind of files
|
|
fRet = TRUE;
|
|
*pchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
pch1 += strlen("encoding=");
|
|
CHAR ch = *pch1;
|
|
pch1++;
|
|
|
|
if (ch != '\"' && ch != '\'') {
|
|
*pchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
pch2 = strchr(pch1, ch);
|
|
if (!pch2 || (pch2 - pch1 >= 20)) {
|
|
*pchEnd = '?';
|
|
goto Exit;
|
|
}
|
|
|
|
// restore *pwch2
|
|
*pchEnd = '?';
|
|
|
|
nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
|
|
|
|
if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
|
|
goto Exit;
|
|
}
|
|
|
|
MoveMemory(pch2 + nLengthIncrease, pch2,
|
|
pbyTarget + *pnTargetFileSize - (PBYTE)pch2);
|
|
CopyMemory(pch1, szGBCharset, strlen(szGBCharset));
|
|
*pnTargetFileSize += nLengthIncrease;
|
|
|
|
fRet = TRUE;
|
|
|
|
}
|
|
|
|
|
|
Exit:
|
|
return fRet;
|
|
}
|
|
#endif
|
|
|
|
#ifdef RTF_SUPPORT
|
|
BOOL ConvertRtfFile(
|
|
PBYTE pBuf, // Read buf
|
|
DWORD dwSize, // File size
|
|
PBYTE pWrite, // Write buf
|
|
DWORD dwWriteSize,
|
|
BOOL fAnsiToUnicode,
|
|
PINT pnTargetFileSize)
|
|
{
|
|
CRtfParser* pcParser;
|
|
DWORD dwVersion;
|
|
DWORD dwCodepage;
|
|
BOOL fRet = FALSE;
|
|
|
|
pcParser = new CRtfParser(pBuf, dwSize, pWrite, dwSize*3);
|
|
if (!pcParser) {
|
|
MsgOverflow();
|
|
goto gotoExit;
|
|
}
|
|
|
|
if (!pcParser->fRTFFile()) {
|
|
MsgNotRtfSourceFile();
|
|
goto gotoExit;
|
|
}
|
|
|
|
if (ecOK != pcParser->GetVersion(&dwVersion) ||
|
|
dwVersion != 1) {
|
|
MsgNotRtfSourceFile();
|
|
goto gotoExit;
|
|
}
|
|
|
|
if (ecOK != pcParser->GetCodepage(&dwCodepage) ||
|
|
dwCodepage != 936) {
|
|
MsgNotRtfSourceFile();
|
|
goto gotoExit;
|
|
}
|
|
|
|
// Explain WordID by corresponding word text
|
|
if (ecOK != pcParser->Do()) {
|
|
MsgNotRtfSourceFile();
|
|
goto gotoExit;
|
|
}
|
|
|
|
pcParser->GetResult((PDWORD)pnTargetFileSize);
|
|
fRet = TRUE;
|
|
|
|
gotoExit:
|
|
if (pcParser) {
|
|
delete pcParser;
|
|
}
|
|
return fRet;
|
|
}
|
|
|
|
#endif |