2020-09-30 16:53:55 +02:00

371 lines
9.3 KiB
C++

#include "stdafx.h"
#include "Msg.h"
#include "ConvEng.h"
#include "TextFile.h"
#ifdef RTF_SUPPORT
#include "RtfParser.h"
#endif
BOOL ConvertTextFile(
PBYTE pbySource,
DWORD dwFileSize,
PBYTE pbyTarget,
DWORD dwTargetSize,
BOOL fAnsiToUnicode,
PINT pnTargetFileSize)
{
BOOL fRet = FALSE;
if (!fAnsiToUnicode && *((PWORD)pbySource) != 0xFEFF) {
MsgNotUnicodeTextSourceFile();
return FALSE;
}
if (fAnsiToUnicode && *((PWORD)pbySource) == 0xFEFF) {
MsgNotAnsiTextSourceFile();
return FALSE;
}
if (fAnsiToUnicode) {
PWCH pwchTarget = (PWCH)pbyTarget;
// Put Unicode text file flag
*pwchTarget = 0xFEFF;
*pnTargetFileSize = 1;
// Null file
if (!dwFileSize) {
fRet = TRUE;
goto Exit;
}
// Convert
*pnTargetFileSize += AnsiStrToUnicodeStr(pbySource, dwFileSize,
pwchTarget+1, dwTargetSize-2);
*pnTargetFileSize *= sizeof(WCHAR);
} else {
// Check and skip Uncode text file flag
if (dwFileSize < 2) {
goto Exit;
}
PWCH pwchData = (PWCH)pbySource;
if (*pwchData != 0xFEFF) {
goto Exit;
}
pwchData++;
// Null file w/ Unicode flag only
if (dwFileSize == 2) {
fRet = TRUE;
goto Exit;
}
// Convert
*pnTargetFileSize = UnicodeStrToAnsiStr(pwchData,
dwFileSize/sizeof(WCHAR) - 1, (PCHAR)pbyTarget, dwTargetSize);
}
if (*pnTargetFileSize) {
fRet = TRUE;
}
Exit:
return fRet;
}
BOOL ConvertHtmlFile(
PBYTE pbySource,
DWORD dwFileSize,
PBYTE pbyTarget,
DWORD dwTargetSize,
BOOL fAnsiToUnicode,
PINT pnTargetFileSize)
{
BOOL fRet = FALSE;
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
// Reserve the last space to explicitly assign zero to the last
// character in the buffer
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
fAnsiToUnicode, pnTargetFileSize)) {
return FALSE;
}
// Change charset
if (fAnsiToUnicode) {
const WCHAR* const wszUnicodeCharset = L"charset=unicode";
WCHAR *pwch1, *pwch2;
int nLengthIncrease;
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
pwch1 = wcsstr((PWCH)pbyTarget, L"charset=");
if (!pwch1) {
// Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE;
goto Exit;
}
pwch2 = wcschr(pwch1, L'\"');
if (!pwch2 || (pwch2 - pwch1 >= 20)) {
goto Exit;
}
nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
goto Exit;
}
MoveMemory(pwch2 + nLengthIncrease, pwch2,
pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
*pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
fRet = TRUE;
} else {
const CHAR* const szGBCharset = "charset=gb18030";
CHAR *pch1, *pch2;
int nLengthIncrease;
*((PCHAR)(pbyTarget+*pnTargetFileSize)) = 0;
pch1 = strstr((PCHAR)pbyTarget, "charset=");
if (!pch1) {
// Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE;
goto Exit;
}
pch2 = strchr(pch1, '\"');
if (!pch2 || (pch2 - pch1 >= 20)) {
goto Exit;
}
nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
goto Exit;
}
MoveMemory(pch2 + nLengthIncrease, pch2,
(PCHAR)pbyTarget + *pnTargetFileSize - pch2);
CopyMemory(pch1, szGBCharset, strlen(szGBCharset)*sizeof(char));
*pnTargetFileSize += nLengthIncrease*sizeof(char);
fRet = TRUE;
}
Exit:
return fRet;
}
#ifdef XML_SUPPORT
BOOL ConvertXmlFile(
PBYTE pbySource,
DWORD dwFileSize,
PBYTE pbyTarget,
DWORD dwTargetSize,
BOOL fAnsiToUnicode,
PINT pnTargetFileSize)
{
BOOL fRet = FALSE;
if (!ConvertTextFile(pbySource, dwFileSize, pbyTarget,
// Reserve the last space to explicitly assign zero to the last
// character in the buffer
dwTargetSize - (fAnsiToUnicode ? sizeof(WCHAR):sizeof(char)),
fAnsiToUnicode, pnTargetFileSize)) {
return FALSE;
}
// Change charset
if (fAnsiToUnicode) {
const WCHAR* const wszUnicodeCharset = L"UTF-16";
WCHAR *pwchEnd, *pwch1, *pwch2;
int nLengthIncrease;
*((PWCH)(pbyTarget+*pnTargetFileSize)) = 0;
pwch1 = wcsstr((PWCH)pbyTarget, L"<?xml");
if (!pwch1) {
goto Exit;
}
pwchEnd = wcsstr(pwch1, L"?>");
if (!pwchEnd) {
goto Exit;
}
// temp set to null-terminal
*pwchEnd = 0;
pwch1 = wcsstr(pwch1, L"encoding=");
if (!pwch1) {
// Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE;
*pwchEnd = '?';
goto Exit;
}
pwch1 += wcslen(L"encoding=");
WCHAR wch = *pwch1;
pwch1++;
if (wch != '\"' && wch != '\'') {
*pwchEnd = '?';
goto Exit;
}
pwch2 = wcschr(pwch1, wch);
if (!pwch2 || (pwch2 - pwch1 >= 20)) {
*pwchEnd = '?';
goto Exit;
}
// restore *pwch2
*pwchEnd = '?';
nLengthIncrease = (int)(wcslen(wszUnicodeCharset) - (pwch2 - pwch1));
if (*pnTargetFileSize + nLengthIncrease*sizeof(WCHAR) > dwTargetSize) {
goto Exit;
}
MoveMemory(pwch2 + nLengthIncrease, pwch2,
pbyTarget + *pnTargetFileSize - (PBYTE)pwch2);
CopyMemory(pwch1, wszUnicodeCharset, wcslen(wszUnicodeCharset)*sizeof(WCHAR));
*pnTargetFileSize += nLengthIncrease*sizeof(WCHAR);
fRet = TRUE;
} else {
const char* const szGBCharset = "GB18030";
char *pchEnd, *pch1, *pch2;
int nLengthIncrease;
*((PCH)(pbyTarget+*pnTargetFileSize)) = 0;
pch1 = strstr((char*)pbyTarget, "<?xml");
if (!pch1) {
goto Exit;
}
pchEnd = strstr(pch1, "?>");
if (!pchEnd) {
goto Exit;
}
// temp set to null-terminal
*pchEnd = 0;
pch1 = strstr(pch1, "encoding=");
if (!pch1) {
// Some Html file may haven't code page flag,
// We skip charset replace step for this kind of files
fRet = TRUE;
*pchEnd = '?';
goto Exit;
}
pch1 += strlen("encoding=");
CHAR ch = *pch1;
pch1++;
if (ch != '\"' && ch != '\'') {
*pchEnd = '?';
goto Exit;
}
pch2 = strchr(pch1, ch);
if (!pch2 || (pch2 - pch1 >= 20)) {
*pchEnd = '?';
goto Exit;
}
// restore *pwch2
*pchEnd = '?';
nLengthIncrease = (int)(strlen(szGBCharset) - (pch2 - pch1));
if (*pnTargetFileSize + nLengthIncrease > (int)dwTargetSize) {
goto Exit;
}
MoveMemory(pch2 + nLengthIncrease, pch2,
pbyTarget + *pnTargetFileSize - (PBYTE)pch2);
CopyMemory(pch1, szGBCharset, strlen(szGBCharset));
*pnTargetFileSize += nLengthIncrease;
fRet = TRUE;
}
Exit:
return fRet;
}
#endif
#ifdef RTF_SUPPORT
BOOL ConvertRtfFile(
PBYTE pBuf, // Read buf
DWORD dwSize, // File size
PBYTE pWrite, // Write buf
DWORD dwWriteSize,
BOOL fAnsiToUnicode,
PINT pnTargetFileSize)
{
CRtfParser* pcParser;
DWORD dwVersion;
DWORD dwCodepage;
BOOL fRet = FALSE;
pcParser = new CRtfParser(pBuf, dwSize, pWrite, dwSize*3);
if (!pcParser) {
MsgOverflow();
goto gotoExit;
}
if (!pcParser->fRTFFile()) {
MsgNotRtfSourceFile();
goto gotoExit;
}
if (ecOK != pcParser->GetVersion(&dwVersion) ||
dwVersion != 1) {
MsgNotRtfSourceFile();
goto gotoExit;
}
if (ecOK != pcParser->GetCodepage(&dwCodepage) ||
dwCodepage != 936) {
MsgNotRtfSourceFile();
goto gotoExit;
}
// Explain WordID by corresponding word text
if (ecOK != pcParser->Do()) {
MsgNotRtfSourceFile();
goto gotoExit;
}
pcParser->GetResult((PDWORD)pnTargetFileSize);
fRet = TRUE;
gotoExit:
if (pcParser) {
delete pcParser;
}
return fRet;
}
#endif