319 lines
9.5 KiB
C
319 lines
9.5 KiB
C
/*******************************************************************************
|
|
* CommonLx.h
|
|
* This is the header file for the defines and constants used by sapi lexicon
|
|
* and the tools
|
|
*
|
|
* Owner: yunusm Date: 07/01/99
|
|
*
|
|
* Copyright (c) 1999 Microsoft Corporation. All Rights Reserved.
|
|
*******************************************************************************/
|
|
|
|
#pragma once
|
|
|
|
//--- Includes -----------------------------------------------------------------
|
|
|
|
#include <stdio.h>
|
|
#include "sapi.h"
|
|
#include "spddkhlp.h"
|
|
|
|
// Phone converter defines for the SpPhoneConverter class
|
|
const static DWORD g_dwMaxLenPhone = 7; // Maximum number of unicode characters in phone string
|
|
const static DWORD g_dwMaxLenId = 3; // Maximum number of ids that can be run together per phone string.
|
|
// This number is 1 for SAPI converters but SR, TTS use this to encode one string into several ids
|
|
// using in the form "aa 01235678".
|
|
|
|
// The following defines used by the compression code for Lookup/Vendor lexicons
|
|
#define MAXTOTALCBSIZE 9 // = CBSIZE + MAXELEMENTSIZE
|
|
#define MAXELEMENTSIZE 5 // = greater of (LTSINDEXSIZE, POSSIZE)
|
|
#define CBSIZE 4 // = LASTINFOFLAGSIZE + WORDINFOTYPESIZE
|
|
#define LASTINFOFLAGSIZE 1
|
|
#define WORDINFOTYPESIZE 3
|
|
#define LTSINDEXSIZE 4
|
|
#define POSSIZE 5 // a maximum of 32 parts of speech
|
|
|
|
typedef enum tagSPLexWordInfoType
|
|
{
|
|
ePRON = 1,
|
|
ePOS = 2
|
|
} SPLEXWORDINFOTYPE;
|
|
|
|
/*
|
|
Control block layout
|
|
|
|
struct CB
|
|
{
|
|
BYTE fLast : LASTINFOFLAGSIZE; // Is this the last Word Information piece
|
|
BYTE WordInfoType : WORDINFOTYPESIZE; // Allow for 8 types
|
|
};
|
|
*/
|
|
|
|
typedef struct tagLookupLexInfo
|
|
{
|
|
GUID guidValidationId;
|
|
GUID guidLexiconId;
|
|
LANGID LangID;
|
|
WORD wReserved;
|
|
DWORD nNumberWords;
|
|
DWORD nNumberProns;
|
|
DWORD nMaxWordInfoLen;
|
|
DWORD nLengthHashTable;
|
|
DWORD nBitsPerHashEntry;
|
|
DWORD nCompressedBlockBits;
|
|
DWORD nWordCBSize;
|
|
DWORD nPronCBSize;
|
|
DWORD nPosCBSize;
|
|
} LOOKUPLEXINFO, *PLOOKUPLEXINFO;
|
|
|
|
typedef struct tagLtsLexInfo
|
|
{
|
|
GUID guidValidationId;
|
|
GUID guidLexiconId;
|
|
LANGID LangID;
|
|
} LTSLEXINFO, *PLTSLEXINFO;
|
|
|
|
// The following two typedefs used in Japanese and Chinese phone converters
|
|
|
|
typedef struct SYLDIC
|
|
{
|
|
char *pKey;
|
|
WCHAR *pString;
|
|
} SYLDIC;
|
|
|
|
typedef struct SYLDICW
|
|
{
|
|
WCHAR *pwKey;
|
|
char *pString;
|
|
} SYLDICW;
|
|
|
|
//--- Validation functions ----------------------------------------------------
|
|
|
|
inline BOOL SpIsBadLexType(DWORD dwFlag)
|
|
{
|
|
if (dwFlag != eLEXTYPE_USER &&
|
|
dwFlag != eLEXTYPE_APP &&
|
|
!(dwFlag >= eLEXTYPE_PRIVATE1 && dwFlag <= eLEXTYPE_PRIVATE20))
|
|
{
|
|
return TRUE;
|
|
}
|
|
else
|
|
{
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
inline BOOL SPIsBadPartOfSpeech(SPPARTOFSPEECH ePartOfSpeech)
|
|
{
|
|
SPPARTOFSPEECH eMask = (SPPARTOFSPEECH)~0xfff;
|
|
SPPARTOFSPEECH ePOS = (SPPARTOFSPEECH)(ePartOfSpeech & eMask);
|
|
if (ePartOfSpeech != SPPS_NotOverriden &&
|
|
ePartOfSpeech != SPPS_Unknown &&
|
|
ePOS != SPPS_Noun &&
|
|
ePOS != SPPS_Verb &&
|
|
ePOS != SPPS_Modifier &&
|
|
ePOS != SPPS_Function &&
|
|
ePOS != SPPS_Interjection)
|
|
{
|
|
return TRUE;
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
|
|
inline BOOL SPIsBadLexWord(const WCHAR *pszWord)
|
|
{
|
|
return (SPIsBadStringPtr(pszWord) || !*pszWord || wcslen(pszWord) >= SP_MAX_WORD_LENGTH);
|
|
}
|
|
|
|
|
|
inline BOOL SPIsBadLexPronunciation(CComPtr<ISpPhoneConverter> spPhoneConv, const WCHAR *pszPronunciation)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
WCHAR szPhone[SP_MAX_PRON_LENGTH * (g_dwMaxLenPhone + 1)]; // we will not fail for lack of space
|
|
|
|
if (SPIsBadStringPtr(pszPronunciation) || !*pszPronunciation ||
|
|
(wcslen(pszPronunciation) >= SP_MAX_PRON_LENGTH))
|
|
{
|
|
return TRUE;
|
|
}
|
|
if (spPhoneConv)
|
|
{
|
|
hr = spPhoneConv->IdToPhone(pszPronunciation, szPhone);
|
|
}
|
|
return (FAILED(hr));
|
|
}
|
|
|
|
|
|
inline BOOL SPIsBadWordPronunciationList(SPWORDPRONUNCIATIONLIST *pWordPronunciationList)
|
|
{
|
|
return (SPIsBadWritePtr(pWordPronunciationList, sizeof(SPWORDPRONUNCIATIONLIST)) ||
|
|
SPIsBadWritePtr(pWordPronunciationList->pvBuffer, pWordPronunciationList->ulSize));
|
|
}
|
|
|
|
|
|
inline BOOL SPIsBadWordList(SPWORDLIST *pWordList)
|
|
{
|
|
return (SPIsBadWritePtr(pWordList, sizeof(SPWORDLIST)) ||
|
|
SPIsBadWritePtr(pWordList->pvBuffer, pWordList->ulSize));
|
|
}
|
|
|
|
inline HRESULT SPCopyPhoneString(const WCHAR *pszSource, WCHAR *pszTarget)
|
|
{
|
|
HRESULT hr = S_OK;
|
|
|
|
if (SPIsBadWritePtr(pszTarget, (wcslen(pszSource) + 1) * sizeof(WCHAR)))
|
|
{
|
|
hr = E_INVALIDARG;
|
|
}
|
|
else
|
|
{
|
|
wcscpy(pszTarget, pszSource);
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
* GetWordHashValue *
|
|
*------------------*
|
|
*
|
|
* Description:
|
|
* Hash function for the Word hash tables. This hash function tries to create
|
|
* a word hash value very dependant on the word text. The mean collison rate
|
|
* on hash tables populated with this hash function is 1 per word access. This
|
|
* result was when collisions were resolved using linear probing when
|
|
* populating the hash table. Using non-linear probing might yield an even lower
|
|
* mean collision rate.
|
|
*
|
|
* Return:
|
|
* hash value
|
|
**********************************************************************YUNUSM*/
|
|
inline DWORD GetWordHashValue(PCWSTR pwszWord, // word string
|
|
DWORD nLengthHash // length of hash table
|
|
)
|
|
{
|
|
DWORD dHash = *pwszWord++;
|
|
|
|
WCHAR c;
|
|
WCHAR cPrev = (WCHAR)dHash;
|
|
|
|
for (; *pwszWord; pwszWord++)
|
|
{
|
|
c = *pwszWord;
|
|
dHash += ((c << (cPrev & 0x1F)) + (cPrev << (c & 0x1F)));
|
|
|
|
cPrev = c;
|
|
}
|
|
return (((dHash << 16) - dHash) % nLengthHash);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* ReallocSPWORDPRONList *
|
|
*-----------------------*
|
|
* Description:
|
|
* Grow a SPWORDPRONUNCIATIONLIST if necessary
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* E_OUTOFMEMORY
|
|
/**************************************************************** YUNUSM ******/
|
|
inline HRESULT ReallocSPWORDPRONList(SPWORDPRONUNCIATIONLIST *pSPList, // buffer to grow
|
|
DWORD dwSize // length to grow to
|
|
)
|
|
{
|
|
SPDBG_FUNC("ReallocSPWORDPRONList");
|
|
|
|
HRESULT hr = S_OK;
|
|
if (pSPList->ulSize < dwSize)
|
|
{
|
|
BYTE *p = (BYTE *)CoTaskMemRealloc(pSPList->pvBuffer, dwSize);
|
|
if (!p)
|
|
{
|
|
hr = E_OUTOFMEMORY;
|
|
}
|
|
else
|
|
{
|
|
pSPList->pvBuffer = p;
|
|
pSPList->pFirstWordPronunciation = (SPWORDPRONUNCIATION *)p;
|
|
pSPList->ulSize = dwSize;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pSPList->pFirstWordPronunciation = (SPWORDPRONUNCIATION *)(pSPList->pvBuffer);
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* ReallocSPWORDList *
|
|
*-----------------------*
|
|
* Description:
|
|
* Grow a SPWORDLIST if necessary
|
|
*
|
|
* Return:
|
|
* S_OK
|
|
* E_OUTOFMEMORY
|
|
/**************************************************************** YUNUSM ******/
|
|
inline HRESULT ReallocSPWORDList(SPWORDLIST *pSPList, // buffer to grow
|
|
DWORD dwSize // length to grow to
|
|
)
|
|
{
|
|
SPDBG_FUNC("ReallocSPWORDList");
|
|
|
|
HRESULT hr = S_OK;
|
|
if (pSPList->ulSize < dwSize)
|
|
{
|
|
BYTE *p = (BYTE *)CoTaskMemRealloc(pSPList->pvBuffer, dwSize);
|
|
if (!p)
|
|
{
|
|
hr = E_OUTOFMEMORY;
|
|
}
|
|
else
|
|
{
|
|
pSPList->pvBuffer = p;
|
|
pSPList->pFirstWord = (SPWORD *)p;
|
|
pSPList->ulSize = dwSize;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
pSPList->pFirstWord = (SPWORD *)(pSPList->pvBuffer);
|
|
}
|
|
return hr;
|
|
}
|
|
|
|
inline size_t PronSize(const WCHAR * const pwszPron)
|
|
{
|
|
// NB - SPWORDPRONUNCIATION struct size includes space for one SPPHONEID
|
|
|
|
const size_t cb = sizeof(SPWORDPRONUNCIATION) + (wcslen(pwszPron) * sizeof(SPPHONEID));
|
|
|
|
return (cb + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
|
|
}
|
|
|
|
|
|
inline size_t WordSize(const WCHAR * const pwszWord)
|
|
{
|
|
// SPWORD struct size with the aligned word size
|
|
|
|
const size_t cb = sizeof(SPWORD) + ((wcslen(pwszWord) + 1) * sizeof(WCHAR));
|
|
|
|
return (cb + sizeof(void *) - 1) & ~(sizeof(void *) - 1);
|
|
}
|
|
|
|
/*******************************************************************************
|
|
* CreateNextPronunciation *
|
|
*-------------------------*
|
|
* Description:
|
|
* Returns a pointer to the location in the pronunciation array
|
|
* where the next pronunciation in the list should start.
|
|
* This function should be used only when creating the list.
|
|
* Once the list is created, access the next pronunciation
|
|
* through the ->pNextWordPronunciation member.
|
|
*
|
|
/**************************************************************** PACOGG ******/
|
|
inline SPWORDPRONUNCIATION* CreateNextPronunciation(SPWORDPRONUNCIATION *pSpPron)
|
|
{
|
|
return (SPWORDPRONUNCIATION *)((BYTE *)pSpPron + PronSize(pSpPron->szPronunciation));
|
|
}
|
|
|
|
//--- End of File -------------------------------------------------------------
|