// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // // File: compstr.c // Contents: This module implements xCompareString functionality. Note that xCompareString differs // from the Win32 CompareString in that it does not take a locale identifier (unnecessary // since Xbox only has one locale. // // Author: Jeff Simon (jeffsim) 16-May-2001 // // UNDONE: * Verify CompareStringW vs CompareStringA functionality // * How do Kanji and the kanas fit into g_rgdwLoc? Or handled algorithmically? // * What is Kanji, hira, kata, ascii sort order? // * Move defines to private header file to share them with compstra.c // // Copyright Microsoft Corporation // // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++ INCLUDE FILES +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #include "basedll.h" #include "winnls.h" #include #pragma hdrstop // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Externally defined variables // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Conversion table. This will be filled in algorithmically by InitLocTable at startup. // UNDONE-ERR: Currently only big enough to hold the 256 ASCII chars. Not sure how to handle // Kanji and the kanas -- will I add them to the table, or handle them algorithmically? extern DWORD g_rgdwLoc[256]; extern void InitLocTable(); // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Internal Defines - specific to this file // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Sort order (lower letters are sorted before upper letters; symbols are sorted before numbers...) // UNDONE-ERR: Just guessing on the sort order of the non-ascii chars (kanji and kanas). Guessing // that the sort order (first->last) would be "ascii", "kata", "hira", "kanji". #define COMP_INVALID 0x80000000 #define COMP_KANJI 0x00800000 #define COMP_HIRAGANA 0x00400000 #define COMP_KATAKANA 0x00200000 #define COMP_ULETTER 0x00100000 #define COMP_LLETTER 0x00080000 #define COMP_NUMBER 0x00040000 #define COMP_SYMBOL 0x00020000 #define COMP_WORDSORT 0x00010000 // NEXTCHAR -- Simple define for skipping to the next character in the main CompareString loop #define NEXTCHAR(x) { pstr##x++; cch##x--; } // SYM_* -- Defines for handling wordsort special symbols #define SYM_NONE 0 // No special symbol encountered #define SYM_1 1 // special symbol already encountered in pstr1 #define SYM_2 2 // special symbol already encountered in pstr2 #define SYM_BOTH SYM_1 | SYM_2 // special symbol already encountered in both pstr1 and pstr2 #define SYM_1FIRST 4 // pstr1 had the first special symbol encountered #define SYM_2FIRST 8 // pstr2 had the first special symbol encountered // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Functions // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // Function: xCompareStringA/xCompareStringW // Purpose: Xbox-specific version of the CompareString function. Compares two strings using the // global Xbox locale. Differs from the Win32 version of CompareString in that we don't // need to have a locale specified. // Arguments: dwCmpFlags -- Comparison-style options // pstr1 -- First string to compare // cch1 -- Number of characters in first string to examine (-1 for 'all') // pstr2 -- First string to compare // cch2 -- Number of characters in first string to examine (-1 for 'all') // Return: CSTR_LESS_THAN if pstr1 sorts BEFORE pstr2 (ie CompareString("a","b") ) // CSTR_EQUAL if pstr1 EQUALS pstr2 (ie CompareString("a","a") ) // CSTR_GREATER_THAN if pstr1 sorts AFTER pstr2 (ie CompareString("b","a") ) // ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ #ifdef COMPSTRA int xCompareStringA(DWORD dwCmpFlags, LPCSTR pstr1, int cch1, LPCSTR pstr2, int cch2) #else int xCompareStringW(DWORD dwCmpFlags, LPCWSTR pstr1, int cch1, LPCWSTR pstr2, int cch2) #endif { // w1, w2, dw1, dw2 -- Temp variables WORD w1, w2; DWORD dw1, dw2; // nSymFound -- Track which string(s) had special symbols, and which came first. int nSymFound = SYM_NONE; // Avoid repetitive flag masking in the while loop by doing it once here... BOOL fWordSort = !(dwCmpFlags & SORT_STRINGSORT); BOOL fIgnoreCase = dwCmpFlags & NORM_IGNORECASE; BOOL fIgnoreSymbols = dwCmpFlags & NORM_IGNORESYMBOLS; // s_rgnSymLookup -- Optimization to remove comparisons from the case where both strings are // the same length and have word-sort symbols in them static int s_rgnSymLookup[] = {CSTR_EQUAL, 0,0,0,0, CSTR_GREATER_THAN, 0, CSTR_LESS_THAN, 0,0, CSTR_LESS_THAN, CSTR_GREATER_THAN}; // Verify parameters if (pstr1 == NULL || pstr2 == NULL || cch1 < -1 || cch2 < -1) { SetLastError(ERROR_INVALID_PARAMETER); return 0; } // UNDONE-WARN: I'm not sure if we actually need to handle NORM_IGNORENONSPACE. If I read MSDN // correctly, we can ignore it, but I need to verify this. // UNDONE-ERR: Remove following asserts and handle appropriately // assert(!(dwCmpFlags & NORM_IGNOREKANATYPE)); // not handled // assert(!(dwCmpFlags & NORM_IGNOREWIDTH)); // not handled // Loop until one of the limits is reached (or we return from inside the while loop). while (cch1 != 0 && cch2 != 0) { // Check if we've reached the end of either string. if (*pstr1 == '\0' && *pstr2 == '\0') return s_rgnSymLookup[nSymFound]; if (*pstr1 == '\0') return CSTR_LESS_THAN; if (*pstr2 == '\0') return CSTR_GREATER_THAN; // Both strings still have characters -- do locale-specific comparison of next character // w1, w2 -- Track the character codes // UNDONE-ERR: Need to figure out how I'm going to handle the table for >256 and then // modify these lines appropriately. #if 1 // def COMPSTRA w1 = (WORD)((BYTE)*pstr1); w2 = (WORD)((BYTE)*pstr2); #else w1 = (WORD)(*pstr1); w2 = (WORD)(*pstr2); #endif // dw1, dw2 -- Track character code or'ed in with bucket describing the 'type' of the char. dw1 = g_rgdwLoc[w1]; dw2 = g_rgdwLoc[w2]; // UNDONE-OPT: Could pull wordsort check outside of the loop to remove an if per iteration. // Same with fIgnoreSymbols, etc. However, that particular optimization // (#including a file with particular defines) obfuscates the code quite a bit. // Word sort -- some chars are special cased; if we hit one, then skip the character, // but track which string(s) have the chars, and which had the first special char in case // the strings otherwise match. (ie, correct ordering: "ab-cd", "abce", "a-bce", "ab-ce") if (fWordSort) { if (dw1 & COMP_WORDSORT) { // If both are special symbols, then skip them. Also need to track if they're // different symbols (ie "'" in one string and "-" in the other) if (dw2 & COMP_WORDSORT) { if (dw1 < dw2) nSymFound = SYM_BOTH | SYM_1FIRST; else if (dw1 > dw2) nSymFound = SYM_BOTH | SYM_2FIRST; NEXTCHAR(1); NEXTCHAR(2); continue; } else { if (nSymFound == SYM_NONE) nSymFound = SYM_1FIRST; nSymFound |= SYM_1; NEXTCHAR(1); continue; } } if (dw2 & COMP_WORDSORT) { if (nSymFound == SYM_NONE) nSymFound = SYM_2FIRST; nSymFound |= SYM_2; NEXTCHAR(2); continue; } // If here, then neither of the current characters were special symbols. Continue // processing them... } if (fIgnoreSymbols) { if (dw1 & COMP_SYMBOL) { NEXTCHAR(1); continue; } if (dw2 & COMP_SYMBOL) { NEXTCHAR(2); continue; } } if (fIgnoreCase) { // Handle case-insensitivity. (+32 converts from upper to lower in our table) if (dw1 & COMP_ULETTER) dw1 = COMP_LLETTER | w1 + 32; if (dw2 & COMP_ULETTER) dw2 = COMP_LLETTER | w2 + 32; } if (dw1 < dw2) return CSTR_LESS_THAN; if (dw1 > dw2) return CSTR_GREATER_THAN; // Strings Matched. Move on to the next character NEXTCHAR(1); NEXTCHAR(2); } // If here, then we passed the limit on one of the strings, and both strings are equal so far. // Check if we hit both limits if (cch1 == 0 && cch2 == 0) { // If we're in a wordsort search, then we need to take the special symbol state into account. // (If we're in a stringsort search, then nSymFound == 0, so we'll return CSTR_EQUAL). return s_rgnSymLookup[nSymFound]; } // If here, then we hit just one limit, and that string comes second... return (cch1 == 0) ? CSTR_GREATER_THAN : CSTR_LESS_THAN; }