510 lines
20 KiB
C
510 lines
20 KiB
C
/*++
|
|
Copyright (c) 1990 Microsoft Corporation
|
|
|
|
Module Name:
|
|
Gen8dot3.c
|
|
|
|
Abstract:
|
|
This module implements a routine to generate 8.3 names from long names.
|
|
|
|
Author:
|
|
Gary Kimura [GaryKi] 26-Mar-1992
|
|
|
|
Environment:
|
|
Pure Utility Routines
|
|
--*/
|
|
|
|
#include "ntrtlp.h"
|
|
#include <stdio.h>
|
|
|
|
extern PUSHORT NlsUnicodeToMbOemData;
|
|
extern PUSHORT NlsOemToUnicodeData;
|
|
extern PCH NlsUnicodeToOemData;
|
|
extern PUSHORT NlsMbOemCodePageTables;
|
|
extern BOOLEAN NlsMbOemCodePageTag;
|
|
extern PUSHORT NlsOemLeadByteInfo;
|
|
extern USHORT OemDefaultChar;
|
|
|
|
// A condensed table of legal fat character values
|
|
|
|
const
|
|
ULONG RtlFatIllegalTable[] = { 0xffffffff,
|
|
0xfc009c04,
|
|
0x38000000,
|
|
0x10000000 };
|
|
|
|
WCHAR GetNextWchar (IN PUNICODE_STRING Name, IN PULONG CurrentIndex, IN BOOLEAN SkipDots, IN BOOLEAN AllowExtendedCharacters);
|
|
USHORT RtlComputeLfnChecksum (PUNICODE_STRING Name);
|
|
|
|
// BOOLEAN IsDbcsCharacter (IN WCHAR Wc);
|
|
#define IsDbcsCharacter(WC) ( \
|
|
((WC) > 127) && \
|
|
(HIBYTE(NlsUnicodeToMbOemData[(WC)])) \
|
|
)
|
|
|
|
#if defined(ALLOC_PRAGMA) && defined(NTOS_KERNEL_RUNTIME)
|
|
#pragma alloc_text(PAGE,RtlGenerate8dot3Name)
|
|
#pragma alloc_text(PAGE,GetNextWchar)
|
|
#pragma alloc_text(PAGE,RtlComputeLfnChecksum)
|
|
#pragma alloc_text(PAGE,RtlIsNameLegalDOS8Dot3)
|
|
#pragma alloc_text(PAGE,RtlIsValidOemCharacter)
|
|
#endif
|
|
|
|
|
|
VOID RtlGenerate8dot3Name (IN PUNICODE_STRING Name, IN BOOLEAN AllowExtendedCharacters, IN OUT PGENERATE_NAME_CONTEXT Context, OUT PUNICODE_STRING Name8dot3)
|
|
/*++
|
|
Routine Description:
|
|
This routine is used to generate an 8.3 name from a long name.
|
|
It can be called repeatedly to generate different 8.3 name variations for the same long name.
|
|
This is necessary if the gernerated 8.3 name conflicts with an existing 8.3 name.
|
|
Arguments:
|
|
Name - Supplies the original long name that is being translated from.
|
|
AllowExtendedCharacters - If TRUE, then extended characters, including DBCS characters, are allowed in the basis of the short name if they map to an upcased Oem character.
|
|
Context - Supplies a context for the translation. This is a private structure needed by this routine to help enumerate the different long name possibilities.
|
|
The caller is responsible with providing a "zeroed out" context structure on the first call for each given input name.
|
|
Name8dot3 - Receives the new 8.3 name. Pool for the buffer must be allocated by the caller and should be 12 characters wide (i.e., 24 bytes).
|
|
--*/
|
|
{
|
|
BOOLEAN DbcsAware;
|
|
BOOLEAN IndexAll9s = TRUE;
|
|
ULONG OemLength;
|
|
ULONG IndexLength;
|
|
WCHAR IndexBuffer[8];
|
|
ULONG i;
|
|
|
|
#ifdef NTOS_KERNEL_RUNTIME
|
|
extern BOOLEAN FsRtlSafeExtensions;
|
|
#else
|
|
BOOLEAN FsRtlSafeExtensions = TRUE;
|
|
#endif
|
|
|
|
DbcsAware = AllowExtendedCharacters && NlsMbOemCodePageTag;
|
|
|
|
// Check if this is the first time we are being called, and if so then initialize the context fields.
|
|
if (Context->NameLength == 0) {
|
|
ULONG LastDotIndex;
|
|
ULONG CurrentIndex;
|
|
BOOLEAN SkipDots;
|
|
WCHAR wc;
|
|
|
|
// Skip down the name remembering the index of the last dot we will skip over the first dot provided the name starts with a dot.
|
|
LastDotIndex = MAXULONG;
|
|
CurrentIndex = 0;
|
|
SkipDots = ((Name->Length > 0) && (Name->Buffer[0] == L'.'));
|
|
while ((wc = GetNextWchar( Name, &CurrentIndex, SkipDots, AllowExtendedCharacters )) != 0) {
|
|
SkipDots = FALSE;
|
|
if (wc == L'.') { LastDotIndex = CurrentIndex; }
|
|
}
|
|
|
|
// If the LastDotIndex is the last character in the name, then there really isn't an extension, so reset LastDotIndex.
|
|
if (LastDotIndex == Name->Length/sizeof(WCHAR)) {
|
|
LastDotIndex = MAXULONG;
|
|
}
|
|
|
|
// Build up the name part. This can be at most 6 characters (because of the ~# appeneded on the end) and we skip over dots, except the last dot, which terminates the loop.
|
|
|
|
// We exit the loop if:
|
|
|
|
// - The input Name has been exhausted
|
|
// - We have consumed the input name up to the last dot
|
|
// - We have filled 6 characters of short name basis
|
|
CurrentIndex = 0;
|
|
OemLength = 0;
|
|
Context->NameLength = 0;
|
|
while ((wc = GetNextWchar( Name, &CurrentIndex, TRUE, AllowExtendedCharacters)) && (CurrentIndex < LastDotIndex) && (Context->NameLength < 6)) {
|
|
// If we are on a multi-byte code page we have to be careful here because the short name (when converted to Oem) must
|
|
// be 8.3 compliant. Note that if AllowExtendedCharacters is FALSE, then GetNextWchar will never return a DBCS
|
|
// character, so we don't care what kind of code page we are on.
|
|
if (DbcsAware) {
|
|
OemLength += IsDbcsCharacter(wc) ? 2 : 1;
|
|
if (OemLength > 6) { break; }
|
|
}
|
|
|
|
// Copy the UNICODE character into the name buffer
|
|
Context->NameBuffer[Context->NameLength++] = wc;
|
|
}
|
|
|
|
// Now if the name part of the basis is 2 or less bytes (when represented in Oem) then append a four character checksum to make the short name space less sparse.
|
|
if ((DbcsAware ? OemLength : Context->NameLength) <= 2) {
|
|
USHORT Checksum;
|
|
WCHAR Nibble;
|
|
|
|
Checksum =
|
|
Context->Checksum = RtlComputeLfnChecksum( Name );
|
|
|
|
for (i = 0; i < 4; i++, Checksum >>= 4) {
|
|
Nibble = Checksum & 0xf;
|
|
Nibble += Nibble <= 9 ? '0' : 'A' - 10;
|
|
Context->NameBuffer[ Context->NameLength + i ] = Nibble;
|
|
}
|
|
|
|
Context->NameLength += 4;
|
|
Context->ChecksumInserted = TRUE;
|
|
}
|
|
|
|
// Now process the last extension (if there is one).
|
|
// If the last dot index is not MAXULONG then we have located the last dot in the name
|
|
if (LastDotIndex != MAXULONG) {
|
|
// Put in the "."
|
|
Context->ExtensionBuffer[0] = L'.';
|
|
|
|
// Process the extension similar to how we processed the name
|
|
|
|
// We exit the loop if:
|
|
|
|
// - The input Name has been exhausted
|
|
// - We have filled . + 3 characters of extension
|
|
OemLength = 1;
|
|
Context->ExtensionLength = 1;
|
|
while ((wc = GetNextWchar( Name, &LastDotIndex, TRUE, AllowExtendedCharacters)) && (Context->ExtensionLength < 4)) {
|
|
if (DbcsAware) {
|
|
OemLength += IsDbcsCharacter(wc) ? 2 : 1;
|
|
if (OemLength > 4) { break; }
|
|
}
|
|
|
|
Context->ExtensionBuffer[Context->ExtensionLength++] = wc;
|
|
}
|
|
|
|
// If we had to truncate the extension (i.e. input name was not exhausted), change the last char of the truncated extension to a ~ is user has selected safe extensions.
|
|
if (wc && FsRtlSafeExtensions) {
|
|
Context->ExtensionBuffer[Context->ExtensionLength - 1] = L'~';
|
|
}
|
|
} else {
|
|
Context->ExtensionLength = 0;
|
|
}
|
|
}
|
|
|
|
// In all cases we add one to the index value and this is the value of the index we are going to generate this time around
|
|
Context->LastIndexValue += 1;
|
|
|
|
// Now if the new index value is greater than 4 then we've had too many collisions and we should alter our basis if possible
|
|
if ((Context->LastIndexValue > 4) && !Context->ChecksumInserted) {
|
|
USHORT Checksum;
|
|
WCHAR Nibble;
|
|
|
|
// 'XX' is represented A DBCS character.
|
|
|
|
// LongName -> ShortName | DbcsBias Oem Unicode
|
|
// -----------------------------+------------------------
|
|
// XXXXThisisapen -> XX1234 | 1 6 5
|
|
// XXThisisapen -> XX1234 | 1 6 5
|
|
// aXXThisisapen -> a1234 | 1 5 5
|
|
// aaThisisapen -> aa1234 | 0 6 6
|
|
ULONG DbcsBias;
|
|
|
|
if (DbcsAware) {
|
|
DbcsBias = ((IsDbcsCharacter(Context->NameBuffer[0]) ? 1 : 0) | (IsDbcsCharacter(Context->NameBuffer[1]) ? 1 : 0));
|
|
} else {
|
|
DbcsBias = 0;
|
|
}
|
|
|
|
Checksum =
|
|
Context->Checksum = RtlComputeLfnChecksum( Name );
|
|
|
|
for (i = (2-DbcsBias); i < (6-DbcsBias); i++, Checksum >>= 4) {
|
|
Nibble = Checksum & 0xf;
|
|
Nibble += Nibble <= 9 ? '0' : 'A' - 10;
|
|
Context->NameBuffer[ i ] = Nibble;
|
|
}
|
|
|
|
Context->NameLength = (UCHAR)(6-DbcsBias);
|
|
Context->LastIndexValue = 1;
|
|
Context->ChecksumInserted = TRUE;
|
|
}
|
|
|
|
// Now build the index buffer from high index to low index because we use a mod & div operation to build the string from the index value.
|
|
|
|
// We also want to remember is we are about to rollover in base 10.
|
|
for (IndexLength = 1, i = Context->LastIndexValue; (IndexLength <= 7) && (i > 0); IndexLength += 1, i /= 10) {
|
|
if ((IndexBuffer[ 8 - IndexLength] = (WCHAR)(L'0' + (i % 10))) != L'9') {
|
|
IndexAll9s = FALSE;
|
|
}
|
|
}
|
|
|
|
// And tack on the preceding dash
|
|
IndexBuffer[ 8 - IndexLength ] = L'~';
|
|
|
|
// At this point everything is set up to copy to the output buffer. First
|
|
// copy over the name and then only copy the index and extension if they exist
|
|
if (Context->NameLength != 0) {
|
|
RtlCopyMemory( &Name8dot3->Buffer[0], &Context->NameBuffer[0], Context->NameLength * 2 );
|
|
Name8dot3->Length = (USHORT)(Context->NameLength * 2);
|
|
} else {
|
|
Name8dot3->Length = 0;
|
|
}
|
|
|
|
// Now do the index.
|
|
RtlCopyMemory( &Name8dot3->Buffer[ Name8dot3->Length/2 ], &IndexBuffer[ 8 - IndexLength ], IndexLength * 2 );
|
|
|
|
Name8dot3->Length += (USHORT) (IndexLength * 2);
|
|
|
|
// Now conditionally do the extension
|
|
if (Context->ExtensionLength != 0) {
|
|
RtlCopyMemory( &Name8dot3->Buffer[ Name8dot3->Length/2 ], &Context->ExtensionBuffer[0], Context->ExtensionLength * 2 );
|
|
Name8dot3->Length += (USHORT) (Context->ExtensionLength * 2);
|
|
}
|
|
|
|
// If current index value is all 9s, then the next value will cause the index string to grow from it's current size.
|
|
// In this case recompute Context->NameLength so that is will be correct for next time.
|
|
if (IndexAll9s) {
|
|
if (DbcsAware) {
|
|
for (i = 0, OemLength = 0; i < Context->NameLength; i++) {
|
|
OemLength += IsDbcsCharacter(Context->NameBuffer[i]) ? 2 : 1;
|
|
if (OemLength >= 8 - (IndexLength + 1)) {
|
|
break;
|
|
}
|
|
}
|
|
|
|
Context->NameLength = (UCHAR)i;
|
|
} else {
|
|
Context->NameLength -= 1;
|
|
}
|
|
}
|
|
|
|
// And return to our caller
|
|
return;
|
|
}
|
|
|
|
|
|
BOOLEAN RtlIsValidOemCharacter (IN PWCHAR Char)
|
|
/*++
|
|
Routine Description:
|
|
This routine determines if the best-fitted and upcased version of the input unicode char is a valid Oem character.
|
|
Arguments:
|
|
Char - Supplies the Unicode char and receives the best-fitted and upcased version if it was indeed valid.
|
|
Return Value:
|
|
TRUE if the character was valid.
|
|
--*/
|
|
{
|
|
WCHAR UniTmp;
|
|
WCHAR OemChar;
|
|
|
|
// First try to make a round trip from Unicode->Oem->Unicode.
|
|
if (!NlsMbOemCodePageTag) {
|
|
UniTmp = (WCHAR)NLS_UPCASE(NlsOemToUnicodeData[(UCHAR)NlsUnicodeToOemData[*Char]]);
|
|
OemChar = NlsUnicodeToOemData[UniTmp];
|
|
} else {
|
|
// Convert to OEM and back to Unicode before upper casing to ensure the visual best fits are converted and upper cased properly.
|
|
OemChar = NlsUnicodeToMbOemData[ *Char ];
|
|
if (NlsOemLeadByteInfo[HIBYTE(OemChar)]) {
|
|
USHORT Entry;
|
|
|
|
// Lead byte - translate the trail byte using the table that corresponds to this lead byte.
|
|
Entry = NlsOemLeadByteInfo[HIBYTE(OemChar)];
|
|
UniTmp = (WCHAR)NlsMbOemCodePageTables[ Entry + LOBYTE(OemChar) ];
|
|
} else {
|
|
// Single byte character.
|
|
UniTmp = NlsOemToUnicodeData[LOBYTE(OemChar)];
|
|
}
|
|
|
|
// Now upcase this UNICODE character, and convert it to Oem.
|
|
UniTmp = (WCHAR)NLS_UPCASE(UniTmp);
|
|
OemChar = NlsUnicodeToMbOemData[UniTmp];
|
|
}
|
|
|
|
// Now if the final OemChar is the default one, then there was no mapping for this UNICODE character.
|
|
if (OemChar == OemDefaultChar) {
|
|
return FALSE;
|
|
} else {
|
|
*Char = UniTmp;
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
|
|
// Local support routine
|
|
|
|
WCHAR GetNextWchar (IN PUNICODE_STRING Name, IN PULONG CurrentIndex, IN BOOLEAN SkipDots, IN BOOLEAN AllowExtendedCharacters)
|
|
/*++
|
|
Routine Description:
|
|
This routine scans the input name starting at the current index and returns the next valid character for the long name to 8.3 generation algorithm.
|
|
It also updates the current index to point to the next character to examine.
|
|
|
|
The user can specify if dots are skipped over or passed back.
|
|
The filtering done by the procedure is:
|
|
1. Skip characters less then blanks, and larger than 127 if AllowExtendedCharacters is FALSE
|
|
2. Optionally skip over dots
|
|
3. translate the special 7 characters : + , ; = [ ] into underscores
|
|
Arguments:
|
|
Name - Supplies the name being examined
|
|
CurrentIndex - Supplies the index to start our examination and also receives the index of one beyond the character we return.
|
|
SkipDots - Indicates whether this routine will also skip over periods
|
|
AllowExtendedCharacters - Tell whether charaacters >= 127 are valid.
|
|
Return Value:
|
|
WCHAR - returns the next wchar in the name string
|
|
--*/
|
|
{
|
|
WCHAR wc;
|
|
|
|
// Until we find out otherwise the character we are going to return is 0
|
|
wc = 0;
|
|
|
|
// Now loop through updating the current index until we either have a character to return or until we exhaust the name buffer
|
|
while (*CurrentIndex < (ULONG)(Name->Length/2)) {
|
|
// Get the next character in the buffer
|
|
wc = Name->Buffer[*CurrentIndex];
|
|
*CurrentIndex += 1;
|
|
|
|
// If the character is to be skipped over then reset wc to 0
|
|
if ((wc <= L' ') || ((wc >= 127) && (!AllowExtendedCharacters || !RtlIsValidOemCharacter(&wc))) || ((wc == L'.') && SkipDots)) {
|
|
wc = 0;
|
|
} else {
|
|
// We have a character to return, but first translate the character is necessary
|
|
if ((wc < 0x80) && (RtlFatIllegalTable[wc/32] & (1 << (wc%32)))) {
|
|
wc = L'_';
|
|
}
|
|
|
|
// Do an a-z upcase.
|
|
if ((wc >= L'a') && (wc <= L'z')) {
|
|
wc -= L'a' - L'A';
|
|
}
|
|
|
|
// And break out of the loop to return to our caller
|
|
break;
|
|
}
|
|
}
|
|
|
|
//DebugTrace( 0, Dbg, "GetNextWchar -> %08x\n", wc);
|
|
return wc;
|
|
}
|
|
|
|
|
|
// Internal support routine
|
|
USHORT RtlComputeLfnChecksum (PUNICODE_STRING Name)
|
|
/*++
|
|
Routine Description:
|
|
This routine computes the Chicago long file name checksum.
|
|
Arguments:
|
|
Name - Supplies the name to compute the checksum on. Note that one character names don't have interesting checksums.
|
|
Return Value:
|
|
The checksum.
|
|
--*/
|
|
{
|
|
ULONG i;
|
|
USHORT Checksum;
|
|
|
|
RTL_PAGED_CODE();
|
|
|
|
if (Name->Length == sizeof(WCHAR)) {
|
|
return Name->Buffer[0];
|
|
}
|
|
|
|
Checksum = ((Name->Buffer[0] << 8) + Name->Buffer[1]) & 0xffff;
|
|
|
|
// This checksum is kinda strange because we want to still have a good range even if all the characters are < 0x00ff.
|
|
for (i=2; i < Name->Length / sizeof(WCHAR); i+=2) {
|
|
Checksum = (Checksum & 1 ? 0x8000 : 0) + (Checksum >> 1) + (Name->Buffer[i] << 8);
|
|
|
|
// Be carefull to not walk off the end of the string.
|
|
if (i+1 < Name->Length / sizeof(WCHAR)) {
|
|
Checksum += Name->Buffer[i+1] & 0xffff;
|
|
}
|
|
}
|
|
|
|
return Checksum;
|
|
}
|
|
|
|
|
|
BOOLEAN RtlIsNameLegalDOS8Dot3 (IN PUNICODE_STRING Name, IN OUT POEM_STRING OemName OPTIONAL, OUT PBOOLEAN NameContainsSpaces OPTIONAL)
|
|
/*++
|
|
Routine Description:
|
|
This routine takes an input string and gives a definitive answer on whether this name can successfully be used to create a file on the FAT file system.
|
|
|
|
This routine can therefore also be used to determine if a name is appropriate to be passed back to a Win31 or DOS app, i.e. whether the downlevel APP will understand the name.
|
|
|
|
Note: an important part of this test is the mapping from UNICODE to Oem, which is why it is important that the input parameter be received in UNICODE.
|
|
Arguments:
|
|
Name - The UNICODE name to test for conformance to 8.3 symantics.
|
|
OemName - If specified, will receive the Oem name corresponding to the passed in Name.
|
|
Storage must be provided by the caller.
|
|
The name is undefined if the routine returns FALSE.
|
|
NameContainsSpaces - If the function returns TRUE, then this parameter will indicate if the names contains spaces.
|
|
If the function returns FALSE, this parameter is undefined.
|
|
In many instances, the alternate name is more appropriate to use if spaces are present in the principle name, even if it is 8.3 compliant.
|
|
Return Value:
|
|
BOOLEAN - TRUE if the passed in UNICODE name forms a valid 8.3 FAT name when upcased to the current Oem code page.
|
|
--*/
|
|
{
|
|
ULONG Index;
|
|
BOOLEAN ExtensionPresent = FALSE;
|
|
BOOLEAN SpacesPresent = FALSE;
|
|
OEM_STRING LocalOemName;
|
|
UCHAR Char;
|
|
UCHAR OemBuffer[12];
|
|
|
|
if (Name->Length > 12*sizeof(WCHAR)) {// If the name is more than 12 chars, bail.
|
|
return FALSE;
|
|
}
|
|
|
|
// Now upcase this name to Oem. If anything goes wrong, return FALSE.
|
|
if (!ARGUMENT_PRESENT(OemName)) {
|
|
OemName = &LocalOemName;
|
|
OemName->Buffer = &OemBuffer[0];
|
|
OemName->Length = 0;
|
|
OemName->MaximumLength = 12;
|
|
}
|
|
|
|
if (!NT_SUCCESS(RtlUpcaseUnicodeStringToCountedOemString(OemName, Name, FALSE))) {
|
|
return FALSE;
|
|
}
|
|
|
|
// Special case . and ..
|
|
if (((OemName->Length == 1) && (OemName->Buffer[0] == '.')) || ((OemName->Length == 2) && (OemName->Buffer[0] == '.') && (OemName->Buffer[1] == '.'))) {
|
|
if (ARGUMENT_PRESENT(NameContainsSpaces)) {
|
|
*NameContainsSpaces = FALSE;
|
|
}
|
|
return TRUE;
|
|
}
|
|
|
|
// Now we are going to walk through the string looking for illegal characters and/or incorrect syntax.
|
|
for ( Index = 0; Index < OemName->Length; Index += 1 ) {
|
|
Char = OemName->Buffer[ Index ];
|
|
// Skip over and Dbcs chacters
|
|
if (NlsMbOemCodePageTag && NlsOemLeadByteInfo[Char]) {
|
|
// 1) if we're looking at base part ( !ExtensionPresent ) and the 8th byte is in the dbcs leading byte range, it's error ( Index == 7 ).
|
|
// If the length of base part is more than 8 ( Index > 7 ), it's definitely error.
|
|
// 2) if the last byte ( Index == DbcsName.Length - 1 ) is in the dbcs leading byte range, it's error
|
|
if ((!ExtensionPresent && (Index >= 7)) || (Index == (ULONG)(OemName->Length - 1))) {
|
|
return FALSE;
|
|
}
|
|
|
|
Index += 1;
|
|
continue;
|
|
}
|
|
|
|
if ((Char < 0x80) && (RtlFatIllegalTable[Char/32] & (1 << (Char%32)))) {// Make sure this character is legal.
|
|
return FALSE;
|
|
}
|
|
|
|
if (Char == ' ') {// Remember if there was a space.
|
|
SpacesPresent = TRUE;
|
|
}
|
|
|
|
if (Char == '.') {
|
|
// We stepped onto a period. We require the following things:
|
|
// - There can only be one
|
|
// - It can't be the first character
|
|
// - The previous character can't be a space.
|
|
// - There can't be more than 3 bytes following
|
|
if (ExtensionPresent || (Index == 0) || (OemName->Buffer[Index - 1] == ' ') || (OemName->Length - (Index + 1) > 3)) {
|
|
return FALSE;
|
|
}
|
|
|
|
ExtensionPresent = TRUE;
|
|
}
|
|
|
|
if ((Index >= 8) && !ExtensionPresent) { // The base part of the name can't be more than 8 characters long.
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
if ((Char == ' ') || (Char == '.')) { // The name cannot end in a space or a period.
|
|
return FALSE;
|
|
}
|
|
|
|
if (ARGUMENT_PRESENT(NameContainsSpaces)) {
|
|
*NameContainsSpaces = SpacesPresent;
|
|
}
|
|
|
|
return TRUE;
|
|
} |