WindowsXP-SP1/inetsrv/intlwb/kor/basemain.cpp

1104 lines
40 KiB
C++

// =========================================================================
// Copyright (C) 1997 - 1998, Microsoft Corporation. All Rights Reserved.
//
// File Name : BASEMAIN.CPP
// Function : BASE ENGINE Handler
// : NLP Base Engine
// =========================================================================
#include "basemain.hpp"
#include "convert.hpp"
#include "MainDict.h"
/*---------------------------------------------------------------------------
%%Function : GetStemEnding
%%Contact : dhyu
---------------------------------------------------------------------------*/
void BaseEngine::GetStemEnding (char *incode, char *stem, char *ending, int position)
{
int codelen = lstrlen (incode) - 1;
LMEPOS = position;
lstrcpy (stem, incode);
if (LMEPOS == -1)
{
ULSPOS = codelen;
ending [0] = NULLCHAR;
}
else
{
if (LMEPOS == codelen)
{
ULSPOS = -1;
stem [0] = NULLCHAR;
}
else
{
ULSPOS = lstrlen(incode) - LMEPOS - 2;
stem[ULSPOS+1] = NULLCHAR;
}
// ending have a reverse order.
for (int k = 0, j = lstrlen(incode) - 1; k <= LMEPOS; j--, k++)
ending [k] = incode [j];
ending [k] = NULLCHAR;
}
}
int BaseEngine::NLP_BASE_NOUN (LPCSTR d, char *rstrings)
{
char Act[10], ostem[80],
oending[40], incode [100], stem [100], ending [40];
int bt, sp[10];
CODECONVERT Conv;
wcount = 0;
memset(incode, NULLCHAR, 100);
memset(Act, NULLCHAR, 10);
memset(lrgsz, NULLCHAR, 400);
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
{ // KS -> Incode
return 99;
}
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
// for (i = bt - 1; i >= 0; i--)
for (i = 0; i < bt; i++)
{
GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5);
ACT_P_A = GetBit(Act [i], 4);
ACT_N_E = GetBit(Act [i], 3);
memset(ostem, NULLCHAR, 80);
memset(oending, NULLCHAR, 40);
Conv.INR2HAN(ending, oending, codeWanSeong);
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
if(__IsDefEnd(LMEPOS, 1) == 1 &&
ending[LMEPOS] == __K_G && ending[LMEPOS-1] == __V_p)
{
if(NLP_Ge_Proc(stem) != BT)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_PRONOUN;
vbuf[wcount++] = POS_TOSSI;
}
continue;
}
if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
continue;
if (FindSilsaWord (ostem) & _NOUN)
{ // searching the noun dictionary
lstrcat(lrgsz, ostem);
vbuf[wcount++] = POS_NOUN;
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
}
if(i == 0 || ACT_P_A == 1)
{
if (NLP_Find_Pronoun (stem, ending) == VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
}
}
if(i == 0 || ACT_N_E == 1)
{
if(NLP_Num_Proc(stem) != BT)
{
lstrcat(lrgsz, ostem);
vbuf[wcount++] = POS_NUMBER;
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
}
continue; // backtracking
}
}
lstrcpy (rstrings, lrgsz);
return wcount;
}
int BaseEngine::NLP_BASE_AFFIX (LPCSTR d, char *rstrings)
{
char Act[10], oending [40], incode [100], stem [100], ending [40];
int bt,
ret,
sp[10];
CODECONVERT Conv;
wcount = 0;
memset(incode, NULLCHAR, 100);
memset(Act, NULLCHAR, 10);
memset(lrgsz, NULLCHAR, 400);
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
{ // KS -> Incode
return 99;
}
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
// for (i = bt - 1; i >= 0; i--)
for (i = 0; i < bt; i++)
{
GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5);
ACT_P_A = GetBit(Act [i], 4);
ACT_N_E = GetBit(Act [i], 3);
if (NLP_NCV_Proc(stem, ending) != NCV_VALID)
continue;
memset(oending, NULLCHAR, 40);
Conv.INR2HAN(ending, oending, codeWanSeong);
ret = NLP_Fix_Proc(stem, ending);
switch (ret)
{
case Deol_VALID :
case Pref_VALID :
case Suf_VALID :
case PreSuf_VALID :
if (lstrlen(oending) > 0)
{
lstrcat (lrgsz, "+");
lstrcat (lrgsz, oending);
vbuf [wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
case BT : continue; // backtracking
}
}
lstrcpy (rstrings, lrgsz);
return wcount;
}
int BaseEngine::NLP_BASE_ALONE(LPCSTR d, char *rstrings)
{
char incode [100];
CODECONVERT Conv;
memset(incode, NULLCHAR, 100);
memset(lrgsz, NULLCHAR, 400);
wcount = 0;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
{ // KS -> Incode
return 99;
}
// check whether input word is ADVERB, or not
if (FindSilsaWord (d) & _ALONE)
{
lstrcat(lrgsz, d);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_ADVERB;
}
lstrcpy (rstrings, lrgsz);
return wcount;
}
int BaseEngine::NLP_BASE_VERB (LPCSTR d, char *rstrings)
{
char index[1],
AUX_Flag, tmp[80],
Act[10], ostem[80],
oending[40], incode [100], stem [100], ending [40], rending [40];
int bt,
ret,
rt,
sp[10], temp, luls;
CODECONVERT Conv;
wcount = 0;
memset(Act, NULLCHAR, 10);
memset(incode, NULLCHAR, 100);
memset(lrgsz, NULLCHAR, 400);
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
{ // KS -> Incode
return 99;
}
bt = NLP_Get_Ending(incode, Act, sp, END);
int codelen = lstrlen(incode) - 1;
for (i = bt-1; i >= 0; i--)
{
memset(ostem, NULLCHAR, 80);
memset(oending, NULLCHAR, 40);
GetStemEnding (incode, stem, ending, sp [i]);
if (lstrlen (stem) == 0)
continue;
ACT_C = GetBit(Act[i], 7);
ACT_V = GetBit(Act[i], 6);
ACT_N_V = GetBit(Act[i], 5);
ACT_P_A = GetBit(Act[i], 4);
ACT_N_E = GetBit(Act[i], 3);
ACT_SS = GetBit(Act[i], 2);
ACT_KE = GetBit(Act[i], 1);
RestoreEnding (ending, rending);
Conv.INR2HAN(rending, oending, codeWanSeong);
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
lstrcpy(tmp, stem);
luls = ULSPOS;
if(ACT_SS == 1)
{
if((ret = NLP_SS_Proc(stem, ending)) < INVALID) // VALID
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf [wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
continue;
}
if(i == 0)
{
break;
}
if(ACT_KE == 1)
{
if((ret = NLP_KTC_Proc(stem, ending)) == BT) // backtracking
{
continue;
}
}
ret = NLP_VCV_Check (stem, ending);
if(ret < INVALID)
{
AUX_Flag = 0;
if(ACT_N_V == 1)
{
if (FindSilsaWord (ostem) & _VERB)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_VERB;
vbuf[wcount++] = POS_ENDING;
AUX_Flag = 1;
}
}
if(ACT_P_A == 1)
{
if (FindSilsaWord (ostem) & _ADJECTIVE)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_ADJECTIVE;
vbuf[wcount++] = POS_ENDING;
AUX_Flag = 1;
}
if(NLP_Dap_Proc(stem) == Dap_VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
AUX_Flag = 1;
}
if(NLP_Gop_Proc(stem) == Gop_VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
AUX_Flag = 1;
}
if((rt = NLP_Manha_Proc(stem)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
AUX_Flag = 1;
}
if((rt = NLP_Manhaeci_Proc(stem)) == Manhaeci_VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
AUX_Flag = 1;
}
if((rt = NLP_Cikha_Proc(stem)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
// AUX_FLOW
if(AUX_Flag == 0)
{
if(ACT_N_V == 1)
{
if((rt = NLP_AUX_Find(stem, 0)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
if(ACT_P_A == 1)
{
if((rt = NLP_AUX_Find(stem, 1)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
}
}
else if (ret != MORECHECK)
continue; // against consonant-vowel harmony
if(ACT_N_E == 1)
{
if(strcmp(stem, TempIkNl) == 0)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_VERB; //Jap_VALID;
vbuf[wcount++] = POS_ENDING;
}
if(__IsDefStem(ULSPOS, 1) == 1 &&
stem[ULSPOS-1] == __K_I && stem[ULSPOS] == __V_l)
{
if(__IsDefStem(ULSPOS, 2) == 1 && stem[ULSPOS-2] == __K_M)
{
sp[i] = LMEPOS+3;
Act[i] = 0x70; // action:01-110-00-0
i++;
if(__IsDefStem(ULSPOS, 4) == 1 &&
stem[ULSPOS-4] == __K_I && stem[ULSPOS-3] == __V_m)
{
sp[i] = LMEPOS+5;
Act[i] = (unsigned char)0xB0; // action:10 110 00 0
i++;
}
}
temp = ULSPOS;
__DelStem2(stem, &temp);
ULSPOS = temp;
if((ret = NLP_Machine_T(stem, ending)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
temp = ULSPOS;
__AddStem2(stem, &temp, __K_I, __V_l);
ULSPOS = temp;
if(__IsDefEnd(LMEPOS, 1) == 1 &&
ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j)
{
for (int i = 0; i < 3; i++)
{
if(strcmp(stem, TempJap[i]) == 0)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_VERB; //VERB_VALID;
vbuf[wcount++] = POS_ENDING;
}
}
continue;
}
index[0] = 'm';
if (FindSilsaWord (ostem) & _NOUN)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_NOUN; //Jap_NOUN_VALID;
vbuf[wcount++] = POS_ENDING;
}
if((ret = NLP_Fix_Proc(stem, ending)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
if (NLP_Find_Pronoun (stem, ending) == VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
}
if((ret = NLP_Num_Proc(stem)) < INVALID)
{
lstrcat(lrgsz, ostem);
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
lstrcat(lrgsz, "\t");
vbuf[wcount++] = POS_NUMBER; //Jap_NUM_VALID;
vbuf[wcount++] = POS_ENDING;
}
continue; // backtracking
}
else if( /*ACT_Z != 1 && */ // ACT_Z != 1
ULS >= __V_k &&
!(__IsDefEnd(LMEPOS, 1) == 1 &&
ending[LMEPOS] == __K_I && ending[LMEPOS-1] == __V_j))
{
if((ret = NLP_Machine_T(stem, ending)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
}
lstrcpy(tmp, stem);
luls = ULSPOS;
if(ACT_C == 0 && ACT_V == 1)
{
// by hjw : 95/3/6
if((ret = NLP_Irr_01(stem, ending)) < INVALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
continue;
}
}
lstrcpy (stem, tmp);
ret = BT;
switch(LME)
{
case __K_N :
if((ret = NLP_Irr_KN(stem, ending)) == Irr_KN_Vl)
{
ret = NLP_Irr_KN_Vl(stem);
}
if(ret == Irr_OPS)
{
if((ret = NLP_Irr_OPS(stem, ending)) == SS)
{
if((ret = NLP_SS_Proc(stem, ending)) == BT)
{
continue;
}
if(ret < INVALID)
{
ret += Irr_SS;
}
}
}
break;
case __K_B :
ret = NLP_Machine_A(stem, ending);
break;
// hjw : 95/3/17
case __K_S :
if(ACT_C == 1) // ATC_C == 1
{
ret = NLP_Irr_KS(stem, ending);
}
else if(ULS >= __V_k)
{
ret = NLP_Machine_A(stem, ending);
}
break;
case __K_M :
ret = NLP_Irr_KM(stem);
break;
case __K_R :
if(__IsDefEnd(LMEPOS, 1) == 0 || ending[LMEPOS-1] < __V_k)
{
ret = NLP_Machine_A(stem,ending);
if (ret != BT)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
if(ACT_P_A == 1)
{
ret = NLP_Irr_KRadj(stem, ending);
if (ret != BT)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
if(ACT_N_V == 1)
{
if((ret = NLP_Irr_KRvb(stem, ending)) == SS)
{
if((ret = NLP_SS_Proc(stem, ending)) == BT)
{
continue;
}
if(ret < INVALID)
{
ret += Irr_SS;
}
}
}
break;
case __K_I :
if(__IsDefEnd(LMEPOS, 1) == 1 && (ending[LMEPOS-1] == __V_h ||
ending[LMEPOS-1] == __V_hl || ending[LMEPOS-1] == __V_l ))
{
if(ULS >= __V_k)
{
ret = NLP_Irr_KI(stem,rending);
}
else
{
continue;
}
}
if(ULS == __K_R)
{
ret = NLP_Irr_KI_KR(stem, ending);
}
if(ULS >= __V_k)
{
ret = NLP_Irr_KI_V(stem, ending);
}
break;
default :
continue;
}
if(ret >= INVALID)
{
continue;
}
if(ret >= VALID)
{
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_ENDING;
}
lstrcat(lrgsz, "\t");
}
}
lstrcpy (rstrings, lrgsz);
return wcount;
}
// made by dhyu 1996. 2
// look into mrfgen01.txt to know details
void BaseEngine::RestoreEnding (char *ending, char *rending)
{
int len = lstrlen (ending); // ending has reverse order.
lstrcpy (rending, ending);
if (lstrlen (ending) == 0)
return;
if (ACT_SS) // insert "IEUNG, EO" to the first of ending
{
rending [len] = __V_j;
rending [len+1] = __K_I;
rending [len+2] = '\0';
return;
}
if (ACT_C && ACT_V) //CV == 11
{
if (ending [len - 1] == __K_I && ending [len - 2] == __V_k)
rending [len - 2] = __V_j;
return;
}
if (!ACT_C && ACT_V == TRUE) // CV == 01
{
switch (ending [len - 1])
{
case __K_B :
rending [len] = __V_m; // insert "SIOS, EU" to the first of ending
rending [len+1] = __K_S;
rending [len+2] = '\0';
return ;
case __K_R :
if (len == 2 && ending [0] == __V_k) // if ending is "ra" , insert "IEUNG EO" to the first of ending
{
rending [len] = __V_j;
rending [len+1] = __K_I;
rending [len+2] = '\0';
return;
}
break;
case __K_I :
if (ending [len - 2] == __V_y) // if ending is "yo",
{
rending [len] = __V_j;
rending [len+1] = __K_I;
rending [len+2] = '\0';
return;
}
break;
case __K_N :
if (!ACT_C && ACT_V && ACT_N_V && !ACT_P_A && !ACT_P_A && !ACT_N_E && !ACT_SS && !ACT_KE)
{
rending [len] = __V_m;
rending [len+1] = __K_N;
rending [len+2] = '\0';
return;
}
}
rending [len] = __V_m; // insert "IEUNG, EU" to the first of ending
rending [len+1] = __K_I;
rending [len+2] = '\0';
return;
}
// "KE-TO-CHI" ending and copula is processed with stem together.
return;
}
// To process compound noun, we use the window which size is 4 characters.
// We decrease the size until we found noun.
// However we don't decrease it less than 2 characters.
// made by dhyu --- 1996. 3
int BaseEngine::NLP_BASE_COMPOUND (LPCSTR d, char *rstrings)
{
char Act[10], ostem[80], oending[40],
incode [100], stem [100], ending [40];
int bt,
sp[10];
BOOL found;
CODECONVERT Conv;
memset(incode, NULLCHAR, 100);
memset(Act, NULLCHAR, 10);
for (int i = 0; i < 10; i++) sp[i] = 0x0000;
if(Conv.HAN2INS((char *)d, incode, codeWanSeong) != SUCCESS)
return 0;
bt = NLP_Get_Ending(incode, Act, sp, TOSSI);
for (i = bt-1; i >= bt-3 && i >= 0; i--)
//for (i = 0; i < bt; i++)
{
GetStemEnding (incode, stem, ending, sp [i]);
ACT_C = GetBit(Act [i], 7); // consonant
ACT_V = GetBit(Act [i], 6); // vowel
ACT_N_V = GetBit(Act [i], 5);
ACT_P_A = GetBit(Act [i], 4);
ACT_N_E = GetBit(Act [i], 3);
if (NLP_NCV_Proc(stem, ending) == NCV_VALID)
{
memset(ostem, NULLCHAR, 80);
memset(oending, NULLCHAR, 40);
Conv.INR2HAN(ending, oending, codeWanSeong);
Conv.INS2HAN(stem, ostem, codeWanSeong); // incode -> ks
wcount = 0;
memset (lrgsz, NULLCHAR, 400);
// Window size is 4 charaters (8 byte)
char window [9], inwindow [25];
memset (window, '\0', 9);
char *next = ostem;
found = TRUE;
while (lstrlen (next) > 8)
{
found = FALSE;
memcpy (window, next, 8);
for (int j = 7; j >= 3; j -= 2)
{
if (FindSilsaWord (window) & _NOUN)
{ // searching the noun dictionary
lstrcat(lrgsz, window);
vbuf[wcount++] = POS_NOUN;
lstrcat(lrgsz, "+");
found = TRUE;
break;
}
window [j] = '\0';
window [j-1] = '\0';
}
if (!found)
{
// if "GYEOM" is the first character in window
Conv.HAN2INS (next, inwindow, codeWanSeong);
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
{
memcpy (window, next, 2);
window [2] = '\0';
lstrcat(lrgsz, window);
vbuf [wcount++] = POS_ADVERB;
lstrcat(lrgsz, "+");
found = TRUE;
next += 2;
}
else
break;
}
else
next += (j+1);
}
if (!found)
continue;
else
{
if (FindSilsaWord (next) & _NOUN)
{
lstrcat (lrgsz, next);
vbuf[wcount++] = POS_NOUN;
}
else
{
switch (lstrlen(next))
{
case 8 :
// if the size of last winow is 4, we divide it into same size two.
memcpy (window, next, 4);
window [4] = '\0';
Conv.HAN2INS (window, inwindow, codeWanSeong);
found = FALSE;
if (FindSilsaWord (window) & _NOUN)
{ // searching the noun dictionary
Conv.HAN2INS(next+4, inwindow, codeWanSeong);
if (FindSilsaWord (next+4) & _NOUN)
{
lstrcat(lrgsz, window);
vbuf[wcount++] = POS_NOUN;
lstrcat(lrgsz, "+");
lstrcat(lrgsz, next+4);
vbuf[wcount++] = POS_NOUN;
found = TRUE;
}
}
if (!found)
{
// if "GYEOM" is the first character in window
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
{
memcpy (window, next, 8);
window [9] = '\0';
if (FindSilsaWord (window) & _NOUN)
{
memcpy (window, next, 2);
window [2] = '\0';
lstrcat(lrgsz, window);
vbuf [wcount++] = POS_ADVERB;
lstrcat(lrgsz, "+");
lstrcat(lrgsz, next+2);
vbuf [wcount++] = POS_NOUN;
}
}
else
{
// if "DEUNG" is the last character
Conv.HAN2INS (next+6, inwindow, codeWanSeong);
if ((inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I) ||
(inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_M) ||
(inwindow [0] == __K_G && inwindow [1] == __V_k && inwindow [2] == __K_B && inwindow [3] == __K_S) ||
(inwindow [0] == __K_G && inwindow [1] == __V_P) ||
(inwindow [0] == __K_C && inwindow [1] == __V_o && inwindow [2] == __K_G))
{
memcpy (window, next, 6);
window [6] = '\0';
if (FindSilsaWord (window) & _NOUN)
{
lstrcat (lrgsz, window);
vbuf [wcount++] = POS_NOUN;
lstrcat (lrgsz, "+");
lstrcat (lrgsz, next+6);
vbuf [wcount++] = POS_NOUN;
}
else
{
// if "DEUNG,DEUNG" is the part
Conv.HAN2INS (next+4, inwindow, codeWanSeong);
if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
{
memcpy (window, next, 4);
window [4] = '\0';
if (FindSilsaWord (window) & _NOUN)
{
lstrcat (lrgsz, window);
vbuf [wcount++] = POS_NOUN;
lstrcat (lrgsz, "+");
lstrcat (lrgsz, next+4);
vbuf [wcount++] = POS_NOUN;
}
else
continue;
}
else
continue;
}
}
else
continue;
}
}
break;
case 6 :
Conv.HAN2INS (next, inwindow, codeWanSeong);
/*
if (FindSilsaWord (next) & _NOUN)
{
lstrcat (lrgsz, next);
vbuf[wcount++] = POS_NOUN;
}
else
{
*/
// if "GYEOM" is the first character in window
if ((inwindow [0] == __K_G && inwindow [1] == __V_u && inwindow [2] == __K_M) ||
(inwindow [0] == __K_M && inwindow [1] == __V_l && inwindow [2] == __K_C))
{
if (FindSilsaWord (next+2) & _NOUN)
{
memcpy (window, next, 2);
window [2] = '\0';
lstrcat(lrgsz, window);
vbuf [wcount++] = POS_ADVERB;
lstrcat(lrgsz, "+");
lstrcat(lrgsz, next+2);
vbuf [wcount++] = POS_NOUN;
}
else
continue;
}
else
{
// if "DEUNG" is the last character
Conv.HAN2INS (next+4, inwindow, codeWanSeong);
if (inwindow [0] == __K_D && inwindow [1] == __V_m && inwindow [2] == __K_I)
{
memcpy (window, next, 4);
window [4] = '\0';
if (FindSilsaWord (window) & _NOUN)
{
lstrcat (lrgsz, window);
vbuf [wcount++] = POS_NOUN;
lstrcat (lrgsz, "+");
lstrcat (lrgsz, next+4);
vbuf [wcount++] = POS_NOUN;
}
else
continue;
}
else
continue;
}
//}
break;
/*
case 4 :
if (FindSilsaWord (next) & _NOUN)
{
lstrcat (lrgsz, next);
vbuf[wcount++] = POS_NOUN;
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
}
else
continue;
break;
*/
default :
continue;
}
}
if (lstrlen (oending) > 0)
{
lstrcat(lrgsz, "+");
lstrcat(lrgsz, oending);
vbuf[wcount++] = POS_TOSSI;
}
lstrcat(lrgsz, "\t");
lstrcpy (rstrings, lrgsz);
return wcount;
}
}
}
lstrcpy (rstrings, "\0");
return 0;
}