$B$d$^$F$G$9!#K\Ev$O$=$s$J2K$O$J$$$O$:$J$s$@$1$I!#(B
> $B:dK\$G$9!#(B
> >> conv$B$O!">uBVA+0\5!3#E}9g%"%k%4%j%:%`$r=q$3$&$H$7$F!"%a%b%j$r?)$$2a$.(B
> >> $B$k$H$$$&$N$,$o$+$C$FGK4~$7$?$H$3$m$G$9!#;d$N(B4$B;~4V!D!D(B
> $B;~4V$,$"$l$P$d$C$F$_$^$9!#$I$C$A$_$A=q$/$7!#(B
code check$B%W%m%0%i%`$r=q$$$F$_$^$7$?!#(Beuc, sjis, jis$B$H%U%!%$%k$,JL$K(B
$B$J$C$F$$$^$9!#$"$^$j;H$$9~$s$G$b$$$J$$$7!"C/$K$G$b$O$*$9$9$a$G$-$^$;$s!#(B
> $B!t(B frame $B8f4j$$$7$^$9(B
$B$3$C$A$NJ}$,FqBj$J$s$G$9$h$M$'!#$$$:$l(Bread_token$B%Y!<%9$G=q$-D>$5$J$$(B
$B$H!#Ev:B$O(Bstrcmp$B$N(Bloop$B$G(B<sTRing>$BLdBj$@$1$G$bJRIU$1$J$$$H%@%a$G$9$M!#(B
$B$D$$$G$K!#$6$C$H8zN($K$D$$$F!"(Bprofile$B$r<h$C$FD4$Y$^$7$?!#(Bread_token
$B$d(BHTMLlineproc1$B$J$I$,$d$d=E$a$G$7$?$,!"0lHV=E$$$N$,(BGC_mark$B$J$s$H$+(B ($B$=(B
$B$l$G$b(B10%$B$[$I(B) $B$J$N$G!"5$$K$9$k$[$I8zN($,0-$$$H$3$m$OL5$5$=$&$G$9!#(B
# $BIi2Y;n83$G!"(Bw3m$B$K(Bmule$B$rFI$^$;$F(B"v"$B$r2!$7$?$i(Bslashing$B!D!D(B
--- <$BIE(B>$B$O<u$1$,0-$+$C$?$h$&$@(B $B$d$^$F(B
diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/XMakefile w3m.new/XMakefile --- w3m.current/XMakefile Tue May 23 21:15:53 2000 +++ w3m.new/XMakefile Tue May 23 22:10:17 2000 @@ -4,8 +4,10 @@ OBJS=main.o file.o buffer.o display.o etc.o search.o linein.o table.o local.o\ form.o map.o frame.o rc.o menu.o mailcap.o\ func.o cookie.o history.o $(KEYBIND_OBJ) -LSRCS=terms.c conv.c url.c ftp.c anchor.c mimehead.c hash.c tagtable.c -LOBJS=terms.o conv.o url.o ftp.o anchor.o mimehead.o hash.o tagtable.o +CSRCS=conv.c conv_jis.c conv_sjis.c conv_euc.c +COBJS=conv.o conv_jis.o conv_sjis.o conv_euc.o +LSRCS=terms.c $(CSRCS) url.c ftp.c anchor.c mimehead.c hash.c tagtable.c +LOBJS=terms.o $(COBJS) url.o ftp.o anchor.o mimehead.o hash.o tagtable.o LLOBJS=version.o ALIBOBJS=Str.o indep.o regex.o textlist.o parsetag.o ALIB=libindep.a @@ -35,6 +37,8 @@ $(RANLIB) $(ALIB) $(OBJS): fm.h funcname1.h + +$(COBJS): conv.h tagtable.c: html.h tagtable.tab mktable$(EXT) ./mktable 100 tagtable.tab > tagtable.c diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv.c w3m.new/conv.c --- w3m.current/conv.c Tue May 23 21:15:58 2000 +++ w3m.new/conv.c Tue May 23 21:17:07 2000 @@ -1,6 +1,7 @@ #include <stdio.h> #include <string.h> #include "fm.h" +#include "conv.h" #ifdef JP_CHARSET #include "Str.h" @@ -10,7 +11,6 @@ #endif /* DEBUG */ #define BF_SIZ 4096 -#define uchar unsigned char #define true 1 #define false 0 #define CODES 7 /* Number of supported Kanji code */ @@ -30,14 +30,12 @@ static char *SIcode, *SOcode; -static uchar *cOption; -Str cConvSE(unsigned char *is); -Str cConvSJ(unsigned char *is); +Str cConvSE(uchar *is); +Str cConvSJ(uchar *is); Str cConvJS(char *is); Str cConvEE(char *is); Str cConvES(char *is); -char checkShiftCode(char *buf, char); static char *han2zen_tab[] = { @@ -148,14 +146,14 @@ } static void -putSFT(unsigned char **pptr, unsigned char *code) +putSFT(uchar **pptr, uchar *code) { /* Put Shift in/out code */ while (*code != '\0') *((*pptr)++) = *(code++); } static uchar -getSLb(unsigned char *ptr, unsigned char *ub) +getSLb(uchar *ptr, uchar *ub) { /* Get Shift-JIS Lower byte */ uchar c = *ptr; @@ -174,7 +172,7 @@ /* Convert Shift-JIS to JIS/real JIS/EUC */ static Str -_cConvSJ(unsigned char *is, int euc) +_cConvSJ(uchar *is, int euc) { Str os = Strnew_size(strlen(is) * 3 / 2); uchar c, @@ -241,19 +239,19 @@ } Str -cConvSE(unsigned char *is) +cConvSE(uchar *is) { return _cConvSJ(is, true); } Str -cConvSJ(unsigned char *is) +cConvSJ(uchar *is) { return _cConvSJ(is, false); } static void -put_js(unsigned char ub, unsigned char lb, Str op) +put_js(uchar ub, uchar lb, Str op) { ub -= 0x20; lb -= 0x20; @@ -380,149 +378,60 @@ return op; } -static unsigned short -sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; -static unsigned short -sjis_second[16] = { 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, - 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; +/* current code list */ +static struct code *(code[]) = { + &code_jis, + &code_sjis, + &code_euc, + NULL, +}; char -checkShiftCode(char *buf, char hint) +checkShiftCode(char *buf, CCS hint) { - register unsigned char *p, + register uchar *p, c; - unsigned char si = '\0', - so = '\0'; - char euc, /* euc parse state */ -#define EUC_ERROR 0 -#define EUC_NOSTATE 1 -#define EUC_REQ1 2 -#define EUC_REQ2 3 - sjis, /* sjis parse state */ -#define SJIS_ERROR 0 -#define SJIS_NOSTATE 1 -#define SJIS_SHIFT 2 - euc_or_sjis; /* 0x01: CS uses right half (&0x80 == 0x80). - 0x02: found right half ctrl char */ - - euc = EUC_NOSTATE; - sjis = SJIS_NOSTATE; - euc_or_sjis = 0; - - p = buf; - while (!((si && so) || - (euc_or_sjis && (!euc || !sjis)) || - (c = *p) == '\0')) { - /* check JIS shift code */ - switch (c) { - case '\033': - switch (*(p + 1)) { - case '$': /* Shift-in */ - si = *(p + 2); - break; - case '(': /* Shift-out ')' */ - so = *(p + 2); - break; - default: - break; - } - break; - } - /* transition EUC state */ - if ((unsigned char) 0xa1 <= c && c <= (unsigned char) 0xff) { - switch (euc) { - case EUC_NOSTATE: - euc = EUC_REQ1; - break; - case EUC_REQ1: - euc_or_sjis |= 1; - euc = EUC_NOSTATE; - break; - case EUC_REQ2: - euc = EUC_REQ1; - break; - case EUC_ERROR: - break; - } - } else { - if (euc == EUC_NOSTATE) { - if (c >= 0x80) { - switch (c) { - case 0x8E: - euc = EUC_REQ1; - break; - case 0x8F: - euc = EUC_REQ2; - break; - default: - euc_or_sjis |= 2; - } - } - } else { - euc = EUC_ERROR; - } - } - /* transition SJIS state */ - switch (sjis) { - case SJIS_NOSTATE: - if (c & 0x80 && - sjis_shift[(c & 0x70) >> 4] & (0x8000 >> (c & 0x0f))) - sjis = SJIS_SHIFT; - break; - case SJIS_SHIFT: - if (sjis_second[c >> 4] & (0x8000 >> (c & 0x0f))) { - euc_or_sjis |= 1; - sjis = SJIS_NOSTATE; - } else { - sjis = SJIS_ERROR; + struct code_state *state_list = NULL, /* list of automata */ + *best = NULL; /* best automaton */ + register struct code_state **s; + int i, + score, + ts; + + /* initialize automata */ + score = 0; + s = &state_list; + for (i = 0; code[i]; i++) { + *s = code[i]->init_state(hint); + if ((ts = (*s)->value()) >= score) { + best = *s; + score = ts; } - break; - case SJIS_ERROR: - break; + s = &(*s)->next; } + + p = buf; + while ((c = *p) && state_list && state_list->next) { p++; + score = 0; + s = &state_list; + while (*s) { + ts = (*s)->transit(c); + if (ts >= score) { + /* change best automaton */ + best = *s; + score = ts; + } + if (ts & 0xfffffff0) { + /* alive */ + s = &(*s)->next; + } else { + /* knock down */ + *s = (*s)->next; } - if (euc_or_sjis) { - if (euc && !sjis) - return 'E'; - if (sjis && (!euc || (euc_or_sjis & 2))) - return 'S'; - /* confused */ - return hint; - } - switch (si) { - case '@': - switch (so) { - case 'H': - return 'J'; - case 'J': - return 'j'; - case 'B': - return 'm'; - default: /* confused */ - return hint; - } - case 'B': - switch (so) { - case 'J': - return 'N'; - case 'B': - return 'n'; - default: /* confused */ - return hint; - } - default: /* no kanji shift */ - switch (so) { - case 'H': - return 'J'; - case 'J': - return 'j'; - case 'B': - return 'm'; - default: - /* no kanji is used in this line */ - return hint; } } + /* return that best automaton guesses */ + return best->best_code(); } #endif /* JP_CHARSET */ diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv.h w3m.new/conv.h --- w3m.current/conv.h Thu Jan 1 09:00:00 1970 +++ w3m.new/conv.h Tue May 23 22:31:19 2000 @@ -0,0 +1,21 @@ +#define uchar unsigned char +typedef unsigned char CCS; /* character code set identifier */ + +struct code_state { + struct code_state *next; + int (*value)(); /* return state score */ + /* score is 31bit unsigned integer */ + /* initial score is 0x40 (no hint) */ + /* machine knocked down under 0x10 */ + int (*transit)(uchar); /* transit machine */ + CCS (*best_code)(); /* return best CCS */ +}; + +struct code { + const char *name; /* CCS name */ + struct code_state *(*init_state)(CCS); /* return initialized machine */ +}; + +extern struct code code_jis; +extern struct code code_sjis; +extern struct code code_euc; diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_euc.c w3m.new/conv_euc.c --- w3m.current/conv_euc.c Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_euc.c Tue May 23 22:54:13 2000 @@ -0,0 +1,92 @@ +#include "fm.h" +#include "conv.h" +#ifdef JP_CHARSET + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + char state; +#define EUC_NOSTATE 0 +#define EUC_REQ1 1 +#define EUC_REQ2 2 +} euc_state; + +static int +euc_value() +{ + return (euc_state.succeed << euc_state.bias) / euc_state.fail; +} + +static int +euc_transit(uchar c) +{ + if (0xa1 <= c && c <= 0xff) { + switch(euc_state.state) { + case EUC_NOSTATE: + euc_state.state = EUC_REQ1; + break; + case EUC_REQ1: + euc_state.succeed++; + euc_state.state = EUC_NOSTATE; + break; + case EUC_REQ2: + euc_state.state = EUC_REQ1; + break; + default: + break; + } + } else { + if (euc_state.state == EUC_NOSTATE) { + if (c >= 0x80) { + switch (c) { + case 0x8E: + euc_state.state = EUC_REQ1; + break; + case 0x8F: + euc_state.state = EUC_REQ2; + break; + default: + euc_state.fail++; + } + } + } else { + euc_state.state = EUC_NOSTATE; + euc_state.fail++; + } + } + return euc_value(); +} + +static CCS +euc_best_code () +{ + return 'E'; +} + +static struct code_state * +euc_init_state(CCS hint) +{ + + euc_state.cs.value = euc_value; + euc_state.cs.transit = euc_transit; + euc_state.cs.best_code = euc_best_code; + + if (hint == 'E') + euc_state.bias = 8; + else + euc_state.bias = 6; + + euc_state.succeed = 1; + euc_state.fail = 1; + euc_state.state = EUC_NOSTATE; + + return &(euc_state.cs); +} + +struct code code_euc = { + "EUC", + euc_init_state, +}; +#endif /* JP_CHARSET */ diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_euc.c~ w3m.new/conv_euc.c~ --- w3m.current/conv_euc.c~ Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_euc.c~ Tue May 23 21:17:07 2000 @@ -0,0 +1,89 @@ +#include "conv.h" + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + char state; +#define EUC_NOSTATE 0 +#define EUC_REQ1 1 +#define EUC_REQ2 2 +} euc_state; + +static int +euc_value() +{ + return (euc_state.succeed << euc_state.bias) / euc_state.fail; +} + +static int +euc_transit(uchar c) +{ + if (0xa1 <= c && c <= 0xff) { + switch(euc_state.state) { + case EUC_NOSTATE: + euc_state.state = EUC_REQ1; + break; + case EUC_REQ1: + euc_state.succeed++; + euc_state.state = EUC_NOSTATE; + break; + case EUC_REQ2: + euc_state.state = EUC_REQ1; + break; + default: + break; + } + } else { + if (euc_state.state == EUC_NOSTATE) { + if (c >= 0x80) { + switch (c) { + case 0x8E: + euc_state.state = EUC_REQ1; + break; + case 0x8F: + euc_state.state = EUC_REQ2; + break; + default: + euc_state.fail++; + } + } + } else { + euc_state.state = EUC_NOSTATE; + euc_state.fail++; + } + } + return euc_value(); +} + +static CCS +euc_best_code () +{ + return 'E'; +} + +static struct code_state * +euc_init_state(CCS hint) +{ + + euc_state.cs.value = euc_value; + euc_state.cs.transit = euc_transit; + euc_state.cs.best_code = euc_best_code; + + if (hint == 'E') + euc_state.bias = 8; + else + euc_state.bias = 6; + + euc_state.succeed = 1; + euc_state.fail = 1; + euc_state.state = EUC_NOSTATE; + + return &(euc_state.cs); +} + +struct code code_euc = { + "EUC", + euc_init_state, +}; diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_jis.c w3m.new/conv_jis.c --- w3m.current/conv_jis.c Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_jis.c Tue May 23 22:54:34 2000 @@ -0,0 +1,200 @@ +#include "fm.h" +#include "conv.h" +#ifdef JP_CHARSET + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + short S1_B; + short S1_H; + short S1_J; + short S2_AT; + short S2_B; + char state; +#define JIS_NOSTATE 0 +#define JIS_ESC 1 +#define JIS_S1 2 +#define JIS_S2 3 +} jis_state; + +static char *(jis_ccs[]) = { + "JH@", + "jJ@", + "mB@", + "NJB", + "nBB", + 0, +}; + +static int +jis_value() +{ + return (jis_state.succeed << jis_state.bias) / jis_state.fail; +} + +static int +jis_transit(uchar c) +{ + if (c & 0x80) { + /* right half */ + jis_state.fail += jis_state.state == JIS_NOSTATE ? 1 : 8; + jis_state.state = JIS_NOSTATE; + } else { + switch(jis_state.state) { + case JIS_NOSTATE: + if (c == '\033') { + jis_nostate: + jis_state.state = JIS_ESC; + } + break; + case JIS_ESC: + switch (c) { + case '(': + jis_state.state = JIS_S1; + break; + case '$': + jis_state.state = JIS_S2; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + case JIS_S1: + jis_state.state = JIS_NOSTATE; + switch (c) { + case 'B': + jis_state.S1_B++; + jis_state.succeed += 4; + break; + case 'H': + jis_state.S1_H++; + jis_state.succeed += 4; + break; + case 'J': + jis_state.S1_J++; + jis_state.succeed += 4; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + case JIS_S2: + jis_state.state = JIS_NOSTATE; + switch (c) { + case '@': + jis_state.S2_AT++; + jis_state.succeed += 4; + break; + case 'B': + jis_state.S2_B++; + jis_state.succeed += 4; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + default: + return *((int *)0); + } + } + return jis_value(); +} + +static CCS +jis_best_code () +{ + int i, + score = 0; + CCS c = 'J'; + + for (i = 0; jis_ccs[i]; i++) { + int cscore = 0; + switch (jis_ccs[i][1]) { + case 'B': + cscore += jis_state.S1_B; + break; + case 'J': + cscore += jis_state.S1_J; + break; + case 'H': + cscore += jis_state.S1_H; + break; + } + switch (jis_ccs[i][2]) { + case '@': + cscore += jis_state.S2_AT; + break; + case 'B': + cscore += jis_state.S1_B; + break; + } + if (cscore >= score) { + c = jis_ccs[i][0]; + } + } + return c; +} + +struct code_state * +jis_init_state(CCS hint) +{ + int i; + + jis_state.cs.value = jis_value; + jis_state.cs.transit = jis_transit; + jis_state.cs.best_code = jis_best_code; + + jis_state.bias = 8; + + jis_state.succeed = 1; + jis_state.fail = 4; + jis_state.state = JIS_NOSTATE; + + jis_state.S1_B = 0; + jis_state.S1_H = 0; + jis_state.S1_J = 0; + jis_state.S2_AT = 0; + jis_state.S2_B = 0; + + for (i = 0; jis_ccs[i]; i++) { + if (jis_ccs[i][0] == hint) { + switch (jis_ccs[i][1]) { + case 'B': + jis_state.S1_B = 4; + break; + case 'J': + jis_state.S1_J = 4; + break; + case 'H': + jis_state.S1_H = 4; + break; + } + switch (jis_ccs[i][2]) { + case '@': + jis_state.S2_AT = 4; + break; + case 'B': + jis_state.S1_B = 4; + break; + } + jis_state.bias = 10; + break; + } + } + + return &(jis_state.cs); +} + +struct code code_jis = { + "JIS", + jis_init_state, +}; +#endif /* JP_CHARSET */ diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_jis.c~ w3m.new/conv_jis.c~ --- w3m.current/conv_jis.c~ Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_jis.c~ Tue May 23 21:17:07 2000 @@ -0,0 +1,197 @@ +#include "conv.h" + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + short S1_B; + short S1_H; + short S1_J; + short S2_AT; + short S2_B; + char state; +#define JIS_NOSTATE 0 +#define JIS_ESC 1 +#define JIS_S1 2 +#define JIS_S2 3 +} jis_state; + +static char *(jis_ccs[]) = { + "JH@", + "jJ@", + "mB@", + "NJB", + "nBB", + 0, +}; + +static int +jis_value() +{ + return (jis_state.succeed << jis_state.bias) / jis_state.fail; +} + +static int +jis_transit(uchar c) +{ + if (c & 0x80) { + /* right half */ + jis_state.fail += jis_state.state == JIS_NOSTATE ? 1 : 8; + jis_state.state = JIS_NOSTATE; + } else { + switch(jis_state.state) { + case JIS_NOSTATE: + if (c == '\033') { + jis_nostate: + jis_state.state = JIS_ESC; + } + break; + case JIS_ESC: + switch (c) { + case '(': + jis_state.state = JIS_S1; + break; + case '$': + jis_state.state = JIS_S2; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + case JIS_S1: + jis_state.state = JIS_NOSTATE; + switch (c) { + case 'B': + jis_state.S1_B++; + jis_state.succeed += 4; + break; + case 'H': + jis_state.S1_H++; + jis_state.succeed += 4; + break; + case 'J': + jis_state.S1_J++; + jis_state.succeed += 4; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + case JIS_S2: + jis_state.state = JIS_NOSTATE; + switch (c) { + case '@': + jis_state.S2_AT++; + jis_state.succeed += 4; + break; + case 'B': + jis_state.S2_B++; + jis_state.succeed += 4; + break; + case '\033': + jis_state.fail += 8; + goto jis_nostate; + break; + } + break; + default: + return *((int *)0); + } + } + return jis_value(); +} + +static CCS +jis_best_code () +{ + int i, + score = 0; + CCS c = 'J'; + + for (i = 0; jis_ccs[i]; i++) { + int cscore = 0; + switch (jis_ccs[i][1]) { + case 'B': + cscore += jis_state.S1_B; + break; + case 'J': + cscore += jis_state.S1_J; + break; + case 'H': + cscore += jis_state.S1_H; + break; + } + switch (jis_ccs[i][2]) { + case '@': + cscore += jis_state.S2_AT; + break; + case 'B': + cscore += jis_state.S1_B; + break; + } + if (cscore >= score) { + c = jis_ccs[i][0]; + } + } + return c; +} + +struct code_state * +jis_init_state(CCS hint) +{ + int i; + + jis_state.cs.value = jis_value; + jis_state.cs.transit = jis_transit; + jis_state.cs.best_code = jis_best_code; + + jis_state.bias = 8; + + jis_state.succeed = 1; + jis_state.fail = 4; + jis_state.state = JIS_NOSTATE; + + jis_state.S1_B = 0; + jis_state.S1_H = 0; + jis_state.S1_J = 0; + jis_state.S2_AT = 0; + jis_state.S2_B = 0; + + for (i = 0; jis_ccs[i]; i++) { + if (jis_ccs[i][0] == hint) { + switch (jis_ccs[i][1]) { + case 'B': + jis_state.S1_B = 4; + break; + case 'J': + jis_state.S1_J = 4; + break; + case 'H': + jis_state.S1_H = 4; + break; + } + switch (jis_ccs[i][2]) { + case '@': + jis_state.S2_AT = 4; + break; + case 'B': + jis_state.S1_B = 4; + break; + } + jis_state.bias = 10; + break; + } + } + + return &(jis_state.cs); +} + +struct code code_jis = { + "JIS", + jis_init_state, +}; diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_sjis.c w3m.new/conv_sjis.c --- w3m.current/conv_sjis.c Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_sjis.c Tue May 23 22:54:28 2000 @@ -0,0 +1,82 @@ +#include "fm.h" +#include "conv.h" +#ifdef JP_CHARSET + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + char state; +#define SJIS_NOSTATE 0 +#define SJIS_SHIFT 1 +} sjis_state; + +static int +sjis_value() +{ + return (sjis_state.succeed << sjis_state.bias) / sjis_state.fail; +} + +static unsigned short +sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +static unsigned short +sjis_second[16] = { 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; + + +static int +sjis_transit(uchar c) +{ + switch(sjis_state.state) { + sjis_nostate: + case SJIS_NOSTATE: + if (c & 0x80 && + sjis_shift[(c & 0x70) >> 4] & (0x8000 >> (c & 0x0f))) + sjis_state.state = SJIS_SHIFT; + break; + case SJIS_SHIFT: + if (sjis_second[c >> 4] & (0x8000 >> (c & 0x0f))) { + sjis_state.succeed++; + } else { + sjis_state.fail++; + goto sjis_nostate; + } + sjis_state.state = SJIS_NOSTATE; + break; + default: + break; + } + return sjis_value(); +} + +static CCS +sjis_best_code () +{ + return 'S'; +} + +static struct code_state * +sjis_init_state(CCS hint) +{ + sjis_state.cs.value = sjis_value; + sjis_state.cs.transit = sjis_transit; + sjis_state.cs.best_code = sjis_best_code; + + if (hint == 'S') + sjis_state.bias = 7; + else + sjis_state.bias = 5; + + sjis_state.succeed = 2; + sjis_state.fail = 1; + sjis_state.state = SJIS_NOSTATE; + + return &(sjis_state.cs); +} + +struct code code_sjis = { + "SJIS", + sjis_init_state, +}; +#endif /* JP_CHARSET */ diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/conv_sjis.c~ w3m.new/conv_sjis.c~ --- w3m.current/conv_sjis.c~ Thu Jan 1 09:00:00 1970 +++ w3m.new/conv_sjis.c~ Tue May 23 21:17:07 2000 @@ -0,0 +1,79 @@ +#include "conv.h" + +static struct { + struct code_state cs; + int bias; + int succeed; + int fail; + char state; +#define SJIS_NOSTATE 0 +#define SJIS_SHIFT 1 +} sjis_state; + +static int +sjis_value() +{ + return (sjis_state.succeed << sjis_state.bias) / sjis_state.fail; +} + +static unsigned short +sjis_shift[8] = { 0x7fff, 0xffff, 0x0, 0x0, 0x0, 0x0, 0xffff, 0x0 }; +static unsigned short +sjis_second[16] = { 0x0, 0x0, 0x0, 0x0, 0xffff, 0xffff, 0xffff, 0xfffe, + 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xfff8 }; + + +static int +sjis_transit(uchar c) +{ + switch(sjis_state.state) { + sjis_nostate: + case SJIS_NOSTATE: + if (c & 0x80 && + sjis_shift[(c & 0x70) >> 4] & (0x8000 >> (c & 0x0f))) + sjis_state.state = SJIS_SHIFT; + break; + case SJIS_SHIFT: + if (sjis_second[c >> 4] & (0x8000 >> (c & 0x0f))) { + sjis_state.succeed++; + } else { + sjis_state.fail++; + goto sjis_nostate; + } + sjis_state.state = SJIS_NOSTATE; + break; + default: + break; + } + return sjis_value(); +} + +static CCS +sjis_best_code () +{ + return 'S'; +} + +static struct code_state * +sjis_init_state(CCS hint) +{ + sjis_state.cs.value = sjis_value; + sjis_state.cs.transit = sjis_transit; + sjis_state.cs.best_code = sjis_best_code; + + if (hint == 'S') + sjis_state.bias = 7; + else + sjis_state.bias = 5; + + sjis_state.succeed = 2; + sjis_state.fail = 1; + sjis_state.state = SJIS_NOSTATE; + + return &(sjis_state.cs); +} + +struct code code_sjis = { + "SJIS", + sjis_init_state, +}; diff -bBwrNU3 -x .* -x gc -x #* -x *.[aol] -x *.out -x config.* -x *.core -x *.orig -x *.rej -x XXMakefile -x mktable -x tagtable.c -x zzz* -x w3m -x w3mbookmark -x w3mhelperpanel -x w3m.prof w3m.current/proto.h w3m.new/proto.h --- w3m.current/proto.h Tue May 23 21:15:51 2000 +++ w3m.new/proto.h Tue May 23 21:17:07 2000 @@ -322,7 +322,7 @@ extern char *GetSICode(char key); extern char *GetSOCode(char key); extern Str conv(char *is, char fc, char tc); -extern char checkShiftCode(char *buf, char hint); +extern char checkShiftCode(char *buf, unsigned char hint); #endif /* JP_CHARSET */ extern ParsedURL *baseURL(Buffer * buf); extern int openSocket(char *hostname, char *remoteport_name, unsigned short remoteport_num);
This archive was generated by hypermail 2b29 : Wed Jul 19 2000 - 10:30:56 CDT