Mercurial > hg > Applications > mh
comparison nkf-utf8/nkf.c @ 10:a6481689f99c
*** empty log message ***
author | kono |
---|---|
date | Wed, 06 Dec 2006 03:17:53 +0900 |
parents | |
children | 441a2190cfae |
comparison
equal
deleted
inserted
replaced
9:52d0fa25c554 | 10:a6481689f99c |
---|---|
1 /** Network Kanji Filter. (PDS Version) | |
2 ************************************************************************ | |
3 ** Copyright (C) 1987, Fujitsu LTD. (Itaru ICHIKAWA) | |
4 ** $BO"Mm@h!'(B $B!J3t!KIY;NDL8&5f=j!!%=%U%H#38&!!;T@n!!;j(B | |
5 ** $B!J(BE-Mail Address: ichikawa@flab.fujitsu.co.jp$B!K(B | |
6 ** Copyright (C) 1996,1998 | |
7 ** Copyright (C) 2002 | |
8 ** $BO"Mm@h!'(B $BN05eBg3X>pJs9)3X2J(B $B2OLn(B $B??<#(B mime/X0208 support | |
9 ** $B!J(BE-Mail Address: kono@ie.u-ryukyu.ac.jp$B!K(B | |
10 ** $BO"Mm@h!'(B COW for DOS & Win16 & Win32 & OS/2 | |
11 ** $B!J(BE-Mail Address: GHG00637@niftyserve.or.p$B!K(B | |
12 ** | |
13 ** $B$3$N%=!<%9$N$$$+$J$kJ#<L!$2~JQ!$=$@5$b5vBz$7$^$9!#$?$@$7!"(B | |
14 ** $B$=$N:]$K$O!"C/$,9W8%$7$?$r<($9$3$NItJ,$r;D$9$3$H!#(B | |
15 ** $B:FG[I[$d;(;o$NIUO?$J$I$NLd$$9g$o$;$bI,MW$"$j$^$;$s!#(B | |
16 ** $B1DMxMxMQ$b>e5-$KH?$7$J$$HO0O$G5v2D$7$^$9!#(B | |
17 ** $B%P%$%J%j$NG[I[$N:]$K$O(Bversion message$B$rJ]B8$9$k$3$H$r>r7o$H$7$^$9!#(B | |
18 ** $B$3$N%W%m%0%i%`$K$D$$$F$OFC$K2?$NJ]>Z$b$7$J$$!"0-$7$+$i$:!#(B | |
19 ** | |
20 ** Everyone is permitted to do anything on this program | |
21 ** including copying, modifying, improving, | |
22 ** as long as you don't try to pretend that you wrote it. | |
23 ** i.e., the above copyright notice has to appear in all copies. | |
24 ** Binary distribution requires original version messages. | |
25 ** You don't have to ask before copying, redistribution or publishing. | |
26 ** THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE. | |
27 ***********************************************************************/ | |
28 | |
29 /*********************************************************************** | |
30 ** UTF-8 $B%5%]!<%H$K$D$$$F(B | |
31 ** $B=>Mh$N(B nkf $B$HF~$l$+$($F$=$N$^$^;H$($k$h$&$K$J$C$F$$$^$9(B | |
32 ** nkf -e $B$J$I$H$7$F5/F0$9$k$H!"<+F0H=JL$G(B UTF-8 $B$HH=Dj$5$l$l$P!"(B | |
33 ** $B$=$N$^$^(B euc-jp $B$KJQ49$5$l$^$9(B | |
34 ** | |
35 ** $B$^$@%P%0$,$"$k2DG=@-$,9b$$$G$9!#(B | |
36 ** ($BFC$K<+F0H=JL!"%3!<%I:.:_!"%(%i!<=hM}7O(B) | |
37 ** | |
38 ** $B2?$+LdBj$r8+$D$1$?$i!"(B | |
39 ** E-Mail: furukawa@tcp-ip.or.jp | |
40 ** $B$^$G8fO"Mm$r$*4j$$$7$^$9!#(B | |
41 ***********************************************************************/ | |
42 /* $Id$ */ | |
43 #define NKF_VERSION "2.0.5" | |
44 #define NKF_RELEASE_DATE "2005-04-10" | |
45 #include "config.h" | |
46 | |
47 static char *CopyRight = | |
48 "Copyright (C) 1987, FUJITSU LTD. (I.Ichikawa),2000 S. Kono, COW, 2002-2005 Kono, Furukawa, Naruse"; | |
49 | |
50 | |
51 /* | |
52 ** | |
53 ** | |
54 ** | |
55 ** USAGE: nkf [flags] [file] | |
56 ** | |
57 ** Flags: | |
58 ** b Output is buffered (DEFAULT) | |
59 ** u Output is unbuffered | |
60 ** | |
61 ** t no operation | |
62 ** | |
63 ** j Outout code is JIS 7 bit (DEFAULT SELECT) | |
64 ** s Output code is MS Kanji (DEFAULT SELECT) | |
65 ** e Output code is AT&T JIS (DEFAULT SELECT) | |
66 ** w Output code is AT&T JIS (DEFAULT SELECT) | |
67 ** l Output code is JIS 7bit and ISO8859-1 Latin-1 | |
68 ** | |
69 ** m MIME conversion for ISO-2022-JP | |
70 ** I Convert non ISO-2022-JP charactor to GETA by Pekoe <pekoe@lair.net> | |
71 ** i_ Output sequence to designate JIS-kanji (DEFAULT_J) | |
72 ** o_ Output sequence to designate single-byte roman characters (DEFAULT_R) | |
73 ** M MIME output conversion | |
74 ** | |
75 ** r {de/en}crypt ROT13/47 | |
76 ** | |
77 ** v display Version | |
78 ** | |
79 ** T Text mode output (for MS-DOS) | |
80 ** | |
81 ** x Do not convert X0201 kana into X0208 | |
82 ** Z Convert X0208 alphabet to ASCII | |
83 ** | |
84 ** f60 fold option | |
85 ** | |
86 ** m MIME decode | |
87 ** B try to fix broken JIS, missing Escape | |
88 ** B[1-9] broken level | |
89 ** | |
90 ** O Output to 'nkf.out' file or last file name | |
91 ** d Delete \r in line feed | |
92 ** c Add \r in line feed | |
93 ** -- other long option | |
94 ** -- ignore following option (don't use with -O ) | |
95 ** | |
96 **/ | |
97 | |
98 #if (defined(__TURBOC__) || defined(_MSC_VER) || defined(LSI_C) || defined(__MINGW32__)) && !defined(MSDOS) | |
99 #define MSDOS | |
100 #if (defined(__Win32__) || defined(_WIN32)) && !defined(__WIN32__) | |
101 #define __WIN32__ | |
102 #endif | |
103 #endif | |
104 | |
105 #ifdef PERL_XS | |
106 #undef OVERWRITE | |
107 #endif | |
108 | |
109 #ifndef PERL_XS | |
110 #include <stdio.h> | |
111 #endif | |
112 | |
113 #include <stdlib.h> | |
114 | |
115 #if defined(MSDOS) || defined(__OS2__) | |
116 #include <fcntl.h> | |
117 #include <io.h> | |
118 #endif | |
119 | |
120 #ifdef MSDOS | |
121 #ifdef LSI_C | |
122 #define setbinmode(fp) fsetbin(fp) | |
123 #else /* Microsoft C, Turbo C */ | |
124 #define setbinmode(fp) setmode(fileno(fp), O_BINARY) | |
125 #endif | |
126 #else /* UNIX,OS/2 */ | |
127 #define setbinmode(fp) | |
128 #endif | |
129 | |
130 #ifdef _IOFBF /* SysV and MSDOS, Windows */ | |
131 #define setvbuffer(fp, buf, size) setvbuf(fp, buf, _IOFBF, size) | |
132 #else /* BSD */ | |
133 #define setvbuffer(fp, buf, size) setbuffer(fp, buf, size) | |
134 #endif | |
135 | |
136 /*Borland C++ 4.5 EasyWin*/ | |
137 #if defined(__TURBOC__) && defined(_Windows) && !defined(__WIN32__) /*Easy Win */ | |
138 #define EASYWIN | |
139 #ifndef __WIN16__ | |
140 #define __WIN16__ | |
141 #endif | |
142 #include <windows.h> | |
143 #endif | |
144 | |
145 #ifdef OVERWRITE | |
146 /* added by satoru@isoternet.org */ | |
147 #include <string.h> | |
148 #include <sys/stat.h> | |
149 #ifndef MSDOS /* UNIX, OS/2 */ | |
150 #include <unistd.h> | |
151 #include <utime.h> | |
152 #else /* defined(MSDOS) */ | |
153 #ifdef __WIN32__ | |
154 #ifdef __BORLANDC__ /* BCC32 */ | |
155 #include <utime.h> | |
156 #else /* !defined(__BORLANDC__) */ | |
157 #include <sys/utime.h> | |
158 #endif /* (__BORLANDC__) */ | |
159 #else /* !defined(__WIN32__) */ | |
160 #if defined(_MSC_VER) || defined(__MINGW32__) /* VC++, MinGW */ | |
161 #include <sys/utime.h> | |
162 #elif defined(__TURBOC__) /* BCC */ | |
163 #include <utime.h> | |
164 #elif defined(LSI_C) /* LSI C */ | |
165 #endif /* (__WIN32__) */ | |
166 #endif | |
167 #endif | |
168 #endif | |
169 | |
170 #ifdef INT_IS_SHORT | |
171 #define int long | |
172 #endif | |
173 | |
174 #define FALSE 0 | |
175 #define TRUE 1 | |
176 | |
177 /* state of output_mode and input_mode | |
178 | |
179 c2 0 means ASCII | |
180 X0201 | |
181 ISO8859_1 | |
182 X0208 | |
183 EOF all termination | |
184 c1 32bit data | |
185 | |
186 */ | |
187 | |
188 #define ASCII 0 | |
189 #define X0208 1 | |
190 #define X0201 2 | |
191 #define ISO8859_1 8 | |
192 #define NO_X0201 3 | |
193 #define X0212 16 | |
194 | |
195 /* Input Assumption */ | |
196 | |
197 #define JIS_INPUT 4 | |
198 #define SJIS_INPUT 5 | |
199 #define LATIN1_INPUT 6 | |
200 #define FIXED_MIME 7 | |
201 #define STRICT_MIME 8 | |
202 | |
203 /* MIME ENCODE */ | |
204 | |
205 #define ISO2022JP 9 | |
206 #define JAPANESE_EUC 10 | |
207 #define SHIFT_JIS 11 | |
208 | |
209 #define UTF8 12 | |
210 #define UTF8_INPUT 13 | |
211 #define UTF16BE_INPUT 14 | |
212 #define UTF16LE_INPUT 15 | |
213 | |
214 #define WISH_TRUE 15 | |
215 | |
216 /* ASCII CODE */ | |
217 | |
218 #define BS 0x08 | |
219 #define TAB 0x09 | |
220 #define NL 0x0a | |
221 #define CR 0x0d | |
222 #define ESC 0x1b | |
223 #define SPACE 0x20 | |
224 #define AT 0x40 | |
225 #define SSP 0xa0 | |
226 #define DEL 0x7f | |
227 #define SI 0x0f | |
228 #define SO 0x0e | |
229 #define SSO 0x8e | |
230 #define SS3 0x8f | |
231 | |
232 #define is_alnum(c) \ | |
233 (('a'<=c && c<='z')||('A'<= c && c<='Z')||('0'<=c && c<='9')) | |
234 | |
235 #define HOLD_SIZE 1024 | |
236 #define IOBUF_SIZE 16384 | |
237 | |
238 #define DEFAULT_J 'B' | |
239 #define DEFAULT_R 'B' | |
240 | |
241 #define SJ0162 0x00e1 /* 01 - 62 ku offset */ | |
242 #define SJ6394 0x0161 /* 63 - 94 ku offset */ | |
243 | |
244 #define RANGE_NUM_MAX 18 | |
245 #define GETA1 0x22 | |
246 #define GETA2 0x2e | |
247 | |
248 | |
249 #if defined( UTF8_OUTPUT_ENABLE ) || defined( UTF8_INPUT_ENABLE ) | |
250 #define sizeof_euc_utf8 94 | |
251 #define sizeof_euc_to_utf8_1byte 94 | |
252 #define sizeof_euc_to_utf8_2bytes 94 | |
253 #define sizeof_utf8_to_euc_C2 64 | |
254 #define sizeof_utf8_to_euc_E5B8 64 | |
255 #define sizeof_utf8_to_euc_2bytes 112 | |
256 #define sizeof_utf8_to_euc_3bytes 112 | |
257 #endif | |
258 | |
259 /* MIME preprocessor */ | |
260 | |
261 | |
262 #ifdef EASYWIN /*Easy Win */ | |
263 extern POINT _BufferSize; | |
264 #endif | |
265 | |
266 /* function prototype */ | |
267 | |
268 #ifdef ANSI_C_PROTOTYPE | |
269 #define PROTO(x) x | |
270 #define STATIC static | |
271 #else | |
272 #define PROTO(x) () | |
273 #define STATIC | |
274 #endif | |
275 | |
276 struct input_code{ | |
277 char *name; | |
278 int stat; | |
279 int score; | |
280 int index; | |
281 int buf[3]; | |
282 void (*status_func)PROTO((struct input_code *, int)); | |
283 int (*iconv_func)PROTO((int c2, int c1, int c0)); | |
284 int _file_stat; | |
285 }; | |
286 | |
287 STATIC char *input_codename = ""; | |
288 | |
289 STATIC int noconvert PROTO((FILE *f)); | |
290 STATIC int kanji_convert PROTO((FILE *f)); | |
291 STATIC int h_conv PROTO((FILE *f,int c2,int c1)); | |
292 STATIC int push_hold_buf PROTO((int c2)); | |
293 STATIC void set_iconv PROTO((int f, int (*iconv_func)(int c2,int c1,int c0))); | |
294 STATIC int s_iconv PROTO((int c2,int c1,int c0)); | |
295 STATIC int s2e_conv PROTO((int c2, int c1, int *p2, int *p1)); | |
296 STATIC int e_iconv PROTO((int c2,int c1,int c0)); | |
297 #ifdef UTF8_INPUT_ENABLE | |
298 STATIC int w2e_conv PROTO((int c2,int c1,int c0,int *p2,int *p1)); | |
299 STATIC int w_iconv PROTO((int c2,int c1,int c0)); | |
300 STATIC int w_iconv16 PROTO((int c2,int c1,int c0)); | |
301 STATIC int w_iconv_common PROTO((int c1,int c0,unsigned short **pp,int psize,int *p2,int *p1)); | |
302 STATIC int ww16_conv PROTO((int c2, int c1, int c0)); | |
303 #endif | |
304 #ifdef UTF8_OUTPUT_ENABLE | |
305 STATIC int e2w_conv PROTO((int c2,int c1)); | |
306 STATIC void w_oconv PROTO((int c2,int c1)); | |
307 STATIC void w_oconv16 PROTO((int c2,int c1)); | |
308 #endif | |
309 STATIC void e_oconv PROTO((int c2,int c1)); | |
310 STATIC int e2s_conv PROTO((int c2, int c1, int *p2, int *p1)); | |
311 STATIC void s_oconv PROTO((int c2,int c1)); | |
312 STATIC void j_oconv PROTO((int c2,int c1)); | |
313 STATIC void fold_conv PROTO((int c2,int c1)); | |
314 STATIC void cr_conv PROTO((int c2,int c1)); | |
315 STATIC void z_conv PROTO((int c2,int c1)); | |
316 STATIC void rot_conv PROTO((int c2,int c1)); | |
317 STATIC void hira_conv PROTO((int c2,int c1)); | |
318 STATIC void base64_conv PROTO((int c2,int c1)); | |
319 STATIC void iso2022jp_check_conv PROTO((int c2,int c1)); | |
320 STATIC void no_connection PROTO((int c2,int c1)); | |
321 STATIC int no_connection2 PROTO((int c2,int c1,int c0)); | |
322 | |
323 STATIC void code_score PROTO((struct input_code *ptr)); | |
324 STATIC void code_status PROTO((int c)); | |
325 | |
326 STATIC void std_putc PROTO((int c)); | |
327 STATIC int std_getc PROTO((FILE *f)); | |
328 STATIC int std_ungetc PROTO((int c,FILE *f)); | |
329 | |
330 STATIC int broken_getc PROTO((FILE *f)); | |
331 STATIC int broken_ungetc PROTO((int c,FILE *f)); | |
332 | |
333 STATIC int mime_begin PROTO((FILE *f)); | |
334 STATIC int mime_getc PROTO((FILE *f)); | |
335 STATIC int mime_ungetc PROTO((int c,FILE *f)); | |
336 | |
337 STATIC int mime_begin_strict PROTO((FILE *f)); | |
338 STATIC int mime_getc_buf PROTO((FILE *f)); | |
339 STATIC int mime_ungetc_buf PROTO((int c,FILE *f)); | |
340 STATIC int mime_integrity PROTO((FILE *f,unsigned char *p)); | |
341 | |
342 STATIC int base64decode PROTO((int c)); | |
343 STATIC void mime_prechar PROTO((int c2, int c1)); | |
344 STATIC void mime_putc PROTO((int c)); | |
345 STATIC void open_mime PROTO((int c)); | |
346 STATIC void close_mime PROTO(()); | |
347 STATIC void usage PROTO(()); | |
348 STATIC void version PROTO(()); | |
349 STATIC void options PROTO((unsigned char *c)); | |
350 #if defined(PERL_XS) || defined(WIN32DLL) | |
351 STATIC void reinit PROTO(()); | |
352 #endif | |
353 | |
354 /* buffers */ | |
355 | |
356 static unsigned char stdibuf[IOBUF_SIZE]; | |
357 static unsigned char stdobuf[IOBUF_SIZE]; | |
358 static unsigned char hold_buf[HOLD_SIZE*2]; | |
359 static int hold_count; | |
360 | |
361 /* MIME preprocessor fifo */ | |
362 | |
363 #define MIME_BUF_SIZE (1024) /* 2^n ring buffer */ | |
364 #define MIME_BUF_MASK (MIME_BUF_SIZE-1) | |
365 #define Fifo(n) mime_buf[(n)&MIME_BUF_MASK] | |
366 static unsigned char mime_buf[MIME_BUF_SIZE]; | |
367 static unsigned int mime_top = 0; | |
368 static unsigned int mime_last = 0; /* decoded */ | |
369 static unsigned int mime_input = 0; /* undecoded */ | |
370 | |
371 /* flags */ | |
372 static int unbuf_f = FALSE; | |
373 static int estab_f = FALSE; | |
374 static int nop_f = FALSE; | |
375 static int binmode_f = TRUE; /* binary mode */ | |
376 static int rot_f = FALSE; /* rot14/43 mode */ | |
377 static int hira_f = FALSE; /* hira/kata henkan */ | |
378 static int input_f = FALSE; /* non fixed input code */ | |
379 static int alpha_f = FALSE; /* convert JIx0208 alphbet to ASCII */ | |
380 static int mime_f = STRICT_MIME; /* convert MIME B base64 or Q */ | |
381 static int mime_decode_f = FALSE; /* mime decode is explicitly on */ | |
382 static int mimebuf_f = FALSE; /* MIME buffered input */ | |
383 static int broken_f = FALSE; /* convert ESC-less broken JIS */ | |
384 static int iso8859_f = FALSE; /* ISO8859 through */ | |
385 static int mimeout_f = FALSE; /* base64 mode */ | |
386 #if defined(MSDOS) || defined(__OS2__) | |
387 static int x0201_f = TRUE; /* Assume JISX0201 kana */ | |
388 #else | |
389 static int x0201_f = NO_X0201; /* Assume NO JISX0201 */ | |
390 #endif | |
391 static int iso2022jp_f = FALSE; /* convert ISO-2022-JP */ | |
392 #ifdef UTF8_OUTPUT_ENABLE | |
393 static int unicode_bom_f= 0; /* Output Unicode BOM */ | |
394 static int w_oconv16_LE = 0; /* utf-16 little endian */ | |
395 static int ms_ucs_map_f = FALSE; /* Microsoft UCS Mapping Compatible */ | |
396 #endif | |
397 | |
398 | |
399 #ifdef NUMCHAR_OPTION | |
400 | |
401 #define CLASS_MASK 0x0f000000 | |
402 #define CLASS_UTF16 0x01000000 | |
403 #endif | |
404 | |
405 #ifdef INPUT_OPTION | |
406 static int cap_f = FALSE; | |
407 static int (*i_cgetc)PROTO((FILE *)) = std_getc; /* input of cgetc */ | |
408 static int (*i_cungetc)PROTO((int c ,FILE *f)) = std_ungetc; | |
409 STATIC int cap_getc PROTO((FILE *f)); | |
410 STATIC int cap_ungetc PROTO((int c,FILE *f)); | |
411 | |
412 static int url_f = FALSE; | |
413 static int (*i_ugetc)PROTO((FILE *)) = std_getc; /* input of ugetc */ | |
414 static int (*i_uungetc)PROTO((int c ,FILE *f)) = std_ungetc; | |
415 STATIC int url_getc PROTO((FILE *f)); | |
416 STATIC int url_ungetc PROTO((int c,FILE *f)); | |
417 | |
418 static int numchar_f = FALSE; | |
419 static int (*i_ngetc)PROTO((FILE *)) = std_getc; /* input of ugetc */ | |
420 static int (*i_nungetc)PROTO((int c ,FILE *f)) = std_ungetc; | |
421 STATIC int numchar_getc PROTO((FILE *f)); | |
422 STATIC int numchar_ungetc PROTO((int c,FILE *f)); | |
423 #endif | |
424 | |
425 #ifdef CHECK_OPTION | |
426 static int noout_f = FALSE; | |
427 STATIC void no_putc PROTO((int c)); | |
428 static int debug_f = FALSE; | |
429 STATIC void debug PROTO((char *str)); | |
430 #endif | |
431 | |
432 static int guess_f = FALSE; | |
433 STATIC void print_guessed_code PROTO((char *filename)); | |
434 STATIC void set_input_codename PROTO((char *codename)); | |
435 static int is_inputcode_mixed = FALSE; | |
436 static int is_inputcode_set = FALSE; | |
437 | |
438 #ifdef EXEC_IO | |
439 static int exec_f = 0; | |
440 #endif | |
441 | |
442 #ifdef SHIFTJIS_CP932 | |
443 STATIC int cp932_f = TRUE; | |
444 #define CP932_TABLE_BEGIN (0xfa) | |
445 #define CP932_TABLE_END (0xfc) | |
446 | |
447 STATIC int cp932inv_f = TRUE; | |
448 #define CP932INV_TABLE_BEGIN (0xed) | |
449 #define CP932INV_TABLE_END (0xee) | |
450 | |
451 /* STATIC int cp932_conv PROTO((int c2, int c1)); */ | |
452 #endif /* SHIFTJIS_CP932 */ | |
453 | |
454 #ifdef X0212_ENABLE | |
455 STATIC int x0212_f = FALSE; | |
456 static int x0212_shift PROTO((int c)); | |
457 static int x0212_unshift PROTO((int c)); | |
458 #endif | |
459 | |
460 STATIC unsigned char prefix_table[256]; | |
461 | |
462 STATIC void e_status PROTO((struct input_code *, int)); | |
463 STATIC void s_status PROTO((struct input_code *, int)); | |
464 | |
465 #ifdef UTF8_INPUT_ENABLE | |
466 STATIC void w_status PROTO((struct input_code *, int)); | |
467 STATIC void w16_status PROTO((struct input_code *, int)); | |
468 static int utf16_mode = UTF16BE_INPUT; | |
469 #endif | |
470 | |
471 struct input_code input_code_list[] = { | |
472 {"EUC-JP", 0, 0, 0, {0, 0, 0}, e_status, e_iconv, 0}, | |
473 {"Shift_JIS", 0, 0, 0, {0, 0, 0}, s_status, s_iconv, 0}, | |
474 {"UTF-8", 0, 0, 0, {0, 0, 0}, w_status, w_iconv, 0}, | |
475 {"UTF-16", 0, 0, 0, {0, 0, 0}, w16_status, w_iconv16, 0}, | |
476 {0} | |
477 }; | |
478 | |
479 static int mimeout_mode = 0; | |
480 static int base64_count = 0; | |
481 | |
482 /* X0208 -> ASCII converter */ | |
483 | |
484 /* fold parameter */ | |
485 static int f_line = 0; /* chars in line */ | |
486 static int f_prev = 0; | |
487 static int fold_preserve_f = FALSE; /* preserve new lines */ | |
488 static int fold_f = FALSE; | |
489 static int fold_len = 0; | |
490 | |
491 /* options */ | |
492 static unsigned char kanji_intro = DEFAULT_J; | |
493 static unsigned char ascii_intro = DEFAULT_R; | |
494 | |
495 /* Folding */ | |
496 | |
497 #define FOLD_MARGIN 10 | |
498 #define DEFAULT_FOLD 60 | |
499 | |
500 static int fold_margin = FOLD_MARGIN; | |
501 | |
502 /* converters */ | |
503 | |
504 #ifdef DEFAULT_CODE_JIS | |
505 # define DEFAULT_CONV j_oconv | |
506 #endif | |
507 #ifdef DEFAULT_CODE_SJIS | |
508 # define DEFAULT_CONV s_oconv | |
509 #endif | |
510 #ifdef DEFAULT_CODE_EUC | |
511 # define DEFAULT_CONV e_oconv | |
512 #endif | |
513 #ifdef DEFAULT_CODE_UTF8 | |
514 # define DEFAULT_CONV w_oconv | |
515 #endif | |
516 | |
517 /* process default */ | |
518 static void (*output_conv)PROTO((int c2,int c1)) = DEFAULT_CONV; | |
519 | |
520 static void (*oconv)PROTO((int c2,int c1)) = no_connection; | |
521 /* s_iconv or oconv */ | |
522 static int (*iconv)PROTO((int c2,int c1,int c0)) = no_connection2; | |
523 | |
524 static void (*o_zconv)PROTO((int c2,int c1)) = no_connection; | |
525 static void (*o_fconv)PROTO((int c2,int c1)) = no_connection; | |
526 static void (*o_crconv)PROTO((int c2,int c1)) = no_connection; | |
527 static void (*o_rot_conv)PROTO((int c2,int c1)) = no_connection; | |
528 static void (*o_hira_conv)PROTO((int c2,int c1)) = no_connection; | |
529 static void (*o_base64conv)PROTO((int c2,int c1)) = no_connection; | |
530 static void (*o_iso2022jp_check_conv)PROTO((int c2,int c1)) = no_connection; | |
531 | |
532 /* static redirections */ | |
533 | |
534 static void (*o_putc)PROTO((int c)) = std_putc; | |
535 | |
536 static int (*i_getc)PROTO((FILE *f)) = std_getc; /* general input */ | |
537 static int (*i_ungetc)PROTO((int c,FILE *f)) =std_ungetc; | |
538 | |
539 static int (*i_bgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */ | |
540 static int (*i_bungetc)PROTO((int c ,FILE *f)) = std_ungetc; | |
541 | |
542 static void (*o_mputc)PROTO((int c)) = std_putc ; /* output of mputc */ | |
543 | |
544 static int (*i_mgetc)PROTO((FILE *)) = std_getc; /* input of mgetc */ | |
545 static int (*i_mungetc)PROTO((int c ,FILE *f)) = std_ungetc; | |
546 | |
547 /* for strict mime */ | |
548 static int (*i_mgetc_buf)PROTO((FILE *)) = std_getc; /* input of mgetc_buf */ | |
549 static int (*i_mungetc_buf)PROTO((int c,FILE *f)) = std_ungetc; | |
550 | |
551 /* Global states */ | |
552 static int output_mode = ASCII, /* output kanji mode */ | |
553 input_mode = ASCII, /* input kanji mode */ | |
554 shift_mode = FALSE; /* TRUE shift out, or X0201 */ | |
555 static int mime_decode_mode = FALSE; /* MIME mode B base64, Q hex */ | |
556 | |
557 /* X0201 / X0208 conversion tables */ | |
558 | |
559 /* X0201 kana conversion table */ | |
560 /* 90-9F A0-DF */ | |
561 static | |
562 unsigned char cv[]= { | |
563 0x21,0x21,0x21,0x23,0x21,0x56,0x21,0x57, | |
564 0x21,0x22,0x21,0x26,0x25,0x72,0x25,0x21, | |
565 0x25,0x23,0x25,0x25,0x25,0x27,0x25,0x29, | |
566 0x25,0x63,0x25,0x65,0x25,0x67,0x25,0x43, | |
567 0x21,0x3c,0x25,0x22,0x25,0x24,0x25,0x26, | |
568 0x25,0x28,0x25,0x2a,0x25,0x2b,0x25,0x2d, | |
569 0x25,0x2f,0x25,0x31,0x25,0x33,0x25,0x35, | |
570 0x25,0x37,0x25,0x39,0x25,0x3b,0x25,0x3d, | |
571 0x25,0x3f,0x25,0x41,0x25,0x44,0x25,0x46, | |
572 0x25,0x48,0x25,0x4a,0x25,0x4b,0x25,0x4c, | |
573 0x25,0x4d,0x25,0x4e,0x25,0x4f,0x25,0x52, | |
574 0x25,0x55,0x25,0x58,0x25,0x5b,0x25,0x5e, | |
575 0x25,0x5f,0x25,0x60,0x25,0x61,0x25,0x62, | |
576 0x25,0x64,0x25,0x66,0x25,0x68,0x25,0x69, | |
577 0x25,0x6a,0x25,0x6b,0x25,0x6c,0x25,0x6d, | |
578 0x25,0x6f,0x25,0x73,0x21,0x2b,0x21,0x2c, | |
579 0x00,0x00}; | |
580 | |
581 | |
582 /* X0201 kana conversion table for daguten */ | |
583 /* 90-9F A0-DF */ | |
584 static | |
585 unsigned char dv[]= { | |
586 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
587 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
588 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
589 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
590 0x00,0x00,0x00,0x00,0x00,0x00,0x25,0x74, | |
591 0x00,0x00,0x00,0x00,0x25,0x2c,0x25,0x2e, | |
592 0x25,0x30,0x25,0x32,0x25,0x34,0x25,0x36, | |
593 0x25,0x38,0x25,0x3a,0x25,0x3c,0x25,0x3e, | |
594 0x25,0x40,0x25,0x42,0x25,0x45,0x25,0x47, | |
595 0x25,0x49,0x00,0x00,0x00,0x00,0x00,0x00, | |
596 0x00,0x00,0x00,0x00,0x25,0x50,0x25,0x53, | |
597 0x25,0x56,0x25,0x59,0x25,0x5c,0x00,0x00, | |
598 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
599 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
600 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
601 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
602 0x00,0x00}; | |
603 | |
604 /* X0201 kana conversion table for han-daguten */ | |
605 /* 90-9F A0-DF */ | |
606 static | |
607 unsigned char ev[]= { | |
608 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
609 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
610 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
611 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
612 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
613 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
614 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
615 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
616 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
617 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
618 0x00,0x00,0x00,0x00,0x25,0x51,0x25,0x54, | |
619 0x25,0x57,0x25,0x5a,0x25,0x5d,0x00,0x00, | |
620 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
621 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
622 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
623 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
624 0x00,0x00}; | |
625 | |
626 | |
627 /* X0208 kigou conversion table */ | |
628 /* 0x8140 - 0x819e */ | |
629 static | |
630 unsigned char fv[] = { | |
631 | |
632 0x00,0x00,0x00,0x00,0x2c,0x2e,0x00,0x3a, | |
633 0x3b,0x3f,0x21,0x00,0x00,0x27,0x60,0x00, | |
634 0x5e,0x00,0x5f,0x00,0x00,0x00,0x00,0x00, | |
635 0x00,0x00,0x00,0x00,0x00,0x2d,0x00,0x2f, | |
636 0x5c,0x00,0x00,0x7c,0x00,0x00,0x60,0x27, | |
637 0x22,0x22,0x28,0x29,0x00,0x00,0x5b,0x5d, | |
638 0x7b,0x7d,0x3c,0x3e,0x00,0x00,0x00,0x00, | |
639 0x00,0x00,0x00,0x00,0x2b,0x2d,0x00,0x00, | |
640 0x00,0x3d,0x00,0x3c,0x3e,0x00,0x00,0x00, | |
641 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, | |
642 0x24,0x00,0x00,0x25,0x23,0x26,0x2a,0x40, | |
643 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 | |
644 } ; | |
645 | |
646 | |
647 #define CRLF 1 | |
648 | |
649 static int file_out = FALSE; | |
650 #ifdef OVERWRITE | |
651 static int overwrite = FALSE; | |
652 #endif | |
653 | |
654 static int crmode_f = 0; /* CR, NL, CRLF */ | |
655 #ifdef EASYWIN /*Easy Win */ | |
656 static int end_check; | |
657 #endif /*Easy Win */ | |
658 | |
659 #define STD_GC_BUFSIZE (256) | |
660 int std_gc_buf[STD_GC_BUFSIZE]; | |
661 int std_gc_ndx; | |
662 | |
663 #ifdef WIN32DLL | |
664 #include "nkf32dll.c" | |
665 #elif defined(PERL_XS) | |
666 #else /* WIN32DLL */ | |
667 int | |
668 main(argc, argv) | |
669 int argc; | |
670 char **argv; | |
671 { | |
672 FILE *fin; | |
673 unsigned char *cp; | |
674 | |
675 char *outfname = NULL; | |
676 char *origfname; | |
677 | |
678 #ifdef EASYWIN /*Easy Win */ | |
679 _BufferSize.y = 400;/*Set Scroll Buffer Size*/ | |
680 #endif | |
681 | |
682 for (argc--,argv++; (argc > 0) && **argv == '-'; argc--, argv++) { | |
683 cp = (unsigned char *)*argv; | |
684 options(cp); | |
685 #ifdef EXEC_IO | |
686 if (exec_f){ | |
687 int fds[2], pid; | |
688 if (pipe(fds) < 0 || (pid = fork()) < 0){ | |
689 abort(); | |
690 } | |
691 if (pid == 0){ | |
692 if (exec_f > 0){ | |
693 close(fds[0]); | |
694 dup2(fds[1], 1); | |
695 }else{ | |
696 close(fds[1]); | |
697 dup2(fds[0], 0); | |
698 } | |
699 execvp(argv[1], &argv[1]); | |
700 } | |
701 if (exec_f > 0){ | |
702 close(fds[1]); | |
703 dup2(fds[0], 0); | |
704 }else{ | |
705 close(fds[0]); | |
706 dup2(fds[1], 1); | |
707 } | |
708 argc = 0; | |
709 break; | |
710 } | |
711 #endif | |
712 } | |
713 if(x0201_f == WISH_TRUE) | |
714 x0201_f = ((!iso2022jp_f)? TRUE : NO_X0201); | |
715 | |
716 if (binmode_f == TRUE) | |
717 #ifdef __OS2__ | |
718 if (freopen("","wb",stdout) == NULL) | |
719 return (-1); | |
720 #else | |
721 setbinmode(stdout); | |
722 #endif | |
723 | |
724 if (unbuf_f) | |
725 setbuf(stdout, (char *) NULL); | |
726 else | |
727 setvbuffer(stdout, stdobuf, IOBUF_SIZE); | |
728 | |
729 if (argc == 0) { | |
730 if (binmode_f == TRUE) | |
731 #ifdef __OS2__ | |
732 if (freopen("","rb",stdin) == NULL) return (-1); | |
733 #else | |
734 setbinmode(stdin); | |
735 #endif | |
736 setvbuffer(stdin, stdibuf, IOBUF_SIZE); | |
737 if (nop_f) | |
738 noconvert(stdin); | |
739 else { | |
740 kanji_convert(stdin); | |
741 if (guess_f) print_guessed_code(NULL); | |
742 } | |
743 } else { | |
744 int nfiles = argc; | |
745 while (argc--) { | |
746 is_inputcode_mixed = FALSE; | |
747 is_inputcode_set = FALSE; | |
748 input_codename = ""; | |
749 if ((fin = fopen((origfname = *argv++), "r")) == NULL) { | |
750 perror(*--argv); | |
751 return(-1); | |
752 } else { | |
753 #ifdef OVERWRITE | |
754 int fd = 0; | |
755 int fd_backup = 0; | |
756 #endif | |
757 | |
758 /* reopen file for stdout */ | |
759 if (file_out == TRUE) { | |
760 #ifdef OVERWRITE | |
761 if (overwrite){ | |
762 outfname = malloc(strlen(origfname) | |
763 + strlen(".nkftmpXXXXXX") | |
764 + 1); | |
765 if (!outfname){ | |
766 perror(origfname); | |
767 return -1; | |
768 } | |
769 strcpy(outfname, origfname); | |
770 #ifdef MSDOS | |
771 { | |
772 int i; | |
773 for (i = strlen(outfname); i; --i){ | |
774 if (outfname[i - 1] == '/' | |
775 || outfname[i - 1] == '\\'){ | |
776 break; | |
777 } | |
778 } | |
779 outfname[i] = '\0'; | |
780 } | |
781 strcat(outfname, "ntXXXXXX"); | |
782 mktemp(outfname); | |
783 fd = open(outfname, O_WRONLY | O_CREAT | O_TRUNC, | |
784 S_IREAD | S_IWRITE); | |
785 #else | |
786 strcat(outfname, ".nkftmpXXXXXX"); | |
787 fd = mkstemp(outfname); | |
788 #endif | |
789 if (fd < 0 | |
790 || (fd_backup = dup(fileno(stdout))) < 0 | |
791 || dup2(fd, fileno(stdout)) < 0 | |
792 ){ | |
793 perror(origfname); | |
794 return -1; | |
795 } | |
796 }else | |
797 #endif | |
798 if(argc == 1 ) { | |
799 outfname = *argv++; | |
800 argc--; | |
801 } else { | |
802 outfname = "nkf.out"; | |
803 } | |
804 | |
805 if(freopen(outfname, "w", stdout) == NULL) { | |
806 perror (outfname); | |
807 return (-1); | |
808 } | |
809 if (binmode_f == TRUE) { | |
810 #ifdef __OS2__ | |
811 if (freopen("","wb",stdout) == NULL) | |
812 return (-1); | |
813 #else | |
814 setbinmode(stdout); | |
815 #endif | |
816 } | |
817 } | |
818 if (binmode_f == TRUE) | |
819 #ifdef __OS2__ | |
820 if (freopen("","rb",fin) == NULL) | |
821 return (-1); | |
822 #else | |
823 setbinmode(fin); | |
824 #endif | |
825 setvbuffer(fin, stdibuf, IOBUF_SIZE); | |
826 if (nop_f) | |
827 noconvert(fin); | |
828 else { | |
829 char *filename = NULL; | |
830 kanji_convert(fin); | |
831 if (nfiles > 1) filename = origfname; | |
832 if (guess_f) print_guessed_code(filename); | |
833 } | |
834 fclose(fin); | |
835 #ifdef OVERWRITE | |
836 if (overwrite) { | |
837 struct stat sb; | |
838 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) | |
839 time_t tb[2]; | |
840 #else | |
841 struct utimbuf tb; | |
842 #endif | |
843 | |
844 fflush(stdout); | |
845 close(fd); | |
846 if (dup2(fd_backup, fileno(stdout)) < 0){ | |
847 perror("dup2"); | |
848 } | |
849 if (stat(origfname, &sb)) { | |
850 fprintf(stderr, "Can't stat %s\n", origfname); | |
851 } | |
852 /* $B%Q!<%_%C%7%g%s$rI|85(B */ | |
853 if (chmod(outfname, sb.st_mode)) { | |
854 fprintf(stderr, "Can't set permission %s\n", outfname); | |
855 } | |
856 | |
857 /* $B%?%$%`%9%?%s%W$rI|85(B */ | |
858 #if defined(MSDOS) && !defined(__MINGW32__) && !defined(__WIN32__) | |
859 tb[0] = tb[1] = sb.st_mtime; | |
860 if (utime(outfname, tb)) { | |
861 fprintf(stderr, "Can't set timestamp %s\n", outfname); | |
862 } | |
863 #else | |
864 tb.actime = sb.st_atime; | |
865 tb.modtime = sb.st_mtime; | |
866 if (utime(outfname, &tb)) { | |
867 fprintf(stderr, "Can't set timestamp %s\n", outfname); | |
868 } | |
869 #endif | |
870 #ifdef MSDOS | |
871 if (unlink(origfname)){ | |
872 perror(origfname); | |
873 } | |
874 #endif | |
875 if (rename(outfname, origfname)) { | |
876 perror(origfname); | |
877 fprintf(stderr, "Can't rename %s to %s\n", | |
878 outfname, origfname); | |
879 } | |
880 free(outfname); | |
881 } | |
882 #endif | |
883 } | |
884 } | |
885 } | |
886 #ifdef EASYWIN /*Easy Win */ | |
887 if (file_out == FALSE) | |
888 scanf("%d",&end_check); | |
889 else | |
890 fclose(stdout); | |
891 #else /* for Other OS */ | |
892 if (file_out == TRUE) | |
893 fclose(stdout); | |
894 #endif /*Easy Win */ | |
895 return (0); | |
896 } | |
897 #endif /* WIN32DLL */ | |
898 | |
899 static | |
900 struct { | |
901 char *name; | |
902 char *alias; | |
903 } long_option[] = { | |
904 {"base64","jMB"}, | |
905 {"euc","e"}, | |
906 {"euc-input","E"}, | |
907 {"fj","jm"}, | |
908 {"help","v"}, | |
909 {"jis","j"}, | |
910 {"jis-input","J"}, | |
911 {"mac","sLm"}, | |
912 {"mime","jM"}, | |
913 {"mime-input","m"}, | |
914 {"msdos","sLw"}, | |
915 {"sjis","s"}, | |
916 {"sjis-input","S"}, | |
917 {"unix","eLu"}, | |
918 {"version","V"}, | |
919 {"windows","sLw"}, | |
920 {"hiragana","h1"}, | |
921 {"katakana","h2"}, | |
922 {"katakana-hiragana","h3"}, | |
923 {"guess", "g"}, | |
924 {"cp932", ""}, | |
925 {"no-cp932", ""}, | |
926 #ifdef X0212_ENABLE | |
927 {"x0212", ""}, | |
928 #endif | |
929 #ifdef UTF8_OUTPUT_ENABLE | |
930 {"utf8", "w"}, | |
931 {"utf16", "w16"}, | |
932 {"ms-ucs-map", ""}, | |
933 #endif | |
934 #ifdef UTF8_INPUT_ENABLE | |
935 {"utf8-input", "W"}, | |
936 {"utf16-input", "W16"}, | |
937 #endif | |
938 #ifdef OVERWRITE | |
939 {"overwrite", ""}, | |
940 #endif | |
941 #ifdef INPUT_OPTION | |
942 {"cap-input", ""}, | |
943 {"url-input", ""}, | |
944 #endif | |
945 #ifdef NUMCHAR_OPTION | |
946 {"numchar-input", ""}, | |
947 #endif | |
948 #ifdef CHECK_OPTION | |
949 {"no-output", ""}, | |
950 {"debug", ""}, | |
951 #endif | |
952 #ifdef SHIFTJIS_CP932 | |
953 {"cp932inv", ""}, | |
954 #endif | |
955 #ifdef EXEC_IO | |
956 {"exec-in", ""}, | |
957 {"exec-out", ""}, | |
958 #endif | |
959 {"prefix=", ""}, | |
960 }; | |
961 | |
962 static int option_mode = 0; | |
963 | |
964 void | |
965 options(cp) | |
966 unsigned char *cp; | |
967 { | |
968 int i; | |
969 unsigned char *p = NULL; | |
970 | |
971 if (option_mode==1) | |
972 return; | |
973 if (*cp++ != '-') | |
974 return; | |
975 while (*cp) { | |
976 if (p && !*cp) { | |
977 cp = p; | |
978 p = 0; | |
979 } | |
980 switch (*cp++) { | |
981 case '-': /* literal options */ | |
982 if (!*cp) { /* ignore the rest of arguments */ | |
983 option_mode = 1; | |
984 return; | |
985 } | |
986 for (i=0;i<sizeof(long_option)/sizeof(long_option[0]);i++) { | |
987 int j; | |
988 p = (unsigned char *)long_option[i].name; | |
989 for (j=0;*p && (*p != '=') && *p == cp[j];p++, j++); | |
990 if (*p == cp[j]){ | |
991 p = &cp[j]; | |
992 break; | |
993 } | |
994 p = 0; | |
995 } | |
996 if (p == 0) return; | |
997 cp = (unsigned char *)long_option[i].alias; | |
998 if (!*cp){ | |
999 #ifdef OVERWRITE | |
1000 if (strcmp(long_option[i].name, "overwrite") == 0){ | |
1001 file_out = TRUE; | |
1002 overwrite = TRUE; | |
1003 continue; | |
1004 } | |
1005 #endif | |
1006 #ifdef INPUT_OPTION | |
1007 if (strcmp(long_option[i].name, "cap-input") == 0){ | |
1008 cap_f = TRUE; | |
1009 continue; | |
1010 } | |
1011 if (strcmp(long_option[i].name, "url-input") == 0){ | |
1012 url_f = TRUE; | |
1013 continue; | |
1014 } | |
1015 #endif | |
1016 #ifdef NUMCHAR_OPTION | |
1017 if (strcmp(long_option[i].name, "numchar-input") == 0){ | |
1018 numchar_f = TRUE; | |
1019 continue; | |
1020 } | |
1021 #endif | |
1022 #ifdef CHECK_OPTION | |
1023 if (strcmp(long_option[i].name, "no-output") == 0){ | |
1024 noout_f = TRUE; | |
1025 continue; | |
1026 } | |
1027 if (strcmp(long_option[i].name, "debug") == 0){ | |
1028 debug_f = TRUE; | |
1029 continue; | |
1030 } | |
1031 #endif | |
1032 if (strcmp(long_option[i].name, "cp932") == 0){ | |
1033 #ifdef SHIFTJIS_CP932 | |
1034 cp932_f = TRUE; | |
1035 cp932inv_f = TRUE; | |
1036 #endif | |
1037 #ifdef UTF8_OUTPUT_ENABLE | |
1038 ms_ucs_map_f = TRUE; | |
1039 #endif | |
1040 continue; | |
1041 } | |
1042 if (strcmp(long_option[i].name, "no-cp932") == 0){ | |
1043 #ifdef SHIFTJIS_CP932 | |
1044 cp932_f = FALSE; | |
1045 cp932inv_f = FALSE; | |
1046 #endif | |
1047 #ifdef UTF8_OUTPUT_ENABLE | |
1048 ms_ucs_map_f = FALSE; | |
1049 #endif | |
1050 continue; | |
1051 } | |
1052 #ifdef SHIFTJIS_CP932 | |
1053 if (strcmp(long_option[i].name, "cp932inv") == 0){ | |
1054 cp932inv_f = TRUE; | |
1055 continue; | |
1056 } | |
1057 #endif | |
1058 | |
1059 #ifdef X0212_ENABLE | |
1060 if (strcmp(long_option[i].name, "x0212") == 0){ | |
1061 x0212_f = TRUE; | |
1062 continue; | |
1063 } | |
1064 #endif | |
1065 | |
1066 #ifdef EXEC_IO | |
1067 if (strcmp(long_option[i].name, "exec-in") == 0){ | |
1068 exec_f = 1; | |
1069 return; | |
1070 } | |
1071 if (strcmp(long_option[i].name, "exec-out") == 0){ | |
1072 exec_f = -1; | |
1073 return; | |
1074 } | |
1075 #endif | |
1076 #ifdef UTF8_OUTPUT_ENABLE | |
1077 if (strcmp(long_option[i].name, "ms-ucs-map") == 0){ | |
1078 ms_ucs_map_f = TRUE; | |
1079 continue; | |
1080 } | |
1081 #endif | |
1082 if (strcmp(long_option[i].name, "prefix=") == 0){ | |
1083 if (*p == '=' && ' ' < p[1] && p[1] < 128){ | |
1084 for (i = 2; ' ' < p[i] && p[i] < 128; i++){ | |
1085 prefix_table[p[i]] = p[1]; | |
1086 } | |
1087 } | |
1088 continue; | |
1089 } | |
1090 } | |
1091 continue; | |
1092 case 'b': /* buffered mode */ | |
1093 unbuf_f = FALSE; | |
1094 continue; | |
1095 case 'u': /* non bufferd mode */ | |
1096 unbuf_f = TRUE; | |
1097 continue; | |
1098 case 't': /* transparent mode */ | |
1099 nop_f = TRUE; | |
1100 continue; | |
1101 case 'j': /* JIS output */ | |
1102 case 'n': | |
1103 output_conv = j_oconv; | |
1104 continue; | |
1105 case 'e': /* AT&T EUC output */ | |
1106 output_conv = e_oconv; | |
1107 continue; | |
1108 case 's': /* SJIS output */ | |
1109 output_conv = s_oconv; | |
1110 continue; | |
1111 case 'l': /* ISO8859 Latin-1 support, no conversion */ | |
1112 iso8859_f = TRUE; /* Only compatible with ISO-2022-JP */ | |
1113 input_f = LATIN1_INPUT; | |
1114 continue; | |
1115 case 'i': /* Kanji IN ESC-$-@/B */ | |
1116 if (*cp=='@'||*cp=='B') | |
1117 kanji_intro = *cp++; | |
1118 continue; | |
1119 case 'o': /* ASCII IN ESC-(-J/B */ | |
1120 if (*cp=='J'||*cp=='B'||*cp=='H') | |
1121 ascii_intro = *cp++; | |
1122 continue; | |
1123 case 'h': | |
1124 /* | |
1125 bit:1 hira -> kata | |
1126 bit:2 kata -> hira | |
1127 */ | |
1128 if ('9'>= *cp && *cp>='0') | |
1129 hira_f |= (*cp++ -'0'); | |
1130 else | |
1131 hira_f |= 1; | |
1132 continue; | |
1133 case 'r': | |
1134 rot_f = TRUE; | |
1135 continue; | |
1136 #if defined(MSDOS) || defined(__OS2__) | |
1137 case 'T': | |
1138 binmode_f = FALSE; | |
1139 continue; | |
1140 #endif | |
1141 #ifndef PERL_XS | |
1142 case 'V': | |
1143 version(); | |
1144 exit(1); | |
1145 break; | |
1146 case 'v': | |
1147 usage(); | |
1148 exit(1); | |
1149 break; | |
1150 #endif | |
1151 #ifdef UTF8_OUTPUT_ENABLE | |
1152 case 'w': /* UTF-8 output */ | |
1153 if ('1'== cp[0] && '6'==cp[1]) { | |
1154 output_conv = w_oconv16; cp+=2; | |
1155 if (cp[0]=='L') { | |
1156 unicode_bom_f=2; cp++; | |
1157 w_oconv16_LE = 1; | |
1158 if (cp[0] == '0'){ | |
1159 unicode_bom_f=1; cp++; | |
1160 } | |
1161 } else if (cp[0] == 'B') { | |
1162 unicode_bom_f=2; cp++; | |
1163 if (cp[0] == '0'){ | |
1164 unicode_bom_f=1; cp++; | |
1165 } | |
1166 } | |
1167 } else if (cp[0] == '8') { | |
1168 output_conv = w_oconv; cp++; | |
1169 unicode_bom_f=2; | |
1170 if (cp[0] == '0'){ | |
1171 unicode_bom_f=1; cp++; | |
1172 } | |
1173 } else | |
1174 output_conv = w_oconv; | |
1175 continue; | |
1176 #endif | |
1177 #ifdef UTF8_INPUT_ENABLE | |
1178 case 'W': /* UTF-8 input */ | |
1179 if ('1'== cp[0] && '6'==cp[1]) { | |
1180 input_f = UTF16BE_INPUT; | |
1181 utf16_mode = UTF16BE_INPUT; | |
1182 cp += 2; | |
1183 if (cp[0]=='L') { | |
1184 cp++; | |
1185 input_f = UTF16LE_INPUT; | |
1186 utf16_mode = UTF16LE_INPUT; | |
1187 } else if (cp[0] == 'B') { | |
1188 cp++; | |
1189 input_f = UTF16BE_INPUT; | |
1190 utf16_mode = UTF16BE_INPUT; | |
1191 } | |
1192 } else if (cp[0] == '8') { | |
1193 cp++; | |
1194 input_f = UTF8_INPUT; | |
1195 } else | |
1196 input_f = UTF8_INPUT; | |
1197 continue; | |
1198 #endif | |
1199 /* Input code assumption */ | |
1200 case 'J': /* JIS input */ | |
1201 case 'E': /* AT&T EUC input */ | |
1202 input_f = JIS_INPUT; | |
1203 continue; | |
1204 case 'S': /* MS Kanji input */ | |
1205 input_f = SJIS_INPUT; | |
1206 if (x0201_f==NO_X0201) x0201_f=TRUE; | |
1207 continue; | |
1208 case 'Z': /* Convert X0208 alphabet to asii */ | |
1209 /* bit:0 Convert X0208 | |
1210 bit:1 Convert Kankaku to one space | |
1211 bit:2 Convert Kankaku to two spaces | |
1212 bit:3 Convert HTML Entity | |
1213 */ | |
1214 if ('9'>= *cp && *cp>='0') | |
1215 alpha_f |= 1<<(*cp++ -'0'); | |
1216 else | |
1217 alpha_f |= TRUE; | |
1218 continue; | |
1219 case 'x': /* Convert X0201 kana to X0208 or X0201 Conversion */ | |
1220 x0201_f = FALSE; /* No X0201->X0208 conversion */ | |
1221 /* accept X0201 | |
1222 ESC-(-I in JIS, EUC, MS Kanji | |
1223 SI/SO in JIS, EUC, MS Kanji | |
1224 SSO in EUC, JIS, not in MS Kanji | |
1225 MS Kanji (0xa0-0xdf) | |
1226 output X0201 | |
1227 ESC-(-I in JIS (0x20-0x5f) | |
1228 SSO in EUC (0xa0-0xdf) | |
1229 0xa0-0xd in MS Kanji (0xa0-0xdf) | |
1230 */ | |
1231 continue; | |
1232 case 'X': /* Assume X0201 kana */ | |
1233 /* Default value is NO_X0201 for EUC/MS-Kanji mix */ | |
1234 x0201_f = TRUE; | |
1235 continue; | |
1236 case 'F': /* prserve new lines */ | |
1237 fold_preserve_f = TRUE; | |
1238 case 'f': /* folding -f60 or -f */ | |
1239 fold_f = TRUE; | |
1240 fold_len = 0; | |
1241 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */ | |
1242 fold_len *= 10; | |
1243 fold_len += *cp++ - '0'; | |
1244 } | |
1245 if (!(0<fold_len && fold_len<BUFSIZ)) | |
1246 fold_len = DEFAULT_FOLD; | |
1247 if (*cp=='-') { | |
1248 fold_margin = 0; | |
1249 cp++; | |
1250 while('0'<= *cp && *cp <='9') { /* we don't use atoi here */ | |
1251 fold_margin *= 10; | |
1252 fold_margin += *cp++ - '0'; | |
1253 } | |
1254 } | |
1255 continue; | |
1256 case 'm': /* MIME support */ | |
1257 /* mime_decode_f = TRUE; */ /* this has too large side effects... */ | |
1258 if (*cp=='B'||*cp=='Q') { | |
1259 mime_decode_mode = *cp++; | |
1260 mimebuf_f = FIXED_MIME; | |
1261 } else if (*cp=='N') { | |
1262 mime_f = TRUE; cp++; | |
1263 } else if (*cp=='S') { | |
1264 mime_f = STRICT_MIME; cp++; | |
1265 } else if (*cp=='0') { | |
1266 mime_decode_f = FALSE; | |
1267 mime_f = FALSE; cp++; | |
1268 } | |
1269 continue; | |
1270 case 'M': /* MIME output */ | |
1271 if (*cp=='B') { | |
1272 mimeout_mode = 'B'; | |
1273 mimeout_f = FIXED_MIME; cp++; | |
1274 } else if (*cp=='Q') { | |
1275 mimeout_mode = 'Q'; | |
1276 mimeout_f = FIXED_MIME; cp++; | |
1277 } else { | |
1278 mimeout_f = TRUE; | |
1279 } | |
1280 continue; | |
1281 case 'B': /* Broken JIS support */ | |
1282 /* bit:0 no ESC JIS | |
1283 bit:1 allow any x on ESC-(-x or ESC-$-x | |
1284 bit:2 reset to ascii on NL | |
1285 */ | |
1286 if ('9'>= *cp && *cp>='0') | |
1287 broken_f |= 1<<(*cp++ -'0'); | |
1288 else | |
1289 broken_f |= TRUE; | |
1290 continue; | |
1291 #ifndef PERL_XS | |
1292 case 'O':/* for Output file */ | |
1293 file_out = TRUE; | |
1294 continue; | |
1295 #endif | |
1296 case 'c':/* add cr code */ | |
1297 crmode_f = CRLF; | |
1298 continue; | |
1299 case 'd':/* delete cr code */ | |
1300 crmode_f = NL; | |
1301 continue; | |
1302 case 'I': /* ISO-2022-JP output */ | |
1303 iso2022jp_f = TRUE; | |
1304 continue; | |
1305 case 'L': /* line mode */ | |
1306 if (*cp=='u') { /* unix */ | |
1307 crmode_f = NL; cp++; | |
1308 } else if (*cp=='m') { /* mac */ | |
1309 crmode_f = CR; cp++; | |
1310 } else if (*cp=='w') { /* windows */ | |
1311 crmode_f = CRLF; cp++; | |
1312 } else if (*cp=='0') { /* no conversion */ | |
1313 crmode_f = 0; cp++; | |
1314 } | |
1315 continue; | |
1316 case 'g': | |
1317 #ifndef PERL_XS | |
1318 guess_f = TRUE; | |
1319 #endif | |
1320 continue; | |
1321 case ' ': | |
1322 /* module muliple options in a string are allowed for Perl moudle */ | |
1323 while(*cp && *cp!='-') cp++; | |
1324 if(*cp=='-') cp++; | |
1325 continue; | |
1326 default: | |
1327 /* bogus option but ignored */ | |
1328 continue; | |
1329 } | |
1330 } | |
1331 } | |
1332 | |
1333 #ifdef ANSI_C_PROTOTYPE | |
1334 struct input_code * find_inputcode_byfunc(int (*iconv_func)(int c2,int c1,int c0)) | |
1335 #else | |
1336 struct input_code * find_inputcode_byfunc(iconv_func) | |
1337 int (*iconv_func)(); | |
1338 #endif | |
1339 { | |
1340 if (iconv_func){ | |
1341 struct input_code *p = input_code_list; | |
1342 while (p->name){ | |
1343 if (iconv_func == p->iconv_func){ | |
1344 return p; | |
1345 } | |
1346 p++; | |
1347 } | |
1348 } | |
1349 return 0; | |
1350 } | |
1351 | |
1352 #ifdef CHECK_OPTION | |
1353 static int (*iconv_for_check)() = 0; | |
1354 #endif | |
1355 | |
1356 #ifdef ANSI_C_PROTOTYPE | |
1357 void set_iconv(int f, int (*iconv_func)(int c2,int c1,int c0)) | |
1358 #else | |
1359 void set_iconv(f, iconv_func) | |
1360 int f; | |
1361 int (*iconv_func)(); | |
1362 #endif | |
1363 { | |
1364 #ifdef INPUT_CODE_FIX | |
1365 if (f || !input_f) | |
1366 #endif | |
1367 if (estab_f != f){ | |
1368 estab_f = f; | |
1369 } | |
1370 | |
1371 if (iconv_func | |
1372 #ifdef INPUT_CODE_FIX | |
1373 && (f == -TRUE || !input_f) /* -TRUE means "FORCE" */ | |
1374 #endif | |
1375 ){ | |
1376 iconv = iconv_func; | |
1377 } | |
1378 #ifdef CHECK_OPTION | |
1379 if (estab_f && iconv_for_check != iconv){ | |
1380 struct input_code *p = find_inputcode_byfunc(iconv); | |
1381 if (p){ | |
1382 set_input_codename(p->name); | |
1383 debug(input_codename); | |
1384 } | |
1385 iconv_for_check = iconv; | |
1386 } | |
1387 #endif | |
1388 } | |
1389 | |
1390 #define SCORE_L2 (1) /* $BBh(B2$B?e=`4A;z(B */ | |
1391 #define SCORE_KANA (SCORE_L2 << 1) /* $B$$$o$f$kH>3Q%+%J(B */ | |
1392 #define SCORE_DEPEND (SCORE_KANA << 1) /* $B5!<o0MB8J8;z(B */ | |
1393 #ifdef SHIFTJIS_CP932 | |
1394 #define SCORE_CP932 (SCORE_DEPEND << 1) /* CP932 $B$K$h$kFI$_49$((B */ | |
1395 #define SCORE_NO_EXIST (SCORE_CP932 << 1) /* $BB8:_$7$J$$J8;z(B */ | |
1396 #else | |
1397 #define SCORE_NO_EXIST (SCORE_DEPEND << 1) /* $BB8:_$7$J$$J8;z(B */ | |
1398 #endif | |
1399 #define SCORE_iMIME (SCORE_NO_EXIST << 1) /* MIME $B$K$h$k;XDj(B */ | |
1400 #define SCORE_ERROR (SCORE_iMIME << 1) /* $B%(%i!<(B */ | |
1401 | |
1402 #define SCORE_INIT (SCORE_iMIME) | |
1403 | |
1404 int score_table_A0[] = { | |
1405 0, 0, 0, 0, | |
1406 0, 0, 0, 0, | |
1407 0, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, | |
1408 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_NO_EXIST, | |
1409 }; | |
1410 | |
1411 int score_table_F0[] = { | |
1412 SCORE_L2, SCORE_L2, SCORE_L2, SCORE_L2, | |
1413 SCORE_L2, SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, | |
1414 SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, SCORE_DEPEND, | |
1415 SCORE_DEPEND, SCORE_NO_EXIST, SCORE_NO_EXIST, SCORE_ERROR, | |
1416 }; | |
1417 | |
1418 void set_code_score(ptr, score) | |
1419 struct input_code *ptr; | |
1420 int score; | |
1421 { | |
1422 if (ptr){ | |
1423 ptr->score |= score; | |
1424 } | |
1425 } | |
1426 | |
1427 void clr_code_score(ptr, score) | |
1428 struct input_code *ptr; | |
1429 int score; | |
1430 { | |
1431 if (ptr){ | |
1432 ptr->score &= ~score; | |
1433 } | |
1434 } | |
1435 | |
1436 void code_score(ptr) | |
1437 struct input_code *ptr; | |
1438 { | |
1439 int c2 = ptr->buf[0]; | |
1440 int c1 = ptr->buf[1]; | |
1441 if (c2 < 0){ | |
1442 set_code_score(ptr, SCORE_ERROR); | |
1443 }else if (c2 == SSO){ | |
1444 set_code_score(ptr, SCORE_KANA); | |
1445 #ifdef UTF8_OUTPUT_ENABLE | |
1446 }else if (!e2w_conv(c2, c1)){ | |
1447 set_code_score(ptr, SCORE_NO_EXIST); | |
1448 #endif | |
1449 }else if ((c2 & 0x70) == 0x20){ | |
1450 set_code_score(ptr, score_table_A0[c2 & 0x0f]); | |
1451 }else if ((c2 & 0x70) == 0x70){ | |
1452 set_code_score(ptr, score_table_F0[c2 & 0x0f]); | |
1453 }else if ((c2 & 0x70) >= 0x50){ | |
1454 set_code_score(ptr, SCORE_L2); | |
1455 } | |
1456 } | |
1457 | |
1458 void status_disable(ptr) | |
1459 struct input_code *ptr; | |
1460 { | |
1461 ptr->stat = -1; | |
1462 ptr->buf[0] = -1; | |
1463 code_score(ptr); | |
1464 if (iconv == ptr->iconv_func) set_iconv(FALSE, 0); | |
1465 } | |
1466 | |
1467 void status_push_ch(ptr, c) | |
1468 struct input_code *ptr; | |
1469 int c; | |
1470 { | |
1471 ptr->buf[ptr->index++] = c; | |
1472 } | |
1473 | |
1474 void status_clear(ptr) | |
1475 struct input_code *ptr; | |
1476 { | |
1477 ptr->stat = 0; | |
1478 ptr->index = 0; | |
1479 } | |
1480 | |
1481 void status_reset(ptr) | |
1482 struct input_code *ptr; | |
1483 { | |
1484 status_clear(ptr); | |
1485 ptr->score = SCORE_INIT; | |
1486 } | |
1487 | |
1488 void status_reinit(ptr) | |
1489 struct input_code *ptr; | |
1490 { | |
1491 status_reset(ptr); | |
1492 ptr->_file_stat = 0; | |
1493 } | |
1494 | |
1495 void status_check(ptr, c) | |
1496 struct input_code *ptr; | |
1497 int c; | |
1498 { | |
1499 if (c <= DEL && estab_f){ | |
1500 status_reset(ptr); | |
1501 } | |
1502 } | |
1503 | |
1504 void s_status(ptr, c) | |
1505 struct input_code *ptr; | |
1506 int c; | |
1507 { | |
1508 switch(ptr->stat){ | |
1509 case -1: | |
1510 status_check(ptr, c); | |
1511 break; | |
1512 case 0: | |
1513 if (c <= DEL){ | |
1514 break; | |
1515 #ifdef NUMCHAR_OPTION | |
1516 }else if ((c & CLASS_MASK) == CLASS_UTF16){ | |
1517 break; | |
1518 #endif | |
1519 }else if (0xa1 <= c && c <= 0xdf){ | |
1520 status_push_ch(ptr, SSO); | |
1521 status_push_ch(ptr, c); | |
1522 code_score(ptr); | |
1523 status_clear(ptr); | |
1524 }else if ((0x81 <= c && c < 0xa0) || (0xe0 <= c && c <= 0xef)){ | |
1525 ptr->stat = 1; | |
1526 status_push_ch(ptr, c); | |
1527 #ifdef SHIFTJIS_CP932 | |
1528 }else if (cp932_f | |
1529 && CP932_TABLE_BEGIN <= c && c <= CP932_TABLE_END){ | |
1530 ptr->stat = 2; | |
1531 status_push_ch(ptr, c); | |
1532 #endif /* SHIFTJIS_CP932 */ | |
1533 #ifdef X0212_ENABLE | |
1534 }else if (x0212_f && 0xf0 <= c && c <= 0xfc){ | |
1535 ptr->stat = 1; | |
1536 status_push_ch(ptr, c); | |
1537 #endif /* X0212_ENABLE */ | |
1538 }else{ | |
1539 status_disable(ptr); | |
1540 } | |
1541 break; | |
1542 case 1: | |
1543 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ | |
1544 status_push_ch(ptr, c); | |
1545 s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]); | |
1546 code_score(ptr); | |
1547 status_clear(ptr); | |
1548 }else{ | |
1549 status_disable(ptr); | |
1550 } | |
1551 break; | |
1552 case 2: | |
1553 #ifdef SHIFTJIS_CP932 | |
1554 if ((0x40 <= c && c <= 0x7e) || (0x80 <= c && c <= 0xfc)){ | |
1555 status_push_ch(ptr, c); | |
1556 if (s2e_conv(ptr->buf[0], ptr->buf[1], &ptr->buf[0], &ptr->buf[1]) == 0){ | |
1557 set_code_score(ptr, SCORE_CP932); | |
1558 status_clear(ptr); | |
1559 break; | |
1560 } | |
1561 } | |
1562 #endif /* SHIFTJIS_CP932 */ | |
1563 #ifndef X0212_ENABLE | |
1564 status_disable(ptr); | |
1565 break; | |
1566 #endif | |
1567 } | |
1568 } | |
1569 | |
1570 void e_status(ptr, c) | |
1571 struct input_code *ptr; | |
1572 int c; | |
1573 { | |
1574 switch (ptr->stat){ | |
1575 case -1: | |
1576 status_check(ptr, c); | |
1577 break; | |
1578 case 0: | |
1579 if (c <= DEL){ | |
1580 break; | |
1581 #ifdef NUMCHAR_OPTION | |
1582 }else if ((c & CLASS_MASK) == CLASS_UTF16){ | |
1583 break; | |
1584 #endif | |
1585 }else if (SSO == c || (0xa1 <= c && c <= 0xfe)){ | |
1586 ptr->stat = 1; | |
1587 status_push_ch(ptr, c); | |
1588 #ifdef X0212_ENABLE | |
1589 }else if (0x8f == c){ | |
1590 ptr->stat = 2; | |
1591 status_push_ch(ptr, c); | |
1592 #endif /* X0212_ENABLE */ | |
1593 }else{ | |
1594 status_disable(ptr); | |
1595 } | |
1596 break; | |
1597 case 1: | |
1598 if (0xa1 <= c && c <= 0xfe){ | |
1599 status_push_ch(ptr, c); | |
1600 code_score(ptr); | |
1601 status_clear(ptr); | |
1602 }else{ | |
1603 status_disable(ptr); | |
1604 } | |
1605 break; | |
1606 #ifdef X0212_ENABLE | |
1607 case 2: | |
1608 if (0xa1 <= c && c <= 0xfe){ | |
1609 ptr->stat = 1; | |
1610 status_push_ch(ptr, c); | |
1611 }else{ | |
1612 status_disable(ptr); | |
1613 } | |
1614 #endif /* X0212_ENABLE */ | |
1615 } | |
1616 } | |
1617 | |
1618 #ifdef UTF8_INPUT_ENABLE | |
1619 void w16_status(ptr, c) | |
1620 struct input_code *ptr; | |
1621 int c; | |
1622 { | |
1623 switch (ptr->stat){ | |
1624 case -1: | |
1625 break; | |
1626 case 0: | |
1627 if (ptr->_file_stat == 0){ | |
1628 if (c == 0xfe || c == 0xff){ | |
1629 ptr->stat = c; | |
1630 status_push_ch(ptr, c); | |
1631 ptr->_file_stat = 1; | |
1632 }else{ | |
1633 status_disable(ptr); | |
1634 ptr->_file_stat = -1; | |
1635 } | |
1636 }else if (ptr->_file_stat > 0){ | |
1637 ptr->stat = 1; | |
1638 status_push_ch(ptr, c); | |
1639 }else if (ptr->_file_stat < 0){ | |
1640 status_disable(ptr); | |
1641 } | |
1642 break; | |
1643 | |
1644 case 1: | |
1645 if (c == EOF){ | |
1646 status_disable(ptr); | |
1647 ptr->_file_stat = -1; | |
1648 }else{ | |
1649 status_push_ch(ptr, c); | |
1650 status_clear(ptr); | |
1651 } | |
1652 break; | |
1653 | |
1654 case 0xfe: | |
1655 case 0xff: | |
1656 if (ptr->stat != c && (c == 0xfe || c == 0xff)){ | |
1657 status_push_ch(ptr, c); | |
1658 status_clear(ptr); | |
1659 }else{ | |
1660 status_disable(ptr); | |
1661 ptr->_file_stat = -1; | |
1662 } | |
1663 break; | |
1664 } | |
1665 } | |
1666 | |
1667 void w_status(ptr, c) | |
1668 struct input_code *ptr; | |
1669 int c; | |
1670 { | |
1671 switch (ptr->stat){ | |
1672 case -1: | |
1673 status_check(ptr, c); | |
1674 break; | |
1675 case 0: | |
1676 if (c <= DEL){ | |
1677 break; | |
1678 #ifdef NUMCHAR_OPTION | |
1679 }else if ((c & CLASS_MASK) == CLASS_UTF16){ | |
1680 break; | |
1681 #endif | |
1682 }else if (0xc0 <= c && c <= 0xdf){ | |
1683 ptr->stat = 1; | |
1684 status_push_ch(ptr, c); | |
1685 }else if (0xe0 <= c && c <= 0xef){ | |
1686 ptr->stat = 2; | |
1687 status_push_ch(ptr, c); | |
1688 }else{ | |
1689 status_disable(ptr); | |
1690 } | |
1691 break; | |
1692 case 1: | |
1693 case 2: | |
1694 if (0x80 <= c && c <= 0xbf){ | |
1695 status_push_ch(ptr, c); | |
1696 if (ptr->index > ptr->stat){ | |
1697 int bom = (ptr->buf[0] == 0xef && ptr->buf[1] == 0xbb | |
1698 && ptr->buf[2] == 0xbf); | |
1699 w2e_conv(ptr->buf[0], ptr->buf[1], ptr->buf[2], | |
1700 &ptr->buf[0], &ptr->buf[1]); | |
1701 if (!bom){ | |
1702 code_score(ptr); | |
1703 } | |
1704 status_clear(ptr); | |
1705 } | |
1706 }else{ | |
1707 status_disable(ptr); | |
1708 } | |
1709 break; | |
1710 } | |
1711 } | |
1712 #endif | |
1713 | |
1714 void | |
1715 code_status(c) | |
1716 int c; | |
1717 { | |
1718 int action_flag = 1; | |
1719 struct input_code *result = 0; | |
1720 struct input_code *p = input_code_list; | |
1721 while (p->name){ | |
1722 (p->status_func)(p, c); | |
1723 if (p->stat > 0){ | |
1724 action_flag = 0; | |
1725 }else if(p->stat == 0){ | |
1726 if (result){ | |
1727 action_flag = 0; | |
1728 }else{ | |
1729 result = p; | |
1730 } | |
1731 } | |
1732 ++p; | |
1733 } | |
1734 | |
1735 if (action_flag){ | |
1736 if (result && !estab_f){ | |
1737 set_iconv(TRUE, result->iconv_func); | |
1738 }else if (c <= DEL){ | |
1739 struct input_code *ptr = input_code_list; | |
1740 while (ptr->name){ | |
1741 status_reset(ptr); | |
1742 ++ptr; | |
1743 } | |
1744 } | |
1745 } | |
1746 } | |
1747 | |
1748 #ifndef WIN32DLL | |
1749 int | |
1750 std_getc(f) | |
1751 FILE *f; | |
1752 { | |
1753 if (std_gc_ndx){ | |
1754 return std_gc_buf[--std_gc_ndx]; | |
1755 } | |
1756 return getc(f); | |
1757 } | |
1758 #endif /*WIN32DLL*/ | |
1759 | |
1760 int | |
1761 std_ungetc(c,f) | |
1762 int c; | |
1763 FILE *f; | |
1764 { | |
1765 if (std_gc_ndx == STD_GC_BUFSIZE){ | |
1766 return EOF; | |
1767 } | |
1768 std_gc_buf[std_gc_ndx++] = c; | |
1769 return c; | |
1770 } | |
1771 | |
1772 #ifndef WIN32DLL | |
1773 void | |
1774 std_putc(c) | |
1775 int c; | |
1776 { | |
1777 if(c!=EOF) | |
1778 putchar(c); | |
1779 } | |
1780 #endif /*WIN32DLL*/ | |
1781 | |
1782 int | |
1783 noconvert(f) | |
1784 FILE *f; | |
1785 { | |
1786 int c; | |
1787 | |
1788 while ((c = (*i_getc)(f)) != EOF) | |
1789 (*o_putc)(c); | |
1790 return 1; | |
1791 } | |
1792 | |
1793 | |
1794 void | |
1795 module_connection() | |
1796 { | |
1797 oconv = output_conv; | |
1798 o_putc = std_putc; | |
1799 | |
1800 /* replace continucation module, from output side */ | |
1801 | |
1802 /* output redicrection */ | |
1803 #ifdef CHECK_OPTION | |
1804 if (noout_f || guess_f){ | |
1805 o_putc = no_putc; | |
1806 } | |
1807 #endif | |
1808 if (mimeout_f) { | |
1809 o_mputc = o_putc; | |
1810 o_putc = mime_putc; | |
1811 if (mimeout_f == TRUE) { | |
1812 o_base64conv = oconv; oconv = base64_conv; | |
1813 } | |
1814 /* base64_count = 0; */ | |
1815 } | |
1816 | |
1817 if (crmode_f) { | |
1818 o_crconv = oconv; oconv = cr_conv; | |
1819 } | |
1820 if (rot_f) { | |
1821 o_rot_conv = oconv; oconv = rot_conv; | |
1822 } | |
1823 if (iso2022jp_f) { | |
1824 o_iso2022jp_check_conv = oconv; oconv = iso2022jp_check_conv; | |
1825 } | |
1826 if (hira_f) { | |
1827 o_hira_conv = oconv; oconv = hira_conv; | |
1828 } | |
1829 if (fold_f) { | |
1830 o_fconv = oconv; oconv = fold_conv; | |
1831 f_line = 0; | |
1832 } | |
1833 if (alpha_f || x0201_f) { | |
1834 o_zconv = oconv; oconv = z_conv; | |
1835 } | |
1836 | |
1837 i_getc = std_getc; | |
1838 i_ungetc = std_ungetc; | |
1839 /* input redicrection */ | |
1840 #ifdef INPUT_OPTION | |
1841 if (cap_f){ | |
1842 i_cgetc = i_getc; i_getc = cap_getc; | |
1843 i_cungetc = i_ungetc; i_ungetc= cap_ungetc; | |
1844 } | |
1845 if (url_f){ | |
1846 i_ugetc = i_getc; i_getc = url_getc; | |
1847 i_uungetc = i_ungetc; i_ungetc= url_ungetc; | |
1848 } | |
1849 #endif | |
1850 #ifdef NUMCHAR_OPTION | |
1851 if (numchar_f){ | |
1852 i_ngetc = i_getc; i_getc = numchar_getc; | |
1853 i_nungetc = i_ungetc; i_ungetc= numchar_ungetc; | |
1854 } | |
1855 #endif | |
1856 if (mime_f && mimebuf_f==FIXED_MIME) { | |
1857 i_mgetc = i_getc; i_getc = mime_getc; | |
1858 i_mungetc = i_ungetc; i_ungetc = mime_ungetc; | |
1859 } | |
1860 if (broken_f & 1) { | |
1861 i_bgetc = i_getc; i_getc = broken_getc; | |
1862 i_bungetc = i_ungetc; i_ungetc = broken_ungetc; | |
1863 } | |
1864 if (input_f == JIS_INPUT || input_f == LATIN1_INPUT) { | |
1865 set_iconv(-TRUE, e_iconv); | |
1866 } else if (input_f == SJIS_INPUT) { | |
1867 set_iconv(-TRUE, s_iconv); | |
1868 #ifdef UTF8_INPUT_ENABLE | |
1869 } else if (input_f == UTF8_INPUT) { | |
1870 set_iconv(-TRUE, w_iconv); | |
1871 } else if (input_f == UTF16BE_INPUT) { | |
1872 set_iconv(-TRUE, w_iconv16); | |
1873 } else if (input_f == UTF16LE_INPUT) { | |
1874 set_iconv(-TRUE, w_iconv16); | |
1875 #endif | |
1876 } else { | |
1877 set_iconv(FALSE, e_iconv); | |
1878 } | |
1879 | |
1880 { | |
1881 struct input_code *p = input_code_list; | |
1882 while (p->name){ | |
1883 status_reinit(p++); | |
1884 } | |
1885 } | |
1886 } | |
1887 | |
1888 /* | |
1889 Conversion main loop. Code detection only. | |
1890 */ | |
1891 | |
1892 int | |
1893 kanji_convert(f) | |
1894 FILE *f; | |
1895 { | |
1896 int c1, | |
1897 c2, c3; | |
1898 int is_8bit = FALSE; | |
1899 | |
1900 module_connection(); | |
1901 c2 = 0; | |
1902 | |
1903 | |
1904 input_mode = ASCII; | |
1905 output_mode = ASCII; | |
1906 shift_mode = FALSE; | |
1907 | |
1908 #define NEXT continue /* no output, get next */ | |
1909 #define SEND ; /* output c1 and c2, get next */ | |
1910 #define LAST break /* end of loop, go closing */ | |
1911 | |
1912 while ((c1 = (*i_getc)(f)) != EOF) { | |
1913 code_status(c1); | |
1914 if (c2) { | |
1915 /* second byte */ | |
1916 if (c2 > DEL) { | |
1917 /* in case of 8th bit is on */ | |
1918 if (!estab_f&&!mime_decode_mode) { | |
1919 /* in case of not established yet */ | |
1920 /* It is still ambiguious */ | |
1921 if (h_conv(f, c2, c1)==EOF) | |
1922 LAST; | |
1923 else | |
1924 c2 = 0; | |
1925 NEXT; | |
1926 } else | |
1927 /* in case of already established */ | |
1928 if (c1 < AT) { | |
1929 /* ignore bogus code */ | |
1930 c2 = 0; | |
1931 NEXT; | |
1932 } else | |
1933 SEND; | |
1934 } else | |
1935 /* second byte, 7 bit code */ | |
1936 /* it might be kanji shitfted */ | |
1937 if ((c1 == DEL) || (c1 <= SPACE)) { | |
1938 /* ignore bogus first code */ | |
1939 c2 = 0; | |
1940 NEXT; | |
1941 } else | |
1942 SEND; | |
1943 } else { | |
1944 /* first byte */ | |
1945 if ( | |
1946 #ifdef UTF8_INPUT_ENABLE | |
1947 iconv == w_iconv16 | |
1948 #else | |
1949 0 | |
1950 #endif | |
1951 ) { | |
1952 c2 = c1; | |
1953 c1 = (*i_getc)(f); | |
1954 SEND; | |
1955 #ifdef NUMCHAR_OPTION | |
1956 } else if ((c1 & CLASS_MASK) == CLASS_UTF16){ | |
1957 SEND; | |
1958 #endif | |
1959 } else if (c1 > DEL) { | |
1960 /* 8 bit code */ | |
1961 if (!estab_f && !iso8859_f) { | |
1962 /* not established yet */ | |
1963 if (!is_8bit) is_8bit = TRUE; | |
1964 c2 = c1; | |
1965 NEXT; | |
1966 } else { /* estab_f==TRUE */ | |
1967 if (iso8859_f) { | |
1968 c2 = ISO8859_1; | |
1969 c1 &= 0x7f; | |
1970 SEND; | |
1971 } else if (SSP<=c1 && c1<0xe0 && iconv == s_iconv) { | |
1972 /* SJIS X0201 Case... */ | |
1973 if(iso2022jp_f && x0201_f==NO_X0201) { | |
1974 (*oconv)(GETA1, GETA2); | |
1975 NEXT; | |
1976 } else { | |
1977 c2 = X0201; | |
1978 c1 &= 0x7f; | |
1979 SEND; | |
1980 } | |
1981 } else if (c1==SSO && iconv != s_iconv) { | |
1982 /* EUC X0201 Case */ | |
1983 c1 = (*i_getc)(f); /* skip SSO */ | |
1984 code_status(c1); | |
1985 if (SSP<=c1 && c1<0xe0) { | |
1986 if(iso2022jp_f && x0201_f==NO_X0201) { | |
1987 (*oconv)(GETA1, GETA2); | |
1988 NEXT; | |
1989 } else { | |
1990 c2 = X0201; | |
1991 c1 &= 0x7f; | |
1992 SEND; | |
1993 } | |
1994 } else { /* bogus code, skip SSO and one byte */ | |
1995 NEXT; | |
1996 } | |
1997 } else { | |
1998 /* already established */ | |
1999 c2 = c1; | |
2000 NEXT; | |
2001 } | |
2002 } | |
2003 } else if ((c1 > SPACE) && (c1 != DEL)) { | |
2004 /* in case of Roman characters */ | |
2005 if (shift_mode) { | |
2006 /* output 1 shifted byte */ | |
2007 if (iso8859_f) { | |
2008 c2 = ISO8859_1; | |
2009 SEND; | |
2010 } else if (SPACE<=c1 && c1<(0xe0&0x7f) ){ | |
2011 /* output 1 shifted byte */ | |
2012 if(iso2022jp_f && x0201_f==NO_X0201) { | |
2013 (*oconv)(GETA1, GETA2); | |
2014 NEXT; | |
2015 } else { | |
2016 c2 = X0201; | |
2017 SEND; | |
2018 } | |
2019 } else { | |
2020 /* look like bogus code */ | |
2021 NEXT; | |
2022 } | |
2023 } else if (input_mode == X0208) { | |
2024 /* in case of Kanji shifted */ | |
2025 c2 = c1; | |
2026 NEXT; | |
2027 } else if (c1 == '=' && mime_f && !mime_decode_mode ) { | |
2028 /* Check MIME code */ | |
2029 if ((c1 = (*i_getc)(f)) == EOF) { | |
2030 (*oconv)(0, '='); | |
2031 LAST; | |
2032 } else if (c1 == '?') { | |
2033 /* =? is mime conversion start sequence */ | |
2034 if(mime_f == STRICT_MIME) { | |
2035 /* check in real detail */ | |
2036 if (mime_begin_strict(f) == EOF) | |
2037 LAST; | |
2038 else | |
2039 NEXT; | |
2040 } else if (mime_begin(f) == EOF) | |
2041 LAST; | |
2042 else | |
2043 NEXT; | |
2044 } else { | |
2045 (*oconv)(0, '='); | |
2046 (*i_ungetc)(c1,f); | |
2047 NEXT; | |
2048 } | |
2049 } else { | |
2050 /* normal ASCII code */ | |
2051 SEND; | |
2052 } | |
2053 } else if (c1 == SI) { | |
2054 shift_mode = FALSE; | |
2055 NEXT; | |
2056 } else if (c1 == SO) { | |
2057 shift_mode = TRUE; | |
2058 NEXT; | |
2059 } else if (c1 == ESC ) { | |
2060 if ((c1 = (*i_getc)(f)) == EOF) { | |
2061 /* (*oconv)(0, ESC); don't send bogus code */ | |
2062 LAST; | |
2063 } else if (c1 == '$') { | |
2064 if ((c1 = (*i_getc)(f)) == EOF) { | |
2065 /* | |
2066 (*oconv)(0, ESC); don't send bogus code | |
2067 (*oconv)(0, '$'); */ | |
2068 LAST; | |
2069 } else if (c1 == '@'|| c1 == 'B') { | |
2070 /* This is kanji introduction */ | |
2071 input_mode = X0208; | |
2072 shift_mode = FALSE; | |
2073 set_input_codename("ISO-2022-JP"); | |
2074 debug(input_codename); | |
2075 NEXT; | |
2076 } else if (c1 == '(') { | |
2077 if ((c1 = (*i_getc)(f)) == EOF) { | |
2078 /* don't send bogus code | |
2079 (*oconv)(0, ESC); | |
2080 (*oconv)(0, '$'); | |
2081 (*oconv)(0, '('); | |
2082 */ | |
2083 LAST; | |
2084 } else if (c1 == '@'|| c1 == 'B') { | |
2085 /* This is kanji introduction */ | |
2086 input_mode = X0208; | |
2087 shift_mode = FALSE; | |
2088 NEXT; | |
2089 #ifdef X0212_ENABLE | |
2090 } else if (c1 == 'D'){ | |
2091 input_mode = X0212; | |
2092 shift_mode = FALSE; | |
2093 NEXT; | |
2094 #endif /* X0212_ENABLE */ | |
2095 } else { | |
2096 /* could be some special code */ | |
2097 (*oconv)(0, ESC); | |
2098 (*oconv)(0, '$'); | |
2099 (*oconv)(0, '('); | |
2100 (*oconv)(0, c1); | |
2101 NEXT; | |
2102 } | |
2103 } else if (broken_f&0x2) { | |
2104 /* accept any ESC-(-x as broken code ... */ | |
2105 input_mode = X0208; | |
2106 shift_mode = FALSE; | |
2107 NEXT; | |
2108 } else { | |
2109 (*oconv)(0, ESC); | |
2110 (*oconv)(0, '$'); | |
2111 (*oconv)(0, c1); | |
2112 NEXT; | |
2113 } | |
2114 } else if (c1 == '(') { | |
2115 if ((c1 = (*i_getc)(f)) == EOF) { | |
2116 /* don't send bogus code | |
2117 (*oconv)(0, ESC); | |
2118 (*oconv)(0, '('); */ | |
2119 LAST; | |
2120 } else { | |
2121 if (c1 == 'I') { | |
2122 /* This is X0201 kana introduction */ | |
2123 input_mode = X0201; shift_mode = X0201; | |
2124 NEXT; | |
2125 } else if (c1 == 'B' || c1 == 'J' || c1 == 'H') { | |
2126 /* This is X0208 kanji introduction */ | |
2127 input_mode = ASCII; shift_mode = FALSE; | |
2128 NEXT; | |
2129 } else if (broken_f&0x2) { | |
2130 input_mode = ASCII; shift_mode = FALSE; | |
2131 NEXT; | |
2132 } else { | |
2133 (*oconv)(0, ESC); | |
2134 (*oconv)(0, '('); | |
2135 /* maintain various input_mode here */ | |
2136 SEND; | |
2137 } | |
2138 } | |
2139 } else if ( c1 == 'N' || c1 == 'n' ){ | |
2140 /* SS2 */ | |
2141 c3 = (*i_getc)(f); /* skip SS2 */ | |
2142 if ( (SPACE<=c3 && c3 < 0x60) || (0xa0<=c3 && c3 < 0xe0)){ | |
2143 c1 = c3; | |
2144 c2 = X0201; | |
2145 SEND; | |
2146 }else{ | |
2147 (*i_ungetc)(c3, f); | |
2148 /* lonely ESC */ | |
2149 (*oconv)(0, ESC); | |
2150 SEND; | |
2151 } | |
2152 } else { | |
2153 /* lonely ESC */ | |
2154 (*oconv)(0, ESC); | |
2155 SEND; | |
2156 } | |
2157 } else if ((c1 == NL || c1 == CR) && broken_f&4) { | |
2158 input_mode = ASCII; set_iconv(FALSE, 0); | |
2159 SEND; | |
2160 } else if (c1 == NL && mime_decode_f && !mime_decode_mode ) { | |
2161 if ((c1=(*i_getc)(f))!=EOF && c1 == SPACE) { | |
2162 i_ungetc(SPACE,f); | |
2163 continue; | |
2164 } else { | |
2165 i_ungetc(c1,f); | |
2166 } | |
2167 c1 = NL; | |
2168 SEND; | |
2169 } else if (c1 == CR && mime_decode_f && !mime_decode_mode ) { | |
2170 if ((c1=(*i_getc)(f))!=EOF) { | |
2171 if (c1==SPACE) { | |
2172 i_ungetc(SPACE,f); | |
2173 continue; | |
2174 } else if (c1 == NL && (c1=(*i_getc)(f))!=EOF && c1 == SPACE) { | |
2175 i_ungetc(SPACE,f); | |
2176 continue; | |
2177 } else { | |
2178 i_ungetc(c1,f); | |
2179 } | |
2180 i_ungetc(NL,f); | |
2181 } else { | |
2182 i_ungetc(c1,f); | |
2183 } | |
2184 c1 = CR; | |
2185 SEND; | |
2186 } else | |
2187 SEND; | |
2188 } | |
2189 /* send: */ | |
2190 if (input_mode == X0208) | |
2191 (*oconv)(c2, c1); /* this is JIS, not SJIS/EUC case */ | |
2192 #ifdef X0212_ENABLE | |
2193 else if (input_mode == X0212) | |
2194 (*oconv)((0x8f << 8) | c2, c1); | |
2195 #endif /* X0212_ENABLE */ | |
2196 else if (input_mode) | |
2197 (*oconv)(input_mode, c1); /* other special case */ | |
2198 else if ((*iconv)(c2, c1, 0) < 0){ /* can be EUC/SJIS */ | |
2199 int c0 = (*i_getc)(f); | |
2200 if (c0 != EOF){ | |
2201 code_status(c0); | |
2202 (*iconv)(c2, c1, c0); | |
2203 } | |
2204 } | |
2205 | |
2206 c2 = 0; | |
2207 continue; | |
2208 /* goto next_word */ | |
2209 } | |
2210 | |
2211 /* epilogue */ | |
2212 (*iconv)(EOF, 0, 0); | |
2213 if (!is_inputcode_set) | |
2214 { | |
2215 if (is_8bit) { | |
2216 struct input_code *p = input_code_list; | |
2217 struct input_code *result = p; | |
2218 while (p->name){ | |
2219 if (p->score < result->score) result = p; | |
2220 ++p; | |
2221 } | |
2222 set_input_codename(result->name); | |
2223 } | |
2224 } | |
2225 return 1; | |
2226 } | |
2227 | |
2228 int | |
2229 h_conv(f, c2, c1) | |
2230 FILE *f; | |
2231 int c1, | |
2232 c2; | |
2233 { | |
2234 int wc,c3; | |
2235 | |
2236 | |
2237 /** it must NOT be in the kanji shifte sequence */ | |
2238 /** it must NOT be written in JIS7 */ | |
2239 /** and it must be after 2 byte 8bit code */ | |
2240 | |
2241 hold_count = 0; | |
2242 push_hold_buf(c2); | |
2243 push_hold_buf(c1); | |
2244 c2 = 0; | |
2245 | |
2246 while ((c1 = (*i_getc)(f)) != EOF) { | |
2247 if (c1 == ESC){ | |
2248 (*i_ungetc)(c1,f); | |
2249 break; | |
2250 } | |
2251 code_status(c1); | |
2252 if (push_hold_buf(c1) == EOF || estab_f){ | |
2253 break; | |
2254 } | |
2255 } | |
2256 | |
2257 if (!estab_f){ | |
2258 struct input_code *p = input_code_list; | |
2259 struct input_code *result = p; | |
2260 if (c1 == EOF){ | |
2261 code_status(c1); | |
2262 } | |
2263 while (p->name){ | |
2264 if (p->score < result->score){ | |
2265 result = p; | |
2266 } | |
2267 ++p; | |
2268 } | |
2269 set_iconv(FALSE, result->iconv_func); | |
2270 } | |
2271 | |
2272 | |
2273 /** now, | |
2274 ** 1) EOF is detected, or | |
2275 ** 2) Code is established, or | |
2276 ** 3) Buffer is FULL (but last word is pushed) | |
2277 ** | |
2278 ** in 1) and 3) cases, we continue to use | |
2279 ** Kanji codes by oconv and leave estab_f unchanged. | |
2280 **/ | |
2281 | |
2282 c3=c1; | |
2283 wc = 0; | |
2284 while (wc < hold_count){ | |
2285 c2 = hold_buf[wc++]; | |
2286 if (c2 <= DEL | |
2287 #ifdef NUMCHAR_OPTION | |
2288 || (c2 & CLASS_MASK) == CLASS_UTF16 | |
2289 #endif | |
2290 ){ | |
2291 (*iconv)(0, c2, 0); | |
2292 continue; | |
2293 }else if (iconv == s_iconv && 0xa1 <= c2 && c2 <= 0xdf){ | |
2294 (*iconv)(X0201, c2, 0); | |
2295 continue; | |
2296 } | |
2297 if (wc < hold_count){ | |
2298 c1 = hold_buf[wc++]; | |
2299 }else{ | |
2300 c1 = (*i_getc)(f); | |
2301 if (c1 == EOF){ | |
2302 c3 = EOF; | |
2303 break; | |
2304 } | |
2305 code_status(c1); | |
2306 } | |
2307 if ((*iconv)(c2, c1, 0) < 0){ | |
2308 int c0; | |
2309 if (wc < hold_count){ | |
2310 c0 = hold_buf[wc++]; | |
2311 }else{ | |
2312 c0 = (*i_getc)(f); | |
2313 if (c0 == EOF){ | |
2314 c3 = EOF; | |
2315 break; | |
2316 } | |
2317 code_status(c0); | |
2318 } | |
2319 (*iconv)(c2, c1, c0); | |
2320 c1 = c0; | |
2321 } | |
2322 } | |
2323 return c3; | |
2324 } | |
2325 | |
2326 | |
2327 | |
2328 int | |
2329 push_hold_buf(c2) | |
2330 int c2; | |
2331 { | |
2332 if (hold_count >= HOLD_SIZE*2) | |
2333 return (EOF); | |
2334 hold_buf[hold_count++] = c2; | |
2335 return ((hold_count >= HOLD_SIZE*2) ? EOF : hold_count); | |
2336 } | |
2337 | |
2338 int s2e_conv(c2, c1, p2, p1) | |
2339 int c2, c1; | |
2340 int *p2, *p1; | |
2341 { | |
2342 int val; | |
2343 #ifdef SHIFTJIS_CP932 | |
2344 if (cp932_f && CP932_TABLE_BEGIN <= c2 && c2 <= CP932_TABLE_END){ | |
2345 extern unsigned short shiftjis_cp932[3][189]; | |
2346 val = shiftjis_cp932[c2 - CP932_TABLE_BEGIN][c1 - 0x40]; | |
2347 if (val){ | |
2348 c2 = val >> 8; | |
2349 c1 = val & 0xff; | |
2350 } | |
2351 } | |
2352 #endif /* SHIFTJIS_CP932 */ | |
2353 #ifdef X0212_ENABLE | |
2354 if (x0212_f && 0xfa <= c2 && c2 <= 0xfc){ | |
2355 extern unsigned short shiftjis_x0212[3][189]; | |
2356 val = shiftjis_x0212[c2 - 0xfa][c1 - 0x40]; | |
2357 if (val){ | |
2358 if (val & 0x8000){ | |
2359 c2 = (0x8f << 8) | (val >> 8); | |
2360 c1 = val & 0xff; | |
2361 }else{ | |
2362 c2 = val >> 8; | |
2363 c1 = val & 0xff; | |
2364 } | |
2365 if (p2) *p2 = c2; | |
2366 if (p1) *p1 = c1; | |
2367 return 0; | |
2368 } | |
2369 } | |
2370 #endif | |
2371 c2 = c2 + c2 - ((c2 <= 0x9f) ? SJ0162 : SJ6394); | |
2372 if (c1 < 0x9f) | |
2373 c1 = c1 - ((c1 > DEL) ? SPACE : 0x1f); | |
2374 else { | |
2375 c1 = c1 - 0x7e; | |
2376 c2++; | |
2377 } | |
2378 | |
2379 #ifdef X0212_ENABLE | |
2380 if (x0212_f){ | |
2381 c2 = x0212_unshift(c2); | |
2382 } | |
2383 #endif | |
2384 if (p2) *p2 = c2; | |
2385 if (p1) *p1 = c1; | |
2386 return 0; | |
2387 } | |
2388 | |
2389 int | |
2390 s_iconv(c2, c1, c0) | |
2391 int c2, | |
2392 c1, c0; | |
2393 { | |
2394 if (c2 == X0201) { | |
2395 c1 &= 0x7f; | |
2396 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) { | |
2397 /* NOP */ | |
2398 } else { | |
2399 int ret = s2e_conv(c2, c1, &c2, &c1); | |
2400 if (ret) return ret; | |
2401 } | |
2402 (*oconv)(c2, c1); | |
2403 return 0; | |
2404 } | |
2405 | |
2406 int | |
2407 e_iconv(c2, c1, c0) | |
2408 int c2, | |
2409 c1, c0; | |
2410 { | |
2411 if (c2 == X0201) { | |
2412 c1 &= 0x7f; | |
2413 #ifdef X0212_ENABLE | |
2414 }else if (c2 == 0x8f){ | |
2415 if (c0 == 0){ | |
2416 return -1; | |
2417 } | |
2418 c2 = (c2 << 8) | (c1 & 0x7f); | |
2419 c1 = c0 & 0x7f; | |
2420 #ifdef SHIFTJIS_CP932 | |
2421 if (cp932_f){ | |
2422 int s2, s1; | |
2423 if (e2s_conv(c2, c1, &s2, &s1) == 0){ | |
2424 s2e_conv(s2, s1, &c2, &c1); | |
2425 if ((c2 & 0xff00) == 0){ | |
2426 c1 &= 0x7f; | |
2427 c2 &= 0x7f; | |
2428 } | |
2429 } | |
2430 } | |
2431 #endif /* SHIFTJIS_CP932 */ | |
2432 #endif /* X0212_ENABLE */ | |
2433 } else if (c2 == SSO){ | |
2434 c2 = X0201; | |
2435 c1 &= 0x7f; | |
2436 } else if ((c2 == EOF) || (c2 == 0) || c2 < SPACE) { | |
2437 /* NOP */ | |
2438 } else { | |
2439 c1 &= 0x7f; | |
2440 c2 &= 0x7f; | |
2441 } | |
2442 (*oconv)(c2, c1); | |
2443 return 0; | |
2444 } | |
2445 | |
2446 #ifdef UTF8_INPUT_ENABLE | |
2447 int | |
2448 w2e_conv(c2, c1, c0, p2, p1) | |
2449 int c2, c1, c0; | |
2450 int *p2, *p1; | |
2451 { | |
2452 extern unsigned short * utf8_to_euc_2bytes[]; | |
2453 extern unsigned short ** utf8_to_euc_3bytes[]; | |
2454 int ret = 0; | |
2455 | |
2456 if (0xc0 <= c2 && c2 <= 0xef) { | |
2457 unsigned short **pp; | |
2458 | |
2459 if (0xe0 <= c2) { | |
2460 if (c0 == 0) return -1; | |
2461 pp = utf8_to_euc_3bytes[c2 - 0x80]; | |
2462 ret = w_iconv_common(c1, c0, pp, sizeof_utf8_to_euc_C2, p2, p1); | |
2463 } else { | |
2464 ret = w_iconv_common(c2, c1, utf8_to_euc_2bytes, sizeof_utf8_to_euc_2bytes, p2, p1); | |
2465 } | |
2466 #ifdef NUMCHAR_OPTION | |
2467 if (ret){ | |
2468 if (p2) *p2 = 0; | |
2469 if (p1) *p1 = CLASS_UTF16 | ww16_conv(c2, c1, c0); | |
2470 ret = 0; | |
2471 } | |
2472 #endif | |
2473 return ret; | |
2474 } else if (c2 == X0201) { | |
2475 c1 &= 0x7f; | |
2476 } | |
2477 if (p2) *p2 = c2; | |
2478 if (p1) *p1 = c1; | |
2479 return ret; | |
2480 } | |
2481 | |
2482 int | |
2483 w_iconv(c2, c1, c0) | |
2484 int c2, | |
2485 c1, c0; | |
2486 { | |
2487 int ret = w2e_conv(c2, c1, c0, &c2, &c1); | |
2488 if (ret == 0){ | |
2489 (*oconv)(c2, c1); | |
2490 } | |
2491 return ret; | |
2492 } | |
2493 | |
2494 void | |
2495 w16w_conv(val, p2, p1, p0) | |
2496 unsigned short val; | |
2497 int *p2, *p1, *p0; | |
2498 { | |
2499 if (val < 0x80){ | |
2500 *p2 = val; | |
2501 *p1 = 0; | |
2502 *p0 = 0; | |
2503 }else if (val < 0x800){ | |
2504 *p2 = 0xc0 | (val >> 6); | |
2505 *p1 = 0x80 | (val & 0x3f); | |
2506 *p0 = 0; | |
2507 }else{ | |
2508 *p2 = 0xe0 | (val >> 12); | |
2509 *p1 = 0x80 | ((val >> 6) & 0x3f); | |
2510 *p0 = 0x80 | (val & 0x3f); | |
2511 } | |
2512 } | |
2513 | |
2514 int | |
2515 ww16_conv(c2, c1, c0) | |
2516 int c2, c1, c0; | |
2517 { | |
2518 unsigned short val; | |
2519 if (c2 >= 0xe0){ | |
2520 val = (c2 & 0x0f) << 12; | |
2521 val |= (c1 & 0x3f) << 6; | |
2522 val |= (c0 & 0x3f); | |
2523 }else if (c2 >= 0xc0){ | |
2524 val = (c2 & 0x1f) << 6; | |
2525 val |= (c1 & 0x3f); | |
2526 }else{ | |
2527 val = c2; | |
2528 } | |
2529 return val; | |
2530 } | |
2531 | |
2532 int | |
2533 w16e_conv(val, p2, p1) | |
2534 unsigned short val; | |
2535 int *p2, *p1; | |
2536 { | |
2537 extern unsigned short * utf8_to_euc_2bytes[]; | |
2538 extern unsigned short ** utf8_to_euc_3bytes[]; | |
2539 int c2, c1, c0; | |
2540 unsigned short **pp; | |
2541 int psize; | |
2542 int ret = 0; | |
2543 | |
2544 w16w_conv(val, &c2, &c1, &c0); | |
2545 if (c1){ | |
2546 if (c0){ | |
2547 pp = utf8_to_euc_3bytes[c2 - 0x80]; | |
2548 psize = sizeof_utf8_to_euc_C2; | |
2549 ret = w_iconv_common(c1, c0, pp, psize, p2, p1); | |
2550 }else{ | |
2551 pp = utf8_to_euc_2bytes; | |
2552 psize = sizeof_utf8_to_euc_2bytes; | |
2553 ret = w_iconv_common(c2, c1, pp, psize, p2, p1); | |
2554 } | |
2555 #ifdef NUMCHAR_OPTION | |
2556 if (ret){ | |
2557 *p2 = 0; | |
2558 *p1 = CLASS_UTF16 | val; | |
2559 ret = 0; | |
2560 } | |
2561 #endif | |
2562 }else{ | |
2563 *p2 = 0; | |
2564 *p1 = c2; | |
2565 } | |
2566 return ret; | |
2567 } | |
2568 | |
2569 int | |
2570 w_iconv16(c2, c1, c0) | |
2571 int c2, c1,c0; | |
2572 { | |
2573 int ret; | |
2574 | |
2575 if (c2==0376 && c1==0377){ | |
2576 utf16_mode = UTF16BE_INPUT; | |
2577 return 0; | |
2578 } else if (c2==0377 && c1==0376){ | |
2579 utf16_mode = UTF16LE_INPUT; | |
2580 return 0; | |
2581 } | |
2582 if (c2 != EOF && utf16_mode == UTF16LE_INPUT) { | |
2583 int tmp; | |
2584 tmp=c1; c1=c2; c2=tmp; | |
2585 } | |
2586 if ((c2==0 && c1 < 0x80) || c2==EOF) { | |
2587 (*oconv)(c2, c1); | |
2588 return 0; | |
2589 } | |
2590 ret = w16e_conv(((c2<<8)&0xff00) + c1, &c2, &c1); | |
2591 if (ret) return ret; | |
2592 (*oconv)(c2, c1); | |
2593 return 0; | |
2594 } | |
2595 | |
2596 int | |
2597 w_iconv_common(c1, c0, pp, psize, p2, p1) | |
2598 int c1,c0; | |
2599 unsigned short **pp; | |
2600 int psize; | |
2601 int *p2, *p1; | |
2602 { | |
2603 int c2; | |
2604 unsigned short *p ; | |
2605 unsigned short val; | |
2606 | |
2607 if (pp == 0) return 1; | |
2608 | |
2609 c1 -= 0x80; | |
2610 if (c1 < 0 || psize <= c1) return 1; | |
2611 p = pp[c1]; | |
2612 if (p == 0) return 1; | |
2613 | |
2614 c0 -= 0x80; | |
2615 if (c0 < 0 || sizeof_utf8_to_euc_E5B8 <= c0) return 1; | |
2616 val = p[c0]; | |
2617 if (val == 0) return 1; | |
2618 | |
2619 c2 = val >> 8; | |
2620 if (val & 0x8000){ | |
2621 c2 &= 0x7f; | |
2622 c2 |= 0x8f00; | |
2623 } | |
2624 if (c2 == SO) c2 = X0201; | |
2625 c1 = val & 0x7f; | |
2626 if (p2) *p2 = c2; | |
2627 if (p1) *p1 = c1; | |
2628 return 0; | |
2629 } | |
2630 | |
2631 #endif | |
2632 | |
2633 #ifdef UTF8_OUTPUT_ENABLE | |
2634 int | |
2635 e2w_conv(c2, c1) | |
2636 int c2, c1; | |
2637 { | |
2638 extern unsigned short euc_to_utf8_1byte[]; | |
2639 extern unsigned short * euc_to_utf8_2bytes[]; | |
2640 extern unsigned short * euc_to_utf8_2bytes_ms[]; | |
2641 unsigned short *p; | |
2642 | |
2643 if (c2 == X0201) { | |
2644 p = euc_to_utf8_1byte; | |
2645 #ifdef X0212_ENABLE | |
2646 } else if (c2 >> 8 == 0x8f){ | |
2647 extern unsigned short * x0212_to_utf8_2bytes[]; | |
2648 c2 = (c2&0x7f) - 0x21; | |
2649 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) | |
2650 p = x0212_to_utf8_2bytes[c2]; | |
2651 else | |
2652 return 0; | |
2653 #endif | |
2654 } else { | |
2655 c2 &= 0x7f; | |
2656 c2 = (c2&0x7f) - 0x21; | |
2657 if (0<=c2 && c2<sizeof_euc_to_utf8_2bytes) | |
2658 p = ms_ucs_map_f ? euc_to_utf8_2bytes_ms[c2] : euc_to_utf8_2bytes[c2]; | |
2659 else | |
2660 return 0; | |
2661 } | |
2662 if (!p) return 0; | |
2663 c1 = (c1 & 0x7f) - 0x21; | |
2664 if (0<=c1 && c1<sizeof_euc_to_utf8_1byte) | |
2665 return p[c1]; | |
2666 return 0; | |
2667 } | |
2668 | |
2669 void | |
2670 w_oconv(c2, c1) | |
2671 int c2, | |
2672 c1; | |
2673 { | |
2674 int c0; | |
2675 if (c2 == EOF) { | |
2676 (*o_putc)(EOF); | |
2677 return; | |
2678 } | |
2679 | |
2680 if (unicode_bom_f==2) { | |
2681 (*o_putc)('\357'); | |
2682 (*o_putc)('\273'); | |
2683 (*o_putc)('\277'); | |
2684 unicode_bom_f=1; | |
2685 } | |
2686 | |
2687 #ifdef NUMCHAR_OPTION | |
2688 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ | |
2689 w16w_conv(c1, &c2, &c1, &c0); | |
2690 (*o_putc)(c2); | |
2691 if (c1){ | |
2692 (*o_putc)(c1); | |
2693 if (c0) (*o_putc)(c0); | |
2694 } | |
2695 return; | |
2696 } | |
2697 #endif | |
2698 | |
2699 if (c2 == 0) { | |
2700 output_mode = ASCII; | |
2701 (*o_putc)(c1); | |
2702 } else if (c2 == ISO8859_1) { | |
2703 output_mode = ISO8859_1; | |
2704 (*o_putc)(c1 | 0x080); | |
2705 } else { | |
2706 unsigned short val; | |
2707 output_mode = UTF8; | |
2708 val = e2w_conv(c2, c1); | |
2709 if (val){ | |
2710 w16w_conv(val, &c2, &c1, &c0); | |
2711 (*o_putc)(c2); | |
2712 if (c1){ | |
2713 (*o_putc)(c1); | |
2714 if (c0) (*o_putc)(c0); | |
2715 } | |
2716 } | |
2717 } | |
2718 } | |
2719 | |
2720 void | |
2721 w_oconv16(c2, c1) | |
2722 int c2, | |
2723 c1; | |
2724 { | |
2725 if (c2 == EOF) { | |
2726 (*o_putc)(EOF); | |
2727 return; | |
2728 } | |
2729 | |
2730 if (unicode_bom_f==2) { | |
2731 if (w_oconv16_LE){ | |
2732 (*o_putc)((unsigned char)'\377'); | |
2733 (*o_putc)('\376'); | |
2734 }else{ | |
2735 (*o_putc)('\376'); | |
2736 (*o_putc)((unsigned char)'\377'); | |
2737 } | |
2738 unicode_bom_f=1; | |
2739 } | |
2740 | |
2741 if (c2 == ISO8859_1) { | |
2742 c2 = 0; | |
2743 c1 |= 0x80; | |
2744 #ifdef NUMCHAR_OPTION | |
2745 } else if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16) { | |
2746 c2 = (c1 >> 8) & 0xff; | |
2747 c1 &= 0xff; | |
2748 #endif | |
2749 } else if (c2) { | |
2750 unsigned short val = e2w_conv(c2, c1); | |
2751 c2 = (val >> 8) & 0xff; | |
2752 c1 = val & 0xff; | |
2753 } | |
2754 if (w_oconv16_LE){ | |
2755 (*o_putc)(c1); | |
2756 (*o_putc)(c2); | |
2757 }else{ | |
2758 (*o_putc)(c2); | |
2759 (*o_putc)(c1); | |
2760 } | |
2761 } | |
2762 | |
2763 #endif | |
2764 | |
2765 void | |
2766 e_oconv(c2, c1) | |
2767 int c2, | |
2768 c1; | |
2769 { | |
2770 #ifdef NUMCHAR_OPTION | |
2771 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ | |
2772 w16e_conv(c1, &c2, &c1); | |
2773 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ | |
2774 return; | |
2775 } | |
2776 } | |
2777 #endif | |
2778 if (c2 == EOF) { | |
2779 (*o_putc)(EOF); | |
2780 return; | |
2781 } else if (c2 == 0) { | |
2782 output_mode = ASCII; | |
2783 (*o_putc)(c1); | |
2784 } else if (c2 == X0201) { | |
2785 output_mode = JAPANESE_EUC; | |
2786 (*o_putc)(SSO); (*o_putc)(c1|0x80); | |
2787 } else if (c2 == ISO8859_1) { | |
2788 output_mode = ISO8859_1; | |
2789 (*o_putc)(c1 | 0x080); | |
2790 #ifdef X0212_ENABLE | |
2791 } else if ((c2 & 0xff00) >> 8 == 0x8f){ | |
2792 output_mode = JAPANESE_EUC; | |
2793 #ifdef SHIFTJIS_CP932 | |
2794 if (cp932_f){ | |
2795 int s2, s1; | |
2796 if (e2s_conv(c2, c1, &s2, &s1) == 0){ | |
2797 s2e_conv(s2, s1, &c2, &c1); | |
2798 } | |
2799 } | |
2800 #endif | |
2801 if ((c2 & 0xff00) >> 8 == 0x8f){ | |
2802 if (x0212_f){ | |
2803 (*o_putc)(0x8f); | |
2804 (*o_putc)((c2 & 0x7f) | 0x080); | |
2805 (*o_putc)(c1 | 0x080); | |
2806 } | |
2807 }else{ | |
2808 (*o_putc)((c2 & 0x7f) | 0x080); | |
2809 (*o_putc)(c1 | 0x080); | |
2810 } | |
2811 #endif | |
2812 } else { | |
2813 if ((c1<0x21 || 0x7e<c1) || | |
2814 (c2<0x21 || 0x7e<c2)) { | |
2815 set_iconv(FALSE, 0); | |
2816 return; /* too late to rescue this char */ | |
2817 } | |
2818 output_mode = JAPANESE_EUC; | |
2819 (*o_putc)(c2 | 0x080); | |
2820 (*o_putc)(c1 | 0x080); | |
2821 } | |
2822 } | |
2823 | |
2824 #ifdef X0212_ENABLE | |
2825 int x0212_shift(c) | |
2826 int c; | |
2827 { | |
2828 int ret = c; | |
2829 c &= 0x7f; | |
2830 if ((ret & 0xff00) == 0x8f00){ | |
2831 if (0x75 <= c && c <= 0x7f){ | |
2832 ret = c + (0x109 - 0x75); | |
2833 } | |
2834 }else{ | |
2835 if (0x75 <= c && c <= 0x7f){ | |
2836 ret = c + (0x113 - 0x75); | |
2837 } | |
2838 } | |
2839 return ret; | |
2840 } | |
2841 | |
2842 | |
2843 int x0212_unshift(c) | |
2844 int c; | |
2845 { | |
2846 int ret = c; | |
2847 if (0x7f <= c && c <= 0x88){ | |
2848 ret = c + (0x75 - 0x7f); | |
2849 }else if (0x89 <= c && c <= 0x92){ | |
2850 ret = (0x8f << 8) | 0x80 | (c + (0x75 - 0x89)); | |
2851 } | |
2852 return ret; | |
2853 } | |
2854 #endif /* X0212_ENABLE */ | |
2855 | |
2856 int | |
2857 e2s_conv(c2, c1, p2, p1) | |
2858 int c2, c1, *p2, *p1; | |
2859 { | |
2860 int val = 0; | |
2861 unsigned short *ptr; | |
2862 int ndx; | |
2863 extern unsigned short *x0212_shiftjis[]; | |
2864 #ifdef X0212_ENABLE | |
2865 if ((c2 & 0xff00) == 0x8f00){ | |
2866 ndx = c2 & 0x7f; | |
2867 if (0x21 <= ndx && ndx <= 0x7e){ | |
2868 ptr = x0212_shiftjis[ndx - 0x21]; | |
2869 if (ptr){ | |
2870 val = ptr[(c1 & 0x7f) - 0x21]; | |
2871 } | |
2872 if (val){ | |
2873 c2 = val >> 8; | |
2874 c1 = val & 0xff; | |
2875 if (p2) *p2 = c2; | |
2876 if (p1) *p1 = c1; | |
2877 return 0; | |
2878 } | |
2879 } | |
2880 c2 = x0212_shift(c2); | |
2881 } | |
2882 #endif /* X0212_ENABLE */ | |
2883 if ((c2 & 0xff00) == 0x8f00){ | |
2884 return 1; | |
2885 } | |
2886 if (p2) *p2 = ((c2 - 1) >> 1) + ((c2 <= 0x5e) ? 0x71 : 0xb1); | |
2887 if (p1) *p1 = c1 + ((c2 & 1) ? ((c1 < 0x60) ? 0x1f : 0x20) : 0x7e); | |
2888 return 0; | |
2889 } | |
2890 | |
2891 void | |
2892 s_oconv(c2, c1) | |
2893 int c2, | |
2894 c1; | |
2895 { | |
2896 #ifdef NUMCHAR_OPTION | |
2897 if (c2 == 0 && (c1 & CLASS_MASK) == CLASS_UTF16){ | |
2898 w16e_conv(c1, &c2, &c1); | |
2899 } | |
2900 #endif | |
2901 if (c2 == EOF) { | |
2902 (*o_putc)(EOF); | |
2903 return; | |
2904 } else if (c2 == 0) { | |
2905 output_mode = ASCII; | |
2906 (*o_putc)(c1); | |
2907 } else if (c2 == X0201) { | |
2908 output_mode = SHIFT_JIS; | |
2909 (*o_putc)(c1|0x80); | |
2910 } else if (c2 == ISO8859_1) { | |
2911 output_mode = ISO8859_1; | |
2912 (*o_putc)(c1 | 0x080); | |
2913 #ifdef X0212_ENABLE | |
2914 } else if ((c2 & 0xff00) >> 8 == 0x8f){ | |
2915 output_mode = SHIFT_JIS; | |
2916 if (e2s_conv(c2, c1, &c2, &c1) == 0){ | |
2917 (*o_putc)(c2); | |
2918 (*o_putc)(c1); | |
2919 } | |
2920 #endif | |
2921 } else { | |
2922 if ((c1<0x20 || 0x7e<c1) || | |
2923 (c2<0x20 || 0x7e<c2)) { | |
2924 set_iconv(FALSE, 0); | |
2925 return; /* too late to rescue this char */ | |
2926 } | |
2927 output_mode = SHIFT_JIS; | |
2928 e2s_conv(c2, c1, &c2, &c1); | |
2929 | |
2930 #ifdef SHIFTJIS_CP932 | |
2931 if (cp932inv_f | |
2932 && CP932INV_TABLE_BEGIN <= c2 && c2 <= CP932INV_TABLE_END){ | |
2933 extern unsigned short cp932inv[2][189]; | |
2934 int c = cp932inv[c2 - CP932INV_TABLE_BEGIN][c1 - 0x40]; | |
2935 if (c){ | |
2936 c2 = c >> 8; | |
2937 c1 = c & 0xff; | |
2938 } | |
2939 } | |
2940 #endif /* SHIFTJIS_CP932 */ | |
2941 | |
2942 (*o_putc)(c2); | |
2943 if (prefix_table[(unsigned char)c1]){ | |
2944 (*o_putc)(prefix_table[(unsigned char)c1]); | |
2945 } | |
2946 (*o_putc)(c1); | |
2947 } | |
2948 } | |
2949 | |
2950 void | |
2951 j_oconv(c2, c1) | |
2952 int c2, | |
2953 c1; | |
2954 { | |
2955 #ifdef NUMCHAR_OPTION | |
2956 if ((c1 & CLASS_MASK) == CLASS_UTF16){ | |
2957 w16e_conv(c1, &c2, &c1); | |
2958 } | |
2959 #endif | |
2960 if (c2 == EOF) { | |
2961 if (output_mode !=ASCII && output_mode!=ISO8859_1) { | |
2962 (*o_putc)(ESC); | |
2963 (*o_putc)('('); | |
2964 (*o_putc)(ascii_intro); | |
2965 output_mode = ASCII; | |
2966 } | |
2967 (*o_putc)(EOF); | |
2968 #ifdef X0212_ENABLE | |
2969 } else if ((c2 & 0xff00) >> 8 == 0x8f){ | |
2970 if (output_mode!=X0212) { | |
2971 output_mode = X0212; | |
2972 (*o_putc)(ESC); | |
2973 (*o_putc)('$'); | |
2974 (*o_putc)('('); | |
2975 (*o_putc)('D'); | |
2976 } | |
2977 (*o_putc)(c2 & 0x7f); | |
2978 (*o_putc)(c1); | |
2979 #endif | |
2980 } else if (c2==X0201) { | |
2981 if (output_mode!=X0201) { | |
2982 output_mode = X0201; | |
2983 (*o_putc)(ESC); | |
2984 (*o_putc)('('); | |
2985 (*o_putc)('I'); | |
2986 } | |
2987 (*o_putc)(c1); | |
2988 } else if (c2==ISO8859_1) { | |
2989 /* iso8859 introduction, or 8th bit on */ | |
2990 /* Can we convert in 7bit form using ESC-'-'-A ? | |
2991 Is this popular? */ | |
2992 output_mode = ISO8859_1; | |
2993 (*o_putc)(c1|0x80); | |
2994 } else if (c2 == 0) { | |
2995 if (output_mode !=ASCII && output_mode!=ISO8859_1) { | |
2996 (*o_putc)(ESC); | |
2997 (*o_putc)('('); | |
2998 (*o_putc)(ascii_intro); | |
2999 output_mode = ASCII; | |
3000 } | |
3001 (*o_putc)(c1); | |
3002 } else { | |
3003 if (output_mode != X0208) { | |
3004 output_mode = X0208; | |
3005 (*o_putc)(ESC); | |
3006 (*o_putc)('$'); | |
3007 (*o_putc)(kanji_intro); | |
3008 } | |
3009 if (c1<0x20 || 0x7e<c1) | |
3010 return; | |
3011 if (c2<0x20 || 0x7e<c2) | |
3012 return; | |
3013 (*o_putc)(c2); | |
3014 (*o_putc)(c1); | |
3015 } | |
3016 } | |
3017 | |
3018 void | |
3019 base64_conv(c2, c1) | |
3020 int c2, | |
3021 c1; | |
3022 { | |
3023 mime_prechar(c2, c1); | |
3024 (*o_base64conv)(c2,c1); | |
3025 } | |
3026 | |
3027 | |
3028 static int broken_buf[3]; | |
3029 static int broken_counter = 0; | |
3030 static int broken_last = 0; | |
3031 int | |
3032 broken_getc(f) | |
3033 FILE *f; | |
3034 { | |
3035 int c,c1; | |
3036 | |
3037 if (broken_counter>0) { | |
3038 return broken_buf[--broken_counter]; | |
3039 } | |
3040 c= (*i_bgetc)(f); | |
3041 if (c=='$' && broken_last != ESC | |
3042 && (input_mode==ASCII || input_mode==X0201)) { | |
3043 c1= (*i_bgetc)(f); | |
3044 broken_last = 0; | |
3045 if (c1=='@'|| c1=='B') { | |
3046 broken_buf[0]=c1; broken_buf[1]=c; | |
3047 broken_counter=2; | |
3048 return ESC; | |
3049 } else { | |
3050 (*i_bungetc)(c1,f); | |
3051 return c; | |
3052 } | |
3053 } else if (c=='(' && broken_last != ESC | |
3054 && (input_mode==X0208 || input_mode==X0201)) { /* ) */ | |
3055 c1= (*i_bgetc)(f); | |
3056 broken_last = 0; | |
3057 if (c1=='J'|| c1=='B') { | |
3058 broken_buf[0]=c1; broken_buf[1]=c; | |
3059 broken_counter=2; | |
3060 return ESC; | |
3061 } else { | |
3062 (*i_bungetc)(c1,f); | |
3063 return c; | |
3064 } | |
3065 } else { | |
3066 broken_last = c; | |
3067 return c; | |
3068 } | |
3069 } | |
3070 | |
3071 int | |
3072 broken_ungetc(c,f) | |
3073 int c; | |
3074 FILE *f; | |
3075 { | |
3076 if (broken_counter<2) | |
3077 broken_buf[broken_counter++]=c; | |
3078 return c; | |
3079 } | |
3080 | |
3081 static int prev_cr = 0; | |
3082 | |
3083 void | |
3084 cr_conv(c2,c1) | |
3085 int c2,c1; | |
3086 { | |
3087 if (prev_cr) { | |
3088 prev_cr = 0; | |
3089 if (! (c2==0&&c1==NL) ) { | |
3090 cr_conv(0,'\n'); | |
3091 } | |
3092 } | |
3093 if (c2) { | |
3094 (*o_crconv)(c2,c1); | |
3095 } else if (c1=='\r') { | |
3096 prev_cr = c1; | |
3097 } else if (c1=='\n') { | |
3098 if (crmode_f==CRLF) { | |
3099 (*o_crconv)(0,'\r'); | |
3100 } else if (crmode_f==CR) { | |
3101 (*o_crconv)(0,'\r'); | |
3102 return; | |
3103 } | |
3104 (*o_crconv)(0,NL); | |
3105 } else if (c1!='\032' || crmode_f!=NL){ | |
3106 (*o_crconv)(c2,c1); | |
3107 } | |
3108 } | |
3109 | |
3110 /* | |
3111 Return value of fold_conv() | |
3112 | |
3113 \n add newline and output char | |
3114 \r add newline and output nothing | |
3115 ' ' space | |
3116 0 skip | |
3117 1 (or else) normal output | |
3118 | |
3119 fold state in prev (previous character) | |
3120 | |
3121 >0x80 Japanese (X0208/X0201) | |
3122 <0x80 ASCII | |
3123 \n new line | |
3124 ' ' space | |
3125 | |
3126 This fold algorthm does not preserve heading space in a line. | |
3127 This is the main difference from fmt. | |
3128 */ | |
3129 | |
3130 #define char_size(c2,c1) (c2?2:1) | |
3131 | |
3132 void | |
3133 fold_conv(c2,c1) | |
3134 int c2,c1; | |
3135 { | |
3136 int prev0; | |
3137 int fold_state=0; | |
3138 | |
3139 if (c1== '\r' && !fold_preserve_f) { | |
3140 fold_state=0; /* ignore cr */ | |
3141 }else if (c1== '\n'&&f_prev=='\r' && fold_preserve_f) { | |
3142 f_prev = '\n'; | |
3143 fold_state=0; /* ignore cr */ | |
3144 } else if (c1== BS) { | |
3145 if (f_line>0) f_line--; | |
3146 fold_state = 1; | |
3147 } else if (c2==EOF && f_line != 0) { /* close open last line */ | |
3148 fold_state = '\n'; | |
3149 } else if ((c1=='\n' && !fold_preserve_f) | |
3150 || ((c1=='\r'||(c1=='\n'&&f_prev!='\r')) | |
3151 && fold_preserve_f)) { | |
3152 /* new line */ | |
3153 if (fold_preserve_f) { | |
3154 f_prev = c1; | |
3155 f_line = 0; | |
3156 fold_state = '\r'; | |
3157 } else if ((f_prev == c1 && !fold_preserve_f) | |
3158 || (f_prev == '\n' && fold_preserve_f) | |
3159 ) { /* duplicate newline */ | |
3160 if (f_line) { | |
3161 f_line = 0; | |
3162 fold_state = '\n'; /* output two newline */ | |
3163 } else { | |
3164 f_line = 0; | |
3165 fold_state = 1; | |
3166 } | |
3167 } else { | |
3168 if (f_prev&0x80) { /* Japanese? */ | |
3169 f_prev = c1; | |
3170 fold_state = 0; /* ignore given single newline */ | |
3171 } else if (f_prev==' ') { | |
3172 fold_state = 0; | |
3173 } else { | |
3174 f_prev = c1; | |
3175 if (++f_line<=fold_len) | |
3176 fold_state = ' '; | |
3177 else { | |
3178 f_line = 0; | |
3179 fold_state = '\r'; /* fold and output nothing */ | |
3180 } | |
3181 } | |
3182 } | |
3183 } else if (c1=='\f') { | |
3184 f_prev = '\n'; | |
3185 if (f_line==0) | |
3186 fold_state = 1; | |
3187 f_line = 0; | |
3188 fold_state = '\n'; /* output newline and clear */ | |
3189 } else if ( (c2==0 && c1==' ')|| | |
3190 (c2==0 && c1=='\t')|| | |
3191 (c2=='!'&& c1=='!')) { | |
3192 /* X0208 kankaku or ascii space */ | |
3193 if (f_prev == ' ') { | |
3194 fold_state = 0; /* remove duplicate spaces */ | |
3195 } else { | |
3196 f_prev = ' '; | |
3197 if (++f_line<=fold_len) | |
3198 fold_state = ' '; /* output ASCII space only */ | |
3199 else { | |
3200 f_prev = ' '; f_line = 0; | |
3201 fold_state = '\r'; /* fold and output nothing */ | |
3202 } | |
3203 } | |
3204 } else { | |
3205 prev0 = f_prev; /* we still need this one... , but almost done */ | |
3206 f_prev = c1; | |
3207 if (c2 || c2==X0201) | |
3208 f_prev |= 0x80; /* this is Japanese */ | |
3209 f_line += char_size(c2,c1); | |
3210 if (f_line<=fold_len) { /* normal case */ | |
3211 fold_state = 1; | |
3212 } else { | |
3213 if (f_line>=fold_len+fold_margin) { /* too many kinsou suspension */ | |
3214 f_line = char_size(c2,c1); | |
3215 fold_state = '\n'; /* We can't wait, do fold now */ | |
3216 } else if (c2==X0201) { | |
3217 /* simple kinsoku rules return 1 means no folding */ | |
3218 if (c1==(0xde&0x7f)) fold_state = 1; /* $B!+(B*/ | |
3219 else if (c1==(0xdf&0x7f)) fold_state = 1; /* $B!,(B*/ | |
3220 else if (c1==(0xa4&0x7f)) fold_state = 1; /* $B!#(B*/ | |
3221 else if (c1==(0xa3&0x7f)) fold_state = 1; /* $B!$(B*/ | |
3222 else if (c1==(0xa1&0x7f)) fold_state = 1; /* $B!W(B*/ | |
3223 else if (c1==(0xb0&0x7f)) fold_state = 1; /* - */ | |
3224 else if (SPACE<=c1 && c1<=(0xdf&0x7f)) { /* X0201 */ | |
3225 f_line = 1; | |
3226 fold_state = '\n';/* add one new f_line before this character */ | |
3227 } else { | |
3228 f_line = 1; | |
3229 fold_state = '\n';/* add one new f_line before this character */ | |
3230 } | |
3231 } else if (c2==0) { | |
3232 /* kinsoku point in ASCII */ | |
3233 if ( c1==')'|| /* { [ ( */ | |
3234 c1==']'|| | |
3235 c1=='}'|| | |
3236 c1=='.'|| | |
3237 c1==','|| | |
3238 c1=='!'|| | |
3239 c1=='?'|| | |
3240 c1=='/'|| | |
3241 c1==':'|| | |
3242 c1==';' ) { | |
3243 fold_state = 1; | |
3244 /* just after special */ | |
3245 } else if (!is_alnum(prev0)) { | |
3246 f_line = char_size(c2,c1); | |
3247 fold_state = '\n'; | |
3248 } else if ((prev0==' ') || /* ignored new f_line */ | |
3249 (prev0=='\n')|| /* ignored new f_line */ | |
3250 (prev0&0x80)) { /* X0208 - ASCII */ | |
3251 f_line = char_size(c2,c1); | |
3252 fold_state = '\n';/* add one new f_line before this character */ | |
3253 } else { | |
3254 fold_state = 1; /* default no fold in ASCII */ | |
3255 } | |
3256 } else { | |
3257 if (c2=='!') { | |
3258 if (c1=='"') fold_state = 1; /* $B!"(B */ | |
3259 else if (c1=='#') fold_state = 1; /* $B!#(B */ | |
3260 else if (c1=='W') fold_state = 1; /* $B!W(B */ | |
3261 else if (c1=='K') fold_state = 1; /* $B!K(B */ | |
3262 else if (c1=='$') fold_state = 1; /* $B!$(B */ | |
3263 else if (c1=='%') fold_state = 1; /* $B!%(B */ | |
3264 else if (c1=='\'') fold_state = 1; /* $B!\(B */ | |
3265 else if (c1=='(') fold_state = 1; /* $B!((B */ | |
3266 else if (c1==')') fold_state = 1; /* $B!)(B */ | |
3267 else if (c1=='*') fold_state = 1; /* $B!*(B */ | |
3268 else if (c1=='+') fold_state = 1; /* $B!+(B */ | |
3269 else if (c1==',') fold_state = 1; /* $B!,(B */ | |
3270 /* default no fold in kinsoku */ | |
3271 else { | |
3272 fold_state = '\n'; | |
3273 f_line = char_size(c2,c1); | |
3274 /* add one new f_line before this character */ | |
3275 } | |
3276 } else { | |
3277 f_line = char_size(c2,c1); | |
3278 fold_state = '\n'; | |
3279 /* add one new f_line before this character */ | |
3280 } | |
3281 } | |
3282 } | |
3283 } | |
3284 /* terminator process */ | |
3285 switch(fold_state) { | |
3286 case '\n': | |
3287 (*o_fconv)(0,'\n'); | |
3288 (*o_fconv)(c2,c1); | |
3289 break; | |
3290 case 0: | |
3291 return; | |
3292 case '\r': | |
3293 (*o_fconv)(0,'\n'); | |
3294 break; | |
3295 case '\t': | |
3296 case ' ': | |
3297 (*o_fconv)(0,' '); | |
3298 break; | |
3299 default: | |
3300 (*o_fconv)(c2,c1); | |
3301 } | |
3302 } | |
3303 | |
3304 int z_prev2=0,z_prev1=0; | |
3305 | |
3306 void | |
3307 z_conv(c2,c1) | |
3308 int c2,c1; | |
3309 { | |
3310 | |
3311 /* if (c2) c1 &= 0x7f; assertion */ | |
3312 | |
3313 if (x0201_f && z_prev2==X0201) { /* X0201 */ | |
3314 if (c1==(0xde&0x7f)) { /* $BByE@(B */ | |
3315 z_prev2=0; | |
3316 (*o_zconv)(dv[(z_prev1-SPACE)*2],dv[(z_prev1-SPACE)*2+1]); | |
3317 return; | |
3318 } else if (c1==(0xdf&0x7f)&&ev[(z_prev1-SPACE)*2]) { /* $BH>ByE@(B */ | |
3319 z_prev2=0; | |
3320 (*o_zconv)(ev[(z_prev1-SPACE)*2],ev[(z_prev1-SPACE)*2+1]); | |
3321 return; | |
3322 } else { | |
3323 z_prev2=0; | |
3324 (*o_zconv)(cv[(z_prev1-SPACE)*2],cv[(z_prev1-SPACE)*2+1]); | |
3325 } | |
3326 } | |
3327 | |
3328 if (c2==EOF) { | |
3329 (*o_zconv)(c2,c1); | |
3330 return; | |
3331 } | |
3332 | |
3333 if (x0201_f && c2==X0201) { | |
3334 if (dv[(c1-SPACE)*2]||ev[(c1-SPACE)*2]) { | |
3335 /* wait for $BByE@(B or $BH>ByE@(B */ | |
3336 z_prev1 = c1; z_prev2 = c2; | |
3337 return; | |
3338 } else { | |
3339 (*o_zconv)(cv[(c1-SPACE)*2],cv[(c1-SPACE)*2+1]); | |
3340 return; | |
3341 } | |
3342 } | |
3343 | |
3344 /* JISX0208 Alphabet */ | |
3345 if (alpha_f && c2 == 0x23 ) { | |
3346 c2 = 0; | |
3347 } else if (alpha_f && c2 == 0x21 ) { | |
3348 /* JISX0208 Kigou */ | |
3349 if (0x21==c1) { | |
3350 if (alpha_f&0x2) { | |
3351 c1 = ' '; | |
3352 c2 = 0; | |
3353 } else if (alpha_f&0x4) { | |
3354 (*o_zconv)(0,' '); | |
3355 (*o_zconv)(0,' '); | |
3356 return; | |
3357 } | |
3358 } else if (0x20<c1 && c1<0x7f && fv[c1-0x20]) { | |
3359 c1 = fv[c1-0x20]; | |
3360 c2 = 0; | |
3361 if (alpha_f&0x8) { | |
3362 char *entity = 0; | |
3363 switch (c1){ | |
3364 case '>': entity = ">"; break; | |
3365 case '<': entity = "<"; break; | |
3366 case '\"': entity = """; break; | |
3367 case '&': entity = "&"; break; | |
3368 } | |
3369 if (entity){ | |
3370 while (*entity) (*o_zconv)(0, *entity++); | |
3371 return; | |
3372 } | |
3373 } | |
3374 } | |
3375 } | |
3376 (*o_zconv)(c2,c1); | |
3377 } | |
3378 | |
3379 | |
3380 #define rot13(c) ( \ | |
3381 ( c < 'A' ) ? c: \ | |
3382 (c <= 'M') ? (c + 13): \ | |
3383 (c <= 'Z') ? (c - 13): \ | |
3384 (c < 'a') ? (c): \ | |
3385 (c <= 'm') ? (c + 13): \ | |
3386 (c <= 'z') ? (c - 13): \ | |
3387 (c) \ | |
3388 ) | |
3389 | |
3390 #define rot47(c) ( \ | |
3391 ( c < '!' ) ? c: \ | |
3392 ( c <= 'O' ) ? (c + 47) : \ | |
3393 ( c <= '~' ) ? (c - 47) : \ | |
3394 c \ | |
3395 ) | |
3396 | |
3397 void | |
3398 rot_conv(c2,c1) | |
3399 int c2,c1; | |
3400 { | |
3401 if (c2==0 || c2==X0201 || c2==ISO8859_1) { | |
3402 c1 = rot13(c1); | |
3403 } else if (c2) { | |
3404 c1 = rot47(c1); | |
3405 c2 = rot47(c2); | |
3406 } | |
3407 (*o_rot_conv)(c2,c1); | |
3408 } | |
3409 | |
3410 void | |
3411 hira_conv(c2,c1) | |
3412 int c2,c1; | |
3413 { | |
3414 if ((hira_f & 1) && c2==0x25 && 0x20<c1 && c1<0x74) { | |
3415 c2 = 0x24; | |
3416 } else if ((hira_f & 2) && c2==0x24 && 0x20<c1 && c1<0x74) { | |
3417 c2 = 0x25; | |
3418 } | |
3419 (*o_hira_conv)(c2,c1); | |
3420 } | |
3421 | |
3422 | |
3423 void | |
3424 iso2022jp_check_conv(c2,c1) | |
3425 int c2, c1; | |
3426 { | |
3427 static int range[RANGE_NUM_MAX][2] = { | |
3428 {0x222f, 0x2239,}, | |
3429 {0x2242, 0x2249,}, | |
3430 {0x2251, 0x225b,}, | |
3431 {0x226b, 0x2271,}, | |
3432 {0x227a, 0x227d,}, | |
3433 {0x2321, 0x232f,}, | |
3434 {0x233a, 0x2340,}, | |
3435 {0x235b, 0x2360,}, | |
3436 {0x237b, 0x237e,}, | |
3437 {0x2474, 0x247e,}, | |
3438 {0x2577, 0x257e,}, | |
3439 {0x2639, 0x2640,}, | |
3440 {0x2659, 0x267e,}, | |
3441 {0x2742, 0x2750,}, | |
3442 {0x2772, 0x277e,}, | |
3443 {0x2841, 0x287e,}, | |
3444 {0x4f54, 0x4f7e,}, | |
3445 {0x7425, 0x747e}, | |
3446 }; | |
3447 int i; | |
3448 int start, end, c; | |
3449 | |
3450 if(c2 >= 0x00 && c2 <= 0x20 && c1 >= 0x7f && c1 <= 0xff) { | |
3451 c2 = GETA1; | |
3452 c1 = GETA2; | |
3453 } | |
3454 if((c2 >= 0x29 && c2 <= 0x2f) || (c2 >= 0x75 && c2 <= 0x7e)) { | |
3455 c2 = GETA1; | |
3456 c1 = GETA2; | |
3457 } | |
3458 | |
3459 for (i = 0; i < RANGE_NUM_MAX; i++) { | |
3460 start = range[i][0]; | |
3461 end = range[i][1]; | |
3462 c = (c2 << 8) + c1; | |
3463 if (c >= start && c <= end) { | |
3464 c2 = GETA1; | |
3465 c1 = GETA2; | |
3466 } | |
3467 } | |
3468 (*o_iso2022jp_check_conv)(c2,c1); | |
3469 } | |
3470 | |
3471 | |
3472 /* This converts =?ISO-2022-JP?B?HOGE HOGE?= */ | |
3473 | |
3474 unsigned char *mime_pattern[] = { | |
3475 (unsigned char *)"\075?EUC-JP?B?", | |
3476 (unsigned char *)"\075?SHIFT_JIS?B?", | |
3477 (unsigned char *)"\075?ISO-8859-1?Q?", | |
3478 (unsigned char *)"\075?ISO-8859-1?B?", | |
3479 (unsigned char *)"\075?ISO-2022-JP?B?", | |
3480 (unsigned char *)"\075?ISO-2022-JP?Q?", | |
3481 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) | |
3482 (unsigned char *)"\075?UTF-8?B?", | |
3483 (unsigned char *)"\075?UTF-8?Q?", | |
3484 #endif | |
3485 (unsigned char *)"\075?US-ASCII?Q?", | |
3486 NULL | |
3487 }; | |
3488 | |
3489 | |
3490 /* $B3:Ev$9$k%3!<%I$NM%@hEY$r>e$2$k$?$a$NL\0u(B */ | |
3491 int (*mime_priority_func[])PROTO((int c2, int c1, int c0)) = { | |
3492 e_iconv, s_iconv, 0, 0, 0, 0, | |
3493 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) | |
3494 w_iconv, w_iconv, | |
3495 #endif | |
3496 0, | |
3497 }; | |
3498 | |
3499 int mime_encode[] = { | |
3500 JAPANESE_EUC, SHIFT_JIS,ISO8859_1, ISO8859_1, X0208, X0201, | |
3501 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) | |
3502 UTF8, UTF8, | |
3503 #endif | |
3504 ASCII, | |
3505 0 | |
3506 }; | |
3507 | |
3508 int mime_encode_method[] = { | |
3509 'B', 'B','Q', 'B', 'B', 'Q', | |
3510 #if defined(UTF8_INPUT_ENABLE) || defined(UTF8_OUTPUT_ENABLE) | |
3511 'B', 'Q', | |
3512 #endif | |
3513 'Q', | |
3514 0 | |
3515 }; | |
3516 | |
3517 | |
3518 #define MAXRECOVER 20 | |
3519 | |
3520 /* I don't trust portablity of toupper */ | |
3521 #define nkf_toupper(c) (('a'<=c && c<='z')?(c-('a'-'A')):c) | |
3522 #define nkf_isdigit(c) ('0'<=c && c<='9') | |
3523 #define nkf_isxdigit(c) (nkf_isdigit(c) || ('a'<=c && c<='f') || ('A'<=c && c <= 'F')) | |
3524 #define nkf_isblank(c) (c == SPACE || c == TAB) | |
3525 #define nkf_isspace(c) (nkf_isblank(c) || c == CR || c == NL) | |
3526 #define nkf_isalpha(c) (('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z')) | |
3527 #define nkf_isalnum(c) (nkf_isdigit(c) || nkf_isalpha(c)) | |
3528 | |
3529 void | |
3530 switch_mime_getc() | |
3531 { | |
3532 if (i_getc!=mime_getc) { | |
3533 i_mgetc = i_getc; i_getc = mime_getc; | |
3534 i_mungetc = i_ungetc; i_ungetc = mime_ungetc; | |
3535 if(mime_f==STRICT_MIME) { | |
3536 i_mgetc_buf = i_mgetc; i_mgetc = mime_getc_buf; | |
3537 i_mungetc_buf = i_mungetc; i_mungetc = mime_ungetc_buf; | |
3538 } | |
3539 } | |
3540 } | |
3541 | |
3542 void | |
3543 unswitch_mime_getc() | |
3544 { | |
3545 if(mime_f==STRICT_MIME) { | |
3546 i_mgetc = i_mgetc_buf; | |
3547 i_mungetc = i_mungetc_buf; | |
3548 } | |
3549 i_getc = i_mgetc; | |
3550 i_ungetc = i_mungetc; | |
3551 } | |
3552 | |
3553 int | |
3554 mime_begin_strict(f) | |
3555 FILE *f; | |
3556 { | |
3557 int c1 = 0; | |
3558 int i,j,k; | |
3559 unsigned char *p,*q; | |
3560 int r[MAXRECOVER]; /* recovery buffer, max mime pattern lenght */ | |
3561 | |
3562 mime_decode_mode = FALSE; | |
3563 /* =? has been checked */ | |
3564 j = 0; | |
3565 p = mime_pattern[j]; | |
3566 r[0]='='; r[1]='?'; | |
3567 | |
3568 for(i=2;p[i]>' ';i++) { /* start at =? */ | |
3569 if ( ((r[i] = c1 = (*i_getc)(f))==EOF) || nkf_toupper(c1) != p[i] ) { | |
3570 /* pattern fails, try next one */ | |
3571 q = p; | |
3572 while ((p = mime_pattern[++j])) { | |
3573 for(k=2;k<i;k++) /* assume length(p) > i */ | |
3574 if (p[k]!=q[k]) break; | |
3575 if (k==i && nkf_toupper(c1)==p[k]) break; | |
3576 } | |
3577 if (p) continue; /* found next one, continue */ | |
3578 /* all fails, output from recovery buffer */ | |
3579 (*i_ungetc)(c1,f); | |
3580 for(j=0;j<i;j++) { | |
3581 (*oconv)(0,r[j]); | |
3582 } | |
3583 return c1; | |
3584 } | |
3585 } | |
3586 mime_decode_mode = p[i-2]; | |
3587 | |
3588 set_iconv(FALSE, mime_priority_func[j]); | |
3589 clr_code_score(find_inputcode_byfunc(mime_priority_func[j]), SCORE_iMIME); | |
3590 | |
3591 if (mime_decode_mode=='B') { | |
3592 mimebuf_f = unbuf_f; | |
3593 if (!unbuf_f) { | |
3594 /* do MIME integrity check */ | |
3595 return mime_integrity(f,mime_pattern[j]); | |
3596 } | |
3597 } | |
3598 switch_mime_getc(); | |
3599 mimebuf_f = TRUE; | |
3600 return c1; | |
3601 } | |
3602 | |
3603 int | |
3604 mime_getc_buf(f) | |
3605 FILE *f; | |
3606 { | |
3607 /* we don't keep eof of Fifo, becase it contains ?= as | |
3608 a terminator. It was checked in mime_integrity. */ | |
3609 return ((mimebuf_f)? | |
3610 (*i_mgetc_buf)(f):Fifo(mime_input++)); | |
3611 } | |
3612 | |
3613 int | |
3614 mime_ungetc_buf(c,f) | |
3615 FILE *f; | |
3616 int c; | |
3617 { | |
3618 if (mimebuf_f) | |
3619 (*i_mungetc_buf)(c,f); | |
3620 else | |
3621 Fifo(--mime_input)=c; | |
3622 return c; | |
3623 } | |
3624 | |
3625 int | |
3626 mime_begin(f) | |
3627 FILE *f; | |
3628 { | |
3629 int c1; | |
3630 int i,k; | |
3631 | |
3632 /* In NONSTRICT mode, only =? is checked. In case of failure, we */ | |
3633 /* re-read and convert again from mime_buffer. */ | |
3634 | |
3635 /* =? has been checked */ | |
3636 k = mime_last; | |
3637 Fifo(mime_last++)='='; Fifo(mime_last++)='?'; | |
3638 for(i=2;i<MAXRECOVER;i++) { /* start at =? */ | |
3639 /* We accept any character type even if it is breaked by new lines */ | |
3640 c1 = (*i_getc)(f); Fifo(mime_last++)= c1 ; | |
3641 if (c1=='\n'||c1==' '||c1=='\r'|| | |
3642 c1=='-'||c1=='_'||is_alnum(c1) ) continue; | |
3643 if (c1=='=') { | |
3644 /* Failed. But this could be another MIME preemble */ | |
3645 (*i_ungetc)(c1,f); | |
3646 mime_last--; | |
3647 break; | |
3648 } | |
3649 if (c1!='?') break; | |
3650 else { | |
3651 /* c1=='?' */ | |
3652 c1 = (*i_getc)(f); Fifo(mime_last++) = c1; | |
3653 if (!(++i<MAXRECOVER) || c1==EOF) break; | |
3654 if (c1=='b'||c1=='B') { | |
3655 mime_decode_mode = 'B'; | |
3656 } else if (c1=='q'||c1=='Q') { | |
3657 mime_decode_mode = 'Q'; | |
3658 } else { | |
3659 break; | |
3660 } | |
3661 c1 = (*i_getc)(f); Fifo(mime_last++) = c1; | |
3662 if (!(++i<MAXRECOVER) || c1==EOF) break; | |
3663 if (c1!='?') { | |
3664 mime_decode_mode = FALSE; | |
3665 } | |
3666 break; | |
3667 } | |
3668 } | |
3669 switch_mime_getc(); | |
3670 if (!mime_decode_mode) { | |
3671 /* false MIME premble, restart from mime_buffer */ | |
3672 mime_decode_mode = 1; /* no decode, but read from the mime_buffer */ | |
3673 /* Since we are in MIME mode until buffer becomes empty, */ | |
3674 /* we never go into mime_begin again for a while. */ | |
3675 return c1; | |
3676 } | |
3677 /* discard mime preemble, and goto MIME mode */ | |
3678 mime_last = k; | |
3679 /* do no MIME integrity check */ | |
3680 return c1; /* used only for checking EOF */ | |
3681 } | |
3682 | |
3683 #ifdef CHECK_OPTION | |
3684 void | |
3685 no_putc(c) | |
3686 int c; | |
3687 { | |
3688 ; | |
3689 } | |
3690 | |
3691 void debug(str) | |
3692 char *str; | |
3693 { | |
3694 if (debug_f){ | |
3695 fprintf(stderr, "%s\n", str); | |
3696 } | |
3697 } | |
3698 #endif | |
3699 | |
3700 void | |
3701 set_input_codename (codename) | |
3702 char *codename; | |
3703 { | |
3704 if (guess_f && | |
3705 is_inputcode_set && | |
3706 strcmp(codename, "") != 0 && | |
3707 strcmp(codename, input_codename) != 0) | |
3708 { | |
3709 is_inputcode_mixed = TRUE; | |
3710 } | |
3711 input_codename = codename; | |
3712 is_inputcode_set = TRUE; | |
3713 } | |
3714 | |
3715 #ifndef WIN32DLL | |
3716 void | |
3717 print_guessed_code (filename) | |
3718 char *filename; | |
3719 { | |
3720 char *codename = "BINARY"; | |
3721 if (!is_inputcode_mixed) { | |
3722 if (strcmp(input_codename, "") == 0) { | |
3723 codename = "ASCII"; | |
3724 } else { | |
3725 codename = input_codename; | |
3726 } | |
3727 } | |
3728 if (filename != NULL) printf("%s:", filename); | |
3729 printf("%s\n", codename); | |
3730 } | |
3731 #endif /*WIN32DLL*/ | |
3732 | |
3733 int | |
3734 hex2bin(x) | |
3735 int x; | |
3736 { | |
3737 if (nkf_isdigit(x)) return x - '0'; | |
3738 return nkf_toupper(x) - 'A' + 10; | |
3739 } | |
3740 | |
3741 #ifdef INPUT_OPTION | |
3742 | |
3743 #ifdef ANSI_C_PROTOTYPE | |
3744 int hex_getc(int ch, FILE *f, int (*g)(FILE *f), int (*u)(int c, FILE *f)) | |
3745 #else | |
3746 int | |
3747 hex_getc(ch, f, g, u) | |
3748 int ch; | |
3749 FILE *f; | |
3750 int (*g)(); | |
3751 int (*u)(); | |
3752 #endif | |
3753 { | |
3754 int c1, c2, c3; | |
3755 c1 = (*g)(f); | |
3756 if (c1 != ch){ | |
3757 return c1; | |
3758 } | |
3759 c2 = (*g)(f); | |
3760 if (!nkf_isxdigit(c2)){ | |
3761 (*u)(c2, f); | |
3762 return c1; | |
3763 } | |
3764 c3 = (*g)(f); | |
3765 if (!nkf_isxdigit(c3)){ | |
3766 (*u)(c2, f); | |
3767 (*u)(c3, f); | |
3768 return c1; | |
3769 } | |
3770 return (hex2bin(c2) << 4) | hex2bin(c3); | |
3771 } | |
3772 | |
3773 int | |
3774 cap_getc(f) | |
3775 FILE *f; | |
3776 { | |
3777 return hex_getc(':', f, i_cgetc, i_cungetc); | |
3778 } | |
3779 | |
3780 int | |
3781 cap_ungetc(c, f) | |
3782 int c; | |
3783 FILE *f; | |
3784 { | |
3785 return (*i_cungetc)(c, f); | |
3786 } | |
3787 | |
3788 int | |
3789 url_getc(f) | |
3790 FILE *f; | |
3791 { | |
3792 return hex_getc('%', f, i_ugetc, i_uungetc); | |
3793 } | |
3794 | |
3795 int | |
3796 url_ungetc(c, f) | |
3797 int c; | |
3798 FILE *f; | |
3799 { | |
3800 return (*i_uungetc)(c, f); | |
3801 } | |
3802 #endif | |
3803 | |
3804 #ifdef NUMCHAR_OPTION | |
3805 int | |
3806 numchar_getc(f) | |
3807 FILE *f; | |
3808 { | |
3809 int (*g)() = i_ngetc; | |
3810 int (*u)() = i_nungetc; | |
3811 int i = 0, j; | |
3812 int buf[8]; | |
3813 long c = -1; | |
3814 | |
3815 buf[i] = (*g)(f); | |
3816 if (buf[i] == '&'){ | |
3817 buf[++i] = (*g)(f); | |
3818 if (buf[i] == '#'){ | |
3819 c = 0; | |
3820 buf[++i] = (*g)(f); | |
3821 if (buf[i] == 'x' || buf[i] == 'X'){ | |
3822 for (j = 0; j < 5; j++){ | |
3823 buf[++i] = (*g)(f); | |
3824 if (!nkf_isxdigit(buf[i])){ | |
3825 if (buf[i] != ';'){ | |
3826 c = -1; | |
3827 } | |
3828 break; | |
3829 } | |
3830 c <<= 4; | |
3831 c |= hex2bin(buf[i]); | |
3832 } | |
3833 }else{ | |
3834 for (j = 0; j < 6; j++){ | |
3835 if (j){ | |
3836 buf[++i] = (*g)(f); | |
3837 } | |
3838 if (!nkf_isdigit(buf[i])){ | |
3839 if (buf[i] != ';'){ | |
3840 c = -1; | |
3841 } | |
3842 break; | |
3843 } | |
3844 c *= 10; | |
3845 c += hex2bin(buf[i]); | |
3846 } | |
3847 } | |
3848 } | |
3849 } | |
3850 if (c != -1){ | |
3851 return CLASS_UTF16 | c; | |
3852 } | |
3853 while (i > 0){ | |
3854 (*u)(buf[i], f); | |
3855 --i; | |
3856 } | |
3857 return buf[0]; | |
3858 } | |
3859 | |
3860 int | |
3861 numchar_ungetc(c, f) | |
3862 int c; | |
3863 FILE *f; | |
3864 { | |
3865 return (*i_nungetc)(c, f); | |
3866 } | |
3867 #endif | |
3868 | |
3869 | |
3870 int | |
3871 mime_getc(f) | |
3872 FILE *f; | |
3873 { | |
3874 int c1, c2, c3, c4, cc; | |
3875 int t1, t2, t3, t4, mode, exit_mode; | |
3876 int lwsp_count; | |
3877 char *lwsp_buf; | |
3878 char *lwsp_buf_new; | |
3879 int lwsp_size = 128; | |
3880 | |
3881 if (mime_top != mime_last) { /* Something is in FIFO */ | |
3882 return Fifo(mime_top++); | |
3883 } | |
3884 if (mime_decode_mode==1 ||mime_decode_mode==FALSE) { | |
3885 mime_decode_mode=FALSE; | |
3886 unswitch_mime_getc(); | |
3887 return (*i_getc)(f); | |
3888 } | |
3889 | |
3890 if (mimebuf_f == FIXED_MIME) | |
3891 exit_mode = mime_decode_mode; | |
3892 else | |
3893 exit_mode = FALSE; | |
3894 if (mime_decode_mode == 'Q') { | |
3895 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF); | |
3896 restart_mime_q: | |
3897 if (c1=='_') return ' '; | |
3898 if (c1!='=' && c1!='?') { | |
3899 return c1; | |
3900 } | |
3901 | |
3902 mime_decode_mode = exit_mode; /* prepare for quit */ | |
3903 if (c1<=' ') return c1; | |
3904 if ((c2 = (*i_mgetc)(f)) == EOF) return (EOF); | |
3905 if (c1=='?'&&c2=='=' && mimebuf_f != FIXED_MIME) { | |
3906 /* end Q encoding */ | |
3907 input_mode = exit_mode; | |
3908 lwsp_count = 0; | |
3909 lwsp_buf = malloc((lwsp_size+5)*sizeof(char)); | |
3910 if (lwsp_buf==NULL) { | |
3911 perror("can't malloc"); | |
3912 return -1; | |
3913 } | |
3914 while ((c1=(*i_getc)(f))!=EOF) { | |
3915 switch (c1) { | |
3916 case NL: | |
3917 case CR: | |
3918 if (c1==NL) { | |
3919 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) { | |
3920 i_ungetc(SPACE,f); | |
3921 continue; | |
3922 } else { | |
3923 i_ungetc(c1,f); | |
3924 } | |
3925 c1 = NL; | |
3926 } else { | |
3927 if ((c1=(*i_getc)(f))!=EOF && c1 == NL) { | |
3928 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) { | |
3929 i_ungetc(SPACE,f); | |
3930 continue; | |
3931 } else { | |
3932 i_ungetc(c1,f); | |
3933 } | |
3934 i_ungetc(NL,f); | |
3935 } else { | |
3936 i_ungetc(c1,f); | |
3937 } | |
3938 c1 = CR; | |
3939 } | |
3940 break; | |
3941 case SPACE: | |
3942 case TAB: | |
3943 lwsp_buf[lwsp_count] = c1; | |
3944 if (lwsp_count++>lwsp_size){ | |
3945 lwsp_size *= 2; | |
3946 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char)); | |
3947 if (lwsp_buf_new==NULL) { | |
3948 free(lwsp_buf); | |
3949 lwsp_buf = NULL; | |
3950 perror("can't realloc"); | |
3951 return -1; | |
3952 } | |
3953 lwsp_buf = lwsp_buf_new; | |
3954 } | |
3955 continue; | |
3956 } | |
3957 break; | |
3958 } | |
3959 if (lwsp_count > 0) { | |
3960 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) { | |
3961 lwsp_count = 0; | |
3962 } else { | |
3963 i_ungetc(c1,f); | |
3964 for(lwsp_count--;lwsp_count>0;lwsp_count--) | |
3965 i_ungetc(lwsp_buf[lwsp_count],f); | |
3966 c1 = lwsp_buf[0]; | |
3967 } | |
3968 } | |
3969 free(lwsp_buf); | |
3970 lwsp_buf = NULL; | |
3971 return c1; | |
3972 } | |
3973 if (c1=='='&&c2<' ') { /* this is soft wrap */ | |
3974 while((c1 = (*i_mgetc)(f)) <=' ') { | |
3975 if ((c1 = (*i_mgetc)(f)) == EOF) return (EOF); | |
3976 } | |
3977 mime_decode_mode = 'Q'; /* still in MIME */ | |
3978 goto restart_mime_q; | |
3979 } | |
3980 if (c1=='?') { | |
3981 mime_decode_mode = 'Q'; /* still in MIME */ | |
3982 (*i_mungetc)(c2,f); | |
3983 return c1; | |
3984 } | |
3985 if ((c3 = (*i_mgetc)(f)) == EOF) return (EOF); | |
3986 if (c2<=' ') return c2; | |
3987 mime_decode_mode = 'Q'; /* still in MIME */ | |
3988 #define hex(c) (('0'<=c&&c<='9')?(c-'0'):\ | |
3989 ('A'<=c&&c<='F')?(c-'A'+10):('a'<=c&&c<='f')?(c-'a'+10):0) | |
3990 return ((hex(c2)<<4) + hex(c3)); | |
3991 } | |
3992 | |
3993 if (mime_decode_mode != 'B') { | |
3994 mime_decode_mode = FALSE; | |
3995 return (*i_mgetc)(f); | |
3996 } | |
3997 | |
3998 | |
3999 /* Base64 encoding */ | |
4000 /* | |
4001 MIME allows line break in the middle of | |
4002 Base64, but we are very pessimistic in decoding | |
4003 in unbuf mode because MIME encoded code may broken by | |
4004 less or editor's control sequence (such as ESC-[-K in unbuffered | |
4005 mode. ignore incomplete MIME. | |
4006 */ | |
4007 mode = mime_decode_mode; | |
4008 mime_decode_mode = exit_mode; /* prepare for quit */ | |
4009 | |
4010 while ((c1 = (*i_mgetc)(f))<=' ') { | |
4011 if (c1==EOF) | |
4012 return (EOF); | |
4013 } | |
4014 mime_c2_retry: | |
4015 if ((c2 = (*i_mgetc)(f))<=' ') { | |
4016 if (c2==EOF) | |
4017 return (EOF); | |
4018 if (mime_f != STRICT_MIME) goto mime_c2_retry; | |
4019 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; | |
4020 return c2; | |
4021 } | |
4022 if ((c1 == '?') && (c2 == '=')) { | |
4023 input_mode = ASCII; | |
4024 lwsp_count = 0; | |
4025 lwsp_buf = malloc((lwsp_size+5)*sizeof(char)); | |
4026 if (lwsp_buf==NULL) { | |
4027 perror("can't malloc"); | |
4028 return -1; | |
4029 } | |
4030 while ((c1=(*i_getc)(f))!=EOF) { | |
4031 switch (c1) { | |
4032 case NL: | |
4033 case CR: | |
4034 if (c1==NL) { | |
4035 if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) { | |
4036 i_ungetc(SPACE,f); | |
4037 continue; | |
4038 } else { | |
4039 i_ungetc(c1,f); | |
4040 } | |
4041 c1 = NL; | |
4042 } else { | |
4043 if ((c1=(*i_getc)(f))!=EOF) { | |
4044 if (c1==SPACE) { | |
4045 i_ungetc(SPACE,f); | |
4046 continue; | |
4047 } else if ((c1=(*i_getc)(f))!=EOF && (c1==SPACE||c1==TAB)) { | |
4048 i_ungetc(SPACE,f); | |
4049 continue; | |
4050 } else { | |
4051 i_ungetc(c1,f); | |
4052 } | |
4053 i_ungetc(NL,f); | |
4054 } else { | |
4055 i_ungetc(c1,f); | |
4056 } | |
4057 c1 = CR; | |
4058 } | |
4059 break; | |
4060 case SPACE: | |
4061 case TAB: | |
4062 lwsp_buf[lwsp_count] = c1; | |
4063 if (lwsp_count++>lwsp_size){ | |
4064 lwsp_size *= 2; | |
4065 lwsp_buf_new = realloc(lwsp_buf, (lwsp_size+5)*sizeof(char)); | |
4066 if (lwsp_buf_new==NULL) { | |
4067 free(lwsp_buf); | |
4068 lwsp_buf = NULL; | |
4069 perror("can't realloc"); | |
4070 return -1; | |
4071 } | |
4072 lwsp_buf = lwsp_buf_new; | |
4073 } | |
4074 continue; | |
4075 } | |
4076 break; | |
4077 } | |
4078 if (lwsp_count > 0) { | |
4079 if (c1=='=' && (lwsp_buf[lwsp_count-1]==SPACE||lwsp_buf[lwsp_count-1]==TAB)) { | |
4080 lwsp_count = 0; | |
4081 } else { | |
4082 i_ungetc(c1,f); | |
4083 for(lwsp_count--;lwsp_count>0;lwsp_count--) | |
4084 i_ungetc(lwsp_buf[lwsp_count],f); | |
4085 c1 = lwsp_buf[0]; | |
4086 } | |
4087 } | |
4088 free(lwsp_buf); | |
4089 lwsp_buf = NULL; | |
4090 return c1; | |
4091 } | |
4092 mime_c3_retry: | |
4093 if ((c3 = (*i_mgetc)(f))<=' ') { | |
4094 if (c3==EOF) | |
4095 return (EOF); | |
4096 if (mime_f != STRICT_MIME) goto mime_c3_retry; | |
4097 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; | |
4098 return c3; | |
4099 } | |
4100 mime_c4_retry: | |
4101 if ((c4 = (*i_mgetc)(f))<=' ') { | |
4102 if (c4==EOF) | |
4103 return (EOF); | |
4104 if (mime_f != STRICT_MIME) goto mime_c4_retry; | |
4105 if (mimebuf_f!=FIXED_MIME) input_mode = ASCII; | |
4106 return c4; | |
4107 } | |
4108 | |
4109 mime_decode_mode = mode; /* still in MIME sigh... */ | |
4110 | |
4111 /* BASE 64 decoding */ | |
4112 | |
4113 t1 = 0x3f & base64decode(c1); | |
4114 t2 = 0x3f & base64decode(c2); | |
4115 t3 = 0x3f & base64decode(c3); | |
4116 t4 = 0x3f & base64decode(c4); | |
4117 cc = ((t1 << 2) & 0x0fc) | ((t2 >> 4) & 0x03); | |
4118 if (c2 != '=') { | |
4119 Fifo(mime_last++) = cc; | |
4120 cc = ((t2 << 4) & 0x0f0) | ((t3 >> 2) & 0x0f); | |
4121 if (c3 != '=') { | |
4122 Fifo(mime_last++) = cc; | |
4123 cc = ((t3 << 6) & 0x0c0) | (t4 & 0x3f); | |
4124 if (c4 != '=') | |
4125 Fifo(mime_last++) = cc; | |
4126 } | |
4127 } else { | |
4128 return c1; | |
4129 } | |
4130 return Fifo(mime_top++); | |
4131 } | |
4132 | |
4133 int | |
4134 mime_ungetc(c,f) | |
4135 int c; | |
4136 FILE *f; | |
4137 { | |
4138 Fifo(--mime_top) = c; | |
4139 return c; | |
4140 } | |
4141 | |
4142 int | |
4143 mime_integrity(f,p) | |
4144 FILE *f; | |
4145 unsigned char *p; | |
4146 { | |
4147 int c,d; | |
4148 unsigned int q; | |
4149 /* In buffered mode, read until =? or NL or buffer full | |
4150 */ | |
4151 mime_input = mime_top; | |
4152 mime_last = mime_top; | |
4153 while(*p) Fifo(mime_input++) = *p++; | |
4154 d = 0; | |
4155 q = mime_input; | |
4156 while((c=(*i_getc)(f))!=EOF) { | |
4157 if (((mime_input-mime_top)&MIME_BUF_MASK)==0) { | |
4158 break; /* buffer full */ | |
4159 } | |
4160 if (c=='=' && d=='?') { | |
4161 /* checked. skip header, start decode */ | |
4162 Fifo(mime_input++) = c; | |
4163 /* mime_last_input = mime_input; */ | |
4164 mime_input = q; | |
4165 switch_mime_getc(); | |
4166 return 1; | |
4167 } | |
4168 if (!( (c=='+'||c=='/'|| c=='=' || c=='?' || is_alnum(c)))) | |
4169 break; | |
4170 /* Should we check length mod 4? */ | |
4171 Fifo(mime_input++) = c; | |
4172 d=c; | |
4173 } | |
4174 /* In case of Incomplete MIME, no MIME decode */ | |
4175 Fifo(mime_input++) = c; | |
4176 mime_last = mime_input; /* point undecoded buffer */ | |
4177 mime_decode_mode = 1; /* no decode on Fifo last in mime_getc */ | |
4178 switch_mime_getc(); /* anyway we need buffered getc */ | |
4179 return 1; | |
4180 } | |
4181 | |
4182 int | |
4183 base64decode(c) | |
4184 int c; | |
4185 { | |
4186 int i; | |
4187 if (c > '@') { | |
4188 if (c < '[') { | |
4189 i = c - 'A'; /* A..Z 0-25 */ | |
4190 } else { | |
4191 i = c - 'G' /* - 'a' + 26 */ ; /* a..z 26-51 */ | |
4192 } | |
4193 } else if (c > '/') { | |
4194 i = c - '0' + '4' /* - '0' + 52 */ ; /* 0..9 52-61 */ | |
4195 } else if (c == '+') { | |
4196 i = '>' /* 62 */ ; /* + 62 */ | |
4197 } else { | |
4198 i = '?' /* 63 */ ; /* / 63 */ | |
4199 } | |
4200 return (i); | |
4201 } | |
4202 | |
4203 static char basis_64[] = | |
4204 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; | |
4205 | |
4206 static int b64c; | |
4207 #define MIMEOUT_BUF_LENGTH (60) | |
4208 char mimeout_buf[MIMEOUT_BUF_LENGTH+1]; | |
4209 int mimeout_buf_count = 0; | |
4210 int mimeout_preserve_space = 0; | |
4211 #define itoh4(c) (c>=10?c+'A'-10:c+'0') | |
4212 | |
4213 void | |
4214 open_mime(mode) | |
4215 int mode; | |
4216 { | |
4217 unsigned char *p; | |
4218 int i; | |
4219 int j; | |
4220 p = mime_pattern[0]; | |
4221 for(i=0;mime_encode[i];i++) { | |
4222 if (mode == mime_encode[i]) { | |
4223 p = mime_pattern[i]; | |
4224 break; | |
4225 } | |
4226 } | |
4227 mimeout_mode = mime_encode_method[i]; | |
4228 | |
4229 i = 0; | |
4230 if (base64_count>45) { | |
4231 if (mimeout_buf_count>0 && nkf_isblank(mimeout_buf[i])){ | |
4232 (*o_mputc)(mimeout_buf[i]); | |
4233 i++; | |
4234 } | |
4235 (*o_mputc)(NL); | |
4236 (*o_mputc)(SPACE); | |
4237 base64_count = 1; | |
4238 if (!mimeout_preserve_space && mimeout_buf_count>0 | |
4239 && (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB | |
4240 || mimeout_buf[i]==CR || mimeout_buf[i]==NL )) { | |
4241 i++; | |
4242 } | |
4243 } | |
4244 if (!mimeout_preserve_space) { | |
4245 for (;i<mimeout_buf_count;i++) { | |
4246 if (mimeout_buf[i]==SPACE || mimeout_buf[i]==TAB | |
4247 || mimeout_buf[i]==CR || mimeout_buf[i]==NL ) { | |
4248 (*o_mputc)(mimeout_buf[i]); | |
4249 base64_count ++; | |
4250 } else { | |
4251 break; | |
4252 } | |
4253 } | |
4254 } | |
4255 mimeout_preserve_space = FALSE; | |
4256 | |
4257 while(*p) { | |
4258 (*o_mputc)(*p++); | |
4259 base64_count ++; | |
4260 } | |
4261 j = mimeout_buf_count; | |
4262 mimeout_buf_count = 0; | |
4263 for (;i<j;i++) { | |
4264 mime_putc(mimeout_buf[i]); | |
4265 } | |
4266 } | |
4267 | |
4268 void | |
4269 close_mime() | |
4270 { | |
4271 (*o_mputc)('?'); | |
4272 (*o_mputc)('='); | |
4273 base64_count += 2; | |
4274 mimeout_mode = 0; | |
4275 } | |
4276 | |
4277 void | |
4278 eof_mime() | |
4279 { | |
4280 switch(mimeout_mode) { | |
4281 case 'Q': | |
4282 case 'B': | |
4283 break; | |
4284 case 2: | |
4285 (*o_mputc)(basis_64[((b64c & 0x3)<< 4)]); | |
4286 (*o_mputc)('='); | |
4287 (*o_mputc)('='); | |
4288 base64_count += 3; | |
4289 break; | |
4290 case 1: | |
4291 (*o_mputc)(basis_64[((b64c & 0xF) << 2)]); | |
4292 (*o_mputc)('='); | |
4293 base64_count += 2; | |
4294 break; | |
4295 } | |
4296 if (mimeout_mode) { | |
4297 if (mimeout_f!=FIXED_MIME) { | |
4298 close_mime(); | |
4299 } else if (mimeout_mode != 'Q') | |
4300 mimeout_mode = 'B'; | |
4301 } | |
4302 } | |
4303 | |
4304 void | |
4305 mimeout_addchar(c) | |
4306 int c; | |
4307 { | |
4308 switch(mimeout_mode) { | |
4309 case 'Q': | |
4310 if(c==SPACE){ | |
4311 (*o_mputc)('_'); | |
4312 base64_count++; | |
4313 } else if (c==CR||c==NL) { | |
4314 (*o_mputc)(c); | |
4315 base64_count = 0; | |
4316 } else if(c<SPACE||c=='='||c=='?'||c=='_'||DEL<=c) { | |
4317 (*o_mputc)('='); | |
4318 (*o_mputc)(itoh4(((c>>4)&0xf))); | |
4319 (*o_mputc)(itoh4((c&0xf))); | |
4320 base64_count += 3; | |
4321 } else { | |
4322 (*o_mputc)(c); | |
4323 base64_count++; | |
4324 } | |
4325 break; | |
4326 case 'B': | |
4327 b64c=c; | |
4328 (*o_mputc)(basis_64[c>>2]); | |
4329 mimeout_mode=2; | |
4330 base64_count ++; | |
4331 break; | |
4332 case 2: | |
4333 (*o_mputc)(basis_64[((b64c & 0x3)<< 4) | ((c & 0xF0) >> 4)]); | |
4334 b64c=c; | |
4335 mimeout_mode=1; | |
4336 base64_count ++; | |
4337 break; | |
4338 case 1: | |
4339 (*o_mputc)(basis_64[((b64c & 0xF) << 2) | ((c & 0xC0) >>6)]); | |
4340 (*o_mputc)(basis_64[c & 0x3F]); | |
4341 mimeout_mode='B'; | |
4342 base64_count += 2; | |
4343 break; | |
4344 default: | |
4345 (*o_mputc)(c); | |
4346 base64_count++; | |
4347 break; | |
4348 } | |
4349 } | |
4350 | |
4351 int mime_lastchar2, mime_lastchar1; | |
4352 | |
4353 void mime_prechar(c2, c1) | |
4354 int c2, c1; | |
4355 { | |
4356 if (mimeout_mode){ | |
4357 if (c2){ | |
4358 if (base64_count + mimeout_buf_count/3*4> 66){ | |
4359 (*o_base64conv)(EOF,0); | |
4360 (*o_base64conv)(0,NL); | |
4361 (*o_base64conv)(0,SPACE); | |
4362 } | |
4363 }/*else if (mime_lastchar2){ | |
4364 if (c1 <=DEL && !nkf_isspace(c1)){ | |
4365 (*o_base64conv)(0,SPACE); | |
4366 } | |
4367 }*/ | |
4368 }/*else{ | |
4369 if (c2 && mime_lastchar2 == 0 | |
4370 && mime_lastchar1 && !nkf_isspace(mime_lastchar1)){ | |
4371 (*o_base64conv)(0,SPACE); | |
4372 } | |
4373 }*/ | |
4374 mime_lastchar2 = c2; | |
4375 mime_lastchar1 = c1; | |
4376 } | |
4377 | |
4378 void | |
4379 mime_putc(c) | |
4380 int c; | |
4381 { | |
4382 int i = 0; | |
4383 int j = 0; | |
4384 int lastchar; | |
4385 | |
4386 if (mimeout_f == FIXED_MIME){ | |
4387 if (mimeout_mode == 'Q'){ | |
4388 if (base64_count > 71){ | |
4389 if (c!=CR && c!=NL) { | |
4390 (*o_mputc)('='); | |
4391 (*o_mputc)(NL); | |
4392 } | |
4393 base64_count = 0; | |
4394 } | |
4395 }else{ | |
4396 if (base64_count > 71){ | |
4397 eof_mime(); | |
4398 (*o_mputc)(NL); | |
4399 base64_count = 0; | |
4400 } | |
4401 if (c == EOF) { /* c==EOF */ | |
4402 eof_mime(); | |
4403 } | |
4404 } | |
4405 if (c != EOF) { /* c==EOF */ | |
4406 mimeout_addchar(c); | |
4407 } | |
4408 return; | |
4409 } | |
4410 | |
4411 /* mimeout_f != FIXED_MIME */ | |
4412 | |
4413 if (c == EOF) { /* c==EOF */ | |
4414 j = mimeout_buf_count; | |
4415 mimeout_buf_count = 0; | |
4416 i = 0; | |
4417 for (;i<j;i++) { | |
4418 /*if (nkf_isspace(mimeout_buf[i])){ | |
4419 break; | |
4420 }*/ | |
4421 mimeout_addchar(mimeout_buf[i]); | |
4422 } | |
4423 eof_mime(); | |
4424 for (;i<j;i++) { | |
4425 (*o_mputc)(mimeout_buf[i]); | |
4426 base64_count++; | |
4427 } | |
4428 return; | |
4429 } | |
4430 | |
4431 if (mimeout_mode=='Q') { | |
4432 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { | |
4433 if (c <= SPACE) { | |
4434 close_mime(); | |
4435 (*o_mputc)(SPACE); | |
4436 base64_count++; | |
4437 } | |
4438 (*o_mputc)(c); | |
4439 base64_count++; | |
4440 } | |
4441 return; | |
4442 } | |
4443 | |
4444 if (mimeout_buf_count > 0){ | |
4445 lastchar = mimeout_buf[mimeout_buf_count - 1]; | |
4446 }else{ | |
4447 lastchar = -1; | |
4448 } | |
4449 | |
4450 if (!mimeout_mode) { | |
4451 if (c <= DEL && (output_mode==ASCII ||output_mode == ISO8859_1)) { | |
4452 if (nkf_isspace(c)) { | |
4453 if (c==CR || c==NL) { | |
4454 base64_count=0; | |
4455 } | |
4456 for (i=0;i<mimeout_buf_count;i++) { | |
4457 (*o_mputc)(mimeout_buf[i]); | |
4458 if (mimeout_buf[i] == CR || mimeout_buf[i] == NL){ | |
4459 base64_count = 0; | |
4460 }else{ | |
4461 base64_count++; | |
4462 } | |
4463 } | |
4464 mimeout_buf[0] = c; | |
4465 mimeout_buf_count = 1; | |
4466 }else{ | |
4467 if (base64_count > 1 | |
4468 && base64_count + mimeout_buf_count > 76){ | |
4469 (*o_mputc)(NL); | |
4470 base64_count = 0; | |
4471 if (!nkf_isspace(mimeout_buf[0])){ | |
4472 (*o_mputc)(SPACE); | |
4473 base64_count++; | |
4474 } | |
4475 } | |
4476 mimeout_buf[mimeout_buf_count++] = c; | |
4477 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { | |
4478 open_mime(output_mode); | |
4479 } | |
4480 } | |
4481 return; | |
4482 }else{ | |
4483 if (lastchar==CR || lastchar == NL){ | |
4484 for (i=0;i<mimeout_buf_count;i++) { | |
4485 (*o_mputc)(mimeout_buf[i]); | |
4486 } | |
4487 base64_count = 0; | |
4488 mimeout_buf_count = 0; | |
4489 } | |
4490 if (lastchar==SPACE) { | |
4491 for (i=0;i<mimeout_buf_count-1;i++) { | |
4492 (*o_mputc)(mimeout_buf[i]); | |
4493 base64_count++; | |
4494 } | |
4495 mimeout_buf[0] = SPACE; | |
4496 mimeout_buf_count = 1; | |
4497 } | |
4498 open_mime(output_mode); | |
4499 } | |
4500 }else{ | |
4501 /* mimeout_mode == 'B', 1, 2 */ | |
4502 if ( c<=DEL && (output_mode==ASCII ||output_mode == ISO8859_1 ) ) { | |
4503 if (lastchar == CR || lastchar == NL){ | |
4504 if (nkf_isblank(c)) { | |
4505 for (i=0;i<mimeout_buf_count;i++) { | |
4506 mimeout_addchar(mimeout_buf[i]); | |
4507 } | |
4508 mimeout_buf_count = 0; | |
4509 } else if (SPACE<c && c<DEL) { | |
4510 eof_mime(); | |
4511 for (i=0;i<mimeout_buf_count;i++) { | |
4512 (*o_mputc)(mimeout_buf[i]); | |
4513 } | |
4514 base64_count = 0; | |
4515 mimeout_buf_count = 0; | |
4516 } | |
4517 } | |
4518 if (c==SPACE || c==TAB || c==CR || c==NL) { | |
4519 for (i=0;i<mimeout_buf_count;i++) { | |
4520 if (SPACE<mimeout_buf[i] && mimeout_buf[i]<DEL) { | |
4521 eof_mime(); | |
4522 for (i=0;i<mimeout_buf_count;i++) { | |
4523 (*o_mputc)(mimeout_buf[i]); | |
4524 base64_count++; | |
4525 } | |
4526 mimeout_buf_count = 0; | |
4527 } | |
4528 } | |
4529 mimeout_buf[mimeout_buf_count++] = c; | |
4530 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { | |
4531 eof_mime(); | |
4532 for (i=0;i<mimeout_buf_count;i++) { | |
4533 (*o_mputc)(mimeout_buf[i]); | |
4534 base64_count++; | |
4535 } | |
4536 mimeout_buf_count = 0; | |
4537 } | |
4538 return; | |
4539 } | |
4540 if (mimeout_buf_count>0 && SPACE<c && c!='=') { | |
4541 mimeout_buf[mimeout_buf_count++] = c; | |
4542 if (mimeout_buf_count>MIMEOUT_BUF_LENGTH) { | |
4543 j = mimeout_buf_count; | |
4544 mimeout_buf_count = 0; | |
4545 for (i=0;i<j;i++) { | |
4546 mimeout_addchar(mimeout_buf[i]); | |
4547 } | |
4548 } | |
4549 return; | |
4550 } | |
4551 } | |
4552 } | |
4553 if (mimeout_buf_count>0) { | |
4554 j = mimeout_buf_count; | |
4555 mimeout_buf_count = 0; | |
4556 for (i=0;i<j;i++) { | |
4557 if (mimeout_buf[i]==CR || mimeout_buf[i]==NL) | |
4558 break; | |
4559 mimeout_addchar(mimeout_buf[i]); | |
4560 } | |
4561 if (i<j) { | |
4562 eof_mime(); | |
4563 base64_count=0; | |
4564 for (;i<j;i++) { | |
4565 (*o_mputc)(mimeout_buf[i]); | |
4566 } | |
4567 open_mime(output_mode); | |
4568 } | |
4569 } | |
4570 mimeout_addchar(c); | |
4571 } | |
4572 | |
4573 | |
4574 #if defined(PERL_XS) || defined(WIN32DLL) | |
4575 void | |
4576 reinit() | |
4577 { | |
4578 { | |
4579 struct input_code *p = input_code_list; | |
4580 while (p->name){ | |
4581 status_reinit(p++); | |
4582 } | |
4583 } | |
4584 unbuf_f = FALSE; | |
4585 estab_f = FALSE; | |
4586 nop_f = FALSE; | |
4587 binmode_f = TRUE; | |
4588 rot_f = FALSE; | |
4589 hira_f = FALSE; | |
4590 input_f = FALSE; | |
4591 alpha_f = FALSE; | |
4592 mime_f = STRICT_MIME; | |
4593 mime_decode_f = FALSE; | |
4594 mimebuf_f = FALSE; | |
4595 broken_f = FALSE; | |
4596 iso8859_f = FALSE; | |
4597 mimeout_f = FALSE; | |
4598 #if defined(MSDOS) || defined(__OS2__) | |
4599 x0201_f = TRUE; | |
4600 #else | |
4601 x0201_f = NO_X0201; | |
4602 #endif | |
4603 iso2022jp_f = FALSE; | |
4604 #ifdef UTF8_OUTPUT_ENABLE | |
4605 unicode_bom_f = 0; | |
4606 w_oconv16_LE = 0; | |
4607 ms_ucs_map_f = FALSE; | |
4608 #endif | |
4609 #ifdef INPUT_OPTION | |
4610 cap_f = FALSE; | |
4611 url_f = FALSE; | |
4612 numchar_f = FALSE; | |
4613 #endif | |
4614 #ifdef CHECK_OPTION | |
4615 noout_f = FALSE; | |
4616 debug_f = FALSE; | |
4617 #endif | |
4618 guess_f = FALSE; | |
4619 is_inputcode_mixed = FALSE; | |
4620 is_inputcode_set = FALSE; | |
4621 #ifdef EXEC_IO | |
4622 exec_f = 0; | |
4623 #endif | |
4624 #ifdef SHIFTJIS_CP932 | |
4625 cp932_f = TRUE; | |
4626 cp932inv_f = TRUE; | |
4627 #endif | |
4628 { | |
4629 int i; | |
4630 for (i = 0; i < 256; i++){ | |
4631 prefix_table[i] = 0; | |
4632 } | |
4633 } | |
4634 #ifdef UTF8_INPUT_ENABLE | |
4635 utf16_mode = UTF16BE_INPUT; | |
4636 #endif | |
4637 mimeout_buf_count = 0; | |
4638 mimeout_mode = 0; | |
4639 base64_count = 0; | |
4640 f_line = 0; | |
4641 f_prev = 0; | |
4642 fold_preserve_f = FALSE; | |
4643 fold_f = FALSE; | |
4644 fold_len = 0; | |
4645 kanji_intro = DEFAULT_J; | |
4646 ascii_intro = DEFAULT_R; | |
4647 fold_margin = FOLD_MARGIN; | |
4648 output_conv = DEFAULT_CONV; | |
4649 oconv = DEFAULT_CONV; | |
4650 o_zconv = no_connection; | |
4651 o_fconv = no_connection; | |
4652 o_crconv = no_connection; | |
4653 o_rot_conv = no_connection; | |
4654 o_hira_conv = no_connection; | |
4655 o_base64conv = no_connection; | |
4656 o_iso2022jp_check_conv = no_connection; | |
4657 o_putc = std_putc; | |
4658 i_getc = std_getc; | |
4659 i_ungetc = std_ungetc; | |
4660 i_bgetc = std_getc; | |
4661 i_bungetc = std_ungetc; | |
4662 o_mputc = std_putc; | |
4663 i_mgetc = std_getc; | |
4664 i_mungetc = std_ungetc; | |
4665 i_mgetc_buf = std_getc; | |
4666 i_mungetc_buf = std_ungetc; | |
4667 output_mode = ASCII; | |
4668 input_mode = ASCII; | |
4669 shift_mode = FALSE; | |
4670 mime_decode_mode = FALSE; | |
4671 file_out = FALSE; | |
4672 crmode_f = 0; | |
4673 option_mode = 0; | |
4674 broken_counter = 0; | |
4675 broken_last = 0; | |
4676 z_prev2=0,z_prev1=0; | |
4677 #ifdef CHECK_OPTION | |
4678 iconv_for_check = 0; | |
4679 #endif | |
4680 input_codename = ""; | |
4681 #ifdef WIN32DLL | |
4682 reinitdll(); | |
4683 #endif /*WIN32DLL*/ | |
4684 } | |
4685 #endif | |
4686 | |
4687 void | |
4688 no_connection(c2,c1) | |
4689 int c2,c1; | |
4690 { | |
4691 no_connection2(c2,c1,0); | |
4692 } | |
4693 | |
4694 int | |
4695 no_connection2(c2,c1,c0) | |
4696 int c2,c1,c0; | |
4697 { | |
4698 fprintf(stderr,"nkf internal module connection failure.\n"); | |
4699 exit(1); | |
4700 return 0; /* LINT */ | |
4701 } | |
4702 | |
4703 #ifndef PERL_XS | |
4704 #ifdef WIN32DLL | |
4705 #define fprintf dllprintf | |
4706 #endif | |
4707 void | |
4708 usage() | |
4709 { | |
4710 fprintf(stderr,"USAGE: nkf(nkf32,wnkf,nkf2) -[flags] [in file] .. [out file for -O flag]\n"); | |
4711 fprintf(stderr,"Flags:\n"); | |
4712 fprintf(stderr,"b,u Output is buffered (DEFAULT),Output is unbuffered\n"); | |
4713 #ifdef DEFAULT_CODE_SJIS | |
4714 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS (DEFAULT), AT&T JIS (EUC), UTF-8\n"); | |
4715 #endif | |
4716 #ifdef DEFAULT_CODE_JIS | |
4717 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit (DEFAULT), Shift JIS, AT&T JIS (EUC), UTF-8\n"); | |
4718 #endif | |
4719 #ifdef DEFAULT_CODE_EUC | |
4720 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC) (DEFAULT), UTF-8\n"); | |
4721 #endif | |
4722 #ifdef DEFAULT_CODE_UTF8 | |
4723 fprintf(stderr,"j,s,e,w Outout code is JIS 7 bit, Shift JIS, AT&T JIS (EUC), UTF-8 (DEFAULT)\n"); | |
4724 #endif | |
4725 #ifdef UTF8_OUTPUT_ENABLE | |
4726 fprintf(stderr," After 'w' you can add more options. (80?|16((B|L)0?)?) \n"); | |
4727 #endif | |
4728 fprintf(stderr,"J,S,E,W Input assumption is JIS 7 bit , Shift JIS, AT&T JIS (EUC), UTF-8\n"); | |
4729 #ifdef UTF8_INPUT_ENABLE | |
4730 fprintf(stderr," After 'W' you can add more options. (8|16(B|L)?) \n"); | |
4731 #endif | |
4732 fprintf(stderr,"t no conversion\n"); | |
4733 fprintf(stderr,"i_/o_ Output sequence to designate JIS-kanji/ASCII (DEFAULT B)\n"); | |
4734 fprintf(stderr,"r {de/en}crypt ROT13/47\n"); | |
4735 fprintf(stderr,"h 1 hirakana->katakana, 2 katakana->hirakana,3 both\n"); | |
4736 fprintf(stderr,"v Show this usage. V: show version\n"); | |
4737 fprintf(stderr,"m[BQN0] MIME decode [B:base64,Q:quoted,N:non-strict,0:no decode]\n"); | |
4738 fprintf(stderr,"M[BQ] MIME encode [B:base64 Q:quoted]\n"); | |
4739 fprintf(stderr,"l ISO8859-1 (Latin-1) support\n"); | |
4740 fprintf(stderr,"f/F Folding: -f60 or -f or -f60-10 (fold margin 10) F preserve nl\n"); | |
4741 fprintf(stderr,"Z[0-3] Convert X0208 alphabet to ASCII 1: Kankaku to space,2: 2 spaces,\n"); | |
4742 fprintf(stderr," 3: Convert HTML Entity\n"); | |
4743 fprintf(stderr,"X,x Assume X0201 kana in MS-Kanji, -x preserves X0201\n"); | |
4744 fprintf(stderr,"B[0-2] Broken input 0: missing ESC,1: any X on ESC-[($]-X,2: ASCII on NL\n"); | |
4745 #ifdef MSDOS | |
4746 fprintf(stderr,"T Text mode output\n"); | |
4747 #endif | |
4748 fprintf(stderr,"O Output to File (DEFAULT 'nkf.out')\n"); | |
4749 fprintf(stderr,"d,c Delete \\r in line feed and \\032, Add \\r in line feed\n"); | |
4750 fprintf(stderr,"I Convert non ISO-2022-JP charactor to GETA\n"); | |
4751 fprintf(stderr,"-L[uwm] line mode u:LF w:CRLF m:CR (DEFAULT noconversion)\n"); | |
4752 fprintf(stderr,"long name options\n"); | |
4753 fprintf(stderr," --fj,--unix,--mac,--windows convert for the system\n"); | |
4754 fprintf(stderr," --jis,--euc,--sjis,--utf8,--utf16,--mime,--base64 convert for the code\n"); | |
4755 fprintf(stderr," --hiragana, --katakana Hiragana/Katakana Conversion\n"); | |
4756 fprintf(stderr," --x0212 Convert JISX0212\n"); | |
4757 fprintf(stderr," --cp932, --no-cp932 CP932 compatibility\n"); | |
4758 #ifdef INPUT_OPTION | |
4759 fprintf(stderr," --cap-input, --url-input Convert hex after ':' or '%%'\n"); | |
4760 #endif | |
4761 #ifdef NUMCHAR_OPTION | |
4762 fprintf(stderr," --numchar-input Convert Unicode Character Reference\n"); | |
4763 #endif | |
4764 #ifdef UTF8_OUTPUT_ENABLE | |
4765 fprintf(stderr," --ms-ucs-map Microsoft UCS Mapping Compatible\n"); | |
4766 #endif | |
4767 #ifdef OVERWRITE | |
4768 fprintf(stderr," --overwrite Overwrite original listed files by filtered result\n"); | |
4769 #endif | |
4770 fprintf(stderr," -g, --guess Guess the input code\n"); | |
4771 fprintf(stderr," --help,--version\n"); | |
4772 version(); | |
4773 } | |
4774 | |
4775 void | |
4776 version() | |
4777 { | |
4778 fprintf(stderr,"Network Kanji Filter Version %s (%s) " | |
4779 #if defined(MSDOS) && !defined(__WIN32__) && !defined(__WIN16__) | |
4780 "for DOS" | |
4781 #endif | |
4782 #if defined(MSDOS) && defined(__WIN16__) | |
4783 "for Win16" | |
4784 #endif | |
4785 #if defined(MSDOS) && defined(__WIN32__) | |
4786 "for Win32" | |
4787 #endif | |
4788 #ifdef __OS2__ | |
4789 "for OS/2" | |
4790 #endif | |
4791 ,NKF_VERSION,NKF_RELEASE_DATE); | |
4792 fprintf(stderr,"\n%s\n",CopyRight); | |
4793 } | |
4794 #endif /*PERL_XS*/ | |
4795 | |
4796 /** | |
4797 ** $B%Q%C%A@):n<T(B | |
4798 ** void@merope.pleiades.or.jp (Kusakabe Youichi) | |
4799 ** NIDE Naoyuki <nide@ics.nara-wu.ac.jp> | |
4800 ** ohta@src.ricoh.co.jp (Junn Ohta) | |
4801 ** inouet@strl.nhk.or.jp (Tomoyuki Inoue) | |
4802 ** kiri@pulser.win.or.jp (Tetsuaki Kiriyama) | |
4803 ** Kimihiko Sato <sato@sail.t.u-tokyo.ac.jp> | |
4804 ** a_kuroe@kuroe.aoba.yokohama.jp (Akihiko Kuroe) | |
4805 ** kono@ie.u-ryukyu.ac.jp (Shinji Kono) | |
4806 ** GHG00637@nifty-serve.or.jp (COW) | |
4807 ** | |
4808 **/ | |
4809 | |
4810 /* end */ |