Mercurial > hg > Applications > mh
view sbr/ml_codeconv.c @ 0:bce86c4163a3
Initial revision
author | kono |
---|---|
date | Mon, 18 Apr 2005 23:46:02 +0900 |
parents | |
children | 442dbbf0ac7d |
line wrap: on
line source
/* ml_codeconv.c - (multilingual) code conversion */ /* by takada@seraph.ntt.jp */ /* arranged by MH-plus project */ #ifdef JAPAN #include "../h/mh.h" #include <ctype.h> #include <stdio.h> #include <sys/types.h> #include <sys/stat.h> /* coding system */ typedef int coding_system_t; #define CS_DEFAULT 0 #define CS_JIS7 1 #define CS_JEUC 2 #define CS_SJIS 3 #define CS_NOCONV 99 /* coding system list */ #define CSL_SIZE 3 #define CSL_DISPLAY(csl) ((csl)[0]) #define CSL_FILE(csl) ((csl)[1]) #define CSL_PROCESS(csl) ((csl)[2]) /* codeset status */ #define ASCII 0 #define JISX0208 1 #define IS_JIS7(c) ((0x21 <= (c)) && ((c) <= 0x7e)) #define IS_JEUC(c) ((0xa1 <= (c)) && ((c) <= 0xfe)) #define IS_SJIS1(c) (((0x81 <= (c)) && ((c) <= 0x9f)) || \ ((0xe0 <= (c)) && ((c) <= 0xfc))) #define IS_SJIS2(c) ((0x40 <= (c)) && ((c) <= 0xfc) && ((c) != 0x7f)) /* hold coding system information */ static coding_system_t ml_coding_info[CSL_SIZE] = { CS_DEFAULT, CS_DEFAULT, CS_DEFAULT }; int japan_environ; /* private functions */ static void read_profile(); static coding_system_t coding_to_cs(); static coding_system_t select_coding_system(); static void ml_fputs_sbr(); static void jeuc_fputs(); static void sjis_fputs(); static void jis7_fputs(); static void cntrl_putc(); static void ml_conv_sbr(); /* * Initialize: holding coding system information */ void ml_init() { read_profile(ml_coding_info); } /* get coding system list from profile and environment variable */ static void read_profile(csl) coding_system_t csl[]; { char *s, *default_coding; #if 0 /* We won't refer $LANG nor $LC_CTYPE */ if ((default_coding = getenv("LC_CTYPE")) == NULL) default_coding = getenv("LANG"); #else default_coding = ""; /* for CS_DEFAULT */ #endif if ((s = getenv("MH_DISPLAY_CODING")) == NULL) if ((s = m_find("display-coding")) == NULL) s = default_coding; CSL_DISPLAY(csl) = coding_to_cs(s); if ((s = getenv("MH_FILE_CODING")) == NULL) if ((s = m_find("file-coding")) == NULL) s = default_coding; CSL_FILE(csl) = coding_to_cs(s); if ((s = getenv("MH_PROCESS_CODING")) == NULL) if ((s = m_find("process-coding")) == NULL) s = default_coding; CSL_PROCESS(csl) = coding_to_cs(s); if (CSL_FILE(ml_coding_info) == CS_NOCONV) japan_environ = 0; else japan_environ = 1; } static coding_system_t coding_to_cs(coding) char *coding; { if (*coding == '\0') return CS_DEFAULT; else if (uleq(coding, "ja_JP.JIS7")) return CS_JIS7; else if (uleq(coding, "ja_JP.EUC")) return CS_JEUC; else if (uleq(coding, "ja_JP.SJIS")) return CS_SJIS; else if (uleq(coding, "C")) return CS_NOCONV; /* for backward compatibility */ else if (uleq(coding,"japanese") || uleq(coding,"ja_JP.jis8") || uleq(coding,"ja_JP.pjis") || uleq(coding,"ja_JP.jis") || uleq(coding,"wr_WR.ct") || uleq(coding,"wr_WR.junet")) { return(CS_JIS7); } else if (uleq(coding,"ja_JP.ujis")) { return(CS_JEUC); } else if (uleq(coding,"ja_JP.mscode")) { return(CS_SJIS); } else if (uleq(coding,"noconv")) { return(CS_NOCONV); } else { return(CS_DEFAULT); } } static coding_system_t select_coding_system(stream) FILE *stream; { struct stat buf; if (fstat(fileno(stream), &buf)) adios (NULLCP, "unable to fstat stream"); switch (buf.st_mode & S_IFMT) { case S_IFREG: return(CSL_FILE(ml_coding_info)); case S_IFIFO: case 0: /* some system returns zero-filled stat for pipe */ return(CSL_PROCESS(ml_coding_info)); case S_IFCHR: default: return(CSL_DISPLAY(ml_coding_info)); } } /* * */ int ml_ismlchar(c) unsigned char c; { return japan_environ ? IS_JEUC(c) : 0; } int ml_ismlptr(p) unsigned char *p; { return japan_environ ? (IS_JEUC(*p) && IS_JEUC(*(p+1))) : 0; } /* * Output: */ void ml_fputs(scanlk, stream) char *scanlk; FILE *stream; { ml_fputs_sbr(scanlk, stream, 0); } void ml_pretty_fputs(scanlk, stream) char *scanlk; FILE *stream; { ml_fputs_sbr(scanlk, stream, 1); } void junet_fputs(scanlk, stream) char *scanlk; FILE *stream; { jis7_fputs(scanlk, stream, 0); } static void ml_fputs_sbr(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { switch (select_coding_system(stream)) { case CS_NOCONV: fputs(scanlk, stream); break; case CS_SJIS: sjis_fputs(scanlk, stream, pretty); break; case CS_JEUC: jeuc_fputs(scanlk, stream, pretty); break; case CS_JIS7: case CS_DEFAULT: default: jis7_fputs(scanlk, stream, pretty); break; } } /* * Output routines with code conversion */ static void jeuc_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { unsigned char u1, u2; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; putc(u1, stream); putc(u2, stream); continue; } putc(' ', stream); } else if (u1 & 0x80) { putc(' ', stream); } else if (pretty && iscntrl(u1)) { cntrl_putc(u1, stream); } else { putc(u1, stream); } } } #define E2S(i1, i2, o1, o2) {\ (i1) &= 0x7f;\ (i2) &= 0x7f;\ (o1) = ((i1) - 0x21) / 2 + 0x81;\ if ((o1) > 0x9f) { (o1) += (0xe0 - 0xa0); }\ if ((i1) & 1) {\ (o2) = (i2) + (0x40 - 0x21);\ if ((o2) > 0x7e) (o2)++;\ } else {\ (o2) = (i2) + (0xfc - 0x7e);\ }\ } static void sjis_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { unsigned char u1, u2, s1, s2; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; E2S(u1, u2, s1, s2); putc(s1, stream); putc(s2, stream); continue; } putc(' ', stream); } else if (u1 & 0x80) { putc(' ', stream); } else if (pretty && iscntrl(u1)) { cntrl_putc(u1, stream); } else { putc(u1, stream); } } } #define DSGNT_JISX0208(stream, status) {\ if (kanji_pos == ASCII) {\ fputs("\033$B", (stream)); (status) = JISX0208;\ }}\ #define DSGNT_ASCII(stream, status) {\ if (kanji_pos == JISX0208) {\ fputs("\033(B", (stream)); (status) = ASCII;\ }}\ static void jis7_fputs(scanlk, stream, pretty) char *scanlk; FILE *stream; int pretty; { int kanji_pos; /* ASCII or JISX0208 */ unsigned char u1, u2; kanji_pos = ASCII; while (u1 = *scanlk++) { if (IS_JEUC(u1)) { u2 = *scanlk; if (IS_JEUC(u2)) { scanlk++; DSGNT_JISX0208(stream, kanji_pos); putc(u1 & 0x7f, stream); putc(u2 & 0x7f, stream); continue; } DSGNT_ASCII(stream, kanji_pos); putc(' ', stream); } else if (u1 & 0x80) { DSGNT_ASCII(stream, kanji_pos); putc(' ', stream); } else if (pretty && iscntrl(u1)) { DSGNT_ASCII(stream, kanji_pos); cntrl_putc(u1, stream); } else { DSGNT_ASCII(stream, kanji_pos); putc(u1, stream); } } DSGNT_ASCII(stream, kanji_pos); } static void cntrl_putc(c, stream) char c; FILE *stream; { switch (c) { case '\b': putc('\\', stream); putc('b', stream); break; case '\f': putc('\\', stream); putc('f', stream); break; case '\n': putc('\\', stream); putc('n', stream); break; case '\r': putc('\\', stream); putc('r', stream); break; case '\t': putc('\\', stream); putc('t', stream); break; default: putc('^', stream); putc(c ^ 0x40, stream); break; } } /* * Input: */ char * ml_conv(s) char *s; { coding_system_t coding; if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) return(s); ml_conv_sbr(s, coding); return(s); } #define ESC '\033' #define SO '\016' #define SI '\017' #define SS2 '\216' #define E2E(i1, i2, o1, o2) { (o1) = (i1); (o2) = (i2); } #define I2E(i1, i2, o1, o2) { (o1) = ((i1) | 0x80); (o2) = ((i2) | 0x80); } #define S2E(i1, i2, o1, o2) {\ if ((i1) >= 0xe0) { (i1) -= (0xe0 - 0xa0); }\ if ((i2) >= 0x80) { (i2)--; }\ if ((i2) < 0x40 + 94) {\ (o1) = (((((i1) - 0x81) * 2) + 0x21) | 0x80);\ (o2) = (((i2) - (0x40 - 0x21)) | 0x80);\ } else {\ (o1) = (((((i1) - 0x81) * 2) + 0x21 + 1) | 0x80);\ (o2) = (((i2) - (0x9e - 0x21)) | 0x80);\ }\ } static void ml_conv_sbr(in, cs) char *in; coding_system_t cs; { char *out = in; int kanji_pos = ASCII; unsigned char c1, c2; while (c1 = *in++) { if (c1 == ESC) { char *cp = in; if ((c1 = *in++) == '$') { if ((c1 = *in++) == 'B' || c1 == '@') { kanji_pos = JISX0208; continue; } else if (c1 == '(') { if ((c1 = *in++) == 'B' || c1 == '@' || c1 == 'O') { /* special case: compaund text */ /* "ESC $ ( O" is for JIS X 0213-2000 */ kanji_pos = JISX0208; continue; } } } else if (c1 == '(') { if ((c1 = *in++) == 'B' || c1 == 'J' || c1 == 'H') { /* "ESC ( H" is an old wrong implementation */ kanji_pos = ASCII; continue; } } else if (c1 == '&') { if ((c1 = *in++) == '@' && (c1 = *in++) == ESC && (c1 = *in++) == '$' && (c1 = *in++) == 'B') { /* special case: JIS X 0208-1990 */ kanji_pos = JISX0208; continue; } } if (! IS_JIS7(c1)) in--; while (cp < in) *out++ = *cp++; continue; /* invalid ESC is ignored. */ } if (kanji_pos == JISX0208 && IS_JIS7(c1)) { c2 = *in; if (IS_JIS7(c2)) { I2E(c1, c2, *out++, *out++); in++; continue; } } if (c1 & 0x80) { if (cs == CS_SJIS) { if (IS_SJIS1(c1)) { c2 = *in; if (IS_SJIS2(c2)) { S2E(c1, c2, *out++, *out++); in++; } } } else { if (IS_JEUC(c1)) { c2 = *in; if (IS_JEUC(c2)) { E2E(c1, c2, *out++, *out++); in++; } } else if (c1 == (unsigned char) SS2) { c2 = *in; if (IS_JEUC(c2)) in++; /* skip */ } } continue; /* invalid 8bit code is ignored. */ } if (c1 == SI || c1 == SO) continue; /* ISO-2022-JP cannot include SI, SO. (cf. RFC-1468) */ *out++ = c1; } *out = '\0'; } #endif /* JAPAN */