view sbr/ml_codeconv.c @ 0:bce86c4163a3

Initial revision
author kono
date Mon, 18 Apr 2005 23:46:02 +0900
parents
children 442dbbf0ac7d
line wrap: on
line source

/* ml_codeconv.c - (multilingual) code conversion */
/*			by takada@seraph.ntt.jp   */
/*			arranged by MH-plus project */

#ifdef JAPAN

#include "../h/mh.h"
#include <ctype.h>
#include <stdio.h>
#include <sys/types.h>
#include <sys/stat.h>

/* coding system */
typedef int coding_system_t;
#define CS_DEFAULT	 0
#define CS_JIS7		 1
#define CS_JEUC		 2
#define CS_SJIS		 3
#define CS_NOCONV	99

/* coding system list */
#define CSL_SIZE	3
#define CSL_DISPLAY(csl)	((csl)[0])
#define CSL_FILE(csl)		((csl)[1])
#define CSL_PROCESS(csl)	((csl)[2])

/* codeset status */
#define ASCII 		0
#define JISX0208	1

#define IS_JIS7(c)	((0x21 <= (c)) && ((c) <= 0x7e))
#define IS_JEUC(c)	((0xa1 <= (c)) && ((c) <= 0xfe))
#define IS_SJIS1(c)	(((0x81 <= (c)) && ((c) <= 0x9f)) || \
			 ((0xe0 <= (c)) && ((c) <= 0xfc)))
#define IS_SJIS2(c)	((0x40 <= (c)) && ((c) <= 0xfc) && ((c) != 0x7f))

/* hold coding system information */
static coding_system_t ml_coding_info[CSL_SIZE] =
       { CS_DEFAULT, CS_DEFAULT, CS_DEFAULT };
int japan_environ;

/* private functions */
static void read_profile();
static coding_system_t coding_to_cs();
static coding_system_t select_coding_system();
static void ml_fputs_sbr();
static void jeuc_fputs();
static void sjis_fputs();
static void jis7_fputs();
static void cntrl_putc();
static void ml_conv_sbr();


/*
 * Initialize: holding coding system information
 */
void
ml_init()
{
    read_profile(ml_coding_info);
}

/* get coding system list from profile and environment variable */
static void
read_profile(csl)
     coding_system_t csl[];
{
    char *s, *default_coding;

#if 0 /* We won't refer $LANG nor $LC_CTYPE */
    if ((default_coding = getenv("LC_CTYPE")) == NULL)
      default_coding = getenv("LANG");
#else
    default_coding = ""; /* for CS_DEFAULT */
#endif

    if ((s = getenv("MH_DISPLAY_CODING")) == NULL)
      if ((s = m_find("display-coding")) == NULL)
	s = default_coding;
    CSL_DISPLAY(csl) = coding_to_cs(s);
    
    if ((s = getenv("MH_FILE_CODING")) == NULL)
      if ((s = m_find("file-coding")) == NULL)
	s = default_coding;
    CSL_FILE(csl) = coding_to_cs(s);

    if ((s = getenv("MH_PROCESS_CODING")) == NULL)
      if ((s = m_find("process-coding")) == NULL)
	s = default_coding;
    CSL_PROCESS(csl) = coding_to_cs(s);

    if (CSL_FILE(ml_coding_info) == CS_NOCONV)
      japan_environ = 0;
    else
      japan_environ = 1;
}

static coding_system_t
coding_to_cs(coding)
     char *coding;
{
    if (*coding == '\0')
	return CS_DEFAULT;
    else if (uleq(coding, "ja_JP.JIS7"))
	return CS_JIS7;
    else if (uleq(coding, "ja_JP.EUC"))
	return CS_JEUC;
    else if (uleq(coding, "ja_JP.SJIS"))
	return CS_SJIS;
    else if (uleq(coding, "C"))
	return CS_NOCONV;

    /* for backward compatibility */
    else if (uleq(coding,"japanese") || uleq(coding,"ja_JP.jis8")
	|| uleq(coding,"ja_JP.pjis") || uleq(coding,"ja_JP.jis")
	|| uleq(coding,"wr_WR.ct")   || uleq(coding,"wr_WR.junet")) {
	return(CS_JIS7);
    } else if (uleq(coding,"ja_JP.ujis")) {
	return(CS_JEUC);
    } else if (uleq(coding,"ja_JP.mscode")) {
	return(CS_SJIS);
    } else if (uleq(coding,"noconv")) {
	return(CS_NOCONV);
    } else {
	return(CS_DEFAULT);
    }
}

static coding_system_t
select_coding_system(stream)
     FILE *stream;
{
    struct stat buf;

    if (fstat(fileno(stream), &buf)) adios (NULLCP, "unable to fstat stream");
    switch (buf.st_mode & S_IFMT) {
      case S_IFREG:
	return(CSL_FILE(ml_coding_info));
      case S_IFIFO:
      case 0: /* some system returns zero-filled stat for pipe */
	return(CSL_PROCESS(ml_coding_info));
      case S_IFCHR:
      default:
	return(CSL_DISPLAY(ml_coding_info));
    }
}


/*
 *
 */
int
ml_ismlchar(c)
     unsigned char c;
{
    return japan_environ ? IS_JEUC(c) : 0;
}

int
ml_ismlptr(p)
     unsigned char *p;
{
    return japan_environ ? (IS_JEUC(*p) && IS_JEUC(*(p+1))) : 0;
}


/*
 * Output:
 */
void
ml_fputs(scanlk, stream)
     char *scanlk;
     FILE *stream;
{
    ml_fputs_sbr(scanlk, stream, 0);
}

void
ml_pretty_fputs(scanlk, stream)
     char *scanlk;
     FILE *stream;
{
    ml_fputs_sbr(scanlk, stream, 1);
}

void
junet_fputs(scanlk, stream)
     char *scanlk;
     FILE *stream;
{
    jis7_fputs(scanlk, stream, 0);
}


static void
ml_fputs_sbr(scanlk, stream, pretty)
     char *scanlk;
     FILE *stream;
     int pretty;
{
    switch (select_coding_system(stream)) {
      case CS_NOCONV:
	fputs(scanlk, stream);
	break;
      case CS_SJIS:
	sjis_fputs(scanlk, stream, pretty);
	break;
      case CS_JEUC:
	jeuc_fputs(scanlk, stream, pretty);
	break;
      case CS_JIS7:
      case CS_DEFAULT:
      default:
	jis7_fputs(scanlk, stream, pretty);
	break;
    }
}


/*
 * Output routines with code conversion
 */
static void
jeuc_fputs(scanlk, stream, pretty)
     char *scanlk;
     FILE *stream;
     int pretty;
{
    unsigned char u1, u2;

    while (u1 = *scanlk++) {
	if (IS_JEUC(u1)) {
	    u2 = *scanlk;
	    if (IS_JEUC(u2)) {
		scanlk++;
		putc(u1, stream); putc(u2, stream);
		continue;
	    }
	    putc(' ', stream);
	} else if (u1 & 0x80) {
	    putc(' ', stream);
	} else if (pretty && iscntrl(u1)) {
	    cntrl_putc(u1, stream);
	} else {
	    putc(u1, stream);
	}
    }
}

#define E2S(i1, i2, o1, o2) {\
    (i1) &= 0x7f;\
    (i2) &= 0x7f;\
    (o1) = ((i1) - 0x21) / 2 + 0x81;\
    if ((o1) > 0x9f) { (o1) += (0xe0 - 0xa0); }\
    if ((i1) & 1) {\
	(o2) = (i2) + (0x40 - 0x21);\
	if ((o2) > 0x7e) (o2)++;\
    } else {\
	(o2) = (i2) + (0xfc - 0x7e);\
    }\
}

static void
sjis_fputs(scanlk, stream, pretty)
     char *scanlk;
     FILE *stream;
     int pretty;
{
    unsigned char u1, u2, s1, s2;

    while (u1 = *scanlk++) {
	if (IS_JEUC(u1)) {
	    u2 = *scanlk;
	    if (IS_JEUC(u2)) {
		scanlk++;
		E2S(u1, u2, s1, s2);
		putc(s1, stream); putc(s2, stream);
		continue;
	    }
	    putc(' ', stream);
	} else if (u1 & 0x80) {
	    putc(' ', stream);
	} else if (pretty && iscntrl(u1)) {
	    cntrl_putc(u1, stream);
	} else {
	    putc(u1, stream);
	}
    }
}

#define DSGNT_JISX0208(stream, status) {\
    if (kanji_pos == ASCII) {\
	fputs("\033$B", (stream)); (status) = JISX0208;\
    }}\

#define DSGNT_ASCII(stream, status) {\
    if (kanji_pos == JISX0208) {\
	fputs("\033(B", (stream)); (status) = ASCII;\
    }}\

static void
jis7_fputs(scanlk, stream, pretty)
     char *scanlk;
     FILE *stream;
     int pretty;
{
    int kanji_pos;	/* ASCII or JISX0208 */
    unsigned char u1, u2;

    kanji_pos = ASCII;
    while (u1 = *scanlk++) {
	if (IS_JEUC(u1)) {
	    u2 = *scanlk;
	    if (IS_JEUC(u2)) {
		scanlk++;
		DSGNT_JISX0208(stream, kanji_pos);
		putc(u1 & 0x7f, stream); putc(u2 & 0x7f, stream);
		continue;
	    }
	    DSGNT_ASCII(stream, kanji_pos);
	    putc(' ', stream);
	} else if (u1 & 0x80) {
	    DSGNT_ASCII(stream, kanji_pos);
	    putc(' ', stream);
	} else if (pretty && iscntrl(u1)) {
	    DSGNT_ASCII(stream, kanji_pos);
	    cntrl_putc(u1, stream);
	} else {
	    DSGNT_ASCII(stream, kanji_pos);
	    putc(u1, stream);
	}
    }
    DSGNT_ASCII(stream, kanji_pos);
}

static void
cntrl_putc(c, stream)
     char c;
     FILE *stream;
{
    switch (c) {
      case '\b': putc('\\', stream); putc('b', stream); break;
      case '\f': putc('\\', stream); putc('f', stream); break;
      case '\n': putc('\\', stream); putc('n', stream); break;
      case '\r': putc('\\', stream); putc('r', stream); break;
      case '\t': putc('\\', stream); putc('t', stream); break;
      default:   putc('^', stream);  putc(c ^ 0x40, stream); break;
    }
}


/*
 * Input:
 */
char *
ml_conv(s)
     char *s;
{
    coding_system_t coding;
    
    if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) 
      return(s);
    
    ml_conv_sbr(s, coding);
    return(s);
}

#define ESC	'\033'
#define SO	'\016'
#define SI	'\017'
#define SS2	'\216'

#define E2E(i1, i2, o1, o2) { (o1) = (i1); (o2) = (i2); }

#define I2E(i1, i2, o1, o2) { (o1) = ((i1) | 0x80); (o2) = ((i2) | 0x80); }

#define S2E(i1, i2, o1, o2) {\
    if ((i1) >= 0xe0) { (i1) -= (0xe0 - 0xa0); }\
    if ((i2) >= 0x80) { (i2)--; }\
    if ((i2) < 0x40 + 94) {\
	(o1) = (((((i1) - 0x81) * 2) + 0x21) | 0x80);\
	(o2) = (((i2) - (0x40 - 0x21)) | 0x80);\
    } else {\
	(o1) = (((((i1) - 0x81) * 2) + 0x21 + 1) | 0x80);\
	(o2) = (((i2) - (0x9e - 0x21)) | 0x80);\
    }\
}

static void
ml_conv_sbr(in, cs)
     char *in;
     coding_system_t cs;
{
    char *out = in;
    int kanji_pos = ASCII;
    unsigned char c1, c2;
    
    while (c1 = *in++) {
	if (c1 == ESC) {
	    char *cp = in;
	    if ((c1 = *in++) == '$') {
		if ((c1 = *in++) == 'B' || c1 == '@') {
		    kanji_pos = JISX0208;
		    continue;
		} else if (c1 == '(') {
		    if ((c1 = *in++) == 'B' || c1 == '@' || c1 == 'O') {
			/* special case: compaund text */
			/* "ESC $ ( O" is for JIS X 0213-2000 */
			kanji_pos = JISX0208;
			continue;
		    }
		}
	    } else if (c1 == '(') {
		if ((c1 = *in++) == 'B' || c1 == 'J' || c1 == 'H') {
		    /* "ESC ( H" is an old wrong implementation */
		    kanji_pos = ASCII;
		    continue;
		}
	    } else if (c1 == '&') {
		if ((c1 = *in++) == '@' && (c1 = *in++) == ESC
		    && (c1 = *in++) == '$' && (c1 = *in++) == 'B') {
		    /* special case: JIS X 0208-1990 */
		    kanji_pos = JISX0208;
		    continue;
		}
	    }
	    if (! IS_JIS7(c1))
		in--;
	    while (cp < in)
		*out++ = *cp++;
	    continue; /* invalid ESC is ignored. */
	}
	if (kanji_pos == JISX0208 && IS_JIS7(c1)) {
	    c2 = *in;
	    if (IS_JIS7(c2)) {
		I2E(c1, c2, *out++, *out++);
		in++;
		continue;
	    }
	}
	if (c1 & 0x80) {
	    if (cs == CS_SJIS) {
		if (IS_SJIS1(c1)) {
		    c2 = *in;
		    if (IS_SJIS2(c2)) {
			S2E(c1, c2, *out++, *out++);
			in++;
		    }
		}
	    } else {
		if (IS_JEUC(c1)) {
		    c2 = *in;
		    if (IS_JEUC(c2)) {
			E2E(c1, c2, *out++, *out++);
			in++;
		    }
		} else if (c1 == (unsigned char) SS2) {
		    c2 = *in;
		    if (IS_JEUC(c2))
			in++; /* skip */
		}
	    }
	    continue; /* invalid 8bit code is ignored. */
	}
	if (c1 == SI || c1 == SO)
	    continue; /* ISO-2022-JP cannot include SI, SO. (cf. RFC-1468) */
	*out++ = c1;
    }
    *out = '\0';
}
#endif /* JAPAN */