changeset 7:c20e4181370f

utf-8 input assumption in case of base64/utf-8
author kono
date Sun, 04 Dec 2005 02:30:39 +0900
parents d802748a597d
children 77780b728543
files config/version config/version.c doc/version.rf sbr/ml_codeconv.c uip/mhlsbr.c
diffstat 5 files changed, 76 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/config/version	Sat Dec 03 17:50:08 2005 +0900
+++ b/config/version	Sun Dec 04 02:30:39 2005 +0900
@@ -1,1 +1,1 @@
-6.8.3.67
+6.8.3.70
--- a/config/version.c	Sat Dec 03 17:50:08 2005 +0900
+++ b/config/version.c	Sun Dec 04 02:30:39 2005 +0900
@@ -1,5 +1,5 @@
 #ifndef JAPAN
-char *version = "@(#)MH 6.8.3 #67[UCI] (leo.ie.u-ryukyu.ac.jp) of Sat Dec 3 17:48:52 JST 2005";
+char *version = "@(#)MH 6.8.3 #70[UCI] (leo.ie.u-ryukyu.ac.jp) of Sun Dec 4 02:28:42 JST 2005";
 #else /* JAPAN */
-char *version = "@(#)MH 6.8.3.JP-pre3.05-20010615 #67[UCI] (leo.ie.u-ryukyu.ac.jp) of Sat Dec 3 17:48:53 JST 2005";
+char *version = "@(#)MH 6.8.3.JP-pre3.05-20010615 #70[UCI] (leo.ie.u-ryukyu.ac.jp) of Sun Dec 4 02:28:42 JST 2005";
 #endif /* JAPAN */
--- a/doc/version.rf	Sat Dec 03 17:50:08 2005 +0900
+++ b/doc/version.rf	Sun Dec 04 02:30:39 2005 +0900
@@ -1,1 +1,1 @@
-.ds MH 6.8.3 #67[UCI]
+.ds MH 6.8.3 #70[UCI]
--- a/sbr/ml_codeconv.c	Sat Dec 03 17:50:08 2005 +0900
+++ b/sbr/ml_codeconv.c	Sun Dec 04 02:30:39 2005 +0900
@@ -416,21 +416,21 @@
     if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) 
       return(s);
     
-    ml_conv_sbr(s, coding, CE_UNKNOWN);
+    ml_conv_sbr(s, coding, CE_UNKNOWN, 0);
     return(s);
 }
 
 char *
-ml_conv_decode(s,encode)
+ml_conv_decode(s,encode,charset)
      char *s;
-     int encode;
+     int encode,charset;
 {
     coding_system_t coding;
     
     if ((s == NULL) || ((coding = CSL_FILE(ml_coding_info)) == CS_NOCONV)) 
       return(s);
     
-    ml_conv_sbr(s, coding, encode);
+    ml_conv_sbr(s, coding, encode, charset);
     return(s);
 }
 
@@ -453,11 +453,17 @@
 } 
 
 static char *
-cs_input_opt(int cs, int encode)
+cs_input_opt(int cs, int encode, int input_charset)
 {
     switch(encode) {
-    case CE_BASE64: return "-emB";
-    case CE_QUOTED: return "-emQ";
+    case CE_BASE64: 
+	if (input_charset==CS_UTF8)
+	    return "-emBW8";
+	return "-emB";
+    case CE_QUOTED: 
+	if (input_charset==CS_UTF8)
+	    return "-emQW8";
+	return "-emQ";
     }
     switch(cs) {
     case CS_JIS7: return "-Je";
@@ -470,12 +476,12 @@
 } 
 
 static void
-ml_conv_sbr(in, cs, encode)
+ml_conv_sbr(in, cs, encode, input_charset)
      char *in;
      coding_system_t cs;
-     int encode;
+     int encode, input_charset;
 {
-    char *opt = cs_input_opt(cs,encode);
+    char *opt = cs_input_opt(cs,encode,input_charset);
     int len = strlen(in);
     nkf_open((unsigned char *)opt,(unsigned char *)in,len,(unsigned char *)in,len,extend,0);
     nkf_end();
--- a/uip/mhlsbr.c	Sat Dec 03 17:50:08 2005 +0900
+++ b/uip/mhlsbr.c	Sun Dec 04 02:30:39 2005 +0900
@@ -263,6 +263,24 @@
     {NULL,               CE_UNKNOWN},
 };  
 
+#define CS_DEFAULT       0
+#define CS_JIS7          1
+#define CS_JEUC          2
+#define CS_SJIS          3
+#define CS_UTF8          4
+#define CS_NOCONV       99
+
+static int     content_type;         /* internal form */
+#define TYPE_FIELD  "Content-Type"
+static struct str2init str2charset[] = {
+    {"utf-8",            CS_UTF8},
+    {"iso-2022-jp",      CS_JIS7},
+    {"euc-jp",           CS_JEUC},
+    {"xsjis",            CS_SJIS},
+    {"shift_jis",        CS_SJIS},      
+};  
+
+
 #define istoken(c) \
         (!isspace (c) \
             && !iscntrl (c) \
@@ -963,6 +981,9 @@
 	    case FLDPLUS: 
 #ifdef ENCODING_FIELD
 		/* form mhn */
+		/*   We had better to check Content-Type, */
+		/*   because in case of short utf-8 text, nkf mail fail to */
+		/*   correct code */
                 if (uleq (name, ENCODING_FIELD)) {
                     char   *cp,
                            *dp;
@@ -998,6 +1019,39 @@
                         s2i++;
                     *dp = c;
                     content_encoding = s2i -> mhnsi_value;
+		} else if (uleq (name, TYPE_FIELD)) {
+                    char   *cp,
+                           *dp;
+                    char    c;
+                    register struct str2init *s2i;
+
+                    cp = add (buf, NULLCP);
+                    while (state == FLDPLUS) {
+                        state = m_getfld (state, name, buf, sizeof buf, fp);
+                        cp = add (buf, cp);
+                    }
+                    while (isspace (*cp))
+                        cp++;
+		    while(cp) {
+			cp = index (cp, 'c');
+			if (!strncasecmp(cp,"charset",7)) {
+			    cp+=7;
+			    break;
+			} else {
+			    cp++;
+			}
+		    }
+                    while (isspace (*cp)||*cp=='=') cp++;
+		    
+                    for (dp = cp; istoken (*dp); dp++);
+                    c = *dp, *dp = '\0';
+                    for (s2i = str2charset; s2i -> mhnsi_key; s2i++)
+                        if (uleq (cp, s2i -> mhnsi_key))
+                            break;
+                    if (!s2i -> mhnsi_key && !uprf (cp, "X-"))
+                        s2i++;
+                    *dp = c;
+                    content_type = s2i -> mhnsi_value;
 		}
 #endif
 		for (ip = ignores; *ip; ip++)
@@ -1078,7 +1132,7 @@
 			    } else {
 				strcpy(buf, ++cp);
 				*cp = 0;
-				(void) ml_conv_decode(holder.c_text,content_encoding);
+				(void) ml_conv_decode(holder.c_text,content_encoding,content_type);
 				/* putcomp() may change the address
 				   of holder.c_text */
 				putcomp(c1, &holder, BODYCOMP);
@@ -1095,7 +1149,7 @@
 						 holder.c_text + buflen - cp,
 						 fp);
 			}
-			(void) ml_conv_decode(holder.c_text,content_encoding);
+			(void) ml_conv_decode(holder.c_text,content_encoding,content_type);
 			putcomp(c1, &holder, BODYCOMP);
 #else /* JAPAN */
 			while (state == BODY) {