/* hanmail version 2.1 --- Hangul mail recovering program This program may be helpful when Hangul mail is encoded. Currently, 5 types of encoding problem is solved. Type 1. QP type --- Hexa code sequence like =B0=A1=C7=B4... Type 2. base64 type --- Each line has NO BLANK, and it is ASCII character sequence of A-Z, a-z, 0-9, +, / Type 3. uuencode type --- Each line begins with 'M'. Type 4. Just like a binary file and Hangul is enclosed by ctrl-N, ctrl-O. Type 5. Strange character sequence as if binary file Type 6. MIME type --- same as base64, and binary file(not Hangul text) attached file by internet mail This program is not perfect because I decoded the Hangul mail by trial-and-error method. 1997. 10. 17. Kang, Seung-Shik at Kookmin University H.R.C --- Hangul engineering & Research Center */ #include #include #include #define LINESIZE 999 unsigned char line[LINESIZE], line2[LINESIZE]; /*----------------------- 7-bit type begins -----------------------*/ /* Recover Hangul mail when MSB is reset. Use this program if you can not read Hangul text and only strange characters are displayed like binary file. e.g. Hangul text was changed into a sequence of control characters. In this case, Hangul flag(MSB) was reset and 8-bit Hangul char. was destroyed to 7-bit Ascii. This program recovers Hangul MSB excluding Ascii characters. The result is not perfect because some adjacent two characters may or may not be Hangul. 1997. 1. 4. Kang, Seung-Shik H.R.C --- Hangul engineering & Research Center */ int isKSC5601H(ch) /* high byte range : 0xB0 <--> 0xC8 */ unsigned char ch; { return (0xB0 <= ch && ch <= 0xC8); } int isKSC5601L(ch) /* low byte range : 0xA1 <--> 0xFE */ unsigned char ch; { return (0xA1 <= ch && ch <= 0xFE); } void setMSB(s, t, hanflag) unsigned char *s, *t; int hanflag; { int c, d; while (*s && *(s+1)) { if (*s == 0x0e) { /* ctrl-N */ hanflag = 1; s++; continue; } else if (*s == 0x0f) { /* ctrl-O */ hanflag = 0; s++; continue; } else { if (hanflag) { /* added 1998. 5. 27. */ c = *s | 0x80; d = *(s+1) | 0x80; } else { c = *s; d = *(s+1); } } if (hanflag && isKSC5601H(c) && isKSC5601L(d)) { *t++ = c; *t++ = d; s += 2; } else *t++ = *s++; } if (*s) *t++ = *s; /* LF */ *t = '\0'; } void set_msb(fpin, fpout, hanflag) FILE *fpin, *fpout; int hanflag; { while (fgets(line, LINESIZE, fpin)) { setMSB(line, line2, hanflag); /* set MSB to 1 for plausible Hangul characters */ fputs(line2, fpout); /* Hangulized sentence */ } } /*----------------------- 7-bit type ended -----------------------*/ /*----------------------- uudecode type begins -----------------------*/ /* Convert Hangul e-mail for 'base64' type of 'uuencode'. Use this program when each line of text file begins with 'M' and then 60 strange char.s follows. */ /* Decode base64 type ASCII character to the bit string of Hangul. */ int uu_conv(ch) char ch; { return (ch - 32) & 0x3f; } /* uuencoded line is decoded & printed. */ void uudecode_line(line, fpout) char *line; FILE *fpout; { int i, n; int c1, c2, c3, c4; /* 4 base64 type ASCII char */ int h1, h2, h3; /* 3 KS C 5601 Hangul bytes */ n = (line[0] == 96) ? 0 : line[0]-32; for (i = 1; n > 0; i += 4) { /* 4 ASCII characters are decoded to corresponding bit string */ c1 = uu_conv(line[i]); c2 = uu_conv(line[i+1]); c3 = uu_conv(line[i+2]); c4 = uu_conv(line[i+3]); /* derive 3 characters from 4 bit-strings */ h1 = (c1 << 2) | (c2 >> 4); h2 = (c2 << 4) | (c3 >> 2); h3 = (c3 << 6) | c4; putc(h1, fpout); if (!--n) break; putc(h2, fpout); if (!--n) break; putc(h3, fpout); if (!--n) break; } } /* Base64 type encoded sequence is converted to KS C 5601 Hangul code. Three KSC 5601 codes are derived from four base64 type ASCII characters. 1. 6 bits are generated from each base64 type ASCII character. 2. 6x4 = 24 bits are rearranged to make 8x3 = 24 bits. */ void uudecode(fpin, fpout) FILE *fpin, *fpout; { char tmp[99]; while (fgets(line, LINESIZE, fpin)) { strncpy(tmp, line, 5); tmp[5] = '\0'; if (!strcmp(tmp, "begin")) while (fgets(line, LINESIZE, fpin)) { strncpy(tmp, line, 3); tmp[3] = '\0'; if (!strcmp(tmp, "end")) break; uudecode_line(line, fpout); } else fputs(line, fpout); /* maybe it is an English line */ } } /*----------------------- uudecode type ended -----------------------*/ /*----------------------- hexacode type begins -----------------------*/ /* Read Hangul e-mail for QP type. Use this program when Hangul e-mail is displayed as a symbol '=' and hexa code, like '=B0=A1'. In this case, your e-mail viewer converted each byte of Hangul into hexa code(KS C 5601). */ int isKSC5601H2(c1, c2) /* check high byte range : 0xB0 <--> 0xC8 */ int c1, c2; { if (c1 == 'B' && (('0' <= c2 && c2 <= '9') || ('A' <= c2 && c2 <= 'F'))) return 1; else if (c1 == 'C' && ('0' <= c2 && c2 <= '8')) return 1; else return 0; } int isKSC5601L2(c1, c2) /* check low byte range : 0xA1 <--> 0xFE */ int c1, c2; { if (('A' <= c1 && c1 <= 'F') && (('0' <= c2 && c2 <= '9') || ('A' <= c2 && c2 <= 'F'))) return 1; else return 0; } /* Convert 2 characters of hexa code to hexa code. E.g. "B0" is converted to '0xB0'. */ int tohexa(c1, c2) int c1, c2; { int hx; hx = (toupper(c1) - 'A' + 10) << 4; c2 = toupper(c2); if (isdigit(c2)) return (hx | (c2 - '0')); else return (hx | (c2 - 'A' + 10)); } /* Hexa type encoded sequence is converted to KS C 5601 Hangul code. */ void hexacode(fpin, fpout) FILE *fpin, *fpout; { int ch, c1, c2, hexa; /* Input 3 characters like "=B0", then convert it to hexa code ch : '=' c1 : left 4 bits of KS C 5601 code c2 : right 4 bits of KS C 5601 code hexa : hexa code for */ while ((ch = getc(fpin)) != EOF) { loop: if (ch == '=') { c1 = getc(fpin); if (c1 == '\n' || c1 == '\r') { /* end of line */ if (c1 == '\r') ch = getc(fpin); ch = getc(fpin); goto loop; } c2 = getc(fpin); /* KSC high byte: ==> hexa */ if (isKSC5601H2(toupper(c1), toupper(c2))) { hexa = tohexa(c1, c2); putc(hexa, fpout); } else if (c1 == '2' && c2 == '0') { /* '=20' --> blank */ putc(' ', fpout); ch = getc(fpin); goto loop; } else { /* English text, e.g. ch, c1, c2 are not "=B0" type */ putc(ch, fpout); putc(c1, fpout); putc(c2, fpout); continue; } ch = getc(fpin); c1 = getc(fpin); if (c1 == '\n' || c1 == '\r') { /* end of line */ if (c1 == '\r') ch = getc(fpin); while ((ch = getc(fpin)) != '=') putc(ch, fpout); c1 = getc(fpin); } c2 = getc(fpin); /* KSC low byte: ==> hexa */ if (ch == '=' && isKSC5601L2(toupper(c1), toupper(c2))) { hexa = tohexa(c1, c2); putc(hexa, fpout); } else { /* English text, e.g. ch, c1, c2 are not "=B0" type */ putc(ch, fpout); putc(c1, fpout); putc(c2, fpout); } } else putc(ch, fpout); /* ASCII character or blank */ } } /*----------------------- hexacode type ended -----------------------*/ /*----------------------- base64 type begins -----------------------*/ /* Convert Hangul e-mail for 'base64' type. Use this program when Hangul e-mail is displayed as a sequence of 'A-Z', 'a-z', '0-9', '+', '/', and '='(at the end). */ /* Decode base64 type ASCII character to the bit string of Hangul. */ int conv(ch) int ch; { if (isupper(ch)) return ch-'A'; else if (islower(ch)) return ch-'a'+0x1a; else if (isdigit(ch)) return ch+4; else if (ch == '+') return 0x3e; else if (ch == '/') return 0x3f; else if (ch == '=') return -1; else return 0; } int base64char(ch) char ch; { return (isalnum(ch) || ch == '+' || ch == '/'); } /* Check if 'line' is a base64 type or not. */ int is_base64_type(line) char *line; { int i, n = strlen(line); if (!n || strchr(line, ' ')) return 0; for (i = 0; i < n; i++) { if (!base64char(*line)) return 0; if (*line == '=' && i != n-1) return 0; } return 1; } void base64line(line, fpout) char *line; FILE *fpout; { int i; int c1, c2, c3, c4; /* 4 base64 type ASCII char */ int h1, h2, h3; /* 3 KS C 5601 Hangul bytes */ int leng = strlen(line); if (line[leng-1] == 0x0a) leng--; if (line[leng-2] == 0x0d) leng--; for (i = 0; i < leng; i += 4) { /* 4 ASCII characters are decoded to corresponding bit string */ c1 = conv(line[i]); c2 = conv(line[i+1]); c3 = conv(line[i+2]); c4 = conv(line[i+3]); /* derive 3 characters from 4 bit-strings */ h1 = (c1 << 2) | (c2 >> 4); if (c3 == -1) h2 = (c2 << 4); else h2 = (c2 << 4) | (c3 >> 2); if (c4 == -1) h3 = (c3 << 6); else h3 = (c3 << 6) | c4; putc(h1, fpout); if (c3 != -1) putc(h2, fpout); if (c4 != -1) putc(h3, fpout); } } /* Base64 type encoded sequence is converted to KS C 5601 Hangul code. Three KSC 5601 codes are derived from four base64 type ASCII characters. 1. 6 bits are generated from each base64 type ASCII character. 2. 6x4 = 24 bits are rearranged to make 8x3 = 24 bits. */ void base64(fpin, fpout) FILE *fpin, *fpout; { while (fgets(line, LINESIZE, fpin)) { if (is_base64_type(line)) /* check if 'line' is a base64 type */ base64line(line, fpout); else fputs(line, fpout); /* maybe it is an English sentence */ } } /*----------------------- base64 type ended -----------------------*/ /*----------------------- MIME type begins -----------------------*/ /* MIME type decoding program --> attached file by netscape 1997. 7. 2. Kang, Seung-Shik at Kookmin University H.R.C --- Hangul engineering & Research Center */ /* If 'substr' is a substring of current 'line', then return. Otherwise, check if 'substr' is a substring of next line. */ char *my_strstr(line, substr, fp) char *line, *substr; FILE *fp; { char *p; if (p = strstr(line, substr)) return p; fgets(line, LINESIZE, fp); return (p = strstr(line, substr)); } void mime(fpin, fpout) FILE *fpin, *fpout; { char filename[999], *p, *q; int c1 = 0, c2 = 0, c3 = 0; FILE *newfp; while (fgets(line, LINESIZE, fpin)) { fputs(line, fpout); if (c1 && c2 && c3) { /* extract attached file */ p += 10; strcpy(filename, p); q = strstr(filename, "\""); *q = '\0'; /*fgets(line, LINESIZE, fpin); /* skip blank line */ newfp = fopen(filename, "w"); while (fgets(line, LINESIZE, fpin)) { if (is_base64_type(line)) /* check if 'line' is a base64 type */ base64line(line, newfp); else { fputs(line, fpout); /* maybe, it is a plain text */ break; } } fclose(newfp); c1 = c2 = c3 = 0; fprintf(stderr, "File <%s> was retrieved.\n", filename); } else if (strstr(line, "Content-Type") && my_strstr(line, "name=", fpin)) c1 = 1; /* 1st check */ else if (strstr(line, "Content-Transfer-Encoding: base64")) c2 = 1; /* 2nd check of base64 */ else if (strstr(line, "Content-Disposition") && (p = my_strstr(line, "filename=", fpin))) c3 = 1; /* 3rd check */ } puts("+====================================================================+"); puts("| CHECK IF NEW FILE(S) ARE RETRIEVED FROM MIME TYPED FILE!!! |"); puts("| FILE NAME(S) ARE THE SAME AS IN MIME TYPED ORGINAL FILE!!! |"); puts("+====================================================================+"); } /*----------------------- MIME type ended -----------------------*/ /*----------------------- mail type detection begins -----------------------*/ /* Check if 'line' is mime type or not. */ int is_mime_type(line, fpin) char *line; FILE *fpin; { if (strstr(line, "Content-Type") && my_strstr(line, "name=", fpin)) { /* 1st check */ if (!fgets(line, LINESIZE, fpin)) return 0; if (strstr(line, "Content-Transfer-Encoding: base64")) { /* 2nd check */ if (!fgets(line, LINESIZE, fpin)) return 0; if (strstr(line, "Content-Disposition") && my_strstr(line, "filename=", fpin)) { /* 3rd check of base64 */ return 1; } } } return 0; } /* check if 'line' is a 7-bit type */ int is_7bit_type(line) char *line; { int i = 0; int n = strlen(line) - 1; if (line[n] == '\r' || line[n] == '\n') { line[n] = '\0'; n--; } if (line[n] == '\r' || line[n] == '\n') { line[n] = '\0'; n--; } while (*line) { if (*line > 0 && *line < ' ') i++; line++; } return (i > 2 ? 1 : 0); } /* Check if 'line' is QP type or not. */ int is_hexa_type(line) char *line; { line = strchr(line, '='); if (!line) return 0; if (isKSC5601H2(*(line+1), *(line+2)) && /* high byte: 0xB0 <--> 0xC8 */ isKSC5601L2(*(line+4), *(line+5))) /* low byte : 0xA1 <--> 0xFE */ return 1; else return 0; } /* Check if 'line' is uuencode type or not. */ int is_uuencode_type(line) char *line; { int n = strlen(line) - 1; if (line[n] == '\r' || line[n] == '\n') { line[n] = '\0'; n--; } if (line[n] == '\r' || line[n] == '\n') { line[n] = '\0'; n--; } return (*line == 'M' && n == 60 && !strchr(line, ' ')); } /* check if 'line' is a 7-bit type */ int is_ctrl_NO_type(line) char *line; { int ctrl_N = 0, ctrl_O = 0; while (*line) { if (*line == 14) ctrl_N++; else if (*line == 15) ctrl_O++; line++; } return (ctrl_N && ctrl_N == ctrl_O); } /* Automatic detection of mailtype. */ int getmailtype(fp) FILE *fp; { int mailtype = 2; /* default: base64 type */ while (fgets(line, LINESIZE, fp)) { if (is_mime_type(line, fp)) { mailtype = 6; break; } else if (strlen(line) > 10 && is_uuencode_type(line)) { mailtype = 3; break; } else if (strlen(line) > 30 && is_base64_type(line)) { mailtype = 2; break; } else if (strstr(line, "\x1B$)C") || (strlen(line) > 10 && is_ctrl_NO_type(line))) { mailtype = 4; break; } else if (strlen(line) > 20 && is_7bit_type(line)) { mailtype = 5; break; } else if (strlen(line) > 10 && is_hexa_type(line)) { mailtype = 1; break; } } rewind(fp); return mailtype; } /*----------------------- mail type detection end -----------------------*/ /*----------------------- driver routine begins -----------------------*/ void synopsis() { puts("\n$ hanmail [-mailtype] [infile] [outfile]\n"); puts(" No I/O files are specified. ---> works for standard I/O device."); puts(" Mail-type can be given manually like \"hanmail -2 infile outfile\"."); puts(" Manual specification of mail type should be one of the following.\n"); puts(" -1 --- QP type : KSC5601 hexa code sequence like =B0=A1=C7=B4..."); puts(" -2 --- base64 : NO BLANKS and ASCII sequence of A-Z,a-z,0-9,+,/"); puts(" -3 --- uudecode : each line begins with 'M' and line-size is 61"); puts(" -4 --- 7bit code: Hangul is enclosed by ctrl-N and ctrl-O"); puts(" -5 --- 7bit code: strange character sequence as if binary file"); puts(" -6 --- MIME type: attached files by INTERNET MAIL are retrieved\n"); puts(" You may use it at 'elm' by pipeline command '|', then 'hanmail | more'."); puts(" You may get C source file of this program at ftp://ham.hansung.ac.kr/.\n"); puts(" Seung-Shik Kang, Kookmin univ. sskang@cs.kookmin.ac.kr, Tel.02-910-4800\n"); } /* Specify input/output file ptr. from 'argc' and 'argv'. */ int set_iofile(fpin, fpout, argc, argv, mailtype) FILE **fpin, **fpout; int argc; char *argv[]; int *mailtype; { int nargs = argc; if (nargs > 1 && (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help"))) { synopsis(); return 0; } else if (nargs > 1 && argv[1][0] == '-' && isdigit(argv[1][1])) { *mailtype = argv[1][1] - '0'; nargs--; } switch (nargs) { /* no. of command-line arguments */ case 1: *fpin = stdin; *fpout = stdout; break; case 2: if (!(*fpin = fopen(argv[argc-1], "rb"))) { printf("No such file <%s>.\n", argv[argc-1]); return 0; } *fpout = stdout; break; case 3: if (!(*fpin = fopen(argv[argc-2], "rb"))) { printf("No such file <%s>.\n", argv[argc-2]); return 0; } if (*fpout = fopen(argv[argc-1], "rb")) { printf("File <%s> exists. Remove it first.\n", argv[argc-1]); return 0; } *fpout = fopen(argv[argc-1], "wb"); break; default: synopsis(); return 0; } return 1; } void main(argc, argv) int argc; char *argv[]; { FILE *fpin, *fpout; int mailtype = 0; if (!set_iofile(&fpin, &fpout, argc, argv, &mailtype)) return; mailtype = (mailtype ? mailtype : getmailtype(fpin)); switch (mailtype) { case 1: /* QP-type: hexacode encoding */ hexacode(fpin, fpout); break; case 2: /* base64 encoding */ base64(fpin, fpout); break; case 3: /* uuencoded file at UNIX system */ uudecode(fpin, fpout); break; case 4: /* MSB is resetted for inside of ctrl-N and ctrl-O */ set_msb(fpin, fpout, 0); break; case 5: /* MSB is resetted */ set_msb(fpin, fpout, 1); break; case 6: /* MIME type of attached files by internet mail */ mime(fpin, fpout); break; default: synopsis(); break; } if (fpin != stdin) fclose(fpin); if (fpout != stdout) fclose(fpout); } /*----------------------- driver routine ended -----------------------*/