1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* Copyright (c) 2000, Sun Microsystems, Inc. All rights reserved. */ 22 23 #pragma ident "@(#)lcl_mimehead.c 1.2 00/01/06 SMI" 24 25 /*//////////////////////////////////////////////////////////////////////// 26 Copyright (c) 1992 Electrotechnical Laboratry (ETL) 27 28 Permission to use, copy, modify, and distribute this material for any 29 purpose and without fee is hereby granted, provided that the above 30 copyright notice and this permission notice appear in all copies, and 31 that the name of ETL not be used in advertising or publicity pertaining 32 to this material without the specific, prior written permission of an 33 authorized representative of ETL. 34 ETL MAKES NO REPRESENTATIONS ABOUT THE ACCURACY OR SUITABILITY OF THIS 35 MATERIAL FOR ANY PURPOSE. IT IS PROVIDED "AS IS", WITHOUT ANY EXPRESS 36 OR IMPLIED WARRANTIES. 37 ///////////////////////////////////////////////////////////////////////// 38 Content-Type: program/C; charset=US-ASCII 39 Program: mimehead.c (MIME header encoder/decoder) 40 Author: Yutaka Sato <ysato (at) etl.go.jp> 41 Description: 42 MIME PartII (RFC1522) encoder/decoder for multibyte ISO-2022 charsets 43 ----------------------------------------------------------------------- 44 EN_FGETC ->[ Encode ->encode_word <= encode_one ]->EN_FPUTC ->EN_FPUTC1 45 [ ->noencode_word ] 46 47 DE_FGETC ->[ Decode ->decord_word <= scan_eword ] 48 [ -> disp_word ] 49 [ ->nodecode_word ]->DE_FPUTC ->DE_FPUTC1 50 ----------------------------------------------------------------------- 51 Bugs: 52 Any linear-white-space between encoded-words should be ignored 53 Should support any charsets & encodings in ISO-2022 or ISO-8859 54 960620 adoption to lcl library. 55 ///////////////////////////////////////////////////////////////////////*/ 56 57 #include <stdio.h> 58 #include "lcl_str_stdio.h" 59 FILE *tmpfile(); 60 FILE *str_fopen(); 61 char *strchr(),*getenv(); 62 #define MAX_LNSIZE 1024 63 typedef char MsgLine[MAX_LNSIZE]; 64 65 #define DEBUG(a) 66 67 68 #define ES_NONE 0 69 #define ES_IN 1 /* type-A encoding (not supported yet...) */ 70 #define ES_OUT 2 /* type-B encoding */ 71 72 int MIME_SPACE_ENCODING = ES_OUT; 73 74 /*//////////////////////////////////////////////////////////////////////*/ 75 76 #define MAXCOL 72 77 #define DISPCOLS 80 78 79 #define SPACECTL_LENG 4 80 #define SWCODE_LENG 4 /*length(encoded(charset sw ESC seq))*/ 81 #define MIN_ENCODEDLEN 4 /* minimum length of encoded text(base64)*/ 82 83 #define ENCODE_BEGIN "=?" 84 #define CHARSET_DONE '?' 85 #define ENCODING_DONE '?' 86 #define ENCODE_DONE "?=" 87 88 #define LF '\n' 89 #define NL '\n' 90 #define CR '\r' 91 #define TAB '\t' 92 #define SPACE ' ' 93 #define FORMFEED '\f' 94 95 #define LWSP_CHAR(ch) (ch == TAB || ch == SPACE) 96 #define FOLD_CHAR SPACE 97 #define SPECIALS "()<>@,;:\\\".[]" 98 #define DELIMITER(ch) (ch==LF || LWSP_CHAR(ch) || strchr(SPECIALS,ch)) 99 #define IS_PRE_DELIMITER(ch) DELIMITER(ch) 100 #define IS_POST_DELIMITER(ch) (ch==EOF || DELIMITER(ch)) 101 102 #define NLNL 0x80000001 103 #define XC_EN_FOLD 0x80000002 104 #define XC_DE_UNFOLD 0x80000003 105 #define XC_DE_CATENATE 0x80000004 106 #define XC_DE_EWORD_WAS_PUT 0x80000005 107 #define XC_DE_FORMFEED 0x80000006 108 #define XC_DE_DEL_LWSP 0x80000007 109 #define XC_DE_IGN_LWSP 0x80000008 110 #define XC_DE_OFLUSH 0x80000009 111 112 #define ENCODE_NONE 0 113 #define ENCODE_BASE64 "B" 114 #define ENCODE_QP "Q" 115 116 /* 117 * ISO-2022 LOCAL 118 */ 119 #define DELSP_PRE 1 /* delete prefixed LWSP */ 120 #define DELSP_POST 2 /* delete postfixed LWSP */ 121 #define DELSP_BOTH 3 /* delete bothside LWSP */ 122 123 /*//////////////////////////////////////////////////////////////////////*/ 124 /* character sets 125 */ 126 127 /* 128 * ISO-2022 character set switch sequences 129 */ 130 #define ESC 033 131 #define GOTO_1BCODE '(' 132 #define GOTO_2BCODE '$' 133 134 /* 135 * basic charset 136 */ 137 typedef struct { 138 char iso2022[8]; 139 char *name; 140 int delspace; /* as indication code for space deletion */ 141 } Charset; 142 143 144 #define B_US_ASCII 1 145 #define B_JP_ASCII 2 146 #define B_JP_KANJI1 3 147 #define B_JP_KANJI2 4 148 149 static Charset BasicCharsets[] = { 150 {"", "UNKNOWN" }, 151 {"\033(B", "US_ASCII" }, 152 {"\033(J", "JISX0201-R", DELSP_BOTH}, 153 {"\033$@", "JISX0208-1", DELSP_PRE }, 154 {"\033$B", "JISX0208-2", DELSP_POST}, 155 0 156 }; 157 #define CODESW_SEQ(bset) BasicCharsets[bset].iso2022 158 #define GOTO_ASCII_SEQ BasicCharsets[B_US_ASCII].iso2022 159 #define GOTO_ASCII_SEQ_LEN strlen(GOTO_ASCII_SEQ) 160 161 static char *DELSP_SEQUENCE[8] = { 162 0, 163 BasicCharsets[B_JP_KANJI1].iso2022, 164 BasicCharsets[B_JP_KANJI2].iso2022, 165 BasicCharsets[B_JP_ASCII].iso2022, 166 0 167 }; 168 #define DELSP_OP(bset) BasicCharsets[bset].delspace 169 #define DELSP_SEQ(delop) DELSP_SEQUENCE[delop] 170 171 172 /* 173 * MIME charset (may include encoding system and several charsets) 174 */ 175 char M_US_ASCII[] = "US-ASCII"; 176 char M_ISO_8859_8[] = "ISO-8859-8"; 177 char M_ISO_2022_JP[] = "ISO-2022-JP"; 178 179 typedef struct { 180 int local; 181 char codesw; 182 char *mcharset; 183 int basic_charset; 184 char *encoding; 185 } MimeCharset; 186 187 static MimeCharset Codes1[16] = { 188 {1, 'B', M_US_ASCII, B_US_ASCII, ENCODE_NONE }, 189 {1, 'J', M_US_ASCII, B_JP_ASCII, ENCODE_NONE }, 190 0 191 }; 192 static MimeCharset Codes2[16] = { 193 {1, '@', M_ISO_2022_JP, B_JP_KANJI1, ENCODE_BASE64 }, 194 {1, 'B', M_ISO_2022_JP, B_JP_KANJI2, ENCODE_BASE64 }, 195 0 196 }; 197 198 MIME_localCharset(mcharset) 199 char *mcharset; 200 { int csi; 201 char *cs; 202 203 for(csi = 0; cs = Codes1[csi].mcharset; csi++) 204 if( lcl_strcasecmp(cs,mcharset) == 0 ) 205 return Codes1[csi].local; 206 207 for(csi = 0; cs = Codes2[csi].mcharset; csi++) 208 if( lcl_strcasecmp(cs,mcharset) == 0 ) 209 return Codes2[csi].local; 210 return 0; 211 } 212 213 /*//////////////////////////////////////////////////////////////////////*/ 214 /* 215 */ 216 typedef struct { 217 int c_ch; /* character code value */ 218 char* c_mcharset; /* MIME charset */ 219 int c_bcharset; /* basic charset */ 220 } CHAR; 221 static CHAR NULL_CHAR = { 0, M_US_ASCII, B_US_ASCII }; 222 223 /* 224 * ROUND ROBBIN BUFFER 225 */ 226 #define RRBUFF_SIZE 8 227 typedef struct { 228 CHAR b_BUFF[RRBUFF_SIZE]; 229 int b_putx; 230 int b_getx; 231 } RRBUFF; 232 static RRBUFF NULL_RRBUFF = {0}; 233 234 typedef struct { 235 FILE *in_file; 236 int in_column; 237 char *in_mcharset; /* current MIME charset */ 238 int in_bcharset; /* current basic charset */ 239 int in_bcharset_got; /* bcharset was got explicitly */ 240 char *in_encoding; /* B or Q */ 241 int in_prevch; /* EN_FGETC() local */ 242 RRBUFF in_BUFF; /* EN_FGETC() local */ 243 RRBUFF in_PUSHED; /* EN_UNGETC() -> EN_FGETC() */ 244 245 FILE *out_file; 246 int out_column; 247 int out_lastputch; /* EN_FPUTC() -> encode_word() */ 248 int out_whichASCII; /* disp_word() -> DE_FPUTC(),C1() */ 249 int out_enLWSP; /* EN_FPUTC() local */ 250 CHAR out_prevCHAR; /* EN_FPUTC1() local */ 251 CHAR out_deLWSP[4]; /* DE_FPUTCX() local */ 252 int out_prev_bcharset; /* DE_FPUTC1X() local */ 253 254 union { int all; struct { unsigned int 255 MIMEencoded :1, /* I: this field is MIME encoded */ 256 end_CRLF :1, /* I: line terminates with CRLF */ 257 eat_SPACE :2, /* I: decoder space eraser */ 258 ext_SPACE :1, /* O: external space encoding */ 259 unfolding :1, /* O: unfold decoder output */ 260 ign_SPACE :1, /* O: ignore postfix space */ 261 gen_SPACE :1, /* O: just after space was generated */ 262 after_eword :1; /* O: just after eword has put */ 263 } mode; } io_MODES; 264 } INOUT; 265 #define ENCODE_EXT io_MODES.mode.ext_SPACE 266 267 static INOUT_init(io,in,out) 268 INOUT *io; 269 FILE *in,*out; 270 { 271 io->in_file = in; 272 io->in_column = 0; 273 io->in_mcharset = M_US_ASCII; 274 io->in_bcharset = B_US_ASCII; 275 io->in_bcharset_got = 0; 276 io->in_encoding = ENCODE_NONE; 277 io->in_prevch = EOF; 278 io->in_BUFF = NULL_RRBUFF; 279 io->in_PUSHED = NULL_RRBUFF; 280 281 io->out_file = out; 282 io->out_column = 0; 283 io->out_lastputch = 0; 284 io->out_whichASCII = B_US_ASCII; 285 io->out_enLWSP = 0; 286 io->out_prevCHAR = NULL_CHAR; 287 io->out_deLWSP[0] = NULL_CHAR; 288 io->out_prev_bcharset = B_US_ASCII; 289 290 io->io_MODES.all = 0; 291 io->ENCODE_EXT = MIME_SPACE_ENCODING == ES_OUT; 292 } 293 294 #define in_CODESW_SEQ(io) CODESW_SEQ(io->in_bcharset) 295 #define MIME_ENCODED io_MODES.mode.MIMEencoded 296 #define EAT_SPACE io_MODES.mode.eat_SPACE 297 #define UNFOLD_LINE io_MODES.mode.unfolding 298 #define IGN_POST_SPACE io_MODES.mode.ign_SPACE 299 #define SPACE_WAS_GEN io_MODES.mode.gen_SPACE 300 #define EWORD_WAS_PUT io_MODES.mode.after_eword 301 302 303 #define NEXT_RRBUFF(BP) (\ 304 ((RRBUFF_SIZE <= ++(BP)->b_putx) ? ((BP)->b_putx = 0):0), \ 305 &(BP)->b_BUFF[(BP)->b_putx] \ 306 ) 307 308 #define PUT_RRBUFF(BP,CH) (\ 309 /* must check full here */ \ 310 ((RRBUFF_SIZE <= ++(BP)->b_putx) ? ((BP)->b_putx = 0):0), \ 311 ((BP)->b_BUFF[(BP)->b_putx] = *CH) \ 312 ) 313 314 #define GET_RRBUFF(BP,CH) (\ 315 ((BP)->b_putx == (BP)->b_getx) ? 0 : (\ 316 ((RRBUFF_SIZE <= ++(BP)->b_getx) ? ((BP)->b_getx = 0):0), \ 317 (*CH = (BP)->b_BUFF[(BP)->b_getx]), \ 318 &(BP)->b_BUFF[(BP)->b_getx] \ 319 )) 320 321 322 static int end_CRLF; 323 static NLfgetc(in) 324 FILE *in; 325 { int ch; 326 327 ch = fgetc(in); 328 if( ch == CR ){ 329 ch = fgetc(in); 330 if( ch == LF ){ 331 end_CRLF = 1; 332 ch = NL; 333 }else{ 334 if( ch != EOF ) 335 ungetc(ch,in); 336 ch = CR; 337 } 338 }else 339 if( ch == LF ){ 340 end_CRLF = 0; 341 ch = NL; 342 } 343 return ch; 344 } 345 static NLfputc(ch,out) 346 FILE *out; 347 { 348 if( ch == NL && end_CRLF ) 349 fputc(CR,out); 350 fputc(ch,out); 351 } 352 353 static EN_UNGETC(CH,io) 354 INOUT *io; 355 CHAR *CH; 356 { 357 PUT_RRBUFF(&io->in_PUSHED,CH); 358 } 359 360 static CHAR *EN_FGETC(io) 361 INOUT *io; 362 { int ch; 363 MimeCharset *csw; 364 int ci; 365 char *mcharset; 366 FILE *infile = io->in_file; 367 CHAR *CH; 368 RRBUFF *BP; 369 370 BP = &io->in_BUFF; 371 CH = NEXT_RRBUFF(BP); 372 373 BP = &io->in_PUSHED; 374 if( GET_RRBUFF(BP,CH) ){ 375 ch = CH->c_ch; 376 goto EXIT; 377 } 378 379 *CH = NULL_CHAR; 380 GET1: 381 ch = NLfgetc(infile); 382 GOT1: 383 if( ch != ESC ) 384 goto exit; 385 386 /* got ESC character */ 387 if( (ch = fgetc(infile)) == EOF ) 388 goto exit; 389 390 if( io->in_prevch == NL ) 391 if( LWSP_CHAR(ch) ) 392 goto GET1; 393 394 switch( ch ){ 395 default: goto exit; 396 case GOTO_1BCODE: csw = Codes1; break; 397 case GOTO_2BCODE: csw = Codes2; break; 398 } 399 if( (ch = fgetc(infile)) == EOF ) 400 goto exit; 401 402 for( ci = 0; mcharset = csw[ci].mcharset; ci++ ){ 403 if( ch == csw[ci].codesw ){ 404 io->in_mcharset = mcharset; 405 io->in_encoding = csw[ci].encoding; 406 407 if( io->in_column == 0 && io->in_bcharset_got ) 408 io->EAT_SPACE = DELSP_OP(io->in_bcharset); 409 410 io->in_bcharset_got = 1; 411 io->in_bcharset = csw[ci].basic_charset; 412 break; 413 } 414 } 415 416 ch = NLfgetc(infile); 417 if( ch == ESC ) 418 goto GOT1; 419 exit: 420 CH->c_ch = ch; 421 CH->c_mcharset = io->in_mcharset; 422 CH->c_bcharset = io->in_bcharset; 423 424 EXIT: 425 io->in_prevch = ch; 426 io->in_column++; 427 if( ch == NL || ch == NLNL ){ 428 io->in_column = 0; 429 io->EAT_SPACE = 0; 430 io->MIME_ENCODED = 0; 431 } 432 433 return CH; 434 } 435 436 static ew_overhead(charset,encoding) 437 char *charset,*encoding; 438 { MsgLine overhead; 439 440 sprintf(overhead,"=?%s?%s?x?= ",charset,encoding); 441 return strlen(overhead) - 1; 442 } 443 444 static EN_FPUTC0(ch,io) 445 INOUT *io; 446 { 447 if( ch == NL ){ 448 io->SPACE_WAS_GEN = 0; 449 io->out_column = 0; 450 }else io->out_column += 1; 451 NLfputc(ch,io->out_file); 452 } 453 454 /* 455 * extra folding before a lengthy encoded-word 456 * put =?charset?encoding? at the beginning of non-ASCII 457 * put SPACE before it if the previous char is not DELIMITER 458 * 459 * put ?= at the end of non-ASCII 460 * put SPACE after it if the next char is not DELIMITER 461 */ 462 static EN_FPUTC1(ch,io,charset,encoding) 463 INOUT *io; 464 char *charset,*encoding; 465 { char *cp; 466 MsgLine line; 467 468 if( charset != io->out_prevCHAR.c_mcharset ){ 469 470 /* AT THE END OF A ENCODED WORD */ 471 if( io->out_prevCHAR.c_mcharset != M_US_ASCII ){ 472 for( cp = ENCODE_DONE; *cp; cp++ ) 473 EN_FPUTC0(*cp,io); 474 475 if( !DELIMITER(ch) ){ 476 EN_FPUTC0(SPACE,io); 477 io->SPACE_WAS_GEN = 1; 478 } 479 } 480 481 /* AT THE BEGINNING OF A ENCODED WORD */ 482 if( charset != M_US_ASCII ){ 483 int reqlen,remlen; 484 485 if( !DELIMITER(io->out_prevCHAR.c_ch) ) 486 EN_FPUTC0(SPACE,io); 487 488 reqlen = ew_overhead(charset,encoding); 489 remlen = MAXCOL - (io->out_column + reqlen); 490 491 if( (remlen-SWCODE_LENG) < MIN_ENCODEDLEN ){ 492 EN_FPUTC0(NL,io); 493 EN_FPUTC0(FOLD_CHAR,io); 494 } 495 sprintf(line,"=?%s?%s?",charset,encoding); 496 for( cp = line; *cp; cp++ ) 497 EN_FPUTC0(*cp,io); 498 io->MIME_ENCODED = 1; 499 } 500 } 501 502 if( ch != EOF ){ 503 if( ch != NL ){ 504 /* split at LWSP_CHAR ... */ 505 if( !encoding ) 506 if( io->MIME_ENCODED ) 507 if( MAXCOL <= io->out_column ) 508 if( LWSP_CHAR(ch) ) 509 EN_FPUTC0(NL,io); 510 } 511 EN_FPUTC0(ch,io); 512 } 513 io->out_prevCHAR.c_mcharset = charset; 514 io->out_prevCHAR.c_ch = ch; 515 } 516 517 #define PENDING_LWSP out_enLWSP 518 519 static EN_FPUTC(ch,io,charset,encoding) 520 INOUT *io; 521 char *charset,*encoding; 522 { int lwsp; 523 524 if( (ch & 0xFF) == ch ) 525 io->out_lastputch = ch; 526 527 if( ch == XC_EN_FOLD ){ 528 if( lwsp = io->PENDING_LWSP ) 529 io->PENDING_LWSP = 0; 530 else{ 531 lwsp = SPACE; 532 io->SPACE_WAS_GEN = 1; 533 } 534 EN_FPUTC1(NL,io,M_US_ASCII,ENCODE_NONE); 535 EN_FPUTC1(lwsp,io,M_US_ASCII,ENCODE_NONE); 536 }else{ 537 if( lwsp = io->PENDING_LWSP ){ 538 EN_FPUTC1(lwsp,io,M_US_ASCII,ENCODE_NONE); 539 io->PENDING_LWSP = 0; 540 } 541 if(LWSP_CHAR(ch)&& charset==M_US_ASCII&& encoding==ENCODE_NONE) 542 io->PENDING_LWSP = ch; 543 else EN_FPUTC1(ch,io,charset,encoding); 544 } 545 } 546 547 /* 548 * PASS THROUGH AN ASCII WORD 549 */ 550 static noencode_word(io) 551 INOUT *io; 552 { CHAR *CH,*NCH; 553 int ch,inx; 554 int canbe_folded; 555 MsgLine line; 556 557 canbe_folded = io->MIME_ENCODED; 558 for(inx = 0; inx <= MAXCOL; inx++){ 559 CH = EN_FGETC(io); 560 ch = CH->c_ch; 561 562 if( io->in_mcharset != M_US_ASCII ){ 563 EN_UNGETC(CH,io); 564 break; 565 } 566 if( ch == EOF ) 567 break; 568 if( ch == NL ){ 569 line[inx++] = NL; 570 NCH = EN_FGETC(io); 571 switch( NCH->c_ch ){ 572 case NL: ch = NLNL; break; 573 case EOF: break; 574 default: EN_UNGETC(NCH,io); break; 575 } 576 break; 577 } 578 /* 579 if( canbe_folded ) 580 if( DELIMITER(ch) ) 581 */ 582 /* might be harmful for tools don't treat unfolding properly */ 583 if( LWSP_CHAR(ch) ) 584 { 585 line[inx++] = ch; 586 break; 587 } 588 line[inx] = ch; 589 } 590 line[inx] = 0; 591 592 if( line[0] != NL ) 593 if( canbe_folded )/* safety for non-MIMEr like inews/Cnews */ 594 if( MAXCOL+2 < io->out_column+inx ) 595 EN_FPUTC(XC_EN_FOLD,io,M_US_ASCII,ENCODE_NONE); 596 597 { int ch,ci; 598 for( ci = 0; ch = line[ci]; ci++ ) 599 EN_FPUTC(ch,io,M_US_ASCII,ENCODE_NONE); 600 } 601 return ch; 602 } 603 static encode_one(encoding,ins,ilen,outs,osize) 604 char *encoding,*ins,*outs; 605 { int len; 606 607 if( lcl_strcasecmp(encoding,ENCODE_QP) == 0 ) 608 len = str_toqp(ins,ilen,outs,osize); 609 else 610 if( lcl_strcasecmp(encoding,ENCODE_BASE64) == 0 ) 611 len = str_to64(ins,ilen,outs,osize); 612 else{ 613 strncpy(outs,ins,ilen); 614 len = ilen; 615 } 616 outs[len] = 0; 617 return len; 618 } 619 620 static encode_word(io) 621 INOUT *io; 622 { char *charset; /* charset of this encoded-word */ 623 char *encoding; /* encoding of this encoded-word */ 624 MsgLine ins,outs; 625 int inx,outx,prefold; 626 int char_bytes,nchar,reqlen,remlen,outlen; 627 char ch,encoded_ch; 628 int prech = 0; 629 int postch = 0; 630 int delop = 0; 631 CHAR *CH; 632 633 charset = io->in_mcharset; 634 char_bytes = 2; 635 encoding = io->in_encoding; 636 reqlen = ew_overhead(charset,encoding); 637 638 /* 639 * firstly, add the code switch sequence in a encoded format 640 */ 641 strcpy(ins,in_CODESW_SEQ(io)); 642 inx = strlen(ins); 643 644 if( io->ENCODE_EXT ){ 645 strcat(ins,in_CODESW_SEQ(io)); 646 outlen = encode_one(encoding,ins,strlen(ins),outs,sizeof(outs)); 647 }else outlen = encode_one(encoding,ins,inx,outs,sizeof(outs)); 648 649 /* 650 * if remaining length is not enough, fold the line 651 */ 652 remlen = MAXCOL - (io->out_column + reqlen); 653 if( (remlen-outlen) <= MIN_ENCODEDLEN ){ 654 remlen = MAXCOL - (1 + reqlen); 655 prefold = 1; 656 }else prefold = 0; 657 658 /* 659 * scan a word to be encoded expanding byte by byte. 660 * every encoded-texts end with the switch to M_US_ASCII 661 */ 662 for(nchar = 0; ;nchar++){ 663 strcpy(&ins[inx],GOTO_ASCII_SEQ); 664 outlen = encode_one(encoding,ins,inx+GOTO_ASCII_SEQ_LEN, 665 outs,sizeof(outs)); 666 667 CH = EN_FGETC(io); 668 ch = CH->c_ch; 669 if( ch == EOF || ch == NL || CH->c_mcharset != charset ){ 670 if( io->in_mcharset == M_US_ASCII ) 671 strcpy(&ins[inx],in_CODESW_SEQ(io)); 672 /* ASCII family like JIS-X0201-Roman */ 673 674 675 /* ENCODE A LWSP BETWEEN ENCODED-WORDS */ 676 /*if( 4 <= (remlen-outlen) )*/ 677 if( LWSP_CHAR(ch) ){ 678 CHAR *NCH; 679 680 NCH = EN_FGETC(io); 681 if( NCH->c_mcharset == charset ){ 682 EN_UNGETC(NCH,io); 683 inx += strlen(&ins[inx]); 684 ins[inx++] = ch; 685 ins[inx] = 0; 686 if( 12 <= (remlen-outlen) ){ 687 /* TO BE CATENATED */ 688 strcpy(&ins[inx],CODESW_SEQ(NCH->c_bcharset)); 689 inx = strlen(ins); 690 continue; 691 } 692 /* TO BE SPLITTED */ 693 postch = ch; 694 break; 695 } 696 EN_UNGETC(NCH,io); 697 EN_UNGETC(CH,io); 698 break; 699 } 700 if( ch != EOF ) 701 EN_UNGETC(CH,io); 702 703 postch = ch; 704 break; 705 } 706 if( nchar % char_bytes == 0 && remlen <= outlen ){ 707 EN_UNGETC(CH,io); 708 break; 709 } 710 711 ins[inx++] = ch; 712 ins[inx] = 0; 713 } 714 inx += strlen(&ins[inx]); 715 716 if( nchar == 0 ) 717 return ch; 718 719 if( prefold ) 720 EN_FPUTC(XC_EN_FOLD,io,M_US_ASCII,ENCODE_NONE); 721 722 /* 723 * output the scanned word 724 */ 725 if( io->ENCODE_EXT ){ /* external space encoding for ISO-2022-JP */ 726 delop = 0; 727 prech = io->out_lastputch; 728 729 /* if pre-SPACE will be inserted... X-), or was inserted */ 730 if( !IS_PRE_DELIMITER(prech) || io->SPACE_WAS_GEN && !prefold ) 731 delop |= DELSP_PRE; 732 io->SPACE_WAS_GEN = 0; 733 734 /* if post SPACE will be inserted... */ 735 if( !IS_POST_DELIMITER(postch) && postch != 0 ) 736 delop |= DELSP_POST; 737 738 if( delop ){ 739 MsgLine tmp; 740 741 strcpy(tmp,ins); 742 strcpy(ins,DELSP_SEQ(delop)); 743 strcat(ins,tmp); 744 inx = strlen(ins); 745 } 746 } 747 748 outlen = encode_one(encoding,ins,inx,outs,sizeof(outs)); 749 for(outx = 0; outx < outlen; outx++){ 750 encoded_ch = outs[outx]; 751 if( encoded_ch == NL ) 752 continue; 753 EN_FPUTC(encoded_ch,io,charset,encoding); 754 } 755 return ch; 756 } 757 /* it may be desirable to fold before an encoded-word, which length is 758 * shorter than MAXCOL, but will be splitted in current line. */ 759 760 761 MIME_headerEncode0(in,out) 762 FILE *in,*out; 763 { char *ip,*op; 764 INOUT iob,*io = &iob; 765 CHAR *CH; 766 int ch; 767 768 INOUT_init(io,in,out); 769 for(;;){ 770 CH = EN_FGETC(io); 771 EN_UNGETC(CH,io); 772 773 ch = CH->c_ch; 774 if( CH->c_mcharset == M_US_ASCII ){ 775 ch = noencode_word(io); 776 if( ch == EOF ) 777 break; 778 if( ch == NLNL ) 779 break; 780 }else{ 781 for(;;){ 782 ch = encode_word(io); 783 if( io->in_mcharset == M_US_ASCII ) 784 break; 785 } 786 if( ch == EOF ) 787 break; 788 } 789 } 790 if( ch == EOF ) 791 EN_FPUTC(ch,io,M_US_ASCII,ENCODE_NONE); 792 return ch; 793 } 794 MIME_headerEncode(in,out) 795 FILE *in,*out; 796 { int ch; 797 MsgLine line; 798 799 ch = MIME_headerEncode0(in,out); 800 if( ch != EOF ){ 801 NLfputc(NL,out); 802 while( fgets(line,sizeof(line),in) != NULL ) 803 fputs(line,out); 804 } 805 } 806 807 808 /* 809 * FINAL OUTPUT WITH ISO-2022 CHARACTER SET SWITCH SEQUENCE 810 */ 811 static DE_FPUTC1X(CH,Out) 812 CHAR *CH; 813 INOUT *Out; 814 { FILE *out; 815 int cset; 816 int ch; 817 818 out = Out->out_file; 819 ch = CH->c_ch; 820 cset = CH->c_bcharset; 821 822 if( cset != Out->out_prev_bcharset ){ 823 fputs(CODESW_SEQ(cset),out); 824 Out->out_prev_bcharset = cset; 825 } 826 Out->EWORD_WAS_PUT = 0; 827 828 switch( ch ){ 829 case EOF: return(0); 830 case XC_DE_OFLUSH: return(0); 831 case NL: Out->out_column = 0; break; 832 default: Out->out_column++; break; 833 } 834 NLfputc(ch,out); 835 } 836 837 /* 838 * PUT ASCII (or CONTROL) CHARACTER IN A CURRENT ASCII FAMILY 839 */ 840 static DE_FPUTC1(ch,io) 841 INOUT *io; 842 { CHAR CH; 843 844 CH.c_bcharset = io->out_whichASCII; 845 CH.c_ch = ch; 846 DE_FPUTC1X(&CH,io); 847 } 848 849 /* 850 * PUT CHARACTER CONTROLLING "LWSP" AND UNFOLDING 851 */ 852 #define CLEAR_LWSP(io) (io->out_deLWSP[0].c_ch = 0) 853 854 static DE_FPUTCX(CH,io) 855 CHAR *CH; 856 INOUT *io; 857 { int ch; 858 CHAR PCH; 859 860 if( io == 0 ) 861 return(0); 862 863 ch = CH->c_ch; 864 865 if( ch == XC_DE_DEL_LWSP ){ 866 CLEAR_LWSP(io); 867 return(0); 868 } 869 if( ch == XC_DE_IGN_LWSP ){ 870 io->IGN_POST_SPACE = 1; 871 return(0); 872 } 873 if( io->IGN_POST_SPACE ){ 874 if( LWSP_CHAR(ch) ) 875 return(0); 876 877 if( (ch & 0xFF) == ch ) 878 io->IGN_POST_SPACE = 0; 879 } 880 if( ch == XC_DE_EWORD_WAS_PUT ){ 881 io->EWORD_WAS_PUT = 1; 882 return(0); 883 } 884 if( ch == XC_DE_CATENATE ){ 885 /* REMOVE PENDING SPACE IF EXISTS */ 886 if( ! io->EWORD_WAS_PUT ){ 887 PCH = io->out_deLWSP[0]; 888 if( PCH.c_ch == NL ){ 889 /* discard the NEWLINE */ 890 PCH = io->out_deLWSP[1]; 891 } 892 if( PCH.c_ch ) 893 DE_FPUTC1X(&PCH,io); 894 } 895 CLEAR_LWSP(io); 896 return(0); 897 } 898 if( ch == XC_DE_UNFOLD ){ 899 if( io->out_deLWSP[0].c_ch == NL ){ 900 PCH = io->out_deLWSP[1]; 901 if( PCH.c_ch ){ 902 DE_FPUTC1X(&PCH,io); 903 CLEAR_LWSP(io); 904 } 905 } 906 return(0); 907 } 908 if( ch == XC_DE_FORMFEED ){ 909 PCH = io->out_deLWSP[0]; 910 if( PCH.c_ch ){ 911 DE_FPUTC1X(&PCH,io); 912 PCH = io->out_deLWSP[1]; 913 if( PCH.c_ch ) 914 DE_FPUTC1X(&PCH,io); 915 CLEAR_LWSP(io); 916 } 917 DE_FPUTC1(FORMFEED,io); 918 DE_FPUTC1(NL,io); 919 fflush(io->out_file); 920 return(0); 921 } 922 923 /* FLUSH LWSP */ 924 PCH = io->out_deLWSP[0]; 925 if( PCH.c_ch ){ 926 if( PCH.c_ch == NL ){ 927 if( LWSP_CHAR(ch) ){ /* linear-white-space */ 928 io->out_deLWSP[1] = *CH; 929 io->out_deLWSP[2].c_ch = 0; 930 return(0); 931 } 932 DE_FPUTC1(NL,io); 933 PCH = io->out_deLWSP[1]; 934 if( PCH.c_ch ) 935 DE_FPUTC1X(&PCH,io); 936 }else DE_FPUTC1X(&PCH,io); 937 CLEAR_LWSP(io); 938 } 939 940 /* ENBUFFER LWSP */ 941 if( io->UNFOLD_LINE ){ 942 /* 943 if( ch == NL || io->EWORD_WAS_PUT && LWSP_CHAR(ch)){ 944 */ 945 if( ch == NL || LWSP_CHAR(ch)){ 946 io->out_deLWSP[0] = *CH; 947 io->out_deLWSP[1].c_ch = 0; 948 return(0); 949 } 950 } 951 DE_FPUTC1X(CH,io); 952 } 953 954 /* 955 * PUT ASCII (or CONTROL) CHARACTER IN A CURRENT ASCII FAMILY 956 */ 957 static DE_FPUTC(ch,io) 958 INOUT *io; 959 { CHAR CH; 960 961 CH.c_bcharset = io->out_whichASCII; 962 CH.c_ch = ch; 963 DE_FPUTCX(&CH,io); 964 } 965 966 967 static scan_eword(in,reads,charset,encoding,text) 968 FILE *in; 969 char *reads,*charset,*encoding,*text; 970 { int i,cs; 971 972 for(i = 0; ;i++){ 973 cs = NLfgetc(in); 974 if(cs==NL || cs==EOF) goto error; 975 *reads++ = cs; 976 if(cs==CHARSET_DONE) break; 977 charset[i] = cs; 978 charset[i+1] = 0; 979 } 980 for(i = 0; ;i++){ 981 cs = NLfgetc(in); 982 if(cs==NL || cs==EOF) goto error; 983 *reads++ = cs; 984 if(cs==ENCODING_DONE) break; 985 encoding[i] = cs; 986 encoding[i+1] = 0; 987 } 988 for(i = 0; i < 80; i++ ){ 989 cs = NLfgetc(in); 990 if(cs==NL || cs==EOF) goto error; 991 *reads++ = cs; 992 if(cs == ENCODE_DONE[0]){ 993 cs = NLfgetc(in); 994 if(cs==NL || cs==EOF) goto error; 995 *reads++ = cs; 996 if( cs == ENCODE_DONE[1] ){ 997 text[i] = 0; 998 break; 999 } 1000 ungetc(cs,in); 1001 cs = ENCODE_DONE[0]; 1002 } 1003 text[i] = cs; 1004 text[i+1] = 0; 1005 } 1006 return 0; 1007 error: 1008 *reads = 0; 1009 return cs; 1010 } 1011 1012 static disp_word(Out,dtext,len) 1013 INOUT *Out; 1014 char *dtext; 1015 { FILE *DecodedText; 1016 INOUT tmpInb,*tmpIn = &tmpInb; 1017 int dch; 1018 int sdlen,dlen; 1019 int eat_space = 0; 1020 CHAR *CH; 1021 1022 if( len <= 0 ) 1023 return 0; 1024 1025 if( Out ) 1026 sdlen = disp_word(0,dtext,len); 1027 1028 DecodedText = str_fopen(dtext,len); 1029 INOUT_init(tmpIn,DecodedText,NULL); 1030 1031 dlen = 0; 1032 for(;;){ 1033 CH = EN_FGETC(tmpIn); 1034 if( (dch = CH->c_ch) == EOF ) 1035 break; 1036 1037 if( Out && dlen == 0 ){ 1038 if( Out->ENCODE_EXT ) 1039 eat_space = tmpIn->EAT_SPACE; 1040 else eat_space = 0; 1041 1042 if( eat_space & DELSP_PRE ){ 1043 DE_FPUTC(XC_DE_DEL_LWSP,Out); 1044 DEBUG(DE_FPUTC('{',Out)); 1045 }else{ 1046 if( Out->out_column + sdlen < DISPCOLS ) 1047 DE_FPUTC(XC_DE_CATENATE,Out); 1048 else DE_FPUTC(XC_DE_OFLUSH,Out); 1049 } 1050 } 1051 if( Out ) 1052 DE_FPUTCX(CH,Out); 1053 dlen++; 1054 } 1055 str_fclose(DecodedText); 1056 1057 if(Out){ 1058 DE_FPUTC(XC_DE_EWORD_WAS_PUT,Out); 1059 Out->MIME_ENCODED = 1; 1060 Out->out_whichASCII = CH->c_bcharset; /* CH == EOF */ 1061 1062 if( eat_space & DELSP_POST ){ 1063 DEBUG(DE_FPUTC(XC_DE_OFLUSH,Out)); 1064 DEBUG(DE_FPUTC('}',Out)); 1065 DE_FPUTC(XC_DE_IGN_LWSP,Out); 1066 } 1067 } 1068 return dlen; 1069 } 1070 1071 /* 2.0 add ret_charset parameter */ 1072 static decode_word(io, ret_charset) 1073 INOUT *io; 1074 char **ret_charset; 1075 { MsgLine reads,charset,encoding,itext,dtext; 1076 int ilen,dsize,len,pad,dlen; 1077 int eow; 1078 1079 *charset = *encoding = *itext = 0; 1080 eow = scan_eword(io->in_file,reads,charset,encoding,itext); 1081 1082 if(charset[0]){ 1083 if(*ret_charset) 1084 free(*ret_charset); 1085 *ret_charset = (char *)malloc(strlen(charset) + 1); 1086 if(*ret_charset) 1087 strcpy(*ret_charset, charset); 1088 } 1089 1090 if( eow == NL || eow == EOF ){ 1091 DE_FPUTC(XC_DE_OFLUSH,io); 1092 fprintf(io->out_file,"=?%s",reads); 1093 if( eow != EOF ) 1094 ungetc(eow,io->in_file); 1095 return eow; 1096 } 1097 1098 /* 1099 if( !MIME_localCharset(charset) ){ 1100 DE_FPUTC(XC_DE_OFLUSH,io); 1101 fprintf(io->out_file,"=?%s?%s?%s?=",charset,encoding,itext); 1102 if( eow ) 1103 fprintf(io->out_file,"%c",eow); 1104 return 0; 1105 } 1106 */ 1107 1108 ilen = strlen(itext); 1109 dsize = sizeof(dtext); 1110 if( lcl_strcasecmp(encoding,ENCODE_QP) == 0 ) 1111 len = str_fromqp(itext,ilen,dtext,dsize); 1112 else 1113 if( lcl_strcasecmp(encoding,ENCODE_BASE64) == 0 ) 1114 len = str_from64(itext,ilen,dtext,dsize); 1115 else{ 1116 strcpy(dtext,itext); 1117 len = ilen; 1118 } 1119 disp_word(io,dtext,len); 1120 return 0; 1121 } 1122 1123 1124 static nodecode_word(io,ch) 1125 INOUT *io; 1126 { 1127 if( io->MIME_ENCODED ){ 1128 /* if the next noencoded-word ends before DISPCOLS ...*/ 1129 if( io->out_column < MAXCOL ) 1130 DE_FPUTC(XC_DE_UNFOLD,io); 1131 else{ 1132 /* the following is experimental */ 1133 if( LWSP_CHAR(ch) ){ 1134 DE_FPUTC(XC_DE_OFLUSH,io); 1135 if( MAXCOL <= io->out_column ){ 1136 DE_FPUTC(NL,io); 1137 1138 /* this ch shuld be put if the next character is not LWSP_CHAR... (?) */ 1139 DE_FPUTC(ch,io); 1140 return(0); 1141 } 1142 } 1143 } 1144 } 1145 DE_FPUTC(ch,io); 1146 } 1147 1148 static DE_FGETC(io) 1149 INOUT *io; 1150 { FILE *in; 1151 int ch; 1152 1153 in = io->in_file; 1154 ch = NLfgetc(in); 1155 if( ch == FORMFEED ){ 1156 ch = NLfgetc(in); 1157 if( ch == NL ) 1158 ch = XC_DE_FORMFEED; 1159 else{ 1160 if( ch != EOF ) 1161 ungetc(ch,in); 1162 ch = FORMFEED; 1163 } 1164 io->MIME_ENCODED = 0; 1165 }else 1166 if( ch == NL ){ 1167 ch = NLfgetc(in); 1168 if( !LWSP_CHAR(ch) )/* at the top of a filed */ 1169 io->MIME_ENCODED = 0; 1170 1171 if( ch == NL ) 1172 ch = NLNL; 1173 else{ 1174 if( ch != EOF ) 1175 ungetc(ch,in); 1176 ch = NL; 1177 } 1178 } 1179 return ch; 1180 } 1181 /* 2.0 add charset parameter */ 1182 /* type bodytoo to int */ 1183 MIME_headerDecode(in,out,bodytoo, charset) 1184 FILE *in,*out; 1185 int bodytoo; 1186 char **charset; 1187 { int ch,next_ch; 1188 INOUT iob,*io = &iob; 1189 1190 INOUT_init(io,in,out); 1191 io->UNFOLD_LINE = 1; 1192 1193 *charset = (char *)NULL; 1194 1195 for(;;){ 1196 ch = DE_FGETC(io); 1197 if( ch == EOF ) 1198 break; 1199 1200 if( ch == ENCODE_BEGIN[0] ){ 1201 ch = NLfgetc(in); 1202 if( ch == EOF ) 1203 break; 1204 if( ch == ENCODE_BEGIN[1] ){ 1205 if( decode_word(io, charset) == EOF ) 1206 break; 1207 }else{ 1208 DE_FPUTC(ENCODE_BEGIN[0],io); 1209 ungetc(ch,in); 1210 } 1211 }else{ 1212 if( ch == NLNL ){ 1213 io->UNFOLD_LINE = 0; 1214 DE_FPUTC(NL,io); 1215 DE_FPUTC(NL,io); 1216 break; 1217 } 1218 nodecode_word(io,ch); 1219 } 1220 } 1221 io->UNFOLD_LINE = 0; 1222 if( ch != EOF && bodytoo ) 1223 while( (ch = NLfgetc(in)) != EOF ) 1224 DE_FPUTC(ch,io); 1225 DE_FPUTC(EOF,io); 1226 } 1227 1228 /* 2.0 add charset parameter */ 1229 /* type osize to int */ 1230 MIME_strHeaderDecode(ins,outs,osize,charset) 1231 char *ins,*outs; 1232 int osize; 1233 char **charset; 1234 { FILE *In,*Out; 1235 int oi; 1236 1237 In = str_fopen(ins,strlen(ins)); 1238 Out = str_fopen(outs,osize); 1239 MIME_headerDecode(In,Out,1,charset); 1240 fflush(Out); 1241 for(oi = 0; outs[oi]; oi++) 1242 if((outs[oi] & 0xFF) == 0xFF) 1243 strcpy(&outs[oi],&outs[oi+1]); 1244 str_fclose(In); 1245 str_fclose(Out); 1246 } 1247 MIME_strHeaderEncode(ins,outs,osize) 1248 char *ins,*outs; 1249 { FILE *In,*Out; 1250 1251 In = str_fopen(ins,strlen(ins)); 1252 Out = str_fopen(outs,osize); 1253 MIME_headerEncode(In,Out); 1254 fflush(Out); 1255 str_fclose(In); 1256 str_fclose(Out); 1257 } 1258 1259 is_MIME_header(fp) 1260 FILE *fp; 1261 { MsgLine line; 1262 int off; 1263 1264 off = ftell(fp); 1265 while( fgets(line,sizeof(line),fp) != NULL ){ 1266 if( *line == NL ) 1267 break; 1268 if( *line == CR && line[1] == NL ) 1269 break; 1270 1271 if( strstr(line,ENCODE_BEGIN) ){ 1272 fseek(fp,off,0); 1273 return 1; 1274 } 1275 } 1276 fseek(fp,off,0); 1277 return 0; 1278 } 1279 1280 FILE * 1281 MIME_tmpHeaderEncode(fp,savFILE) 1282 FILE *fp,savFILE; 1283 { FILE *tin,*tfp; 1284 MsgLine line; 1285 int ch; 1286 1287 if( fp == NULL ) 1288 return(0); 1289 tin = tmpfile(); 1290 while( fgets(line,sizeof(line),fp) != NULL ){ 1291 fputs(line,tin); 1292 if(strcmp(line,".\n")==0 || strcmp(line,".\r\n")==0) 1293 break; 1294 } 1295 fflush(tin); 1296 fseek(tin,0,0); 1297 1298 tfp = tmpfile(); 1299 ch = MIME_headerEncode0(tin,tfp); 1300 if( ch == NLNL ){ 1301 fputs("\r\n",tfp); 1302 while( fgets(line,sizeof(line),tin) != NULL ) 1303 fputs(line,tfp); 1304 } 1305 fputs(".\r\n",tfp); 1306 fflush(tfp); 1307 fseek(tfp,0,0); 1308 1309 fclose(tin); 1310 return tfp; 1311 } 1312 1313 /*//////////////////////////////////////////////////////////////////////*/ 1314 MIME_localStrColumns(str) 1315 char *str; 1316 { INOUT iob,*io = &iob; 1317 FILE *sfp; 1318 int len; 1319 CHAR *CH; 1320 1321 sfp = str_fopen(str,strlen(str)); 1322 INOUT_init(io,sfp,NULL); 1323 1324 len = 0; 1325 1326 for(;;){ 1327 CH = EN_FGETC(io); 1328 if( CH->c_ch == EOF ) 1329 break; 1330 len++; 1331 } 1332 1333 str_fclose(sfp); 1334 return len; 1335 } 1336