1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.2 (Berkeley) 07/23/94"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundary -- the message boundary -- NULL if we are 62 ** processing the outer portion. 63 ** 64 ** Returns: 65 ** An indicator of what terminated the message part: 66 ** MBT_FINAL -- the final boundary 67 ** MBT_INTERMED -- an intermediate boundary 68 ** MBT_NOTSEP -- an end of file 69 */ 70 71 int 72 mime8to7(mci, header, e, boundary) 73 register MCI *mci; 74 HDR *header; 75 register ENVELOPE *e; 76 char *boundary; 77 { 78 register char *p; 79 int linelen; 80 int bt; 81 off_t offset; 82 size_t sectionsize, sectionhighbits; 83 char bbuf[128]; 84 char buf[MAXLINE]; 85 extern char *hvalue(); 86 87 if (tTd(43, 1)) 88 { 89 printf("mime8to7: boundary=%s\n", 90 boundary == NULL ? "<none>" : boundary); 91 } 92 p = hvalue("Content-Type", header); 93 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 94 { 95 register char *q; 96 97 /* oh dear -- this part is hard */ 98 p = strstr(p, "boundary="); /*XXX*/ 99 if (p == NULL) 100 { 101 syserr("mime8to7: Content-Type: %s missing boundary", p); 102 p = "---"; 103 } 104 else 105 p += 9; 106 if (*p == '"') 107 q = strchr(p, '"'); 108 else 109 q = strchr(p, ','); 110 if (q == NULL) 111 q = p + strlen(p); 112 if (q - p > sizeof bbuf - 1) 113 { 114 syserr("mime8to7: multipart boundary \"%.*s\" too long", 115 q - p, p); 116 q = p + sizeof bbuf - 1; 117 } 118 strncpy(bbuf, p, q - p); 119 bbuf[q - p] = '\0'; 120 if (tTd(43, 1)) 121 { 122 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 123 } 124 125 /* skip the early "comment" prologue */ 126 bt = MBT_FINAL; 127 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 128 { 129 bt = mimeboundary(buf, bbuf); 130 if (bt != MBT_NOTSEP) 131 break; 132 putline(buf, mci); 133 } 134 while (bt != MBT_FINAL) 135 { 136 auto HDR *hdr = NULL; 137 138 sprintf(buf, "--%s", bbuf); 139 putline(buf, mci); 140 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 141 putheader(mci, hdr, e); 142 bt = mime8to7(mci, hdr, e, bbuf); 143 } 144 sprintf(buf, "--%s--", bbuf); 145 putline(buf, mci); 146 147 /* skip the late "comment" epilogue */ 148 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 149 { 150 putline(buf, mci); 151 bt = mimeboundary(buf, boundary); 152 if (bt != MBT_NOTSEP) 153 break; 154 } 155 return bt; 156 } 157 158 /* 159 ** Non-compound body type 160 ** 161 ** Compute the ratio of seven to eight bit characters; 162 ** use that as a heuristic to decide how to do the 163 ** encoding. 164 */ 165 166 /* remember where we were */ 167 offset = ftell(e->e_dfp); 168 if (offset == -1) 169 syserr("mime8to7: cannot ftell on %s", e->e_df); 170 171 /* do a scan of this body type to count character types */ 172 sectionsize = sectionhighbits = 0; 173 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 174 { 175 bt = mimeboundary(buf, boundary); 176 if (bt != MBT_NOTSEP) 177 break; 178 for (p = buf; *p != '\0'; p++) 179 { 180 /* count bytes with the high bit set */ 181 /* XXX should this count any character that will */ 182 /* XXX have to be encoded in quoted-printable? */ 183 sectionsize++; 184 if (bitset(0200, *p)) 185 sectionhighbits++; 186 } 187 188 /* 189 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 190 ** assume base64. This heuristic avoids double-reading 191 ** large graphics or video files. 192 */ 193 194 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 195 break; 196 } 197 if (feof(e->e_dfp)) 198 bt = MBT_FINAL; 199 200 /* return to the original offset for processing */ 201 /* XXX use relative seeks to handle >31 bit file sizes? */ 202 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 203 syserr("mime8to7: cannot fseek on %s", e->e_df); 204 205 /* 206 ** Heuristically determine encoding method. 207 ** If more than 1/8 of the total characters have the 208 ** eighth bit set, use base64; else use quoted-printable. 209 */ 210 211 if (tTd(43, 8)) 212 { 213 printf("mime8to7: %ld high bits in %ld bytes\n", 214 sectionhighbits, sectionsize); 215 } 216 if (sectionsize / 8 < sectionhighbits) 217 { 218 /* use base64 encoding */ 219 int c1, c2; 220 221 putline("Content-Transfer-Encoding: base64", mci); 222 putline("", mci); 223 mci->mci_flags &= ~MCIF_INHEADER; 224 linelen = 0; 225 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 226 { 227 if (linelen > 71) 228 { 229 fputs(mci->mci_mailer->m_eol, mci->mci_out); 230 linelen = 0; 231 } 232 linelen += 4; 233 fputc(Base64Code[c1 >> 2], mci->mci_out); 234 c1 = (c1 & 0x03) << 4; 235 c2 = mime_getchar(e->e_dfp, boundary); 236 if (c2 == EOF) 237 { 238 fputc(Base64Code[c1], mci->mci_out); 239 fputc('=', mci->mci_out); 240 fputc('=', mci->mci_out); 241 break; 242 } 243 c1 |= (c2 >> 4) & 0x0f; 244 fputc(Base64Code[c1], mci->mci_out); 245 c1 = (c2 & 0x0f) << 2; 246 c2 = mime_getchar(e->e_dfp, boundary); 247 if (c2 == EOF) 248 { 249 fputc(Base64Code[c1], mci->mci_out); 250 fputc('=', mci->mci_out); 251 break; 252 } 253 c1 |= (c2 >> 6) & 0x03; 254 fputc(Base64Code[c1], mci->mci_out); 255 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 256 } 257 } 258 else 259 { 260 /* use quoted-printable encoding */ 261 int c1, c2; 262 263 putline("Content-Transfer-Encoding: quoted-printable", mci); 264 putline("", mci); 265 mci->mci_flags &= ~MCIF_INHEADER; 266 linelen = 0; 267 c2 = EOF; 268 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 269 { 270 if (c1 == '\n') 271 { 272 if (c2 == ' ' || c2 == '\t') 273 { 274 fputc('=', mci->mci_out); 275 fputs(mci->mci_mailer->m_eol, mci->mci_out); 276 } 277 fputs(mci->mci_mailer->m_eol, mci->mci_out); 278 linelen = 0; 279 c2 = c1; 280 continue; 281 } 282 if (linelen > 72) 283 { 284 fputc('=', mci->mci_out); 285 fputs(mci->mci_mailer->m_eol, mci->mci_out); 286 linelen = 0; 287 } 288 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 289 { 290 fputc('=', mci->mci_out); 291 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 292 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 293 linelen += 3; 294 } 295 else 296 { 297 fputc(c1, mci->mci_out); 298 linelen++; 299 } 300 c2 = c1; 301 } 302 } 303 if (linelen > 0) 304 fputs(mci->mci_mailer->m_eol, mci->mci_out); 305 return MimeBoundaryType; 306 } 307 308 309 int 310 mime_getchar(fp, boundary) 311 register FILE *fp; 312 char *boundary; 313 { 314 int c; 315 static char *bp = NULL; 316 static int buflen = 0; 317 static bool atbol = TRUE; /* at beginning of line */ 318 static char buf[128]; /* need not be a full line */ 319 320 if (buflen > 0) 321 { 322 buflen--; 323 return *bp++; 324 } 325 c = fgetc(fp); 326 if (atbol && c == '-' && boundary != NULL) 327 { 328 /* check for a message boundary */ 329 bp = buf; 330 c = fgetc(fp); 331 if (c != '-') 332 { 333 if (c != EOF) 334 { 335 *bp = c; 336 buflen++; 337 } 338 return '-'; 339 } 340 341 /* got "--", now check for rest of separator */ 342 *bp++ = '-'; 343 *bp++ = '-'; 344 while (bp < &buf[sizeof buf - 1] && 345 (c = fgetc(fp)) != EOF && c != '\n') 346 { 347 *bp++ = c; 348 } 349 *bp = '\0'; 350 MimeBoundaryType = mimeboundary(buf, boundary); 351 switch (MimeBoundaryType) 352 { 353 case MBT_FINAL: 354 case MBT_INTERMED: 355 /* we have a message boundary */ 356 buflen = 0; 357 return EOF; 358 } 359 360 atbol = c == '\n'; 361 if (c != EOF) 362 *bp++ = c; 363 buflen = bp - buf - 1; 364 bp = buf; 365 return *bp++; 366 } 367 else if (atbol && c == '.') 368 { 369 /* implement hidden dot algorithm */ 370 bp = buf; 371 *bp = c; 372 buflen = 1; 373 c = fgetc(fp); 374 if (c != '\n') 375 return '.'; 376 atbol = TRUE; 377 buf[0] = '.'; 378 buf[1] = '\n'; 379 buflen = 2; 380 return '.'; 381 } 382 383 atbol = c == '\n'; 384 return c; 385 } 386 /* 387 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 388 ** 389 ** Parameters: 390 ** line -- the input line. 391 ** boundary -- the expected boundary. 392 ** 393 ** Returns: 394 ** MBT_NOTSEP -- if this is not a separator line 395 ** MBT_INTERMED -- if this is an intermediate separator 396 ** MBT_FINAL -- if this is a final boundary 397 ** MBT_SYNTAX -- if this is a boundary for the wrong 398 ** enclosure -- i.e., a syntax error. 399 */ 400 401 int 402 mimeboundary(line, boundary) 403 register char *line; 404 char *boundary; 405 { 406 int type; 407 int i; 408 409 if (line[0] != '-' || line[1] != '-' || boundary == NULL) 410 return MBT_NOTSEP; 411 if (tTd(43, 5)) 412 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 413 boundary, line); 414 i = strlen(line); 415 if (line[i - 1] == '\n') 416 i--; 417 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 418 { 419 type = MBT_FINAL; 420 i -= 2; 421 } 422 else 423 type = MBT_INTERMED; 424 425 /* XXX should check for improper nesting here */ 426 if (strncmp(boundary, &line[2], i - 2) != 0 || 427 strlen(boundary) != i - 2) 428 type = MBT_NOTSEP; 429 if (tTd(43, 5)) 430 printf("%d\n", type); 431 return type; 432 } 433