1 /* 2 * Copyright (c) 1994 Eric P. Allman 3 * Copyright (c) 1994 4 * The Regents of the University of California. All rights reserved. 5 * 6 * %sccs.include.redist.c% 7 */ 8 9 # include "sendmail.h" 10 # include <string.h> 11 12 #ifndef lint 13 static char sccsid[] = "@(#)mime.c 8.4 (Berkeley) 08/15/94"; 14 #endif /* not lint */ 15 16 /* 17 ** MIME support. 18 ** 19 ** I am indebted to John Beck of Hewlett-Packard, who contributed 20 ** his code to me for inclusion. As it turns out, I did not use 21 ** his code since he used a "minimum change" approach that used 22 ** several temp files, and I wanted a "minimum impact" approach 23 ** that would avoid copying. However, looking over his code 24 ** helped me cement my understanding of the problem. 25 ** 26 ** I also looked at, but did not directly use, Nathaniel 27 ** Borenstein's "code.c" module. Again, it functioned as 28 ** a file-to-file translator, which did not fit within my 29 ** design bounds, but it was a useful base for understanding 30 ** the problem. 31 */ 32 33 34 /* character set for hex and base64 encoding */ 35 char Base16Code[] = "0123456789ABCDEF"; 36 char Base64Code[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 37 38 /* types of MIME boundaries */ 39 #define MBT_SYNTAX 0 /* syntax error */ 40 #define MBT_NOTSEP 1 /* not a boundary */ 41 #define MBT_INTERMED 2 /* intermediate boundary (no trailing --) */ 42 #define MBT_FINAL 3 /* final boundary (trailing -- included) */ 43 44 static int MimeBoundaryType; /* internal linkage */ 45 /* 46 ** MIME8TO7 -- output 8 bit body in 7 bit format 47 ** 48 ** The header has already been output -- this has to do the 49 ** 8 to 7 bit conversion. It would be easy if we didn't have 50 ** to deal with nested formats (multipart/xxx and message/rfc822). 51 ** 52 ** We won't be called if we don't have to do a conversion, and 53 ** appropriate MIME-Version: and Content-Type: fields have been 54 ** output. Any Content-Transfer-Encoding: field has not been 55 ** output, and we can add it here. 56 ** 57 ** Parameters: 58 ** mci -- mailer connection information. 59 ** header -- the header for this body part. 60 ** e -- envelope. 61 ** boundary -- the message boundary -- NULL if we are 62 ** processing the outer portion. 63 ** 64 ** Returns: 65 ** An indicator of what terminated the message part: 66 ** MBT_FINAL -- the final boundary 67 ** MBT_INTERMED -- an intermediate boundary 68 ** MBT_NOTSEP -- an end of file 69 */ 70 71 int 72 mime8to7(mci, header, e, boundary) 73 register MCI *mci; 74 HDR *header; 75 register ENVELOPE *e; 76 char *boundary; 77 { 78 register char *p; 79 int linelen; 80 int bt; 81 off_t offset; 82 size_t sectionsize, sectionhighbits; 83 char bbuf[128]; 84 char buf[MAXLINE]; 85 86 if (tTd(43, 1)) 87 { 88 printf("mime8to7: boundary=%s\n", 89 boundary == NULL ? "<none>" : boundary); 90 } 91 p = hvalue("Content-Type", header); 92 if (p != NULL && strncasecmp(p, "multipart/", 10) == 0) 93 { 94 register char *q; 95 96 /* oh dear -- this part is hard */ 97 p = strstr(p, "boundary="); /*XXX*/ 98 if (p == NULL) 99 { 100 syserr("mime8to7: Content-Type: %s missing boundary", p); 101 p = "---"; 102 } 103 else 104 p += 9; 105 if (*p == '"') 106 q = strchr(p, '"'); 107 else 108 q = strchr(p, ','); 109 if (q == NULL) 110 q = p + strlen(p); 111 if (q - p > sizeof bbuf - 1) 112 { 113 syserr("mime8to7: multipart boundary \"%.*s\" too long", 114 q - p, p); 115 q = p + sizeof bbuf - 1; 116 } 117 strncpy(bbuf, p, q - p); 118 bbuf[q - p] = '\0'; 119 if (tTd(43, 1)) 120 { 121 printf("mime8to7: multipart boundary \"%s\"\n", bbuf); 122 } 123 124 /* skip the early "comment" prologue */ 125 bt = MBT_FINAL; 126 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 127 { 128 bt = mimeboundary(buf, bbuf); 129 if (bt != MBT_NOTSEP) 130 break; 131 putline(buf, mci); 132 } 133 while (bt != MBT_FINAL) 134 { 135 auto HDR *hdr = NULL; 136 137 sprintf(buf, "--%s", bbuf); 138 putline(buf, mci); 139 collect(e->e_dfp, FALSE, FALSE, &hdr, e); 140 putheader(mci, hdr, e); 141 bt = mime8to7(mci, hdr, e, bbuf); 142 } 143 sprintf(buf, "--%s--", bbuf); 144 putline(buf, mci); 145 146 /* skip the late "comment" epilogue */ 147 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 148 { 149 putline(buf, mci); 150 bt = mimeboundary(buf, boundary); 151 if (bt != MBT_NOTSEP) 152 break; 153 } 154 return bt; 155 } 156 157 /* 158 ** Non-compound body type 159 ** 160 ** Compute the ratio of seven to eight bit characters; 161 ** use that as a heuristic to decide how to do the 162 ** encoding. 163 */ 164 165 /* remember where we were */ 166 offset = ftell(e->e_dfp); 167 if (offset == -1) 168 syserr("mime8to7: cannot ftell on %s", e->e_df); 169 170 /* do a scan of this body type to count character types */ 171 sectionsize = sectionhighbits = 0; 172 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 173 { 174 bt = mimeboundary(buf, boundary); 175 if (bt != MBT_NOTSEP) 176 break; 177 for (p = buf; *p != '\0'; p++) 178 { 179 /* count bytes with the high bit set */ 180 sectionsize++; 181 if (bitset(0200, *p)) 182 sectionhighbits++; 183 } 184 185 /* 186 ** Heuristic: if 1/4 of the first 4K bytes are 8-bit, 187 ** assume base64. This heuristic avoids double-reading 188 ** large graphics or video files. 189 */ 190 191 if (sectionsize >= 4096 && sectionhighbits > sectionsize / 4) 192 break; 193 } 194 if (feof(e->e_dfp)) 195 bt = MBT_FINAL; 196 197 /* return to the original offset for processing */ 198 /* XXX use relative seeks to handle >31 bit file sizes? */ 199 if (fseek(e->e_dfp, offset, SEEK_SET) < 0) 200 syserr("mime8to7: cannot fseek on %s", e->e_df); 201 202 /* 203 ** Heuristically determine encoding method. 204 ** If more than 1/8 of the total characters have the 205 ** eighth bit set, use base64; else use quoted-printable. 206 */ 207 208 if (tTd(43, 8)) 209 { 210 printf("mime8to7: %ld high bits in %ld bytes\n", 211 sectionhighbits, sectionsize); 212 } 213 if (sectionhighbits == 0) 214 { 215 /* no encoding necessary */ 216 putline("", mci); 217 mci->mci_flags &= ~MCIF_INHEADER; 218 while (fgets(buf, sizeof buf, e->e_dfp) != NULL) 219 { 220 bt = mimeboundary(buf, boundary); 221 if (bt != MBT_NOTSEP) 222 break; 223 if (buf[0] == 'F' && 224 bitnset(M_ESCFROM, mci->mci_mailer->m_flags) && 225 strncmp(buf, "From ", 5) == 0) 226 (void) putc('>', mci->mci_out); 227 putline(buf, mci); 228 } 229 } 230 else if (sectionsize / 8 < sectionhighbits) 231 { 232 /* use base64 encoding */ 233 int c1, c2; 234 235 putline("Content-Transfer-Encoding: base64", mci); 236 putline("", mci); 237 mci->mci_flags &= ~MCIF_INHEADER; 238 linelen = 0; 239 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 240 { 241 if (linelen > 71) 242 { 243 fputs(mci->mci_mailer->m_eol, mci->mci_out); 244 linelen = 0; 245 } 246 linelen += 4; 247 fputc(Base64Code[c1 >> 2], mci->mci_out); 248 c1 = (c1 & 0x03) << 4; 249 c2 = mime_getchar(e->e_dfp, boundary); 250 if (c2 == EOF) 251 { 252 fputc(Base64Code[c1], mci->mci_out); 253 fputc('=', mci->mci_out); 254 fputc('=', mci->mci_out); 255 break; 256 } 257 c1 |= (c2 >> 4) & 0x0f; 258 fputc(Base64Code[c1], mci->mci_out); 259 c1 = (c2 & 0x0f) << 2; 260 c2 = mime_getchar(e->e_dfp, boundary); 261 if (c2 == EOF) 262 { 263 fputc(Base64Code[c1], mci->mci_out); 264 fputc('=', mci->mci_out); 265 break; 266 } 267 c1 |= (c2 >> 6) & 0x03; 268 fputc(Base64Code[c1], mci->mci_out); 269 fputc(Base64Code[c2 & 0x3f], mci->mci_out); 270 } 271 } 272 else 273 { 274 /* use quoted-printable encoding */ 275 int c1, c2; 276 277 putline("Content-Transfer-Encoding: quoted-printable", mci); 278 putline("", mci); 279 mci->mci_flags &= ~MCIF_INHEADER; 280 linelen = 0; 281 c2 = '\n'; 282 while ((c1 = mime_getchar(e->e_dfp, boundary)) != EOF) 283 { 284 if (c1 == '\n') 285 { 286 if (c2 == ' ' || c2 == '\t') 287 { 288 fputc('=', mci->mci_out); 289 fputs(mci->mci_mailer->m_eol, mci->mci_out); 290 } 291 fputs(mci->mci_mailer->m_eol, mci->mci_out); 292 linelen = 0; 293 c2 = c1; 294 continue; 295 } 296 else if (c2 == '\n' && c1 == '.' && 297 bitnset(M_XDOT, mci->mci_mailer->m_flags)) 298 { 299 fputc('.', mci->mci_out); 300 linelen++; 301 } 302 if (linelen > 72) 303 { 304 fputc('=', mci->mci_out); 305 fputs(mci->mci_mailer->m_eol, mci->mci_out); 306 linelen = 0; 307 c2 = '\n'; 308 } 309 if ((c1 < 0x20 && c1 != '\t') || c1 >= 0x7f || c1 == '=') 310 { 311 fputc('=', mci->mci_out); 312 fputc(Base16Code[(c1 >> 4) & 0x0f], mci->mci_out); 313 fputc(Base16Code[c1 & 0x0f], mci->mci_out); 314 linelen += 3; 315 } 316 else 317 { 318 fputc(c1, mci->mci_out); 319 linelen++; 320 } 321 c2 = c1; 322 } 323 } 324 if (linelen > 0) 325 fputs(mci->mci_mailer->m_eol, mci->mci_out); 326 return MimeBoundaryType; 327 } 328 329 330 int 331 mime_getchar(fp, boundary) 332 register FILE *fp; 333 char *boundary; 334 { 335 int c; 336 static char *bp = NULL; 337 static int buflen = 0; 338 static bool atbol = TRUE; /* at beginning of line */ 339 static char buf[128]; /* need not be a full line */ 340 341 if (buflen > 0) 342 { 343 buflen--; 344 return *bp++; 345 } 346 c = fgetc(fp); 347 if (atbol && c == '-' && boundary != NULL) 348 { 349 /* check for a message boundary */ 350 bp = buf; 351 c = fgetc(fp); 352 if (c != '-') 353 { 354 if (c != EOF) 355 { 356 *bp = c; 357 buflen++; 358 } 359 return '-'; 360 } 361 362 /* got "--", now check for rest of separator */ 363 *bp++ = '-'; 364 *bp++ = '-'; 365 while (bp < &buf[sizeof buf - 1] && 366 (c = fgetc(fp)) != EOF && c != '\n') 367 { 368 *bp++ = c; 369 } 370 *bp = '\0'; 371 MimeBoundaryType = mimeboundary(buf, boundary); 372 switch (MimeBoundaryType) 373 { 374 case MBT_FINAL: 375 case MBT_INTERMED: 376 /* we have a message boundary */ 377 buflen = 0; 378 return EOF; 379 } 380 381 atbol = c == '\n'; 382 if (c != EOF) 383 *bp++ = c; 384 buflen = bp - buf - 1; 385 bp = buf; 386 return *bp++; 387 } 388 389 atbol = c == '\n'; 390 return c; 391 } 392 /* 393 ** MIMEBOUNDARY -- determine if this line is a MIME boundary & its type 394 ** 395 ** Parameters: 396 ** line -- the input line. 397 ** boundary -- the expected boundary. 398 ** 399 ** Returns: 400 ** MBT_NOTSEP -- if this is not a separator line 401 ** MBT_INTERMED -- if this is an intermediate separator 402 ** MBT_FINAL -- if this is a final boundary 403 ** MBT_SYNTAX -- if this is a boundary for the wrong 404 ** enclosure -- i.e., a syntax error. 405 */ 406 407 int 408 mimeboundary(line, boundary) 409 register char *line; 410 char *boundary; 411 { 412 int type; 413 int i; 414 415 if (line[0] != '-' || line[1] != '-' || boundary == NULL) 416 return MBT_NOTSEP; 417 if (tTd(43, 5)) 418 printf("mimeboundary: bound=\"%s\", line=\"%s\"... ", 419 boundary, line); 420 i = strlen(line); 421 if (line[i - 1] == '\n') 422 i--; 423 if (i > 2 && strncmp(&line[i - 2], "--", 2) == 0) 424 { 425 type = MBT_FINAL; 426 i -= 2; 427 } 428 else 429 type = MBT_INTERMED; 430 431 /* XXX should check for improper nesting here */ 432 if (strncmp(boundary, &line[2], i - 2) != 0 || 433 strlen(boundary) != i - 2) 434 type = MBT_NOTSEP; 435 if (tTd(43, 5)) 436 printf("%d\n", type); 437 return type; 438 } 439