1 /* $NetBSD: encoding.c,v 1.4 2015/01/02 21:15:32 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * Encoding -- determine the character encoding of a text file. 32 * 33 * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit 34 * international characters. 35 */ 36 37 #include "file.h" 38 39 #ifndef lint 40 #if 0 41 FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $") 42 #else 43 __RCSID("$NetBSD: encoding.c,v 1.4 2015/01/02 21:15:32 christos Exp $"); 44 #endif 45 #endif /* lint */ 46 47 #include "magic.h" 48 #include <string.h> 49 #include <memory.h> 50 #include <stdlib.h> 51 52 53 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); 54 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, 55 size_t *); 56 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); 57 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); 58 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); 59 private void from_ebcdic(const unsigned char *, size_t, unsigned char *); 60 61 #ifdef DEBUG_ENCODING 62 #define DPRINTF(a) printf a 63 #else 64 #define DPRINTF(a) 65 #endif 66 67 /* 68 * Try to determine whether text is in some character code we can 69 * identify. Each of these tests, if it succeeds, will leave 70 * the text converted into one-unichar-per-character Unicode in 71 * ubuf, and the number of characters converted in ulen. 72 */ 73 protected int 74 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type) 75 { 76 size_t mlen; 77 int rv = 1, ucs_type; 78 unsigned char *nbuf = NULL; 79 80 *type = "text"; 81 *ulen = 0; 82 *code = "unknown"; 83 *code_mime = "binary"; 84 85 mlen = (nbytes + 1) * sizeof((*ubuf)[0]); 86 if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) { 87 file_oomem(ms, mlen); 88 goto done; 89 } 90 mlen = (nbytes + 1) * sizeof(nbuf[0]); 91 if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) { 92 file_oomem(ms, mlen); 93 goto done; 94 } 95 96 if (looks_ascii(buf, nbytes, *ubuf, ulen)) { 97 DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); 98 *code = "ASCII"; 99 *code_mime = "us-ascii"; 100 } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { 101 DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); 102 *code = "UTF-8 Unicode (with BOM)"; 103 *code_mime = "utf-8"; 104 } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { 105 DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen)); 106 *code = "UTF-8 Unicode"; 107 *code_mime = "utf-8"; 108 } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { 109 if (ucs_type == 1) { 110 *code = "Little-endian UTF-16 Unicode"; 111 *code_mime = "utf-16le"; 112 } else { 113 *code = "Big-endian UTF-16 Unicode"; 114 *code_mime = "utf-16be"; 115 } 116 DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen)); 117 } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) { 118 DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen)); 119 *code = "ISO-8859"; 120 *code_mime = "iso-8859-1"; 121 } else if (looks_extended(buf, nbytes, *ubuf, ulen)) { 122 DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen)); 123 *code = "Non-ISO extended-ASCII"; 124 *code_mime = "unknown-8bit"; 125 } else { 126 from_ebcdic(buf, nbytes, nbuf); 127 128 if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) { 129 DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen)); 130 *code = "EBCDIC"; 131 *code_mime = "ebcdic"; 132 } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) { 133 DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n", 134 *ulen)); 135 *code = "International EBCDIC"; 136 *code_mime = "ebcdic"; 137 } else { /* Doesn't look like text at all */ 138 DPRINTF(("binary\n")); 139 rv = 0; 140 *type = "binary"; 141 } 142 } 143 144 done: 145 free(nbuf); 146 147 return rv; 148 } 149 150 /* 151 * This table reflects a particular philosophy about what constitutes 152 * "text," and there is room for disagreement about it. 153 * 154 * Version 3.31 of the file command considered a file to be ASCII if 155 * each of its characters was approved by either the isascii() or 156 * isalpha() function. On most systems, this would mean that any 157 * file consisting only of characters in the range 0x00 ... 0x7F 158 * would be called ASCII text, but many systems might reasonably 159 * consider some characters outside this range to be alphabetic, 160 * so the file command would call such characters ASCII. It might 161 * have been more accurate to call this "considered textual on the 162 * local system" than "ASCII." 163 * 164 * It considered a file to be "International language text" if each 165 * of its characters was either an ASCII printing character (according 166 * to the real ASCII standard, not the above test), a character in 167 * the range 0x80 ... 0xFF, or one of the following control characters: 168 * backspace, tab, line feed, vertical tab, form feed, carriage return, 169 * escape. No attempt was made to determine the language in which files 170 * of this type were written. 171 * 172 * 173 * The table below considers a file to be ASCII if all of its characters 174 * are either ASCII printing characters (again, according to the X3.4 175 * standard, not isascii()) or any of the following controls: bell, 176 * backspace, tab, line feed, form feed, carriage return, esc, nextline. 177 * 178 * I include bell because some programs (particularly shell scripts) 179 * use it literally, even though it is rare in normal text. I exclude 180 * vertical tab because it never seems to be used in real text. I also 181 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), 182 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline 183 * character to. It might be more appropriate to include it in the 8859 184 * set instead of the ASCII set, but it's got to be included in *something* 185 * we recognize or EBCDIC files aren't going to be considered textual. 186 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek 187 * and Latin characters, so these should possibly be allowed. But they 188 * make a real mess on VT100-style displays if they're not paired properly, 189 * so we are probably better off not calling them text. 190 * 191 * A file is considered to be ISO-8859 text if its characters are all 192 * either ASCII, according to the above definition, or printing characters 193 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. 194 * 195 * Finally, a file is considered to be international text from some other 196 * character code if its characters are all either ISO-8859 (according to 197 * the above definition) or characters in the range 0x80 ... 0x9F, which 198 * ISO-8859 considers to be control characters but the IBM PC and Macintosh 199 * consider to be printing characters. 200 */ 201 202 #define F 0 /* character never appears in text */ 203 #define T 1 /* character appears in plain ASCII text */ 204 #define I 2 /* character appears in ISO-8859 text */ 205 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ 206 207 private char text_chars[256] = { 208 /* BEL BS HT LF FF CR */ 209 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ 210 /* ESC */ 211 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ 212 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ 213 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ 214 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ 215 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ 216 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ 217 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ 218 /* NEL */ 219 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ 220 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ 221 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ 222 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ 223 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ 224 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ 225 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ 226 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ 227 }; 228 229 private int 230 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, 231 size_t *ulen) 232 { 233 size_t i; 234 235 *ulen = 0; 236 237 for (i = 0; i < nbytes; i++) { 238 int t = text_chars[buf[i]]; 239 240 if (t != T) 241 return 0; 242 243 ubuf[(*ulen)++] = buf[i]; 244 } 245 246 return 1; 247 } 248 249 private int 250 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 251 { 252 size_t i; 253 254 *ulen = 0; 255 256 for (i = 0; i < nbytes; i++) { 257 int t = text_chars[buf[i]]; 258 259 if (t != T && t != I) 260 return 0; 261 262 ubuf[(*ulen)++] = buf[i]; 263 } 264 265 return 1; 266 } 267 268 private int 269 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, 270 size_t *ulen) 271 { 272 size_t i; 273 274 *ulen = 0; 275 276 for (i = 0; i < nbytes; i++) { 277 int t = text_chars[buf[i]]; 278 279 if (t != T && t != I && t != X) 280 return 0; 281 282 ubuf[(*ulen)++] = buf[i]; 283 } 284 285 return 1; 286 } 287 288 /* 289 * Decide whether some text looks like UTF-8. Returns: 290 * 291 * -1: invalid UTF-8 292 * 0: uses odd control characters, so doesn't look like text 293 * 1: 7-bit text 294 * 2: definitely UTF-8 text (valid high-bit set bytes) 295 * 296 * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; 297 * ubuf must be big enough! 298 */ 299 protected int 300 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 301 { 302 size_t i; 303 int n; 304 unichar c; 305 int gotone = 0, ctrl = 0; 306 307 if (ubuf) 308 *ulen = 0; 309 310 for (i = 0; i < nbytes; i++) { 311 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ 312 /* 313 * Even if the whole file is valid UTF-8 sequences, 314 * still reject it if it uses weird control characters. 315 */ 316 317 if (text_chars[buf[i]] != T) 318 ctrl = 1; 319 320 if (ubuf) 321 ubuf[(*ulen)++] = buf[i]; 322 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ 323 return -1; 324 } else { /* 11xxxxxx begins UTF-8 */ 325 int following; 326 327 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ 328 c = buf[i] & 0x1f; 329 following = 1; 330 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ 331 c = buf[i] & 0x0f; 332 following = 2; 333 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ 334 c = buf[i] & 0x07; 335 following = 3; 336 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ 337 c = buf[i] & 0x03; 338 following = 4; 339 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ 340 c = buf[i] & 0x01; 341 following = 5; 342 } else 343 return -1; 344 345 for (n = 0; n < following; n++) { 346 i++; 347 if (i >= nbytes) 348 goto done; 349 350 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) 351 return -1; 352 353 c = (c << 6) + (buf[i] & 0x3f); 354 } 355 356 if (ubuf) 357 ubuf[(*ulen)++] = c; 358 gotone = 1; 359 } 360 } 361 done: 362 return ctrl ? 0 : (gotone ? 2 : 1); 363 } 364 365 /* 366 * Decide whether some text looks like UTF-8 with BOM. If there is no 367 * BOM, return -1; otherwise return the result of looks_utf8 on the 368 * rest of the text. 369 */ 370 private int 371 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, 372 size_t *ulen) 373 { 374 if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) 375 return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); 376 else 377 return -1; 378 } 379 380 private int 381 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf, 382 size_t *ulen) 383 { 384 int bigend; 385 size_t i; 386 387 if (nbytes < 2) 388 return 0; 389 390 if (buf[0] == 0xff && buf[1] == 0xfe) 391 bigend = 0; 392 else if (buf[0] == 0xfe && buf[1] == 0xff) 393 bigend = 1; 394 else 395 return 0; 396 397 *ulen = 0; 398 399 for (i = 2; i + 1 < nbytes; i += 2) { 400 /* XXX fix to properly handle chars > 65536 */ 401 402 if (bigend) 403 ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; 404 else 405 ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; 406 407 if (ubuf[*ulen - 1] == 0xfffe) 408 return 0; 409 if (ubuf[*ulen - 1] < 128 && 410 text_chars[(size_t)ubuf[*ulen - 1]] != T) 411 return 0; 412 } 413 414 return 1 + bigend; 415 } 416 417 #undef F 418 #undef T 419 #undef I 420 #undef X 421 422 /* 423 * This table maps each EBCDIC character to an (8-bit extended) ASCII 424 * character, as specified in the rationale for the dd(1) command in 425 * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. 426 * 427 * Unfortunately it does not seem to correspond exactly to any of the 428 * five variants of EBCDIC documented in IBM's _Enterprise Systems 429 * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh 430 * Edition, July, 1999, pp. I-1 - I-4. 431 * 432 * Fortunately, though, all versions of EBCDIC, including this one, agree 433 * on most of the printing characters that also appear in (7-bit) ASCII. 434 * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. 435 * 436 * Fortunately too, there is general agreement that codes 0x00 through 437 * 0x3F represent control characters, 0x41 a nonbreaking space, and the 438 * remainder printing characters. 439 * 440 * This is sufficient to allow us to identify EBCDIC text and to distinguish 441 * between old-style and internationalized examples of text. 442 */ 443 444 private unsigned char ebcdic_to_ascii[] = { 445 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, 446 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, 447 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, 448 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, 449 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', 450 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', 451 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', 452 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', 453 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, 454 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, 455 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, 456 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, 457 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, 458 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, 459 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, 460 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 461 }; 462 463 #ifdef notdef 464 /* 465 * The following EBCDIC-to-ASCII table may relate more closely to reality, 466 * or at least to modern reality. It comes from 467 * 468 * http://ftp.s390.ibm.com/products/oe/bpxqp9.html 469 * 470 * and maps the characters of EBCDIC code page 1047 (the code used for 471 * Unix-derived software on IBM's 390 systems) to the corresponding 472 * characters from ISO 8859-1. 473 * 474 * If this table is used instead of the above one, some of the special 475 * cases for the NEL character can be taken out of the code. 476 */ 477 478 private unsigned char ebcdic_1047_to_8859[] = { 479 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, 480 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, 481 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, 482 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, 483 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, 484 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, 485 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, 486 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, 487 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, 488 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, 489 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, 490 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, 491 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, 492 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, 493 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, 494 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F 495 }; 496 #endif 497 498 /* 499 * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. 500 */ 501 private void 502 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) 503 { 504 size_t i; 505 506 for (i = 0; i < nbytes; i++) { 507 out[i] = ebcdic_to_ascii[buf[i]]; 508 } 509 } 510