1 /* $NetBSD: encoding.c,v 1.2 2014/09/11 13:30:04 christos Exp $ */ 2 /* 3 * Copyright (c) Ian F. Darwin 1986-1995. 4 * Software written by Ian F. Darwin and others; 5 * maintained 1995-present by Christos Zoulas and others. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice immediately at the beginning of the file, without modification, 12 * this list of conditions, and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 21 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 /* 30 * Encoding -- determine the character encoding of a text file. 31 * 32 * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit 33 * international characters. 34 */ 35 36 #include "file.h" 37 38 #ifndef lint 39 #if 0 40 FILE_RCSID("@(#)$File: encoding.c,v 1.9 2013/11/19 20:45:50 christos Exp $") 41 #else 42 __RCSID("$NetBSD: encoding.c,v 1.2 2014/09/11 13:30:04 christos Exp $"); 43 #endif 44 #endif /* lint */ 45 46 #include "magic.h" 47 #include <string.h> 48 #include <memory.h> 49 #include <stdlib.h> 50 51 52 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); 53 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, 54 size_t *); 55 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); 56 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); 57 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); 58 private void from_ebcdic(const unsigned char *, size_t, unsigned char *); 59 60 #ifdef DEBUG_ENCODING 61 #define DPRINTF(a) printf a 62 #else 63 #define DPRINTF(a) 64 #endif 65 66 /* 67 * Try to determine whether text is in some character code we can 68 * identify. Each of these tests, if it succeeds, will leave 69 * the text converted into one-unichar-per-character Unicode in 70 * ubuf, and the number of characters converted in ulen. 71 */ 72 protected int 73 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type) 74 { 75 size_t mlen; 76 int rv = 1, ucs_type; 77 unsigned char *nbuf = NULL; 78 79 *type = "text"; 80 *ulen = 0; 81 *code = "unknown"; 82 *code_mime = "binary"; 83 84 mlen = (nbytes + 1) * sizeof((*ubuf)[0]); 85 if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) { 86 file_oomem(ms, mlen); 87 goto done; 88 } 89 mlen = (nbytes + 1) * sizeof(nbuf[0]); 90 if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) { 91 file_oomem(ms, mlen); 92 goto done; 93 } 94 95 if (looks_ascii(buf, nbytes, *ubuf, ulen)) { 96 DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); 97 *code = "ASCII"; 98 *code_mime = "us-ascii"; 99 } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { 100 DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); 101 *code = "UTF-8 Unicode (with BOM)"; 102 *code_mime = "utf-8"; 103 } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { 104 DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen)); 105 *code = "UTF-8 Unicode"; 106 *code_mime = "utf-8"; 107 } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { 108 if (ucs_type == 1) { 109 *code = "Little-endian UTF-16 Unicode"; 110 *code_mime = "utf-16le"; 111 } else { 112 *code = "Big-endian UTF-16 Unicode"; 113 *code_mime = "utf-16be"; 114 } 115 DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen)); 116 } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) { 117 DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen)); 118 *code = "ISO-8859"; 119 *code_mime = "iso-8859-1"; 120 } else if (looks_extended(buf, nbytes, *ubuf, ulen)) { 121 DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen)); 122 *code = "Non-ISO extended-ASCII"; 123 *code_mime = "unknown-8bit"; 124 } else { 125 from_ebcdic(buf, nbytes, nbuf); 126 127 if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) { 128 DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen)); 129 *code = "EBCDIC"; 130 *code_mime = "ebcdic"; 131 } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) { 132 DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n", 133 *ulen)); 134 *code = "International EBCDIC"; 135 *code_mime = "ebcdic"; 136 } else { /* Doesn't look like text at all */ 137 DPRINTF(("binary\n")); 138 rv = 0; 139 *type = "binary"; 140 } 141 } 142 143 done: 144 free(nbuf); 145 146 return rv; 147 } 148 149 /* 150 * This table reflects a particular philosophy about what constitutes 151 * "text," and there is room for disagreement about it. 152 * 153 * Version 3.31 of the file command considered a file to be ASCII if 154 * each of its characters was approved by either the isascii() or 155 * isalpha() function. On most systems, this would mean that any 156 * file consisting only of characters in the range 0x00 ... 0x7F 157 * would be called ASCII text, but many systems might reasonably 158 * consider some characters outside this range to be alphabetic, 159 * so the file command would call such characters ASCII. It might 160 * have been more accurate to call this "considered textual on the 161 * local system" than "ASCII." 162 * 163 * It considered a file to be "International language text" if each 164 * of its characters was either an ASCII printing character (according 165 * to the real ASCII standard, not the above test), a character in 166 * the range 0x80 ... 0xFF, or one of the following control characters: 167 * backspace, tab, line feed, vertical tab, form feed, carriage return, 168 * escape. No attempt was made to determine the language in which files 169 * of this type were written. 170 * 171 * 172 * The table below considers a file to be ASCII if all of its characters 173 * are either ASCII printing characters (again, according to the X3.4 174 * standard, not isascii()) or any of the following controls: bell, 175 * backspace, tab, line feed, form feed, carriage return, esc, nextline. 176 * 177 * I include bell because some programs (particularly shell scripts) 178 * use it literally, even though it is rare in normal text. I exclude 179 * vertical tab because it never seems to be used in real text. I also 180 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), 181 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline 182 * character to. It might be more appropriate to include it in the 8859 183 * set instead of the ASCII set, but it's got to be included in *something* 184 * we recognize or EBCDIC files aren't going to be considered textual. 185 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek 186 * and Latin characters, so these should possibly be allowed. But they 187 * make a real mess on VT100-style displays if they're not paired properly, 188 * so we are probably better off not calling them text. 189 * 190 * A file is considered to be ISO-8859 text if its characters are all 191 * either ASCII, according to the above definition, or printing characters 192 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. 193 * 194 * Finally, a file is considered to be international text from some other 195 * character code if its characters are all either ISO-8859 (according to 196 * the above definition) or characters in the range 0x80 ... 0x9F, which 197 * ISO-8859 considers to be control characters but the IBM PC and Macintosh 198 * consider to be printing characters. 199 */ 200 201 #define F 0 /* character never appears in text */ 202 #define T 1 /* character appears in plain ASCII text */ 203 #define I 2 /* character appears in ISO-8859 text */ 204 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ 205 206 private char text_chars[256] = { 207 /* BEL BS HT LF FF CR */ 208 F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F, /* 0x0X */ 209 /* ESC */ 210 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ 211 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ 212 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ 213 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ 214 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ 215 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ 216 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ 217 /* NEL */ 218 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ 219 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ 220 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ 221 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ 222 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ 223 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ 224 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ 225 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ 226 }; 227 228 private int 229 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, 230 size_t *ulen) 231 { 232 size_t i; 233 234 *ulen = 0; 235 236 for (i = 0; i < nbytes; i++) { 237 int t = text_chars[buf[i]]; 238 239 if (t != T) 240 return 0; 241 242 ubuf[(*ulen)++] = buf[i]; 243 } 244 245 return 1; 246 } 247 248 private int 249 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 250 { 251 size_t i; 252 253 *ulen = 0; 254 255 for (i = 0; i < nbytes; i++) { 256 int t = text_chars[buf[i]]; 257 258 if (t != T && t != I) 259 return 0; 260 261 ubuf[(*ulen)++] = buf[i]; 262 } 263 264 return 1; 265 } 266 267 private int 268 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, 269 size_t *ulen) 270 { 271 size_t i; 272 273 *ulen = 0; 274 275 for (i = 0; i < nbytes; i++) { 276 int t = text_chars[buf[i]]; 277 278 if (t != T && t != I && t != X) 279 return 0; 280 281 ubuf[(*ulen)++] = buf[i]; 282 } 283 284 return 1; 285 } 286 287 /* 288 * Decide whether some text looks like UTF-8. Returns: 289 * 290 * -1: invalid UTF-8 291 * 0: uses odd control characters, so doesn't look like text 292 * 1: 7-bit text 293 * 2: definitely UTF-8 text (valid high-bit set bytes) 294 * 295 * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; 296 * ubuf must be big enough! 297 */ 298 protected int 299 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 300 { 301 size_t i; 302 int n; 303 unichar c; 304 int gotone = 0, ctrl = 0; 305 306 if (ubuf) 307 *ulen = 0; 308 309 for (i = 0; i < nbytes; i++) { 310 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ 311 /* 312 * Even if the whole file is valid UTF-8 sequences, 313 * still reject it if it uses weird control characters. 314 */ 315 316 if (text_chars[buf[i]] != T) 317 ctrl = 1; 318 319 if (ubuf) 320 ubuf[(*ulen)++] = buf[i]; 321 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ 322 return -1; 323 } else { /* 11xxxxxx begins UTF-8 */ 324 int following; 325 326 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ 327 c = buf[i] & 0x1f; 328 following = 1; 329 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ 330 c = buf[i] & 0x0f; 331 following = 2; 332 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ 333 c = buf[i] & 0x07; 334 following = 3; 335 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ 336 c = buf[i] & 0x03; 337 following = 4; 338 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ 339 c = buf[i] & 0x01; 340 following = 5; 341 } else 342 return -1; 343 344 for (n = 0; n < following; n++) { 345 i++; 346 if (i >= nbytes) 347 goto done; 348 349 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) 350 return -1; 351 352 c = (c << 6) + (buf[i] & 0x3f); 353 } 354 355 if (ubuf) 356 ubuf[(*ulen)++] = c; 357 gotone = 1; 358 } 359 } 360 done: 361 return ctrl ? 0 : (gotone ? 2 : 1); 362 } 363 364 /* 365 * Decide whether some text looks like UTF-8 with BOM. If there is no 366 * BOM, return -1; otherwise return the result of looks_utf8 on the 367 * rest of the text. 368 */ 369 private int 370 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, 371 size_t *ulen) 372 { 373 if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) 374 return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); 375 else 376 return -1; 377 } 378 379 private int 380 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf, 381 size_t *ulen) 382 { 383 int bigend; 384 size_t i; 385 386 if (nbytes < 2) 387 return 0; 388 389 if (buf[0] == 0xff && buf[1] == 0xfe) 390 bigend = 0; 391 else if (buf[0] == 0xfe && buf[1] == 0xff) 392 bigend = 1; 393 else 394 return 0; 395 396 *ulen = 0; 397 398 for (i = 2; i + 1 < nbytes; i += 2) { 399 /* XXX fix to properly handle chars > 65536 */ 400 401 if (bigend) 402 ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; 403 else 404 ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; 405 406 if (ubuf[*ulen - 1] == 0xfffe) 407 return 0; 408 if (ubuf[*ulen - 1] < 128 && 409 text_chars[(size_t)ubuf[*ulen - 1]] != T) 410 return 0; 411 } 412 413 return 1 + bigend; 414 } 415 416 #undef F 417 #undef T 418 #undef I 419 #undef X 420 421 /* 422 * This table maps each EBCDIC character to an (8-bit extended) ASCII 423 * character, as specified in the rationale for the dd(1) command in 424 * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. 425 * 426 * Unfortunately it does not seem to correspond exactly to any of the 427 * five variants of EBCDIC documented in IBM's _Enterprise Systems 428 * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh 429 * Edition, July, 1999, pp. I-1 - I-4. 430 * 431 * Fortunately, though, all versions of EBCDIC, including this one, agree 432 * on most of the printing characters that also appear in (7-bit) ASCII. 433 * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. 434 * 435 * Fortunately too, there is general agreement that codes 0x00 through 436 * 0x3F represent control characters, 0x41 a nonbreaking space, and the 437 * remainder printing characters. 438 * 439 * This is sufficient to allow us to identify EBCDIC text and to distinguish 440 * between old-style and internationalized examples of text. 441 */ 442 443 private unsigned char ebcdic_to_ascii[] = { 444 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, 445 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, 446 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, 447 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, 448 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', 449 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', 450 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', 451 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', 452 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, 453 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, 454 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, 455 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, 456 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, 457 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, 458 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, 459 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 460 }; 461 462 #ifdef notdef 463 /* 464 * The following EBCDIC-to-ASCII table may relate more closely to reality, 465 * or at least to modern reality. It comes from 466 * 467 * http://ftp.s390.ibm.com/products/oe/bpxqp9.html 468 * 469 * and maps the characters of EBCDIC code page 1047 (the code used for 470 * Unix-derived software on IBM's 390 systems) to the corresponding 471 * characters from ISO 8859-1. 472 * 473 * If this table is used instead of the above one, some of the special 474 * cases for the NEL character can be taken out of the code. 475 */ 476 477 private unsigned char ebcdic_1047_to_8859[] = { 478 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, 479 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, 480 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, 481 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, 482 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, 483 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, 484 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, 485 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, 486 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, 487 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, 488 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, 489 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, 490 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, 491 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, 492 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, 493 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F 494 }; 495 #endif 496 497 /* 498 * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. 499 */ 500 private void 501 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) 502 { 503 size_t i; 504 505 for (i = 0; i < nbytes; i++) { 506 out[i] = ebcdic_to_ascii[buf[i]]; 507 } 508 } 509