1 /* $NetBSD: encoding.c,v 1.5 2017/02/10 17:53:24 christos Exp $ */ 2 3 /* 4 * Copyright (c) Ian F. Darwin 1986-1995. 5 * Software written by Ian F. Darwin and others; 6 * maintained 1995-present by Christos Zoulas and others. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 22 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 /* 31 * Encoding -- determine the character encoding of a text file. 32 * 33 * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit 34 * international characters. 35 */ 36 37 #include "file.h" 38 39 #ifndef lint 40 #if 0 41 FILE_RCSID("@(#)$File: encoding.c,v 1.13 2015/06/04 19:16:28 christos Exp $") 42 #else 43 __RCSID("$NetBSD: encoding.c,v 1.5 2017/02/10 17:53:24 christos Exp $"); 44 #endif 45 #endif /* lint */ 46 47 #include "magic.h" 48 #include <string.h> 49 #include <memory.h> 50 #include <stdlib.h> 51 52 53 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *); 54 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *, 55 size_t *); 56 private int looks_utf7(const unsigned char *, size_t, unichar *, size_t *); 57 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *); 58 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *); 59 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *); 60 private void from_ebcdic(const unsigned char *, size_t, unsigned char *); 61 62 #ifdef DEBUG_ENCODING 63 #define DPRINTF(a) printf a 64 #else 65 #define DPRINTF(a) 66 #endif 67 68 /* 69 * Try to determine whether text is in some character code we can 70 * identify. Each of these tests, if it succeeds, will leave 71 * the text converted into one-unichar-per-character Unicode in 72 * ubuf, and the number of characters converted in ulen. 73 */ 74 protected int 75 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type) 76 { 77 size_t mlen; 78 int rv = 1, ucs_type; 79 unsigned char *nbuf = NULL; 80 81 *type = "text"; 82 *ulen = 0; 83 *code = "unknown"; 84 *code_mime = "binary"; 85 86 mlen = (nbytes + 1) * sizeof((*ubuf)[0]); 87 if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) { 88 file_oomem(ms, mlen); 89 goto done; 90 } 91 mlen = (nbytes + 1) * sizeof(nbuf[0]); 92 if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) { 93 file_oomem(ms, mlen); 94 goto done; 95 } 96 97 if (looks_ascii(buf, nbytes, *ubuf, ulen)) { 98 if (looks_utf7(buf, nbytes, *ubuf, ulen) > 0) { 99 DPRINTF(("utf-7 %" SIZE_T_FORMAT "u\n", *ulen)); 100 *code = "UTF-7 Unicode"; 101 *code_mime = "utf-7"; 102 } else { 103 DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen)); 104 *code = "ASCII"; 105 *code_mime = "us-ascii"; 106 } 107 } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) { 108 DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen)); 109 *code = "UTF-8 Unicode (with BOM)"; 110 *code_mime = "utf-8"; 111 } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) { 112 DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen)); 113 *code = "UTF-8 Unicode"; 114 *code_mime = "utf-8"; 115 } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) { 116 if (ucs_type == 1) { 117 *code = "Little-endian UTF-16 Unicode"; 118 *code_mime = "utf-16le"; 119 } else { 120 *code = "Big-endian UTF-16 Unicode"; 121 *code_mime = "utf-16be"; 122 } 123 DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen)); 124 } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) { 125 DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen)); 126 *code = "ISO-8859"; 127 *code_mime = "iso-8859-1"; 128 } else if (looks_extended(buf, nbytes, *ubuf, ulen)) { 129 DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen)); 130 *code = "Non-ISO extended-ASCII"; 131 *code_mime = "unknown-8bit"; 132 } else { 133 from_ebcdic(buf, nbytes, nbuf); 134 135 if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) { 136 DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen)); 137 *code = "EBCDIC"; 138 *code_mime = "ebcdic"; 139 } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) { 140 DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n", 141 *ulen)); 142 *code = "International EBCDIC"; 143 *code_mime = "ebcdic"; 144 } else { /* Doesn't look like text at all */ 145 DPRINTF(("binary\n")); 146 rv = 0; 147 *type = "binary"; 148 } 149 } 150 151 done: 152 free(nbuf); 153 154 return rv; 155 } 156 157 /* 158 * This table reflects a particular philosophy about what constitutes 159 * "text," and there is room for disagreement about it. 160 * 161 * Version 3.31 of the file command considered a file to be ASCII if 162 * each of its characters was approved by either the isascii() or 163 * isalpha() function. On most systems, this would mean that any 164 * file consisting only of characters in the range 0x00 ... 0x7F 165 * would be called ASCII text, but many systems might reasonably 166 * consider some characters outside this range to be alphabetic, 167 * so the file command would call such characters ASCII. It might 168 * have been more accurate to call this "considered textual on the 169 * local system" than "ASCII." 170 * 171 * It considered a file to be "International language text" if each 172 * of its characters was either an ASCII printing character (according 173 * to the real ASCII standard, not the above test), a character in 174 * the range 0x80 ... 0xFF, or one of the following control characters: 175 * backspace, tab, line feed, vertical tab, form feed, carriage return, 176 * escape. No attempt was made to determine the language in which files 177 * of this type were written. 178 * 179 * 180 * The table below considers a file to be ASCII if all of its characters 181 * are either ASCII printing characters (again, according to the X3.4 182 * standard, not isascii()) or any of the following controls: bell, 183 * backspace, tab, line feed, form feed, carriage return, esc, nextline. 184 * 185 * I include bell because some programs (particularly shell scripts) 186 * use it literally, even though it is rare in normal text. I exclude 187 * vertical tab because it never seems to be used in real text. I also 188 * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85), 189 * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline 190 * character to. It might be more appropriate to include it in the 8859 191 * set instead of the ASCII set, but it's got to be included in *something* 192 * we recognize or EBCDIC files aren't going to be considered textual. 193 * Some old Unix source files use SO/SI (^N/^O) to shift between Greek 194 * and Latin characters, so these should possibly be allowed. But they 195 * make a real mess on VT100-style displays if they're not paired properly, 196 * so we are probably better off not calling them text. 197 * 198 * A file is considered to be ISO-8859 text if its characters are all 199 * either ASCII, according to the above definition, or printing characters 200 * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF. 201 * 202 * Finally, a file is considered to be international text from some other 203 * character code if its characters are all either ISO-8859 (according to 204 * the above definition) or characters in the range 0x80 ... 0x9F, which 205 * ISO-8859 considers to be control characters but the IBM PC and Macintosh 206 * consider to be printing characters. 207 */ 208 209 #define F 0 /* character never appears in text */ 210 #define T 1 /* character appears in plain ASCII text */ 211 #define I 2 /* character appears in ISO-8859 text */ 212 #define X 3 /* character appears in non-ISO extended ASCII (Mac, IBM PC) */ 213 214 private char text_chars[256] = { 215 /* BEL BS HT LF VT FF CR */ 216 F, F, F, F, F, F, F, T, T, T, T, T, T, T, F, F, /* 0x0X */ 217 /* ESC */ 218 F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F, /* 0x1X */ 219 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x2X */ 220 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x3X */ 221 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x4X */ 222 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x5X */ 223 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, /* 0x6X */ 224 T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F, /* 0x7X */ 225 /* NEL */ 226 X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X, /* 0x8X */ 227 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 0x9X */ 228 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xaX */ 229 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xbX */ 230 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xcX */ 231 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xdX */ 232 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, /* 0xeX */ 233 I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I /* 0xfX */ 234 }; 235 236 private int 237 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf, 238 size_t *ulen) 239 { 240 size_t i; 241 242 *ulen = 0; 243 244 for (i = 0; i < nbytes; i++) { 245 int t = text_chars[buf[i]]; 246 247 if (t != T) 248 return 0; 249 250 ubuf[(*ulen)++] = buf[i]; 251 } 252 253 return 1; 254 } 255 256 private int 257 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 258 { 259 size_t i; 260 261 *ulen = 0; 262 263 for (i = 0; i < nbytes; i++) { 264 int t = text_chars[buf[i]]; 265 266 if (t != T && t != I) 267 return 0; 268 269 ubuf[(*ulen)++] = buf[i]; 270 } 271 272 return 1; 273 } 274 275 private int 276 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf, 277 size_t *ulen) 278 { 279 size_t i; 280 281 *ulen = 0; 282 283 for (i = 0; i < nbytes; i++) { 284 int t = text_chars[buf[i]]; 285 286 if (t != T && t != I && t != X) 287 return 0; 288 289 ubuf[(*ulen)++] = buf[i]; 290 } 291 292 return 1; 293 } 294 295 /* 296 * Decide whether some text looks like UTF-8. Returns: 297 * 298 * -1: invalid UTF-8 299 * 0: uses odd control characters, so doesn't look like text 300 * 1: 7-bit text 301 * 2: definitely UTF-8 text (valid high-bit set bytes) 302 * 303 * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen; 304 * ubuf must be big enough! 305 */ 306 protected int 307 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 308 { 309 size_t i; 310 int n; 311 unichar c; 312 int gotone = 0, ctrl = 0; 313 314 if (ubuf) 315 *ulen = 0; 316 317 for (i = 0; i < nbytes; i++) { 318 if ((buf[i] & 0x80) == 0) { /* 0xxxxxxx is plain ASCII */ 319 /* 320 * Even if the whole file is valid UTF-8 sequences, 321 * still reject it if it uses weird control characters. 322 */ 323 324 if (text_chars[buf[i]] != T) 325 ctrl = 1; 326 327 if (ubuf) 328 ubuf[(*ulen)++] = buf[i]; 329 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */ 330 return -1; 331 } else { /* 11xxxxxx begins UTF-8 */ 332 int following; 333 334 if ((buf[i] & 0x20) == 0) { /* 110xxxxx */ 335 c = buf[i] & 0x1f; 336 following = 1; 337 } else if ((buf[i] & 0x10) == 0) { /* 1110xxxx */ 338 c = buf[i] & 0x0f; 339 following = 2; 340 } else if ((buf[i] & 0x08) == 0) { /* 11110xxx */ 341 c = buf[i] & 0x07; 342 following = 3; 343 } else if ((buf[i] & 0x04) == 0) { /* 111110xx */ 344 c = buf[i] & 0x03; 345 following = 4; 346 } else if ((buf[i] & 0x02) == 0) { /* 1111110x */ 347 c = buf[i] & 0x01; 348 following = 5; 349 } else 350 return -1; 351 352 for (n = 0; n < following; n++) { 353 i++; 354 if (i >= nbytes) 355 goto done; 356 357 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40)) 358 return -1; 359 360 c = (c << 6) + (buf[i] & 0x3f); 361 } 362 363 if (ubuf) 364 ubuf[(*ulen)++] = c; 365 gotone = 1; 366 } 367 } 368 done: 369 return ctrl ? 0 : (gotone ? 2 : 1); 370 } 371 372 /* 373 * Decide whether some text looks like UTF-8 with BOM. If there is no 374 * BOM, return -1; otherwise return the result of looks_utf8 on the 375 * rest of the text. 376 */ 377 private int 378 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf, 379 size_t *ulen) 380 { 381 if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf) 382 return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen); 383 else 384 return -1; 385 } 386 387 private int 388 looks_utf7(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen) 389 { 390 if (nbytes > 4 && buf[0] == '+' && buf[1] == '/' && buf[2] == 'v') 391 switch (buf[3]) { 392 case '8': 393 case '9': 394 case '+': 395 case '/': 396 if (ubuf) 397 *ulen = 0; 398 return 1; 399 default: 400 return -1; 401 } 402 else 403 return -1; 404 } 405 406 private int 407 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf, 408 size_t *ulen) 409 { 410 int bigend; 411 size_t i; 412 413 if (nbytes < 2) 414 return 0; 415 416 if (buf[0] == 0xff && buf[1] == 0xfe) 417 bigend = 0; 418 else if (buf[0] == 0xfe && buf[1] == 0xff) 419 bigend = 1; 420 else 421 return 0; 422 423 *ulen = 0; 424 425 for (i = 2; i + 1 < nbytes; i += 2) { 426 /* XXX fix to properly handle chars > 65536 */ 427 428 if (bigend) 429 ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i]; 430 else 431 ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1]; 432 433 if (ubuf[*ulen - 1] == 0xfffe) 434 return 0; 435 if (ubuf[*ulen - 1] < 128 && 436 text_chars[(size_t)ubuf[*ulen - 1]] != T) 437 return 0; 438 } 439 440 return 1 + bigend; 441 } 442 443 #undef F 444 #undef T 445 #undef I 446 #undef X 447 448 /* 449 * This table maps each EBCDIC character to an (8-bit extended) ASCII 450 * character, as specified in the rationale for the dd(1) command in 451 * draft 11.2 (September, 1991) of the POSIX P1003.2 standard. 452 * 453 * Unfortunately it does not seem to correspond exactly to any of the 454 * five variants of EBCDIC documented in IBM's _Enterprise Systems 455 * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh 456 * Edition, July, 1999, pp. I-1 - I-4. 457 * 458 * Fortunately, though, all versions of EBCDIC, including this one, agree 459 * on most of the printing characters that also appear in (7-bit) ASCII. 460 * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all. 461 * 462 * Fortunately too, there is general agreement that codes 0x00 through 463 * 0x3F represent control characters, 0x41 a nonbreaking space, and the 464 * remainder printing characters. 465 * 466 * This is sufficient to allow us to identify EBCDIC text and to distinguish 467 * between old-style and internationalized examples of text. 468 */ 469 470 private unsigned char ebcdic_to_ascii[] = { 471 0, 1, 2, 3, 156, 9, 134, 127, 151, 141, 142, 11, 12, 13, 14, 15, 472 16, 17, 18, 19, 157, 133, 8, 135, 24, 25, 146, 143, 28, 29, 30, 31, 473 128, 129, 130, 131, 132, 10, 23, 27, 136, 137, 138, 139, 140, 5, 6, 7, 474 144, 145, 22, 147, 148, 149, 150, 4, 152, 153, 154, 155, 20, 21, 158, 26, 475 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|', 476 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~', 477 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?', 478 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"', 479 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201, 480 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208, 481 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215, 482 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231, 483 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237, 484 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243, 485 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249, 486 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255 487 }; 488 489 #ifdef notdef 490 /* 491 * The following EBCDIC-to-ASCII table may relate more closely to reality, 492 * or at least to modern reality. It comes from 493 * 494 * http://ftp.s390.ibm.com/products/oe/bpxqp9.html 495 * 496 * and maps the characters of EBCDIC code page 1047 (the code used for 497 * Unix-derived software on IBM's 390 systems) to the corresponding 498 * characters from ISO 8859-1. 499 * 500 * If this table is used instead of the above one, some of the special 501 * cases for the NEL character can be taken out of the code. 502 */ 503 504 private unsigned char ebcdic_1047_to_8859[] = { 505 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F, 506 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F, 507 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07, 508 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A, 509 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C, 510 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E, 511 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F, 512 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22, 513 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1, 514 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4, 515 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE, 516 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7, 517 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5, 518 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF, 519 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5, 520 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F 521 }; 522 #endif 523 524 /* 525 * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII. 526 */ 527 private void 528 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out) 529 { 530 size_t i; 531 532 for (i = 0; i < nbytes; i++) { 533 out[i] = ebcdic_to_ascii[buf[i]]; 534 } 535 } 536