xref: /netbsd-src/external/bsd/file/dist/src/encoding.c (revision 6cf6fe02a981b55727c49c3d37b0d8191a98c0ee)
1 /*	$NetBSD: encoding.c,v 1.2 2014/09/11 13:30:04 christos Exp $	*/
2 /*
3  * Copyright (c) Ian F. Darwin 1986-1995.
4  * Software written by Ian F. Darwin and others;
5  * maintained 1995-present by Christos Zoulas and others.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice immediately at the beginning of the file, without modification,
12  *    this list of conditions, and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
21  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 /*
30  * Encoding -- determine the character encoding of a text file.
31  *
32  * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
33  * international characters.
34  */
35 
36 #include "file.h"
37 
38 #ifndef	lint
39 #if 0
40 FILE_RCSID("@(#)$File: encoding.c,v 1.9 2013/11/19 20:45:50 christos Exp $")
41 #else
42 __RCSID("$NetBSD: encoding.c,v 1.2 2014/09/11 13:30:04 christos Exp $");
43 #endif
44 #endif	/* lint */
45 
46 #include "magic.h"
47 #include <string.h>
48 #include <memory.h>
49 #include <stdlib.h>
50 
51 
52 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
53 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
54     size_t *);
55 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
56 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
57 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
58 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
59 
60 #ifdef DEBUG_ENCODING
61 #define DPRINTF(a) printf a
62 #else
63 #define DPRINTF(a)
64 #endif
65 
66 /*
67  * Try to determine whether text is in some character code we can
68  * identify.  Each of these tests, if it succeeds, will leave
69  * the text converted into one-unichar-per-character Unicode in
70  * ubuf, and the number of characters converted in ulen.
71  */
72 protected int
73 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
74 {
75 	size_t mlen;
76 	int rv = 1, ucs_type;
77 	unsigned char *nbuf = NULL;
78 
79 	*type = "text";
80 	*ulen = 0;
81 	*code = "unknown";
82 	*code_mime = "binary";
83 
84 	mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
85 	if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
86 		file_oomem(ms, mlen);
87 		goto done;
88 	}
89 	mlen = (nbytes + 1) * sizeof(nbuf[0]);
90 	if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
91 		file_oomem(ms, mlen);
92 		goto done;
93 	}
94 
95 	if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
96 		DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
97 		*code = "ASCII";
98 		*code_mime = "us-ascii";
99 	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
100 		DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
101 		*code = "UTF-8 Unicode (with BOM)";
102 		*code_mime = "utf-8";
103 	} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
104 		DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
105 		*code = "UTF-8 Unicode";
106 		*code_mime = "utf-8";
107 	} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
108 		if (ucs_type == 1) {
109 			*code = "Little-endian UTF-16 Unicode";
110 			*code_mime = "utf-16le";
111 		} else {
112 			*code = "Big-endian UTF-16 Unicode";
113 			*code_mime = "utf-16be";
114 		}
115 		DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
116 	} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
117 		DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen));
118 		*code = "ISO-8859";
119 		*code_mime = "iso-8859-1";
120 	} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
121 		DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen));
122 		*code = "Non-ISO extended-ASCII";
123 		*code_mime = "unknown-8bit";
124 	} else {
125 		from_ebcdic(buf, nbytes, nbuf);
126 
127 		if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
128 			DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen));
129 			*code = "EBCDIC";
130 			*code_mime = "ebcdic";
131 		} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
132 			DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n",
133 			    *ulen));
134 			*code = "International EBCDIC";
135 			*code_mime = "ebcdic";
136 		} else { /* Doesn't look like text at all */
137 			DPRINTF(("binary\n"));
138 			rv = 0;
139 			*type = "binary";
140 		}
141 	}
142 
143  done:
144 	free(nbuf);
145 
146 	return rv;
147 }
148 
149 /*
150  * This table reflects a particular philosophy about what constitutes
151  * "text," and there is room for disagreement about it.
152  *
153  * Version 3.31 of the file command considered a file to be ASCII if
154  * each of its characters was approved by either the isascii() or
155  * isalpha() function.  On most systems, this would mean that any
156  * file consisting only of characters in the range 0x00 ... 0x7F
157  * would be called ASCII text, but many systems might reasonably
158  * consider some characters outside this range to be alphabetic,
159  * so the file command would call such characters ASCII.  It might
160  * have been more accurate to call this "considered textual on the
161  * local system" than "ASCII."
162  *
163  * It considered a file to be "International language text" if each
164  * of its characters was either an ASCII printing character (according
165  * to the real ASCII standard, not the above test), a character in
166  * the range 0x80 ... 0xFF, or one of the following control characters:
167  * backspace, tab, line feed, vertical tab, form feed, carriage return,
168  * escape.  No attempt was made to determine the language in which files
169  * of this type were written.
170  *
171  *
172  * The table below considers a file to be ASCII if all of its characters
173  * are either ASCII printing characters (again, according to the X3.4
174  * standard, not isascii()) or any of the following controls: bell,
175  * backspace, tab, line feed, form feed, carriage return, esc, nextline.
176  *
177  * I include bell because some programs (particularly shell scripts)
178  * use it literally, even though it is rare in normal text.  I exclude
179  * vertical tab because it never seems to be used in real text.  I also
180  * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
181  * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
182  * character to.  It might be more appropriate to include it in the 8859
183  * set instead of the ASCII set, but it's got to be included in *something*
184  * we recognize or EBCDIC files aren't going to be considered textual.
185  * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
186  * and Latin characters, so these should possibly be allowed.  But they
187  * make a real mess on VT100-style displays if they're not paired properly,
188  * so we are probably better off not calling them text.
189  *
190  * A file is considered to be ISO-8859 text if its characters are all
191  * either ASCII, according to the above definition, or printing characters
192  * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
193  *
194  * Finally, a file is considered to be international text from some other
195  * character code if its characters are all either ISO-8859 (according to
196  * the above definition) or characters in the range 0x80 ... 0x9F, which
197  * ISO-8859 considers to be control characters but the IBM PC and Macintosh
198  * consider to be printing characters.
199  */
200 
201 #define F 0   /* character never appears in text */
202 #define T 1   /* character appears in plain ASCII text */
203 #define I 2   /* character appears in ISO-8859 text */
204 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
205 
206 private char text_chars[256] = {
207 	/*                  BEL BS HT LF    FF CR    */
208 	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
209 	/*                              ESC          */
210 	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
211 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
212 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
213 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
214 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
215 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
216 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
217 	/*            NEL                            */
218 	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
219 	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
220 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
221 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
222 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
223 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
224 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
225 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
226 };
227 
228 private int
229 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
230     size_t *ulen)
231 {
232 	size_t i;
233 
234 	*ulen = 0;
235 
236 	for (i = 0; i < nbytes; i++) {
237 		int t = text_chars[buf[i]];
238 
239 		if (t != T)
240 			return 0;
241 
242 		ubuf[(*ulen)++] = buf[i];
243 	}
244 
245 	return 1;
246 }
247 
248 private int
249 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
250 {
251 	size_t i;
252 
253 	*ulen = 0;
254 
255 	for (i = 0; i < nbytes; i++) {
256 		int t = text_chars[buf[i]];
257 
258 		if (t != T && t != I)
259 			return 0;
260 
261 		ubuf[(*ulen)++] = buf[i];
262 	}
263 
264 	return 1;
265 }
266 
267 private int
268 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
269     size_t *ulen)
270 {
271 	size_t i;
272 
273 	*ulen = 0;
274 
275 	for (i = 0; i < nbytes; i++) {
276 		int t = text_chars[buf[i]];
277 
278 		if (t != T && t != I && t != X)
279 			return 0;
280 
281 		ubuf[(*ulen)++] = buf[i];
282 	}
283 
284 	return 1;
285 }
286 
287 /*
288  * Decide whether some text looks like UTF-8. Returns:
289  *
290  *     -1: invalid UTF-8
291  *      0: uses odd control characters, so doesn't look like text
292  *      1: 7-bit text
293  *      2: definitely UTF-8 text (valid high-bit set bytes)
294  *
295  * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
296  * ubuf must be big enough!
297  */
298 protected int
299 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
300 {
301 	size_t i;
302 	int n;
303 	unichar c;
304 	int gotone = 0, ctrl = 0;
305 
306 	if (ubuf)
307 		*ulen = 0;
308 
309 	for (i = 0; i < nbytes; i++) {
310 		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
311 			/*
312 			 * Even if the whole file is valid UTF-8 sequences,
313 			 * still reject it if it uses weird control characters.
314 			 */
315 
316 			if (text_chars[buf[i]] != T)
317 				ctrl = 1;
318 
319 			if (ubuf)
320 				ubuf[(*ulen)++] = buf[i];
321 		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
322 			return -1;
323 		} else {			   /* 11xxxxxx begins UTF-8 */
324 			int following;
325 
326 			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
327 				c = buf[i] & 0x1f;
328 				following = 1;
329 			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
330 				c = buf[i] & 0x0f;
331 				following = 2;
332 			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
333 				c = buf[i] & 0x07;
334 				following = 3;
335 			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
336 				c = buf[i] & 0x03;
337 				following = 4;
338 			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
339 				c = buf[i] & 0x01;
340 				following = 5;
341 			} else
342 				return -1;
343 
344 			for (n = 0; n < following; n++) {
345 				i++;
346 				if (i >= nbytes)
347 					goto done;
348 
349 				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
350 					return -1;
351 
352 				c = (c << 6) + (buf[i] & 0x3f);
353 			}
354 
355 			if (ubuf)
356 				ubuf[(*ulen)++] = c;
357 			gotone = 1;
358 		}
359 	}
360 done:
361 	return ctrl ? 0 : (gotone ? 2 : 1);
362 }
363 
364 /*
365  * Decide whether some text looks like UTF-8 with BOM. If there is no
366  * BOM, return -1; otherwise return the result of looks_utf8 on the
367  * rest of the text.
368  */
369 private int
370 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
371     size_t *ulen)
372 {
373 	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
374 		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
375 	else
376 		return -1;
377 }
378 
379 private int
380 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
381     size_t *ulen)
382 {
383 	int bigend;
384 	size_t i;
385 
386 	if (nbytes < 2)
387 		return 0;
388 
389 	if (buf[0] == 0xff && buf[1] == 0xfe)
390 		bigend = 0;
391 	else if (buf[0] == 0xfe && buf[1] == 0xff)
392 		bigend = 1;
393 	else
394 		return 0;
395 
396 	*ulen = 0;
397 
398 	for (i = 2; i + 1 < nbytes; i += 2) {
399 		/* XXX fix to properly handle chars > 65536 */
400 
401 		if (bigend)
402 			ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
403 		else
404 			ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
405 
406 		if (ubuf[*ulen - 1] == 0xfffe)
407 			return 0;
408 		if (ubuf[*ulen - 1] < 128 &&
409 		    text_chars[(size_t)ubuf[*ulen - 1]] != T)
410 			return 0;
411 	}
412 
413 	return 1 + bigend;
414 }
415 
416 #undef F
417 #undef T
418 #undef I
419 #undef X
420 
421 /*
422  * This table maps each EBCDIC character to an (8-bit extended) ASCII
423  * character, as specified in the rationale for the dd(1) command in
424  * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
425  *
426  * Unfortunately it does not seem to correspond exactly to any of the
427  * five variants of EBCDIC documented in IBM's _Enterprise Systems
428  * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
429  * Edition, July, 1999, pp. I-1 - I-4.
430  *
431  * Fortunately, though, all versions of EBCDIC, including this one, agree
432  * on most of the printing characters that also appear in (7-bit) ASCII.
433  * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
434  *
435  * Fortunately too, there is general agreement that codes 0x00 through
436  * 0x3F represent control characters, 0x41 a nonbreaking space, and the
437  * remainder printing characters.
438  *
439  * This is sufficient to allow us to identify EBCDIC text and to distinguish
440  * between old-style and internationalized examples of text.
441  */
442 
443 private unsigned char ebcdic_to_ascii[] = {
444   0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
445  16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
446 128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
447 144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
448 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
449 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
450 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
451 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
452 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
453 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
454 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
455 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
456 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
457 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
458 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
459 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
460 };
461 
462 #ifdef notdef
463 /*
464  * The following EBCDIC-to-ASCII table may relate more closely to reality,
465  * or at least to modern reality.  It comes from
466  *
467  *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
468  *
469  * and maps the characters of EBCDIC code page 1047 (the code used for
470  * Unix-derived software on IBM's 390 systems) to the corresponding
471  * characters from ISO 8859-1.
472  *
473  * If this table is used instead of the above one, some of the special
474  * cases for the NEL character can be taken out of the code.
475  */
476 
477 private unsigned char ebcdic_1047_to_8859[] = {
478 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
479 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
480 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
481 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
482 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
483 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
484 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
485 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
486 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
487 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
488 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
489 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
490 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
491 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
492 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
493 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
494 };
495 #endif
496 
497 /*
498  * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
499  */
500 private void
501 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
502 {
503 	size_t i;
504 
505 	for (i = 0; i < nbytes; i++) {
506 		out[i] = ebcdic_to_ascii[buf[i]];
507 	}
508 }
509