xref: /minix3/external/bsd/file/dist/src/encoding.c (revision f14fb602092e015ff630df58e17c2a9cd57d29b3)
1 /*	$NetBSD: encoding.c,v 1.1.1.2 2011/05/12 20:46:52 christos Exp $	*/
2 
3 /*
4  * Copyright (c) Ian F. Darwin 1986-1995.
5  * Software written by Ian F. Darwin and others;
6  * maintained 1995-present by Christos Zoulas and others.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice immediately at the beginning of the file, without modification,
13  *    this list of conditions, and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 /*
31  * Encoding -- determine the character encoding of a text file.
32  *
33  * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
34  * international characters.
35  */
36 
37 #include "file.h"
38 
39 #ifndef	lint
40 #if 0
41 FILE_RCSID("@(#)$File: encoding.c,v 1.5 2010/07/21 16:47:17 christos Exp $")
42 #else
43 __RCSID("$NetBSD: encoding.c,v 1.1.1.2 2011/05/12 20:46:52 christos Exp $");
44 #endif
45 #endif	/* lint */
46 
47 #include "magic.h"
48 #include <string.h>
49 #include <memory.h>
50 #include <stdlib.h>
51 
52 
53 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
54 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
55     size_t *);
56 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
57 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
58 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
59 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
60 
61 #ifdef DEBUG_ENCODING
62 #define DPRINTF(a) printf a
63 #else
64 #define DPRINTF(a)
65 #endif
66 
67 /*
68  * Try to determine whether text is in some character code we can
69  * identify.  Each of these tests, if it succeeds, will leave
70  * the text converted into one-unichar-per-character Unicode in
71  * ubuf, and the number of characters converted in ulen.
72  */
73 protected int
74 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
75 {
76 	size_t mlen;
77 	int rv = 1, ucs_type;
78 	unsigned char *nbuf = NULL;
79 
80 	mlen = (nbytes + 1) * sizeof(nbuf[0]);
81 	if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
82 		file_oomem(ms, mlen);
83 		goto done;
84 	}
85 	mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
86 	if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
87 		file_oomem(ms, mlen);
88 		goto done;
89 	}
90 
91 	*type = "text";
92 	if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
93 		DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
94 		*code = "ASCII";
95 		*code_mime = "us-ascii";
96 	} else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
97 		DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
98 		*code = "UTF-8 Unicode (with BOM)";
99 		*code_mime = "utf-8";
100 	} else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
101 		DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
102 		*code = "UTF-8 Unicode (with BOM)";
103 		*code = "UTF-8 Unicode";
104 		*code_mime = "utf-8";
105 	} else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
106 		if (ucs_type == 1) {
107 			*code = "Little-endian UTF-16 Unicode";
108 			*code_mime = "utf-16le";
109 		} else {
110 			*code = "Big-endian UTF-16 Unicode";
111 			*code_mime = "utf-16be";
112 		}
113 		DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
114 	} else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
115 		DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen));
116 		*code = "ISO-8859";
117 		*code_mime = "iso-8859-1";
118 	} else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
119 		DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen));
120 		*code = "Non-ISO extended-ASCII";
121 		*code_mime = "unknown-8bit";
122 	} else {
123 		from_ebcdic(buf, nbytes, nbuf);
124 
125 		if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
126 			DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen));
127 			*code = "EBCDIC";
128 			*code_mime = "ebcdic";
129 		} else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
130 			DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n",
131 			    *ulen));
132 			*code = "International EBCDIC";
133 			*code_mime = "ebcdic";
134 		} else { /* Doesn't look like text at all */
135 			DPRINTF(("binary\n"));
136 			rv = 0;
137 			*type = "binary";
138 		}
139 	}
140 
141  done:
142 	if (nbuf)
143 		free(nbuf);
144 
145 	return rv;
146 }
147 
148 /*
149  * This table reflects a particular philosophy about what constitutes
150  * "text," and there is room for disagreement about it.
151  *
152  * Version 3.31 of the file command considered a file to be ASCII if
153  * each of its characters was approved by either the isascii() or
154  * isalpha() function.  On most systems, this would mean that any
155  * file consisting only of characters in the range 0x00 ... 0x7F
156  * would be called ASCII text, but many systems might reasonably
157  * consider some characters outside this range to be alphabetic,
158  * so the file command would call such characters ASCII.  It might
159  * have been more accurate to call this "considered textual on the
160  * local system" than "ASCII."
161  *
162  * It considered a file to be "International language text" if each
163  * of its characters was either an ASCII printing character (according
164  * to the real ASCII standard, not the above test), a character in
165  * the range 0x80 ... 0xFF, or one of the following control characters:
166  * backspace, tab, line feed, vertical tab, form feed, carriage return,
167  * escape.  No attempt was made to determine the language in which files
168  * of this type were written.
169  *
170  *
171  * The table below considers a file to be ASCII if all of its characters
172  * are either ASCII printing characters (again, according to the X3.4
173  * standard, not isascii()) or any of the following controls: bell,
174  * backspace, tab, line feed, form feed, carriage return, esc, nextline.
175  *
176  * I include bell because some programs (particularly shell scripts)
177  * use it literally, even though it is rare in normal text.  I exclude
178  * vertical tab because it never seems to be used in real text.  I also
179  * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
180  * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
181  * character to.  It might be more appropriate to include it in the 8859
182  * set instead of the ASCII set, but it's got to be included in *something*
183  * we recognize or EBCDIC files aren't going to be considered textual.
184  * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
185  * and Latin characters, so these should possibly be allowed.  But they
186  * make a real mess on VT100-style displays if they're not paired properly,
187  * so we are probably better off not calling them text.
188  *
189  * A file is considered to be ISO-8859 text if its characters are all
190  * either ASCII, according to the above definition, or printing characters
191  * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
192  *
193  * Finally, a file is considered to be international text from some other
194  * character code if its characters are all either ISO-8859 (according to
195  * the above definition) or characters in the range 0x80 ... 0x9F, which
196  * ISO-8859 considers to be control characters but the IBM PC and Macintosh
197  * consider to be printing characters.
198  */
199 
200 #define F 0   /* character never appears in text */
201 #define T 1   /* character appears in plain ASCII text */
202 #define I 2   /* character appears in ISO-8859 text */
203 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
204 
205 private char text_chars[256] = {
206 	/*                  BEL BS HT LF    FF CR    */
207 	F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
208 	/*                              ESC          */
209 	F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
210 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
211 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
212 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
213 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
214 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
215 	T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
216 	/*            NEL                            */
217 	X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
218 	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
219 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
220 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
221 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
222 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
223 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
224 	I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
225 };
226 
227 private int
228 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
229     size_t *ulen)
230 {
231 	size_t i;
232 
233 	*ulen = 0;
234 
235 	for (i = 0; i < nbytes; i++) {
236 		int t = text_chars[buf[i]];
237 
238 		if (t != T)
239 			return 0;
240 
241 		ubuf[(*ulen)++] = buf[i];
242 	}
243 
244 	return 1;
245 }
246 
247 private int
248 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
249 {
250 	size_t i;
251 
252 	*ulen = 0;
253 
254 	for (i = 0; i < nbytes; i++) {
255 		int t = text_chars[buf[i]];
256 
257 		if (t != T && t != I)
258 			return 0;
259 
260 		ubuf[(*ulen)++] = buf[i];
261 	}
262 
263 	return 1;
264 }
265 
266 private int
267 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
268     size_t *ulen)
269 {
270 	size_t i;
271 
272 	*ulen = 0;
273 
274 	for (i = 0; i < nbytes; i++) {
275 		int t = text_chars[buf[i]];
276 
277 		if (t != T && t != I && t != X)
278 			return 0;
279 
280 		ubuf[(*ulen)++] = buf[i];
281 	}
282 
283 	return 1;
284 }
285 
286 /*
287  * Decide whether some text looks like UTF-8. Returns:
288  *
289  *     -1: invalid UTF-8
290  *      0: uses odd control characters, so doesn't look like text
291  *      1: 7-bit text
292  *      2: definitely UTF-8 text (valid high-bit set bytes)
293  *
294  * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
295  * ubuf must be big enough!
296  */
297 protected int
298 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
299 {
300 	size_t i;
301 	int n;
302 	unichar c;
303 	int gotone = 0, ctrl = 0;
304 
305 	if (ubuf)
306 		*ulen = 0;
307 
308 	for (i = 0; i < nbytes; i++) {
309 		if ((buf[i] & 0x80) == 0) {	   /* 0xxxxxxx is plain ASCII */
310 			/*
311 			 * Even if the whole file is valid UTF-8 sequences,
312 			 * still reject it if it uses weird control characters.
313 			 */
314 
315 			if (text_chars[buf[i]] != T)
316 				ctrl = 1;
317 
318 			if (ubuf)
319 				ubuf[(*ulen)++] = buf[i];
320 		} else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
321 			return -1;
322 		} else {			   /* 11xxxxxx begins UTF-8 */
323 			int following;
324 
325 			if ((buf[i] & 0x20) == 0) {		/* 110xxxxx */
326 				c = buf[i] & 0x1f;
327 				following = 1;
328 			} else if ((buf[i] & 0x10) == 0) {	/* 1110xxxx */
329 				c = buf[i] & 0x0f;
330 				following = 2;
331 			} else if ((buf[i] & 0x08) == 0) {	/* 11110xxx */
332 				c = buf[i] & 0x07;
333 				following = 3;
334 			} else if ((buf[i] & 0x04) == 0) {	/* 111110xx */
335 				c = buf[i] & 0x03;
336 				following = 4;
337 			} else if ((buf[i] & 0x02) == 0) {	/* 1111110x */
338 				c = buf[i] & 0x01;
339 				following = 5;
340 			} else
341 				return -1;
342 
343 			for (n = 0; n < following; n++) {
344 				i++;
345 				if (i >= nbytes)
346 					goto done;
347 
348 				if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
349 					return -1;
350 
351 				c = (c << 6) + (buf[i] & 0x3f);
352 			}
353 
354 			if (ubuf)
355 				ubuf[(*ulen)++] = c;
356 			gotone = 1;
357 		}
358 	}
359 done:
360 	return ctrl ? 0 : (gotone ? 2 : 1);
361 }
362 
363 /*
364  * Decide whether some text looks like UTF-8 with BOM. If there is no
365  * BOM, return -1; otherwise return the result of looks_utf8 on the
366  * rest of the text.
367  */
368 private int
369 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
370     size_t *ulen)
371 {
372 	if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
373 		return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
374 	else
375 		return -1;
376 }
377 
378 private int
379 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
380     size_t *ulen)
381 {
382 	int bigend;
383 	size_t i;
384 
385 	if (nbytes < 2)
386 		return 0;
387 
388 	if (buf[0] == 0xff && buf[1] == 0xfe)
389 		bigend = 0;
390 	else if (buf[0] == 0xfe && buf[1] == 0xff)
391 		bigend = 1;
392 	else
393 		return 0;
394 
395 	*ulen = 0;
396 
397 	for (i = 2; i + 1 < nbytes; i += 2) {
398 		/* XXX fix to properly handle chars > 65536 */
399 
400 		if (bigend)
401 			ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
402 		else
403 			ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
404 
405 		if (ubuf[*ulen - 1] == 0xfffe)
406 			return 0;
407 		if (ubuf[*ulen - 1] < 128 &&
408 		    text_chars[(size_t)ubuf[*ulen - 1]] != T)
409 			return 0;
410 	}
411 
412 	return 1 + bigend;
413 }
414 
415 #undef F
416 #undef T
417 #undef I
418 #undef X
419 
420 /*
421  * This table maps each EBCDIC character to an (8-bit extended) ASCII
422  * character, as specified in the rationale for the dd(1) command in
423  * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
424  *
425  * Unfortunately it does not seem to correspond exactly to any of the
426  * five variants of EBCDIC documented in IBM's _Enterprise Systems
427  * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
428  * Edition, July, 1999, pp. I-1 - I-4.
429  *
430  * Fortunately, though, all versions of EBCDIC, including this one, agree
431  * on most of the printing characters that also appear in (7-bit) ASCII.
432  * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
433  *
434  * Fortunately too, there is general agreement that codes 0x00 through
435  * 0x3F represent control characters, 0x41 a nonbreaking space, and the
436  * remainder printing characters.
437  *
438  * This is sufficient to allow us to identify EBCDIC text and to distinguish
439  * between old-style and internationalized examples of text.
440  */
441 
442 private unsigned char ebcdic_to_ascii[] = {
443   0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
444  16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
445 128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
446 144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
447 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
448 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
449 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
450 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
451 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
452 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
453 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
454 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
455 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
456 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
457 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
458 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
459 };
460 
461 #ifdef notdef
462 /*
463  * The following EBCDIC-to-ASCII table may relate more closely to reality,
464  * or at least to modern reality.  It comes from
465  *
466  *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
467  *
468  * and maps the characters of EBCDIC code page 1047 (the code used for
469  * Unix-derived software on IBM's 390 systems) to the corresponding
470  * characters from ISO 8859-1.
471  *
472  * If this table is used instead of the above one, some of the special
473  * cases for the NEL character can be taken out of the code.
474  */
475 
476 private unsigned char ebcdic_1047_to_8859[] = {
477 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
478 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
479 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
480 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
481 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
482 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
483 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
484 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
485 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
486 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
487 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
488 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
489 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
490 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
491 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
492 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
493 };
494 #endif
495 
496 /*
497  * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
498  */
499 private void
500 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
501 {
502 	size_t i;
503 
504 	for (i = 0; i < nbytes; i++) {
505 		out[i] = ebcdic_to_ascii[buf[i]];
506 	}
507 }
508