xref: /plan9/sys/src/cmd/tcs/tcs.c (revision 853458f38e7eb3a48cfa3a36aefdb799375e398a)
1 #ifndef PLAN9
2 #include	<sys/types.h>
3 #include	<stdio.h>
4 #include	<unistd.h>
5 #include	<stdlib.h>
6 #include	<fcntl.h>
7 #include	<string.h>
8 #include	<errno.h>
9 #include	"plan9.h"
10 #else /* PLAN9 */
11 #include	<u.h>
12 #include	<libc.h>
13 #include	<bio.h>
14 #endif /* PLAN9 */
15 #include	"cyrillic.h"
16 #include	"misc.h"
17 #include	"ms.h"
18 #include	"8859.h"
19 #include	"big5.h"
20 #include	"gb.h"
21 #include	"hdr.h"
22 #include	"conv.h"
23 
24 void usage(void);
25 void list(void);
26 int squawk = 1;
27 int clean = 0;
28 int verbose = 0;
29 long ninput, noutput, nrunes, nerrors;
30 char *file = "stdin";
31 char *argv0;
32 Rune runes[N];
33 char obuf[UTFmax*N];	/* maximum bloat from N runes */
34 long tab[NRUNE];
35 #ifndef	PLAN9
36 extern char version[];
37 #endif
38 
39 void intable(int, long *, struct convert *);
40 void unicode_in(int, long *, struct convert *);
41 void unicode_out(Rune *, int, long *);
42 
43 int
main(int argc,char ** argv)44 main(int argc, char **argv)
45 {
46 	char *from = "utf";
47 	char *to = "utf";
48 	int fd;
49 	int listem = 0;
50 	struct convert *t, *f;
51 
52 	ARGBEGIN {
53 	case 'c':
54 		clean = 1;
55 		break;
56 	case 'f':
57 		from = EARGF(usage());
58 		break;
59 	case 'l':
60 		listem = 1;
61 		break;
62 	case 's':
63 		squawk = 0;
64 		break;
65 	case 't':
66 		to = EARGF(usage());
67 		break;
68 	case 'v':
69 		verbose = 1;
70 		break;
71 	default:
72 		usage();
73 		break;
74 	} ARGEND
75 
76 	USED(argc);
77 	if(verbose)
78 		squawk = 1;
79 	if(listem){
80 		list();
81 		EXIT(0, 0);
82 	}
83 	if(!from || !to)
84 		usage();
85 	f = conv(from, 1);
86 	t = conv(to, 0);
87 #define	PROC	{if(f->flags&Table)\
88 			intable(fd, (long *)f->data, t);\
89 		else\
90 			((Infn)(f->fn))(fd, (long *)0, t);}
91 	if(*argv){
92 		while(*argv){
93 			file = *argv;
94 #ifndef PLAN9
95 			if((fd = open(*argv, 0)) < 0){
96 				EPR "%s: %s: %s\n", argv0, *argv, strerror(errno));
97 #else /* PLAN9 */
98 			if((fd = open(*argv, OREAD)) < 0){
99 				EPR "%s: %s: %r\n", argv0, *argv);
100 #endif /* PLAN9 */
101 				EXIT(1, "open failure");
102 			}
103 			PROC
104 			close(fd);
105 			argv++;
106 		}
107 	} else {
108 		fd = 0;
109 		PROC
110 	}
111 	if(verbose)
112 		EPR "%s: %ld input bytes, %ld runes, %ld output bytes (%ld errors)\n", argv0,
113 			ninput, nrunes, noutput, nerrors);
114 	EXIT(((nerrors && squawk)? 1:0), ((nerrors && squawk)? "conversion error":0));
115 	return(0);	/* shut up compiler */
116 }
117 
118 void
119 usage(void)
120 {
121 	EPR "Usage: %s [-slv] [-f cs] [-t cs] [file ...]\n", argv0);
122 	verbose = 1;
123 	list();
124 	EXIT(1, "usage");
125 }
126 
127 void
128 list(void)
129 {
130 	struct convert *c;
131 	char ch = verbose?'\t':' ';
132 
133 #ifndef	PLAN9
134 	EPR "%s version = '%s'\n", argv0, version);
135 #endif
136 	if(verbose)
137 		EPR "character sets:\n");
138 	else
139 		EPR "cs:");
140 	for(c = convert; c->name; c++){
141 		if((c->flags&From) && c[1].name && (strcmp(c[1].name, c->name) == 0)){
142 			EPR "%c%s", ch, c->name);
143 			c++;
144 		} else if(c->flags&Table)
145 			EPR "%c%s", ch, c->name);
146 		else if(c->flags&From)
147 			EPR "%c%s(from)", ch, c->name);
148 		else
149 			EPR "%c%s(to)", ch, c->name);
150 		if(verbose)
151 			EPR "\t%s\n", c->chatter);
152 	}
153 	if(!verbose)
154 		EPR "\n");
155 }
156 
157 
158 struct convert *
159 conv(char *name, int from)
160 {
161 	struct convert *c;
162 
163 	for(c = convert; c->name; c++){
164 		if(cistrcmp(c->name, name) != 0)
165 			continue;
166 		if(c->flags&Table)
167 			return(c);
168 		if(((c->flags&From) == 0) == (from == 0))
169 			return(c);
170 	}
171 	EPR "%s: charset `%s' unknown\n", argv0, name);
172 	EXIT(1, "unknown character set");
173 	return(0);	/* just shut the compiler up */
174 }
175 
176 void
177 swab2(char *b, int n)
178 {
179 	char *e, p;
180 
181 	for(e = b+n; b < e; b++){
182 		p = *b;
183 		*b = b[1];
184 		*++b = p;
185 	}
186 }
187 
188 void
189 unicode_in(int fd, long *notused, struct convert *out)
190 {
191 	Rune buf[N];
192 	int n;
193 	int swabme;
194 
195 	USED(notused);
196 	if(read(fd, (char *)buf, 2) != 2)
197 		return;
198 	ninput += 2;
199 	switch(buf[0])
200 	{
201 	default:
202 		OUT(out, buf, 1);
203 	case 0xFEFF:
204 		swabme = 0;
205 		break;
206 	case 0xFFFE:
207 		swabme = 1;
208 		break;
209 	}
210 	while((n = read(fd, (char *)buf, 2*N)) > 0){
211 		ninput += n;
212 		if(swabme)
213 			swab2((char *)buf, n);
214 		if(n&1){
215 			if(squawk)
216 				EPR "%s: odd byte count in %s\n", argv0, file);
217 			nerrors++;
218 			if(clean)
219 				n--;
220 			else
221 				buf[n++/2] = Runeerror;
222 		}
223 		OUT(out, buf, n/2);
224 	}
225 	OUT(out, buf, 0);
226 }
227 
228 void
229 unicode_in_be(int fd, long *notused, struct convert *out)
230 {
231 	int i, n;
232 	Rune buf[N], r;
233 	uchar *p;
234 
235 	USED(notused);
236 	while((n = read(fd, (char *)buf, 2*N)) > 0){
237 		ninput += n;
238 		p = (uchar*)buf;
239 		for(i=0; i<n/2; i++){
240 			r = *p++<<8;
241 			r |= *p++;
242 			buf[i] = r;
243 		}
244 		if(n&1){
245 			if(squawk)
246 				EPR "%s: odd byte count in %s\n", argv0, file);
247 			nerrors++;
248 			if(clean)
249 				n--;
250 			else
251 				buf[n++/2] = Runeerror;
252 		}
253 		OUT(out, buf, n/2);
254 	}
255 	OUT(out, buf, 0);
256 }
257 
258 void
259 unicode_in_le(int fd, long *notused, struct convert *out)
260 {
261 	int i, n;
262 	Rune buf[N], r;
263 	uchar *p;
264 
265 	USED(notused);
266 	while((n = read(fd, (char *)buf, 2*N)) > 0){
267 		ninput += n;
268 		p = (uchar*)buf;
269 		for(i=0; i<n/2; i++){
270 			r = *p++;
271 			r |= *p++<<8;
272 			buf[i] = r;
273 		}
274 		if(n&1){
275 			if(squawk)
276 				EPR "%s: odd byte count in %s\n", argv0, file);
277 			nerrors++;
278 			if(clean)
279 				n--;
280 			else
281 				buf[n++/2] = Runeerror;
282 		}
283 		OUT(out, buf, n/2);
284 	}
285 	OUT(out, buf, 0);
286 }
287 
288 void
289 unicode_out(Rune *base, int n, long *notused)
290 {
291 	static int first = 1;
292 
293 	USED(notused);
294 	nrunes += n;
295 	if(first){
296 		unsigned short x = 0xFEFF;
297 		noutput += 2;
298 		write(1, (char *)&x, 2);
299 		first = 0;
300 	}
301 	noutput += 2*n;
302 	write(1, (char *)base, 2*n);
303 }
304 
305 void
306 unicode_out_be(Rune *base, int n, long *notused)
307 {
308 	int i;
309 	uchar *p;
310 	Rune r;
311 
312 	USED(notused);
313 	p = (uchar*)base;
314 	for(i=0; i<n; i++){
315 		r = base[i];
316 		*p++ = r>>8;
317 		*p++ = r;
318 	}
319 	nrunes += n;
320 	noutput += 2*n;
321 	write(1, (char *)base, 2*n);
322 }
323 
324 void
325 unicode_out_le(Rune *base, int n, long *notused)
326 {
327 	int i;
328 	uchar *p;
329 	Rune r;
330 
331 	USED(notused);
332 	p = (uchar*)base;
333 	for(i=0; i<n; i++){
334 		r = base[i];
335 		*p++ = r;
336 		*p++ = r>>8;
337 	}
338 	nrunes += n;
339 	noutput += 2*n;
340 	write(1, (char *)base, 2*n);
341 }
342 
343 void
344 intable(int fd, long *table, struct convert *out)
345 {
346 	uchar buf[N];
347 	uchar *p, *e;
348 	Rune *r;
349 	int n;
350 	long c;
351 
352 	while((n = read(fd, (char *)buf, N)) > 0){
353 		ninput += n;
354 		r = runes;
355 		for(p = buf, e = buf+n; p < e; p++){
356 			c = table[*p];
357 			if(c < 0){
358 				if(squawk)
359 					EPR "%s: bad char 0x%x near byte %ld in %s\n", argv0, *p, ninput+(p-e), file);
360 				nerrors++;
361 				if(clean)
362 					continue;
363 				c = BADMAP;
364 			}
365 			*r++ = c;
366 		}
367 		OUT(out, runes, r-runes);
368 	}
369 	OUT(out, runes, 0);
370 	if(n < 0){
371 #ifdef	PLAN9
372 		EPR "%s: input read: %r\n", argv0);
373 #else
374 		EPR "%s: input read: %s\n", argv0, strerror(errno));
375 #endif
376 		EXIT(1, "input read error");
377 	}
378 }
379 
380 void
381 outtable(Rune *base, int n, long *map)
382 {
383 	long c;
384 	char *p;
385 	int i;
386 
387 	nrunes += n;
388 	for(i = 0; i < NRUNE; i++)
389 		tab[i] = -1;
390 	for(i = 0; i < 256; i++)
391 		if(map[i] >= 0)
392 			tab[map[i]] = i;
393 	for(i = 0, p = obuf; i < n; i++){
394 		c = tab[base[i]];
395 		if(c < 0){
396 			if(squawk)
397 				EPR "%s: rune 0x%x not in output cs\n", argv0, base[i]);
398 			nerrors++;
399 			if(clean)
400 				continue;
401 			c = BADMAP;
402 		}
403 		*p++ = c;
404 	}
405 	noutput += p-obuf;
406 	write(1, obuf, p-obuf);
407 }
408 
409 long tabascii[256] =
410 {
411 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
412 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
413 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
414 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
415 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
416 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
417 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
418 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
419   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
420   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
421   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
422   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
423   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
424   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
425   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
426   -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,
427 };
428 
429 long tabmsdos[256] =	/* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
430 {
431 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,
432 0x10,0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,
433 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
434 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
435 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
436 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
437 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
438 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
439 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
440 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
441 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
442 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
443 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
444 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
445 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
446 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
447 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
448 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
449 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
450 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
451 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
452 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
453 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
454 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
455 };
456 long tabmsdos2[256] =	/* from jhelling@cs.ruu.nl (Jeroen Hellingman) */
457 {
458 0x0000, 0x263a, 0x263b, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
459 0x25d8, 0x25cb, 0x25d9, 0x2642, 0x2640, 0x266a, 0x266b, 0x263c,
460 0x25b6, 0x25c0, 0x2195, 0x203c, 0x00b6, 0x00a7, 0x2043, 0x21a8,
461 0x2191, 0x2193, 0x2192, 0x2190, 0x2319, 0x2194, 0x25b2, 0x25bc,
462 0x20,0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,
463 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x3b,0x3c,0x3d,0x3e,0x3f,
464 0x40,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,
465 0x50,0x51,0x52,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x5b,0x5c,0x5d,0x5e,0x5f,
466 0x60,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x6b,0x6c,0x6d,0x6e,0x6f,
467 0x70,0x71,0x72,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x7b,0x7c,0x7d,0x7e,0x7f,
468 0x00c7, 0x00fc, 0x00e9, 0x00e2, 0x00e4, 0x00e0, 0x00e5, 0x00e7, /* latin */
469 0x00ea, 0x00eb, 0x00e8, 0x00ef, 0x00ee, 0x00ec, 0x00c4, 0x00c5,
470 0x00c9, 0x00e6, 0x00c6, 0x00f4, 0x00f6, 0x00f2, 0x00fb, 0x00f9,
471 0x00ff, 0x00d6, 0x00dc, 0x00a2, 0x00a3, 0x00a5, 0x20a7, 0x0192,
472 0x00e1, 0x00ed, 0x00f3, 0x00fa, 0x00f1, 0x00d1, 0x00aa, 0x00ba,
473 0x00bf, 0x2310, 0x00ac, 0x00bd, 0x00bc, 0x00a1, 0x00ab, 0x00bb,
474 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556, /* forms */
475 0x2555, 0x2563, 0x2551, 0x2557, 0x255d, 0x255c, 0x255b, 0x2510,
476 0x2514, 0x2534, 0x252c, 0x251c, 0x2500, 0x253c, 0x255e, 0x255f,
477 0x255a, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256c, 0x2567,
478 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256b,
479 0x256a, 0x2518, 0x250c, 0x2588, 0x2584, 0x258c, 0x2590, 0x2580,
480 0x03b1, 0x00df, 0x0393, 0x03c0, 0x03a3, 0x03c3, 0x00b5, 0x03c4, /* greek */
481 0x03a6, 0x0398, 0x2126, 0x03b4, 0x221e, 0x2205, 0x2208, 0x2229,
482 0x2261, 0x00b1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00f7, 0x2248, /* math */
483 0x00b0, 0x2022, 0x00b7, 0x221a, 0x207f, 0x00b2, 0x220e, 0x00a0,
484 };
485 struct convert convert[] =
486 {	/* if two entries have the same name, put the from one first */
487 	{ "8859-1", "Latin-1 (Western and Northern Europe including Italian)", Table, (void *)tab8859_1 },
488 	{ "8859-2", "Latin-2 (Eastern Europe except Turkey and the Baltic countries)", Table, (void *)tab8859_2 },
489 	{ "8859-3", "Latin-3 (Mediterranean, South Africa, Esperanto)", Table, (void *)tab8859_3 },
490 	{ "8859-4", "Latin-4 (Scandinavia and the Baltic countries; obsolete)", Table, (void *)tab8859_4 },
491 	{ "8859-5", "Part 5 (Cyrillic)", Table, (void *)tab8859_5 },
492 	{ "8859-6", "Part 6 (Arabic)", Table, (void *)tab8859_6 },
493 	{ "8859-7", "Part 7 (Greek)", Table, (void *)tab8859_7 },
494 	{ "8859-8", "Part 8 (Hebrew)", Table, (void *)tab8859_8 },
495 	{ "8859-9", "Latin-5 (Turkey, Western Europe except Icelandic and Faroese)", Table, (void *)tab8859_9 },
496 	{ "8859-10", "Latin-6 (Northern Europe)", Table, (void *)tab8859_10 },
497 	{ "8859-15", "Latin-9 (Western Europe)", Table, (void *)tab8859_15 },
498 	{ "ascii", "7-bit ASCII", Table, (void *)tabascii },
499 	{ "atari", "ATARI-ST character set", Table, (void *)tabatari },
500 	{ "av", "Alternativnyj Variant", Table, (void *)tabav },
501 	{ "big5", "Big 5 (HKU)", From|Func, 0, (Fnptr)big5_in },
502 	{ "big5", "Big 5 (HKU)", Func, 0, (Fnptr)big5_out },
503 	{ "ebcdic", "EBCDIC", Table, (void *)tabebcdic },	/* 6f is recommended bad map */
504 	{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
505 	{ "euc-k", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
506  	{ "euc-kr", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
507  	{ "euc-kr", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
508  	{ "ks_c_5601-1987", "Korean EUC: ASCII+KS C 5601 1987", From|Func, 0, (Fnptr)uksc_in },
509  	{ "ks_c_5601-1987", "Korean EUC: ASCII+KS C 5601 1987", Func, 0, (Fnptr)uksc_out },
510 	{ "gb2312", "GB2312-80 (Chinese)", From|Func, 0, (Fnptr)gb_in },
511 	{ "gb2312", "GB2312-80 (Chinese)", Func, 0, (Fnptr)gb_out },
512 	{ "gbk", "GBK (Chinese)", From|Func, 0, (Fnptr)gbk_in },
513 	{ "gbk", "GBK (Chinese)", Func, 0, (Fnptr)gbk_out },
514 	{ "html", "HTML", From|Func, 0, (Fnptr)html_in },
515 	{ "html", "HTML", Func, 0, (Fnptr)html_out },
516 	{ "ibm437", "IBM Code Page 437 (US)", Table, (void*)tabcp437 },
517 	{ "ibm720", "IBM Code Page 720 (Arabic)", Table, (void*)tabcp720 },
518 	{ "ibm737", "IBM Code Page 737 (Greek)", Table, (void*)tabcp737 },
519 	{ "ibm775", "IBM Code Page 775 (Baltic)", Table, (void*)tabcp775 },
520 	{ "ibm850", "IBM Code Page 850 (Multilingual Latin I)", Table, (void*)tabcp850 },
521 	{ "ibm852", "IBM Code Page 852 (Latin II)", Table, (void*)tabcp852 },
522 	{ "ibm855", "IBM Code Page 855 (Cyrillic)", Table, (void*)tabcp855 },
523 	{ "ibm857", "IBM Code Page 857 (Turkish)", Table, (void*)tabcp857 },
524 	{ "ibm858", "IBM Code Page 858 (Multilingual Latin I+Euro)", Table, (void*)tabcp858 },
525 	{ "ibm862", "IBM Code Page 862 (Hebrew)", Table, (void*)tabcp862 },
526 	{ "ibm866", "IBM Code Page 866 (Russian)", Table, (void*)tabcp866 },
527 	{ "ibm874", "IBM Code Page 874 (Thai)", Table, (void*)tabcp874 },
528 	{ "iso-2022-jp", "alias for jis-kanji (MIME)", From|Func, 0, (Fnptr)jisjis_in },
529 	{ "iso-2022-jp", "alias for jis-kanji (MIME)", Func, 0, (Fnptr)jisjis_out },
530 	{ "iso-8859-1", "alias for 8859-1 (MIME)", Table, (void *)tab8859_1 },
531 	{ "iso-8859-2", "alias for 8859-2 (MIME)", Table, (void *)tab8859_2 },
532 	{ "iso-8859-3", "alias for 8859-3 (MIME)", Table, (void *)tab8859_3 },
533 	{ "iso-8859-4", "alias for 8859-4 (MIME)", Table, (void *)tab8859_4 },
534 	{ "iso-8859-5", "alias for 8859-5 (MIME)", Table, (void *)tab8859_5 },
535 	{ "iso-8859-6", "alias for 8859-6 (MIME)", Table, (void *)tab8859_6 },
536 	{ "iso-8859-7", "alias for 8859-7 (MIME)", Table, (void *)tab8859_7 },
537 	{ "iso-8859-8", "alias for 8859-8 (MIME)", Table, (void *)tab8859_8 },
538 	{ "iso-8859-9", "alias for 8859-9 (MIME)", Table, (void *)tab8859_9 },
539 	{ "iso-8859-10", "alias for 8859-10 (MIME)", Table, (void *)tab8859_10 },
540 	{ "iso-8859-15", "alias for 8859-15 (MIME)", Table, (void *)tab8859_15 },
541 	{ "jis", "guesses at the JIS encoding", From|Func, 0, (Fnptr)jis_in },
542 	{ "jis-kanji", "ISO 2022-JP (Japanese)", From|Func, 0, (Fnptr)jisjis_in },
543 	{ "jis-kanji", "ISO 2022-JP (Japanese)", Func, 0, (Fnptr)jisjis_out },
544 	{ "koi8", "KOI-8 (GOST 19769-74)", Table, (void *)tabkoi8 },
545 	{ "koi8-r", "alias for koi8 (MIME)", Table, (void *)tabkoi8 },
546 	{ "latin1", "alias for 8859-1", Table, (void *)tab8859_1 },
547 	{ "macrom", "Macintosh Standard Roman character set", Table, (void *)tabmacroman },
548 	{ "microsoft", "alias for windows1252", Table, (void *)tabcp1252 },
549 	{ "ms-kanji", "Microsoft, or Shift-JIS", From|Func, 0, (Fnptr)msjis_in },
550 	{ "ms-kanji", "Microsoft, or Shift-JIS", Func, 0, (Fnptr)msjis_out },
551 	{ "msdos", "IBM PC (alias for ibm437)", Table, (void *)tabcp437 },
552 	{ "msdos2", "IBM PC (ibm437 with graphics in C0)", Table, (void *)tabmsdos2 },
553 	{ "next", "NEXTSTEP character set", Table, (void *)tabnextstep },
554 	{ "ov", "Osnovnoj Variant", Table, (void *)tabov },
555 	{ "ps2", "IBM PS/2: (alias for ibm850)", Table, (void *)tabcp850 },
556 	{ "sf1", "ISO-646: Finnish/Swedish SF-1 variant", Table, (void *)tabsf1 },
557 	{ "sf2", "ISO-646: Finnish/Swedish SF-2 variant (recommended)", Table, (void *)tabsf2 },
558 	{ "tis-620", "Thai+ASCII (TIS 620-1986)", Table, (void *)tabtis620 },
559 	{ "tune", "TUNE (Tamil)", From|Func, 0, (Fnptr)tune_in },
560 	{ "tune", "TUNE (Tamil)", Func, 0, (Fnptr)tune_out },
561 	{ "ucode", "Russian U-code", Table, (void *)tabucode },
562 	{ "ujis", "EUC-JX: JIS 0208", From|Func, 0, (Fnptr)ujis_in },
563 	{ "ujis", "EUC-JX: JIS 0208", Func, 0, (Fnptr)ujis_out },
564 	{ "unicode", "Unicode 1.1", From|Func, 0, (Fnptr)unicode_in },
565 	{ "unicode", "Unicode 1.1", Func, 0, (Fnptr)unicode_out },
566 	{ "unicode-be", "Unicode 1.1 big-endian", From|Func, 0, (Fnptr)unicode_in_be },
567 	{ "unicode-be", "Unicode 1.1 big-endian", Func, 0, (Fnptr)unicode_out_be },
568 	{ "unicode-le", "Unicode 1.1 little-endian", From|Func, 0, (Fnptr)unicode_in_le },
569 	{ "unicode-le", "Unicode 1.1 little-endian", Func, 0, (Fnptr)unicode_out_le },
570 	{ "us-ascii", "alias for ascii (MIME)", Table, (void *)tabascii },
571 	{ "utf", "FSS-UTF a.k.a. UTF-8", From|Func, 0, (Fnptr)utf_in },
572 	{ "utf", "FSS-UTF a.k.a. UTF-8", Func, 0, (Fnptr)utf_out },
573 	{ "utf1", "UTF-1 (ISO 10646 Annex A)", From|Func, 0, (Fnptr)isoutf_in },
574 	{ "utf1", "UTF-1 (ISO 10646 Annex A)", Func, 0, (Fnptr)isoutf_out },
575 	{ "utf-8", "alias for utf (MIME)", From|Func, 0, (Fnptr)utf_in },
576 	{ "utf-8", "alias for utf (MIME)", Func, 0, (Fnptr)utf_out },
577 	{ "utf-16", "alias for unicode (MIME)", From|Func, 0, (Fnptr)unicode_in },
578 	{ "utf-16", "alias for unicode (MIME)", Func, 0, (Fnptr)unicode_out },
579 	{ "utf-16be", "alias for unicode-be (MIME)", From|Func, 0, (Fnptr)unicode_in_be },
580 	{ "utf-16be", "alias for unicode-be (MIME)", Func, 0, (Fnptr)unicode_out_be },
581 	{ "utf-16le", "alias for unicode-le (MIME)", From|Func, 0, (Fnptr)unicode_in_le },
582 	{ "utf-16le", "alias for unicode-le (MIME)", Func, 0, (Fnptr)unicode_out_le },
583 	{ "viet1", "Vietnamese VSCII-1 (1993)", Table, (void *)tabviet1 },
584 	{ "viet2", "Vietnamese VSCII-2 (1993)", Table, (void *)tabviet2 },
585 	{ "vscii", "Vietnamese VISCII 1.1 (1992)", Table, (void *)tabviscii },
586 	{ "windows-1250", "Windows Code Page 1250 (Central Europe)", Table, (void *)tabcp1250 },
587 	{ "windows-1251", "Windows Code Page 1251 (Cyrillic)", Table, (void *)tabcp1251 },
588 	{ "windows-1252", "Windows Code Page 1252 (Latin I)", Table, (void *)tabcp1252 },
589 	{ "windows-1253", "Windows Code Page 1253 (Greek)", Table, (void *)tabcp1253 },
590 	{ "windows-1254", "Windows Code Page 1254 (Turkish)", Table, (void *)tabcp1254 },
591 	{ "windows-1255", "Windows Code Page 1255 (Hebrew)", Table, (void *)tabcp1255 },
592 	{ "windows-1256", "Windows Code Page 1256 (Arabic)", Table, (void *)tabcp1256 },
593 	{ "windows-1257", "Windows Code Page 1257 (Baltic)", Table, (void *)tabcp1257 },
594 	{ "windows-1258", "Windows Code Page 1258 (Vietnam)", Table, (void *)tabcp1258 },
595 	{ 0 },
596 };
597