xref: /netbsd-src/lib/libc/citrus/modules/citrus_iso2022.c (revision d710132b4b8ce7f7cccaaf660cb16aa16b4077a0)
1 /*	$NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $	*/
2 
3 /*-
4  * Copyright (c)1999, 2002 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	$Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29  */
30 
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.7 2003/06/25 09:51:44 tshiozak Exp $");
34 #endif /* LIBC_SCCS and not lint */
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46 
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_iso2022.h"
53 
54 
55 /* ----------------------------------------------------------------------
56  * private stuffs used by templates
57  */
58 
59 
60 /*
61  * wchar_t mappings:
62  * ASCII (ESC ( B)		00000000 00000000 00000000 0xxxxxxx
63  * iso-8859-1 (ESC , A)		00000000 00000000 00000000 1xxxxxxx
64  * 94 charset (ESC ( F)		0fffffff 00000000 00000000 0xxxxxxx
65  * 94 charset (ESC ( M F)	0fffffff 1mmmmmmm 00000000 0xxxxxxx
66  * 96 charset (ESC , F)		0fffffff 00000000 00000000 1xxxxxxx
67  * 96 charset (ESC , M F)	0fffffff 1mmmmmmm 00000000 1xxxxxxx
68  * 94x94 charset (ESC $ ( F)	0fffffff 00000000 0xxxxxxx 0xxxxxxx
69  * 96x96 charset (ESC $ , F)	0fffffff 00000000 0xxxxxxx 1xxxxxxx
70  * 94x94 charset (ESC & V ESC $ ( F)
71  *				0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72  * 94x94x94 charset (ESC $ ( F)	0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73  * 96x96x96 charset (ESC $ , F)	0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74  */
75 
76 typedef struct {
77 	u_char	type;
78 #define	CS94		(0U)
79 #define	CS96		(1U)
80 #define	CS94MULTI	(2U)
81 #define	CS96MULTI	(3U)
82 
83 	u_char	final;
84 	u_char	interm;
85 	u_char	vers;
86 } _ISO2022Charset;
87 
88 typedef struct {
89 	_ISO2022Charset	g[4];
90 	/* need 3 bits to hold -1, 0, ..., 3 */
91 	int	gl:3,
92 		gr:3,
93 		singlegl:3,
94 		singlegr:3;
95 	char ch[7];	/* longest escape sequence (ESC & V ESC $ ( F) */
96 	int chlen;
97 	int flags;
98 #define _ISO2022STATE_FLAG_INITIALIZED	1
99 } _ISO2022State;
100 
101 typedef struct {
102 	_ISO2022Charset	*recommend[4];
103 	size_t	recommendsize[4];
104 	_ISO2022Charset	initg[4];
105 	int	maxcharset;
106 	int	flags;
107 #define	F_8BIT	0x0001
108 #define	F_NOOLD	0x0002
109 #define	F_SI	0x0010	/*0F*/
110 #define	F_SO	0x0020	/*0E*/
111 #define	F_LS0	0x0010	/*0F*/
112 #define	F_LS1	0x0020	/*0E*/
113 #define	F_LS2	0x0040	/*ESC n*/
114 #define	F_LS3	0x0080	/*ESC o*/
115 #define	F_LS1R	0x0100	/*ESC ~*/
116 #define	F_LS2R	0x0200	/*ESC }*/
117 #define	F_LS3R	0x0400	/*ESC |*/
118 #define	F_SS2	0x0800	/*ESC N*/
119 #define	F_SS3	0x1000	/*ESC O*/
120 #define	F_SS2R	0x2000	/*8E*/
121 #define	F_SS3R	0x4000	/*8F*/
122 } _ISO2022EncodingInfo;
123 typedef struct {
124 	_ISO2022EncodingInfo ei;
125 	struct {
126 		/* for future multi-locale facility */
127 		_ISO2022State	s_mblen;
128 		_ISO2022State	s_mbrlen;
129 		_ISO2022State	s_mbrtowc;
130 		_ISO2022State	s_mbtowc;
131 		_ISO2022State	s_mbsrtowcs;
132 		_ISO2022State	s_wcrtomb;
133 		_ISO2022State	s_wcsrtombs;
134 		_ISO2022State	s_wctomb;
135 	} states;
136 } _ISO2022CTypeInfo;
137 
138 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
139 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
140 
141 #define _FUNCNAME(m)			_citrus_ISO2022_##m
142 #define _ENCODING_INFO			_ISO2022EncodingInfo
143 #define _CTYPE_INFO			_ISO2022CTypeInfo
144 #define _ENCODING_STATE			_ISO2022State
145 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
146 #define _ENCODING_IS_STATE_DEPENDENT	1
147 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	\
148     (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
149 
150 
151 #define _ISO2022INVALID (wchar_t)-1
152 
153 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
154 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
155 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
156 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
157 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
158 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
159 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
160 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
161 
162 static __inline int
163 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
164 {
165 
166 	_DIAGASSERT(p != NULL);
167 	_DIAGASSERT(cs != NULL);
168 
169 	if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
170 		cs->final = (u_char)(p[3] & 0xff);
171 		cs->interm = '\0';
172 		cs->vers = '\0';
173 		cs->type = CS94MULTI;
174 	} else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
175 		cs->final = (u_char)(p[3] & 0xff);
176 		cs->interm = '\0';
177 		cs->vers = '\0';
178 		cs->type = CS96MULTI;
179 	} else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
180 		cs->final = (u_char)(p[2] & 0xff);
181 		cs->interm = '\0';
182 		cs->vers = '\0';
183 		cs->type = CS94;
184 	} else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
185 		cs->final = (u_char )(p[2] & 0xff);
186 		cs->interm = '\0';
187 		cs->vers = '\0';
188 		cs->type = CS96;
189 	} else {
190 		return 1;
191 	}
192 
193 	return 0;
194 }
195 
196 
197 #define _NOTMATCH	0
198 #define _MATCH		1
199 #define _PARSEFAIL	2
200 
201 static __inline int
202 get_recommend(_ISO2022EncodingInfo * __restrict ei,
203 	      const char * __restrict token)
204 {
205 	int i;
206 	_ISO2022Charset cs;
207 
208 	if (!strchr("0123", token[0]) || token[1] != '=')
209 		return (_NOTMATCH);
210 
211 	if (getcs(&token[2], &cs) == 0)
212 		;
213 	else if (!strcmp(&token[2], "94")) {
214 		cs.final = (u_char)(token[4]);
215 		cs.interm = '\0';
216 		cs.vers = '\0';
217 		cs.type = CS94;
218 	} else if (!strcmp(&token[2], "96")) {
219 		cs.final = (u_char)(token[4]);
220 		cs.interm = '\0';
221 		cs.vers = '\0';
222 		cs.type = CS96;
223 	} else if (!strcmp(&token[2], "94$")) {
224 		cs.final = (u_char)(token[5]);
225 		cs.interm = '\0';
226 		cs.vers = '\0';
227 		cs.type = CS94MULTI;
228 	} else if (!strcmp(&token[2], "96$")) {
229 		cs.final = (u_char)(token[5]);
230 		cs.interm = '\0';
231 		cs.vers = '\0';
232 		cs.type = CS96MULTI;
233 	} else {
234 		return (_PARSEFAIL);
235 	}
236 
237 	i = token[0] - '0';
238 	ei->recommendsize[i] += 1;
239 	if (!ei->recommend[i]) {
240 		ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
241 	} else {
242 		ei->recommend[i] =
243 		    realloc(ei->recommend[i],
244 			    sizeof(_ISO2022Charset)* (ei->recommendsize[i]));
245 	}
246 	if (!ei->recommend[i])
247 		return (_PARSEFAIL);
248 
249 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
250 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
251 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
252 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
253 
254 	return (_MATCH);
255 }
256 
257 static __inline int
258 get_initg(_ISO2022EncodingInfo * __restrict ei,
259 	  const char * __restrict token)
260 {
261 	_ISO2022Charset cs;
262 
263 	if (strncmp("INIT", &token[0], 4) ||
264 	    !strchr("0123", token[4]) ||
265 	    token[5] != '=')
266 		return (_NOTMATCH);
267 
268 	if (getcs(&token[6], &cs) != 0)
269 		return (_PARSEFAIL);
270 
271 	ei->initg[token[4] - '0'].type = cs.type;
272 	ei->initg[token[4] - '0'].final = cs.final;
273 	ei->initg[token[4] - '0'].interm = cs.interm;
274 	ei->initg[token[4] - '0'].vers = cs.vers;
275 
276 	return (_MATCH);
277 }
278 
279 static __inline int
280 get_max(_ISO2022EncodingInfo * __restrict ei,
281 	const char * __restrict token)
282 {
283 	if (!strcmp(token, "MAX1")) {
284 		ei->maxcharset = 1;
285 	} else if (!strcmp(token, "MAX2")) {
286 		ei->maxcharset = 2;
287 	} else if (!strcmp(token, "MAX3")) {
288 		ei->maxcharset = 3;
289 	} else
290 		return (_NOTMATCH);
291 
292 	return (_MATCH);
293 }
294 
295 
296 static __inline int
297 get_flags(_ISO2022EncodingInfo * __restrict ei,
298 	  const char * __restrict token)
299 {
300 	int i;
301 	static struct {
302 		const char	*tag;
303 		int		flag;
304 	} const tags[] = {
305 		{ "DUMMY",	0	},
306 		{ "8BIT",	F_8BIT	},
307 		{ "NOOLD",	F_NOOLD	},
308 		{ "SI",		F_SI	},
309 		{ "SO",		F_SO	},
310 		{ "LS0",	F_LS0	},
311 		{ "LS1",	F_LS1	},
312 		{ "LS2",	F_LS2	},
313 		{ "LS3",	F_LS3	},
314 		{ "LS1R",	F_LS1R	},
315 		{ "LS2R",	F_LS2R	},
316 		{ "LS3R",	F_LS3R	},
317 		{ "SS2",	F_SS2	},
318 		{ "SS3",	F_SS3	},
319 		{ "SS2R",	F_SS2R	},
320 		{ "SS3R",	F_SS3R	},
321 		{ NULL,		0 }
322 	};
323 
324 	for (i = 0; tags[i].tag; i++) {
325 		if (!strcmp(token, tags[i].tag)) {
326 			ei->flags |= tags[i].flag;
327 			return (_MATCH);
328 		}
329 	}
330 
331 	return (_NOTMATCH);
332 }
333 
334 
335 static __inline int
336 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
337 			       const void * __restrict var, size_t lenvar)
338 {
339 	char const *v, *e;
340 	char buf[20];
341 	int i, len, ret;
342 
343 	_DIAGASSERT(ei != NULL);
344 
345 
346 	/*
347 	 * parse VARIABLE section.
348 	 */
349 
350 	if (!var)
351 		return (EFTYPE);
352 
353 	v = (const char *) var;
354 
355 	/* initialize structure */
356 	ei->maxcharset = 0;
357 	for (i = 0; i < 4; i++) {
358 		ei->recommend[i] = NULL;
359 		ei->recommendsize[i] = 0;
360 	}
361 	ei->flags = 0;
362 
363 	while (*v) {
364 		while (*v == ' ' || *v == '\t')
365 			++v;
366 
367 		/* find the token */
368 		e = v;
369 		while (*e && *e != ' ' && *e != '\t')
370 			++e;
371 
372 		len = e-v;
373 		if (len == 0)
374 			break;
375 		if (len>=sizeof(buf))
376 			goto parsefail;
377 		sprintf(buf, "%.*s", len, v);
378 
379 		if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
380 			;
381 		else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
382 			;
383 		else if ((ret = get_max(ei, buf)) != _NOTMATCH)
384 			;
385 		else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
386 			;
387 		else
388 			ret = _PARSEFAIL;
389 		if (ret==_PARSEFAIL)
390 			goto parsefail;
391 		v = e;
392 
393 	}
394 
395 	return (0);
396 
397 parsefail:
398 	free(ei->recommend[0]);
399 	free(ei->recommend[1]);
400 	free(ei->recommend[2]);
401 	free(ei->recommend[3]);
402 
403 	return (EFTYPE);
404 }
405 
406 static __inline void
407 /*ARGSUSED*/
408 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
409 			   _ISO2022State * __restrict s)
410 {
411 	int i;
412 
413 	memset(s, 0, sizeof(*s));
414 	s->gl = 0;
415 	s->gr = (ei->flags & F_8BIT) ? 1 : -1;
416 
417 	for (i = 0; i < 4; i++) {
418 		if (ei->initg[i].final) {
419 			s->g[i].type = ei->initg[i].type;
420 			s->g[i].final = ei->initg[i].final;
421 			s->g[i].interm = ei->initg[i].interm;
422 		}
423 	}
424 	s->singlegl = s->singlegr = -1;
425 	s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
426 }
427 
428 static __inline void
429 /*ARGSUSED*/
430 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
431 			   void * __restrict pspriv,
432 			   const _ISO2022State * __restrict s)
433 {
434 	memcpy(pspriv, (const void *)s, sizeof(*s));
435 }
436 
437 static __inline void
438 /*ARGSUSED*/
439 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
440 			     _ISO2022State * __restrict s,
441 			     const void * __restrict pspriv)
442 {
443 	memcpy((void *)s, pspriv, sizeof(*s));
444 }
445 
446 static int
447 /*ARGSUSED*/
448 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
449 				     const void * __restrict var,
450 				     size_t lenvar)
451 {
452 
453 	_DIAGASSERT(ei != NULL);
454 
455 	return _citrus_ISO2022_parse_variable(ei, var, lenvar);
456 }
457 
458 static void
459 /*ARGSUSED*/
460 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
461 {
462 }
463 
464 #define	ESC	'\033'
465 #define	ECMA	-1
466 #define	INTERM	-2
467 #define	OECMA	-3
468 static struct seqtable {
469 	int type;
470 	int csoff;
471 	int finaloff;
472 	int intermoff;
473 	int versoff;
474 	int len;
475 	int chars[10];
476 } seqtable[] = {
477 	/* G0 94MULTI special */
478 	{ CS94MULTI, -1, 2, -1, -1,	3, { ESC, '$', OECMA }, },
479 	/* G0 94MULTI special with version identification */
480 	{ CS94MULTI, -1, 5, -1, 2,	6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
481 	/* G? 94 */
482 	{ CS94, 1, 2, -1, -1,		3, { ESC, CS94, ECMA, }, },
483 	/* G? 94 with 2nd intermediate char */
484 	{ CS94, 1, 3, 2, -1,		4, { ESC, CS94, INTERM, ECMA, }, },
485 	/* G? 96 */
486 	{ CS96, 1, 2, -1, -1,		3, { ESC, CS96, ECMA, }, },
487 	/* G? 96 with 2nd intermediate char */
488 	{ CS96, 1, 3, 2, -1,		4, { ESC, CS96, INTERM, ECMA, }, },
489 	/* G? 94MULTI */
490 	{ CS94MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS94, ECMA, }, },
491 	/* G? 96MULTI */
492 	{ CS96MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS96, ECMA, }, },
493 	/* G? 94MULTI with version specification */
494 	{ CS94MULTI, 5, 6, -1, 2,	7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
495 	/* LS2/3 */
496 	{ -1, -1, -1, -1, -1,		2, { ESC, 'n', }, },
497 	{ -1, -1, -1, -1, -1,		2, { ESC, 'o', }, },
498 	/* LS1/2/3R */
499 	{ -1, -1, -1, -1, -1,		2, { ESC, '~', }, },
500 	{ -1, -1, -1, -1, -1,		2, { ESC, /*{*/ '}', }, },
501 	{ -1, -1, -1, -1, -1,		2, { ESC, '|', }, },
502 	/* SS2/3 */
503 	{ -1, -1, -1, -1, -1,		2, { ESC, 'N', }, },
504 	{ -1, -1, -1, -1, -1,		2, { ESC, 'O', }, },
505 	/* end of records */
506 	{ 0, }
507 };
508 
509 static int
510 seqmatch(const char * __restrict s, size_t n,
511 	 const struct seqtable * __restrict sp)
512 {
513 	const int *p;
514 
515 	_DIAGASSERT(s != NULL);
516 	_DIAGASSERT(sp != NULL);
517 
518 	p = sp->chars;
519 	while (p - sp->chars < n && p - sp->chars < sp->len) {
520 		switch (*p) {
521 		case ECMA:
522 			if (!isecma(*s))
523 				goto terminate;
524 			break;
525 		case OECMA:
526 			if (*s && strchr("@AB", *s))
527 				break;
528 			else
529 				goto terminate;
530 		case INTERM:
531 			if (!isinterm(*s))
532 				goto terminate;
533 			break;
534 		case CS94:
535 			if (*s && strchr("()*+", *s))
536 				break;
537 			else
538 				goto terminate;
539 		case CS96:
540 			if (*s && strchr(",-./", *s))
541 				break;
542 			else
543 				goto terminate;
544 		default:
545 			if (*s != *p)
546 				goto terminate;
547 			break;
548 		}
549 
550 		p++;
551 		s++;
552 	}
553 
554 terminate:
555 	return p - sp->chars;
556 }
557 
558 static wchar_t
559 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
560 			  const char * __restrict string, size_t n,
561 			  const char ** __restrict result,
562 			  _ISO2022State * __restrict psenc)
563 {
564 	wchar_t wchar = 0;
565 	int cur;
566 	struct seqtable *sp;
567 	int nmatch;
568 	int i;
569 
570 	_DIAGASSERT(ei != NULL);
571 	_DIAGASSERT(state != NULL);
572 	_DIAGASSERT(string != NULL);
573 	/* result may be NULL */
574 
575 	while (1) {
576 		/* SI/SO */
577 		if (1 <= n && string[0] == '\017') {
578 			psenc->gl = 0;
579 			string++;
580 			n--;
581 			continue;
582 		}
583 		if (1 <= n && string[0] == '\016') {
584 			psenc->gl = 1;
585 			string++;
586 			n--;
587 			continue;
588 		}
589 
590 		/* SS2/3R */
591 		if (1 <= n && string[0] && strchr("\217\216", string[0])) {
592 			psenc->singlegl = psenc->singlegr =
593 			    (string[0] - '\216') + 2;
594 			string++;
595 			n--;
596 			continue;
597 		}
598 
599 		/* eat the letter if this is not ESC */
600 		if (1 <= n && string[0] != '\033')
601 			break;
602 
603 		/* look for a perfect match from escape sequences */
604 		for (sp = &seqtable[0]; sp->len; sp++) {
605 			nmatch = seqmatch(string, n, sp);
606 			if (sp->len == nmatch && n >= sp->len)
607 				break;
608 		}
609 
610 		if (!sp->len)
611 			goto notseq;
612 
613 		if (sp->type != -1) {
614 			if (sp->csoff == -1)
615 				i = 0;
616 			else {
617 				switch (sp->type) {
618 				case CS94:
619 				case CS94MULTI:
620 					i = string[sp->csoff] - '(';
621 					break;
622 				case CS96:
623 				case CS96MULTI:
624 					i = string[sp->csoff] - ',';
625 					break;
626 				}
627 			}
628 			psenc->g[i].type = sp->type;
629 			psenc->g[i].final = '\0';
630 			psenc->g[i].interm = '\0';
631 			psenc->g[i].vers = '\0';
632 			/* sp->finaloff must not be -1 */
633 			if (sp->finaloff != -1)
634 				psenc->g[i].final = string[sp->finaloff];
635 			if (sp->intermoff != -1)
636 				psenc->g[i].interm = string[sp->intermoff];
637 			if (sp->versoff != -1)
638 				psenc->g[i].vers = string[sp->versoff];
639 
640 			string += sp->len;
641 			n -= sp->len;
642 			continue;
643 		}
644 
645 		/* LS2/3 */
646 		if (2 <= n && string[0] == '\033'
647 		 && string[1] && strchr("no", string[1])) {
648 			psenc->gl = string[1] - 'n' + 2;
649 			string += 2;
650 			n -= 2;
651 			continue;
652 		}
653 
654 		/* LS1/2/3R */
655 			/* XXX: { for vi showmatch */
656 		if (2 <= n && string[0] == '\033'
657 		 && string[1] && strchr("~}|", string[1])) {
658 			psenc->gr = 3 - (string[1] - '|');
659 			string += 2;
660 			n -= 2;
661 			continue;
662 		}
663 
664 		/* SS2/3 */
665 		if (2 <= n && string[0] == '\033'
666 		 && string[1] && strchr("NO", string[1])) {
667 			psenc->singlegl = (string[1] - 'N') + 2;
668 			string += 2;
669 			n -= 2;
670 			continue;
671 		}
672 
673 	notseq:
674 		/*
675 		 * if we've got an unknown escape sequence, eat the ESC at the
676 		 * head.  otherwise, wait till full escape sequence comes.
677 		 */
678 		for (sp = &seqtable[0]; sp->len; sp++) {
679 			nmatch = seqmatch(string, n, sp);
680 			if (!nmatch)
681 				continue;
682 
683 			/*
684 			 * if we are in the middle of escape sequence,
685 			 * we still need to wait for more characters to come
686 			 */
687 			if (n < sp->len) {
688 				if (nmatch == n) {
689 					if (result)
690 						*result = string;
691 					return (_ISO2022INVALID);
692 				}
693 			} else {
694 				if (nmatch == sp->len) {
695 					/* this case should not happen */
696 					goto eat;
697 				}
698 			}
699 		}
700 
701 		break;
702 	}
703 
704 eat:
705 	/* no letter to eat */
706 	if (n < 1) {
707 		if (result)
708 			*result = string;
709 		return (_ISO2022INVALID);
710 	}
711 
712 	/* normal chars.  always eat C0/C1 as is. */
713 	if (iscntl(*string & 0xff))
714 		cur = -1;
715 	else if (*string & 0x80) {
716 		cur = (psenc->singlegr == -1)
717 			? psenc->gr : psenc->singlegr;
718 	} else {
719 		cur = (psenc->singlegl == -1)
720 			? psenc->gl : psenc->singlegl;
721 	}
722 
723 	if (cur == -1) {
724 asis:
725 		wchar = *string++ & 0xff;
726 		if (result)
727 			*result = string;
728 		/* reset single shift state */
729 		psenc->singlegr = psenc->singlegl = -1;
730 		return wchar;
731 	}
732 
733 	/* length error check */
734 	switch (psenc->g[cur].type) {
735 	case CS94MULTI:
736 	case CS96MULTI:
737 		if (!isthree(psenc->g[cur].final)) {
738 			if (2 <= n
739 			 && (string[0] & 0x80) == (string[1] & 0x80))
740 				break;
741 		} else {
742 			if (3 <= n
743 			 && (string[0] & 0x80) == (string[1] & 0x80)
744 			 && (string[0] & 0x80) == (string[2] & 0x80))
745 				break;
746 		}
747 
748 		/* we still need to wait for more characters to come */
749 		if (result)
750 			*result = string;
751 		return (_ISO2022INVALID);
752 
753 	case CS94:
754 	case CS96:
755 		if (1 <= n)
756 			break;
757 
758 		/* we still need to wait for more characters to come */
759 		if (result)
760 			*result = string;
761 		return (_ISO2022INVALID);
762 	}
763 
764 	/* range check */
765 	switch (psenc->g[cur].type) {
766 	case CS94:
767 		if (!(is94(string[0] & 0x7f)))
768 			goto asis;
769 	case CS96:
770 		if (!(is96(string[0] & 0x7f)))
771 			goto asis;
772 		break;
773 	case CS94MULTI:
774 		if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
775 			goto asis;
776 		break;
777 	case CS96MULTI:
778 		if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
779 			goto asis;
780 		break;
781 	}
782 
783 	/* extract the character. */
784 	switch (psenc->g[cur].type) {
785 	case CS94:
786 		/* special case for ASCII. */
787 		if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
788 			wchar = *string++;
789 			wchar &= 0x7f;
790 			break;
791 		}
792 		wchar = psenc->g[cur].final;
793 		wchar = (wchar << 8);
794 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
795 		wchar = (wchar << 8);
796 		wchar = (wchar << 8) | (*string++ & 0x7f);
797 		break;
798 	case CS96:
799 		/* special case for ISO-8859-1. */
800 		if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
801 			wchar = *string++;
802 			wchar &= 0x7f;
803 			wchar |= 0x80;
804 			break;
805 		}
806 		wchar = psenc->g[cur].final;
807 		wchar = (wchar << 8);
808 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
809 		wchar = (wchar << 8);
810 		wchar = (wchar << 8) | (*string++ & 0x7f);
811 		wchar |= 0x80;
812 		break;
813 	case CS94MULTI:
814 	case CS96MULTI:
815 		wchar = psenc->g[cur].final;
816 		wchar = (wchar << 8);
817 		if (isthree(psenc->g[cur].final))
818 			wchar |= (*string++ & 0x7f);
819 		wchar = (wchar << 8) | (*string++ & 0x7f);
820 		wchar = (wchar << 8) | (*string++ & 0x7f);
821 		if (psenc->g[cur].type == CS96MULTI)
822 			wchar |= 0x80;
823 		break;
824 	}
825 
826 	if (result)
827 		*result = string;
828 	/* reset single shift state */
829 	psenc->singlegr = psenc->singlegl = -1;
830 	return wchar;
831 }
832 
833 
834 
835 static int
836 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
837 			     wchar_t * __restrict pwc,
838 			     const char ** __restrict s,
839 			     size_t n, _ISO2022State * __restrict psenc,
840 			     size_t * __restrict nresult)
841 {
842 	wchar_t wchar;
843 	const char *s0, *p, *result;
844 	int c;
845 	int chlenbak;
846 
847 	_DIAGASSERT(nresult != 0);
848 	_DIAGASSERT(ei != NULL);
849 	_DIAGASSERT(psenc != NULL);
850 	_DIAGASSERT(s != NULL);
851 
852 	s0 = *s;
853 	c = 0;
854 	chlenbak = psenc->chlen;
855 
856 	/*
857 	 * if we have something in buffer, use that.
858 	 * otherwise, skip here
859 	 */
860 	if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
861 		/* illgeal state */
862 		_citrus_ISO2022_init_state(ei, psenc);
863 		goto encoding_error;
864 	}
865 	if (psenc->chlen == 0)
866 		goto emptybuf;
867 
868 	/* buffer is not empty */
869 	p = psenc->ch;
870 	while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
871 		if (n > 0) {
872 			psenc->ch[psenc->chlen++] = *s0++;
873 			n--;
874 		}
875 
876 		wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
877 					   &result, psenc);
878 		if (wchar != _ISO2022INVALID) {
879 			c += result - p;
880 			if (psenc->chlen > c)
881 				memmove(psenc->ch, result, psenc->chlen - c);
882 			if (psenc->chlen < c)
883 				psenc->chlen = 0;
884 			else
885 				psenc->chlen -= c;
886 			goto output;
887 		}
888 
889 		c += result - p;
890 		p = result;
891 
892 		if (n == 0)
893 			goto restart;
894 	}
895 
896 	/* escape sequence too long? */
897 	goto encoding_error;
898 
899 emptybuf:
900 	wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
901 	if (wchar != _ISO2022INVALID) {
902 		c += result - s0;
903 		psenc->chlen = 0;
904 		s0 = result;
905 		goto output;
906 	}
907 	if (result > s0 && n > result - s0) {
908 		c += (result - s0);
909 		n -= (result - s0);
910 		s0 = result;
911 		goto emptybuf;
912 	}
913 	n += c;
914 	if (n < sizeof(psenc->ch)) {
915 		memcpy(psenc->ch, s0 - c, n);
916 		psenc->chlen = n;
917 		s0 = result;
918 		goto restart;
919 	}
920 
921 	/* escape sequence too long? */
922 
923 encoding_error:
924 	psenc->chlen = 0;
925 	*nresult = (size_t)-1;
926 	return (EILSEQ);
927 
928 output:
929 	*s = s0;
930 	if (pwc)
931 		*pwc = wchar;
932 
933 	if (!wchar)
934 		*nresult = 0;
935 	else
936 		*nresult = c - chlenbak;
937 
938 	return (0);
939 
940 restart:
941 	*s = s0;
942 	*nresult = (size_t)-2;
943 
944 	return (0);
945 }
946 
947 static int
948 recommendation(_ISO2022EncodingInfo * __restrict ei,
949 	       _ISO2022Charset * __restrict cs)
950 {
951 	int i, j;
952 	_ISO2022Charset *recommend;
953 
954 	_DIAGASSERT(ei != NULL);
955 	_DIAGASSERT(cs != NULL);
956 
957 	/* first, try a exact match. */
958 	for (i = 0; i < 4; i++) {
959 		recommend = ei->recommend[i];
960 		for (j = 0; j < ei->recommendsize[i]; j++) {
961 			if (cs->type != recommend[j].type)
962 				continue;
963 			if (cs->final != recommend[j].final)
964 				continue;
965 			if (cs->interm != recommend[j].interm)
966 				continue;
967 
968 			return i;
969 		}
970 	}
971 
972 	/* then, try a wildcard match over final char. */
973 	for (i = 0; i < 4; i++) {
974 		recommend = ei->recommend[i];
975 		for (j = 0; j < ei->recommendsize[i]; j++) {
976 			if (cs->type != recommend[j].type)
977 				continue;
978 			if (cs->final && (cs->final != recommend[j].final))
979 				continue;
980 			if (cs->interm && (cs->interm != recommend[j].interm))
981 				continue;
982 
983 			return i;
984 		}
985 	}
986 
987 	/* there's no recommendation. make a guess. */
988 	if (ei->maxcharset == 0) {
989 		return 0;
990 	} else {
991 		switch (cs->type) {
992 		case CS94:
993 		case CS94MULTI:
994 			return 0;
995 		case CS96:
996 		case CS96MULTI:
997 			return 1;
998 		}
999 	}
1000 	return 0;
1001 }
1002 
1003 static int
1004 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1005 		   char * __restrict string, size_t n,
1006 		   char ** __restrict result,
1007 		   _ISO2022State * __restrict psenc)
1008 {
1009 	int i = 0, len;
1010 	_ISO2022Charset cs;
1011 	char *p;
1012 	char tmp[MB_LEN_MAX];
1013 	int target;
1014 	u_char mask;
1015 	int bit8;
1016 
1017 	_DIAGASSERT(ei != NULL);
1018 	_DIAGASSERT(string != NULL);
1019 	/* result may be NULL */
1020 	/* state appears to be unused */
1021 
1022 	if (iscntl(wc & 0xff)) {
1023 		/* go back to ASCII on control chars */
1024 		cs.type = CS94;
1025 		cs.final = 'B';
1026 		cs.interm = '\0';
1027 	} else if (!(wc & ~0xff)) {
1028 		if (wc & 0x80) {
1029 			/* special treatment for ISO-8859-1 */
1030 			cs.type = CS96;
1031 			cs.final = 'A';
1032 			cs.interm = '\0';
1033 		} else {
1034 			/* special treatment for ASCII */
1035 			cs.type = CS94;
1036 			cs.final = 'B';
1037 			cs.interm = '\0';
1038 		}
1039 	} else {
1040 		cs.final = (wc >> 24) & 0x7f;
1041 		if ((wc >> 16) & 0x80)
1042 			cs.interm = (wc >> 16) & 0x7f;
1043 		else
1044 			cs.interm = '\0';
1045 		if (wc & 0x80)
1046 			cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1047 		else
1048 			cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1049 	}
1050 	target = recommendation(ei, &cs);
1051 	p = tmp;
1052 	bit8 = ei->flags & F_8BIT;
1053 
1054 	/* designate the charset onto the target plane(G0/1/2/3). */
1055 	if (psenc->g[target].type == cs.type
1056 	 && psenc->g[target].final == cs.final
1057 	 && psenc->g[target].interm == cs.interm)
1058 		goto planeok;
1059 
1060 	*p++ = '\033';
1061 	if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1062 		*p++ = '$';
1063 	if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1064 	 && !cs.interm && !(ei->flags & F_NOOLD))
1065 		;
1066 	else if (cs.type == CS94 || cs.type == CS94MULTI)
1067 		*p++ = "()*+"[target];
1068 	else
1069 		*p++ = ",-./"[target];
1070 	if (cs.interm)
1071 		*p++ = cs.interm;
1072 	*p++ = cs.final;
1073 
1074 	psenc->g[target].type = cs.type;
1075 	psenc->g[target].final = cs.final;
1076 	psenc->g[target].interm = cs.interm;
1077 
1078 planeok:
1079 	/* invoke the plane onto GL or GR. */
1080 	if (psenc->gl == target)
1081 		goto sideok;
1082 	if (bit8 && psenc->gr == target)
1083 		goto sideok;
1084 
1085 	if (target == 0 && (ei->flags & F_LS0)) {
1086 		*p++ = '\017';
1087 		psenc->gl = 0;
1088 	} else if (target == 1 && (ei->flags & F_LS1)) {
1089 		*p++ = '\016';
1090 		psenc->gl = 1;
1091 	} else if (target == 2 && (ei->flags & F_LS2)) {
1092 		*p++ = '\033';
1093 		*p++ = 'n';
1094 		psenc->gl = 2;
1095 	} else if (target == 3 && (ei->flags & F_LS3)) {
1096 		*p++ = '\033';
1097 		*p++ = 'o';
1098 		psenc->gl = 3;
1099 	} else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1100 		*p++ = '\033';
1101 		*p++ = '~';
1102 		psenc->gr = 1;
1103 	} else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1104 		*p++ = '\033';
1105 		/*{*/
1106 		*p++ = '}';
1107 		psenc->gr = 2;
1108 	} else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1109 		*p++ = '\033';
1110 		*p++ = '|';
1111 		psenc->gr = 3;
1112 	} else if (target == 2 && (ei->flags & F_SS2)) {
1113 		*p++ = '\033';
1114 		*p++ = 'N';
1115 		psenc->singlegl = 2;
1116 	} else if (target == 3 && (ei->flags & F_SS3)) {
1117 		*p++ = '\033';
1118 		*p++ = 'O';
1119 		psenc->singlegl = 3;
1120 	} else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1121 		*p++ = '\216';
1122 		*p++ = 'N';
1123 		psenc->singlegl = psenc->singlegr = 2;
1124 	} else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1125 		*p++ = '\217';
1126 		*p++ = 'O';
1127 		psenc->singlegl = psenc->singlegr = 3;
1128 	} else
1129 		abort();
1130 
1131 sideok:
1132 	if (psenc->singlegl == target)
1133 		mask = 0x00;
1134 	else if (psenc->singlegr == target)
1135 		mask = 0x80;
1136 	else if (psenc->gl == target)
1137 		mask = 0x00;
1138 	else if ((ei->flags & F_8BIT) && psenc->gr == target)
1139 		mask = 0x80;
1140 	else
1141 		abort();
1142 
1143 	switch (cs.type) {
1144 	case CS94:
1145 	case CS96:
1146 		i = 1;
1147 		break;
1148 	case CS94MULTI:
1149 	case CS96MULTI:
1150 		i = isthree(cs.final) ? 3 : 2;
1151 		break;
1152 	}
1153 	if (wc != 0)
1154 		while (i-- > 0)
1155 			*p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1156 
1157 	/* reset single shift state */
1158 	psenc->singlegl = psenc->singlegr = -1;
1159 
1160 	len = p - tmp;
1161 	if (n < len) {
1162 		if (result)
1163 			*result = (char *)0;
1164 	} else {
1165 		if (result)
1166 			*result = string + len;
1167 		memcpy(string, tmp, len);
1168 	}
1169 	return len;
1170 }
1171 
1172 static int
1173 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1174 			     char * __restrict s, size_t n, wchar_t wc,
1175 			     _ISO2022State * __restrict psenc,
1176 			     size_t * __restrict nresult)
1177 {
1178 	char buf[MB_LEN_MAX];
1179 	char *result;
1180 	int len, ret;
1181 
1182 	_DIAGASSERT(ei != NULL);
1183 	_DIAGASSERT(nresult != 0);
1184 	_DIAGASSERT(s != NULL);
1185 
1186 	/* XXX state will be modified after this operation... */
1187 	len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1188 	if (sizeof(buf) < len || n < len) {
1189 		/* XXX should recover state? */
1190 		ret = E2BIG;
1191 		goto err;
1192 	}
1193 
1194 	memcpy(s, buf, len);
1195 	*nresult = (size_t)len;
1196 	return (0);
1197 
1198 err:
1199 	/* bound check failure */
1200 	*nresult = (size_t)-1;
1201 	return ret;
1202 }
1203 
1204 static __inline int
1205 /*ARGSUSED*/
1206 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1207 			      _csid_t * __restrict csid,
1208 			      _index_t * __restrict idx, wchar_t wc)
1209 {
1210 	wchar_t m, nm;
1211 
1212 	_DIAGASSERT(csid != NULL && idx != NULL);
1213 
1214 	m = wc & 0x7FFF8080;
1215 	nm = wc & 0x007F7F7F;
1216 	if (m & 0x00800000) {
1217 		nm &= 0x00007F7F;
1218 	} else {
1219 		m &= 0x7F008080;
1220 	}
1221 	if (nm & 0x007F0000) {
1222 		/* ^3 mark */
1223 		m |= 0x007F0000;
1224 	} else if (nm & 0x00007F00) {
1225 		/* ^2 mark */
1226 		m |= 0x00007F00;
1227 	}
1228 	*csid = (_csid_t)m;
1229 	*idx  = (_index_t)nm;
1230 
1231 	return (0);
1232 }
1233 
1234 static __inline int
1235 /*ARGSUSED*/
1236 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1237 			      wchar_t * __restrict wc,
1238 			      _csid_t csid, _index_t idx)
1239 {
1240 
1241 	_DIAGASSERT(ei != NULL && wc != NULL);
1242 
1243 	*wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1244 
1245 	return (0);
1246 }
1247 
1248 /* ----------------------------------------------------------------------
1249  * public interface for ctype
1250  */
1251 
1252 _CITRUS_CTYPE_DECLS(ISO2022);
1253 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1254 
1255 #include "citrus_ctype_template.h"
1256 
1257 /* ----------------------------------------------------------------------
1258  * public interface for stdenc
1259  */
1260 
1261 _CITRUS_STDENC_DECLS(ISO2022);
1262 _CITRUS_STDENC_DEF_OPS(ISO2022);
1263 
1264 #include "citrus_stdenc_template.h"
1265