xref: /netbsd-src/lib/libc/citrus/modules/citrus_iso2022.c (revision e5548b402ae4c44fb816de42c7bba9581ce23ef5)
1 /*	$NetBSD: citrus_iso2022.c,v 1.14 2005/10/29 18:02:04 tshiozak Exp $	*/
2 
3 /*-
4  * Copyright (c)1999, 2002 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	$Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29  */
30 
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.14 2005/10/29 18:02:04 tshiozak Exp $");
34 #endif /* LIBC_SCCS and not lint */
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46 
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_iso2022.h"
53 
54 
55 /* ----------------------------------------------------------------------
56  * private stuffs used by templates
57  */
58 
59 
60 /*
61  * wchar_t mappings:
62  * ASCII (ESC ( B)		00000000 00000000 00000000 0xxxxxxx
63  * iso-8859-1 (ESC , A)		00000000 00000000 00000000 1xxxxxxx
64  * 94 charset (ESC ( F)		0fffffff 00000000 00000000 0xxxxxxx
65  * 94 charset (ESC ( M F)	0fffffff 1mmmmmmm 00000000 0xxxxxxx
66  * 96 charset (ESC , F)		0fffffff 00000000 00000000 1xxxxxxx
67  * 96 charset (ESC , M F)	0fffffff 1mmmmmmm 00000000 1xxxxxxx
68  * 94x94 charset (ESC $ ( F)	0fffffff 00000000 0xxxxxxx 0xxxxxxx
69  * 96x96 charset (ESC $ , F)	0fffffff 00000000 0xxxxxxx 1xxxxxxx
70  * 94x94 charset (ESC & V ESC $ ( F)
71  *				0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72  * 94x94x94 charset (ESC $ ( F)	0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73  * 96x96x96 charset (ESC $ , F)	0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74  * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
75  *				1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
76  */
77 
78 typedef struct {
79 	u_char	type;
80 #define	CS94		(0U)
81 #define	CS96		(1U)
82 #define	CS94MULTI	(2U)
83 #define	CS96MULTI	(3U)
84 
85 	u_char	final;
86 	u_char	interm;
87 	u_char	vers;
88 } _ISO2022Charset;
89 
90 typedef struct {
91 	_ISO2022Charset	g[4];
92 	/* need 3 bits to hold -1, 0, ..., 3 */
93 	int	gl:3,
94 		gr:3,
95 		singlegl:3,
96 		singlegr:3;
97 	char ch[7];	/* longest escape sequence (ESC & V ESC $ ( F) */
98 	int chlen;
99 	int flags;
100 #define _ISO2022STATE_FLAG_INITIALIZED	1
101 } _ISO2022State;
102 
103 typedef struct {
104 	_ISO2022Charset	*recommend[4];
105 	size_t	recommendsize[4];
106 	_ISO2022Charset	initg[4];
107 	int	maxcharset;
108 	int	flags;
109 #define	F_8BIT	0x0001
110 #define	F_NOOLD	0x0002
111 #define	F_SI	0x0010	/*0F*/
112 #define	F_SO	0x0020	/*0E*/
113 #define	F_LS0	0x0010	/*0F*/
114 #define	F_LS1	0x0020	/*0E*/
115 #define	F_LS2	0x0040	/*ESC n*/
116 #define	F_LS3	0x0080	/*ESC o*/
117 #define	F_LS1R	0x0100	/*ESC ~*/
118 #define	F_LS2R	0x0200	/*ESC }*/
119 #define	F_LS3R	0x0400	/*ESC |*/
120 #define	F_SS2	0x0800	/*ESC N*/
121 #define	F_SS3	0x1000	/*ESC O*/
122 #define	F_SS2R	0x2000	/*8E*/
123 #define	F_SS3R	0x4000	/*8F*/
124 } _ISO2022EncodingInfo;
125 typedef struct {
126 	_ISO2022EncodingInfo ei;
127 	struct {
128 		/* for future multi-locale facility */
129 		_ISO2022State	s_mblen;
130 		_ISO2022State	s_mbrlen;
131 		_ISO2022State	s_mbrtowc;
132 		_ISO2022State	s_mbtowc;
133 		_ISO2022State	s_mbsrtowcs;
134 		_ISO2022State	s_wcrtomb;
135 		_ISO2022State	s_wcsrtombs;
136 		_ISO2022State	s_wctomb;
137 	} states;
138 } _ISO2022CTypeInfo;
139 
140 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
141 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
142 
143 #define _FUNCNAME(m)			_citrus_ISO2022_##m
144 #define _ENCODING_INFO			_ISO2022EncodingInfo
145 #define _CTYPE_INFO			_ISO2022CTypeInfo
146 #define _ENCODING_STATE			_ISO2022State
147 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
148 #define _ENCODING_IS_STATE_DEPENDENT	1
149 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	\
150     (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
151 
152 
153 #define _ISO2022INVALID (wchar_t)-1
154 
155 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
156 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
157 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
158 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
159 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
160 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
161 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
162 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
163 
164 static __inline int
165 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
166 {
167 
168 	_DIAGASSERT(p != NULL);
169 	_DIAGASSERT(cs != NULL);
170 
171 	if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
172 		cs->final = (u_char)(p[3] & 0xff);
173 		cs->interm = '\0';
174 		cs->vers = '\0';
175 		cs->type = CS94MULTI;
176 	} else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
177 		cs->final = (u_char)(p[3] & 0xff);
178 		cs->interm = '\0';
179 		cs->vers = '\0';
180 		cs->type = CS96MULTI;
181 	} else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
182 		cs->final = (u_char)(p[2] & 0xff);
183 		cs->interm = '\0';
184 		cs->vers = '\0';
185 		cs->type = CS94;
186 	} else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
187 		cs->final = (u_char )(p[2] & 0xff);
188 		cs->interm = '\0';
189 		cs->vers = '\0';
190 		cs->type = CS96;
191 	} else {
192 		return 1;
193 	}
194 
195 	return 0;
196 }
197 
198 
199 #define _NOTMATCH	0
200 #define _MATCH		1
201 #define _PARSEFAIL	2
202 
203 static __inline int
204 get_recommend(_ISO2022EncodingInfo * __restrict ei,
205 	      const char * __restrict token)
206 {
207 	int i;
208 	_ISO2022Charset cs, *p;
209 
210 	if (!strchr("0123", token[0]) || token[1] != '=')
211 		return (_NOTMATCH);
212 
213 	if (getcs(&token[2], &cs) == 0)
214 		;
215 	else if (!strcmp(&token[2], "94")) {
216 		cs.final = (u_char)(token[4]);
217 		cs.interm = '\0';
218 		cs.vers = '\0';
219 		cs.type = CS94;
220 	} else if (!strcmp(&token[2], "96")) {
221 		cs.final = (u_char)(token[4]);
222 		cs.interm = '\0';
223 		cs.vers = '\0';
224 		cs.type = CS96;
225 	} else if (!strcmp(&token[2], "94$")) {
226 		cs.final = (u_char)(token[5]);
227 		cs.interm = '\0';
228 		cs.vers = '\0';
229 		cs.type = CS94MULTI;
230 	} else if (!strcmp(&token[2], "96$")) {
231 		cs.final = (u_char)(token[5]);
232 		cs.interm = '\0';
233 		cs.vers = '\0';
234 		cs.type = CS96MULTI;
235 	} else {
236 		return (_PARSEFAIL);
237 	}
238 
239 	i = token[0] - '0';
240 	if (!ei->recommend[i]) {
241 		ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
242 	} else {
243 		p = realloc(ei->recommend[i],
244 		    sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
245 		if (!p)
246 			return (_PARSEFAIL);
247 		ei->recommend[i] = p;
248 	}
249 	if (!ei->recommend[i])
250 		return (_PARSEFAIL);
251 	ei->recommendsize[i]++;
252 
253 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
254 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
255 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
256 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
257 
258 	return (_MATCH);
259 }
260 
261 static __inline int
262 get_initg(_ISO2022EncodingInfo * __restrict ei,
263 	  const char * __restrict token)
264 {
265 	_ISO2022Charset cs;
266 
267 	if (strncmp("INIT", &token[0], 4) ||
268 	    !strchr("0123", token[4]) ||
269 	    token[5] != '=')
270 		return (_NOTMATCH);
271 
272 	if (getcs(&token[6], &cs) != 0)
273 		return (_PARSEFAIL);
274 
275 	ei->initg[token[4] - '0'].type = cs.type;
276 	ei->initg[token[4] - '0'].final = cs.final;
277 	ei->initg[token[4] - '0'].interm = cs.interm;
278 	ei->initg[token[4] - '0'].vers = cs.vers;
279 
280 	return (_MATCH);
281 }
282 
283 static __inline int
284 get_max(_ISO2022EncodingInfo * __restrict ei,
285 	const char * __restrict token)
286 {
287 	if (!strcmp(token, "MAX1")) {
288 		ei->maxcharset = 1;
289 	} else if (!strcmp(token, "MAX2")) {
290 		ei->maxcharset = 2;
291 	} else if (!strcmp(token, "MAX3")) {
292 		ei->maxcharset = 3;
293 	} else
294 		return (_NOTMATCH);
295 
296 	return (_MATCH);
297 }
298 
299 
300 static __inline int
301 get_flags(_ISO2022EncodingInfo * __restrict ei,
302 	  const char * __restrict token)
303 {
304 	int i;
305 	static struct {
306 		const char	*tag;
307 		int		flag;
308 	} const tags[] = {
309 		{ "DUMMY",	0	},
310 		{ "8BIT",	F_8BIT	},
311 		{ "NOOLD",	F_NOOLD	},
312 		{ "SI",		F_SI	},
313 		{ "SO",		F_SO	},
314 		{ "LS0",	F_LS0	},
315 		{ "LS1",	F_LS1	},
316 		{ "LS2",	F_LS2	},
317 		{ "LS3",	F_LS3	},
318 		{ "LS1R",	F_LS1R	},
319 		{ "LS2R",	F_LS2R	},
320 		{ "LS3R",	F_LS3R	},
321 		{ "SS2",	F_SS2	},
322 		{ "SS3",	F_SS3	},
323 		{ "SS2R",	F_SS2R	},
324 		{ "SS3R",	F_SS3R	},
325 		{ NULL,		0 }
326 	};
327 
328 	for (i = 0; tags[i].tag; i++) {
329 		if (!strcmp(token, tags[i].tag)) {
330 			ei->flags |= tags[i].flag;
331 			return (_MATCH);
332 		}
333 	}
334 
335 	return (_NOTMATCH);
336 }
337 
338 
339 static __inline int
340 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
341 			       const void * __restrict var, size_t lenvar)
342 {
343 	char const *v, *e;
344 	char buf[20];
345 	int i, len, ret;
346 
347 	_DIAGASSERT(ei != NULL);
348 
349 
350 	/*
351 	 * parse VARIABLE section.
352 	 */
353 
354 	if (!var)
355 		return (EFTYPE);
356 
357 	v = (const char *) var;
358 
359 	/* initialize structure */
360 	ei->maxcharset = 0;
361 	for (i = 0; i < 4; i++) {
362 		ei->recommend[i] = NULL;
363 		ei->recommendsize[i] = 0;
364 	}
365 	ei->flags = 0;
366 
367 	while (*v) {
368 		while (*v == ' ' || *v == '\t')
369 			++v;
370 
371 		/* find the token */
372 		e = v;
373 		while (*e && *e != ' ' && *e != '\t')
374 			++e;
375 
376 		len = e-v;
377 		if (len == 0)
378 			break;
379 		if (len>=sizeof(buf))
380 			goto parsefail;
381 		snprintf(buf, sizeof(buf), "%.*s", len, v);
382 
383 		if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
384 			;
385 		else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
386 			;
387 		else if ((ret = get_max(ei, buf)) != _NOTMATCH)
388 			;
389 		else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
390 			;
391 		else
392 			ret = _PARSEFAIL;
393 		if (ret==_PARSEFAIL)
394 			goto parsefail;
395 		v = e;
396 
397 	}
398 
399 	return (0);
400 
401 parsefail:
402 	free(ei->recommend[0]);
403 	free(ei->recommend[1]);
404 	free(ei->recommend[2]);
405 	free(ei->recommend[3]);
406 
407 	return (EFTYPE);
408 }
409 
410 static __inline void
411 /*ARGSUSED*/
412 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
413 			   _ISO2022State * __restrict s)
414 {
415 	int i;
416 
417 	memset(s, 0, sizeof(*s));
418 	s->gl = 0;
419 	s->gr = (ei->flags & F_8BIT) ? 1 : -1;
420 
421 	for (i = 0; i < 4; i++) {
422 		if (ei->initg[i].final) {
423 			s->g[i].type = ei->initg[i].type;
424 			s->g[i].final = ei->initg[i].final;
425 			s->g[i].interm = ei->initg[i].interm;
426 		}
427 	}
428 	s->singlegl = s->singlegr = -1;
429 	s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
430 }
431 
432 static __inline void
433 /*ARGSUSED*/
434 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
435 			   void * __restrict pspriv,
436 			   const _ISO2022State * __restrict s)
437 {
438 	memcpy(pspriv, (const void *)s, sizeof(*s));
439 }
440 
441 static __inline void
442 /*ARGSUSED*/
443 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
444 			     _ISO2022State * __restrict s,
445 			     const void * __restrict pspriv)
446 {
447 	memcpy((void *)s, pspriv, sizeof(*s));
448 }
449 
450 static int
451 /*ARGSUSED*/
452 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
453 				     const void * __restrict var,
454 				     size_t lenvar)
455 {
456 
457 	_DIAGASSERT(ei != NULL);
458 
459 	return _citrus_ISO2022_parse_variable(ei, var, lenvar);
460 }
461 
462 static void
463 /*ARGSUSED*/
464 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
465 {
466 }
467 
468 #define	ESC	'\033'
469 #define	ECMA	-1
470 #define	INTERM	-2
471 #define	OECMA	-3
472 static const struct seqtable {
473 	int type;
474 	int csoff;
475 	int finaloff;
476 	int intermoff;
477 	int versoff;
478 	int len;
479 	int chars[10];
480 } seqtable[] = {
481 	/* G0 94MULTI special */
482 	{ CS94MULTI, -1, 2, -1, -1,	3, { ESC, '$', OECMA }, },
483 	/* G0 94MULTI special with version identification */
484 	{ CS94MULTI, -1, 5, -1, 2,	6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
485 	/* G? 94 */
486 	{ CS94, 1, 2, -1, -1,		3, { ESC, CS94, ECMA, }, },
487 	/* G? 94 with 2nd intermediate char */
488 	{ CS94, 1, 3, 2, -1,		4, { ESC, CS94, INTERM, ECMA, }, },
489 	/* G? 96 */
490 	{ CS96, 1, 2, -1, -1,		3, { ESC, CS96, ECMA, }, },
491 	/* G? 96 with 2nd intermediate char */
492 	{ CS96, 1, 3, 2, -1,		4, { ESC, CS96, INTERM, ECMA, }, },
493 	/* G? 94MULTI */
494 	{ CS94MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS94, ECMA, }, },
495 	/* G? 96MULTI */
496 	{ CS96MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS96, ECMA, }, },
497 	/* G? 94MULTI with version specification */
498 	{ CS94MULTI, 5, 6, -1, 2,	7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
499 	/* LS2/3 */
500 	{ -1, -1, -1, -1, -1,		2, { ESC, 'n', }, },
501 	{ -1, -1, -1, -1, -1,		2, { ESC, 'o', }, },
502 	/* LS1/2/3R */
503 	{ -1, -1, -1, -1, -1,		2, { ESC, '~', }, },
504 	{ -1, -1, -1, -1, -1,		2, { ESC, /*{*/ '}', }, },
505 	{ -1, -1, -1, -1, -1,		2, { ESC, '|', }, },
506 	/* SS2/3 */
507 	{ -1, -1, -1, -1, -1,		2, { ESC, 'N', }, },
508 	{ -1, -1, -1, -1, -1,		2, { ESC, 'O', }, },
509 	/* end of records */
510 	{ 0, }
511 };
512 
513 static int
514 seqmatch(const char * __restrict s, size_t n,
515 	 const struct seqtable * __restrict sp)
516 {
517 	const int *p;
518 
519 	_DIAGASSERT(s != NULL);
520 	_DIAGASSERT(sp != NULL);
521 
522 	p = sp->chars;
523 	while (p - sp->chars < n && p - sp->chars < sp->len) {
524 		switch (*p) {
525 		case ECMA:
526 			if (!isecma(*s))
527 				goto terminate;
528 			break;
529 		case OECMA:
530 			if (*s && strchr("@AB", *s))
531 				break;
532 			else
533 				goto terminate;
534 		case INTERM:
535 			if (!isinterm(*s))
536 				goto terminate;
537 			break;
538 		case CS94:
539 			if (*s && strchr("()*+", *s))
540 				break;
541 			else
542 				goto terminate;
543 		case CS96:
544 			if (*s && strchr(",-./", *s))
545 				break;
546 			else
547 				goto terminate;
548 		default:
549 			if (*s != *p)
550 				goto terminate;
551 			break;
552 		}
553 
554 		p++;
555 		s++;
556 	}
557 
558 terminate:
559 	return p - sp->chars;
560 }
561 
562 static wchar_t
563 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
564 		const char * __restrict string, size_t n,
565 		const char ** __restrict result,
566 		_ISO2022State * __restrict psenc)
567 {
568 	wchar_t wchar = 0;
569 	int cur;
570 	const struct seqtable *sp;
571 	int nmatch;
572 	int i;
573 
574 	_DIAGASSERT(ei != NULL);
575 	_DIAGASSERT(psenc != NULL);
576 	_DIAGASSERT(string != NULL);
577 	/* result may be NULL */
578 
579 	while (1) {
580 		/* SI/SO */
581 		if (1 <= n && string[0] == '\017') {
582 			psenc->gl = 0;
583 			string++;
584 			n--;
585 			continue;
586 		}
587 		if (1 <= n && string[0] == '\016') {
588 			psenc->gl = 1;
589 			string++;
590 			n--;
591 			continue;
592 		}
593 
594 		/* SS2/3R */
595 		if (1 <= n && string[0] && strchr("\217\216", string[0])) {
596 			psenc->singlegl = psenc->singlegr =
597 			    (string[0] - '\216') + 2;
598 			string++;
599 			n--;
600 			continue;
601 		}
602 
603 		/* eat the letter if this is not ESC */
604 		if (1 <= n && string[0] != '\033')
605 			break;
606 
607 		/* look for a perfect match from escape sequences */
608 		for (sp = &seqtable[0]; sp->len; sp++) {
609 			nmatch = seqmatch(string, n, sp);
610 			if (sp->len == nmatch && n >= sp->len)
611 				break;
612 		}
613 
614 		if (!sp->len)
615 			goto notseq;
616 
617 		if (sp->type != -1) {
618 			if (sp->csoff == -1)
619 				i = 0;
620 			else {
621 				switch (sp->type) {
622 				case CS94:
623 				case CS94MULTI:
624 					i = string[sp->csoff] - '(';
625 					break;
626 				case CS96:
627 				case CS96MULTI:
628 					i = string[sp->csoff] - ',';
629 					break;
630 				}
631 			}
632 			psenc->g[i].type = sp->type;
633 			psenc->g[i].final = '\0';
634 			psenc->g[i].interm = '\0';
635 			psenc->g[i].vers = '\0';
636 			/* sp->finaloff must not be -1 */
637 			if (sp->finaloff != -1)
638 				psenc->g[i].final = string[sp->finaloff];
639 			if (sp->intermoff != -1)
640 				psenc->g[i].interm = string[sp->intermoff];
641 			if (sp->versoff != -1)
642 				psenc->g[i].vers = string[sp->versoff];
643 
644 			string += sp->len;
645 			n -= sp->len;
646 			continue;
647 		}
648 
649 		/* LS2/3 */
650 		if (2 <= n && string[0] == '\033'
651 		 && string[1] && strchr("no", string[1])) {
652 			psenc->gl = string[1] - 'n' + 2;
653 			string += 2;
654 			n -= 2;
655 			continue;
656 		}
657 
658 		/* LS1/2/3R */
659 			/* XXX: { for vi showmatch */
660 		if (2 <= n && string[0] == '\033'
661 		 && string[1] && strchr("~}|", string[1])) {
662 			psenc->gr = 3 - (string[1] - '|');
663 			string += 2;
664 			n -= 2;
665 			continue;
666 		}
667 
668 		/* SS2/3 */
669 		if (2 <= n && string[0] == '\033'
670 		 && string[1] && strchr("NO", string[1])) {
671 			psenc->singlegl = (string[1] - 'N') + 2;
672 			string += 2;
673 			n -= 2;
674 			continue;
675 		}
676 
677 	notseq:
678 		/*
679 		 * if we've got an unknown escape sequence, eat the ESC at the
680 		 * head.  otherwise, wait till full escape sequence comes.
681 		 */
682 		for (sp = &seqtable[0]; sp->len; sp++) {
683 			nmatch = seqmatch(string, n, sp);
684 			if (!nmatch)
685 				continue;
686 
687 			/*
688 			 * if we are in the middle of escape sequence,
689 			 * we still need to wait for more characters to come
690 			 */
691 			if (n < sp->len) {
692 				if (nmatch == n) {
693 					if (result)
694 						*result = string;
695 					return (_ISO2022INVALID);
696 				}
697 			} else {
698 				if (nmatch == sp->len) {
699 					/* this case should not happen */
700 					goto eat;
701 				}
702 			}
703 		}
704 
705 		break;
706 	}
707 
708 eat:
709 	/* no letter to eat */
710 	if (n < 1) {
711 		if (result)
712 			*result = string;
713 		return (_ISO2022INVALID);
714 	}
715 
716 	/* normal chars.  always eat C0/C1 as is. */
717 	if (iscntl(*string & 0xff))
718 		cur = -1;
719 	else if (*string & 0x80) {
720 		cur = (psenc->singlegr == -1)
721 			? psenc->gr : psenc->singlegr;
722 	} else {
723 		cur = (psenc->singlegl == -1)
724 			? psenc->gl : psenc->singlegl;
725 	}
726 
727 	if (cur == -1) {
728 asis:
729 		wchar = *string++ & 0xff;
730 		if (result)
731 			*result = string;
732 		/* reset single shift state */
733 		psenc->singlegr = psenc->singlegl = -1;
734 		return wchar;
735 	}
736 
737 	/* length error check */
738 	switch (psenc->g[cur].type) {
739 	case CS94MULTI:
740 	case CS96MULTI:
741 		if (!isthree(psenc->g[cur].final)) {
742 			if (2 <= n
743 			 && (string[0] & 0x80) == (string[1] & 0x80))
744 				break;
745 		} else {
746 			if (3 <= n
747 			 && (string[0] & 0x80) == (string[1] & 0x80)
748 			 && (string[0] & 0x80) == (string[2] & 0x80))
749 				break;
750 		}
751 
752 		/* we still need to wait for more characters to come */
753 		if (result)
754 			*result = string;
755 		return (_ISO2022INVALID);
756 
757 	case CS94:
758 	case CS96:
759 		if (1 <= n)
760 			break;
761 
762 		/* we still need to wait for more characters to come */
763 		if (result)
764 			*result = string;
765 		return (_ISO2022INVALID);
766 	}
767 
768 	/* range check */
769 	switch (psenc->g[cur].type) {
770 	case CS94:
771 		if (!(is94(string[0] & 0x7f)))
772 			goto asis;
773 	case CS96:
774 		if (!(is96(string[0] & 0x7f)))
775 			goto asis;
776 		break;
777 	case CS94MULTI:
778 		if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
779 			goto asis;
780 		break;
781 	case CS96MULTI:
782 		if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
783 			goto asis;
784 		break;
785 	}
786 
787 	/* extract the character. */
788 	switch (psenc->g[cur].type) {
789 	case CS94:
790 		/* special case for ASCII. */
791 		if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
792 			wchar = *string++;
793 			wchar &= 0x7f;
794 			break;
795 		}
796 		wchar = psenc->g[cur].final;
797 		wchar = (wchar << 8);
798 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
799 		wchar = (wchar << 8);
800 		wchar = (wchar << 8) | (*string++ & 0x7f);
801 		break;
802 	case CS96:
803 		/* special case for ISO-8859-1. */
804 		if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
805 			wchar = *string++;
806 			wchar &= 0x7f;
807 			wchar |= 0x80;
808 			break;
809 		}
810 		wchar = psenc->g[cur].final;
811 		wchar = (wchar << 8);
812 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
813 		wchar = (wchar << 8);
814 		wchar = (wchar << 8) | (*string++ & 0x7f);
815 		wchar |= 0x80;
816 		break;
817 	case CS94MULTI:
818 	case CS96MULTI:
819 		wchar = psenc->g[cur].final;
820 		wchar = (wchar << 8);
821 		if (isthree(psenc->g[cur].final))
822 			wchar |= (*string++ & 0x7f);
823 		wchar = (wchar << 8) | (*string++ & 0x7f);
824 		wchar = (wchar << 8) | (*string++ & 0x7f);
825 		if (psenc->g[cur].type == CS96MULTI)
826 			wchar |= 0x80;
827 		break;
828 	}
829 
830 	if (result)
831 		*result = string;
832 	/* reset single shift state */
833 	psenc->singlegr = psenc->singlegl = -1;
834 	return wchar;
835 }
836 
837 
838 
839 static int
840 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
841 			     wchar_t * __restrict pwc,
842 			     const char ** __restrict s,
843 			     size_t n, _ISO2022State * __restrict psenc,
844 			     size_t * __restrict nresult)
845 {
846 	wchar_t wchar;
847 	const char *s0, *p, *result;
848 	int c;
849 	int chlenbak;
850 
851 	_DIAGASSERT(nresult != 0);
852 	_DIAGASSERT(ei != NULL);
853 	_DIAGASSERT(psenc != NULL);
854 	_DIAGASSERT(s != NULL);
855 
856 	s0 = *s;
857 	c = 0;
858 	chlenbak = psenc->chlen;
859 
860 	/*
861 	 * if we have something in buffer, use that.
862 	 * otherwise, skip here
863 	 */
864 	if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
865 		/* illgeal state */
866 		_citrus_ISO2022_init_state(ei, psenc);
867 		goto encoding_error;
868 	}
869 	if (psenc->chlen == 0)
870 		goto emptybuf;
871 
872 	/* buffer is not empty */
873 	p = psenc->ch;
874 	while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
875 		if (n > 0) {
876 			psenc->ch[psenc->chlen++] = *s0++;
877 			n--;
878 		}
879 
880 		wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
881 					   &result, psenc);
882 		c += result - p;
883 		if (wchar != _ISO2022INVALID) {
884 			if (psenc->chlen > c)
885 				memmove(psenc->ch, result, psenc->chlen - c);
886 			if (psenc->chlen < c)
887 				psenc->chlen = 0;
888 			else
889 				psenc->chlen -= c;
890 			goto output;
891 		}
892 
893 		if (n == 0) {
894 			if ((result - p) == psenc->chlen)
895 				/* complete shift sequence. */
896 				psenc->chlen = 0;
897 			goto restart;
898 		}
899 
900 		p = result;
901 	}
902 
903 	/* escape sequence too long? */
904 	goto encoding_error;
905 
906 emptybuf:
907 	wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
908 	if (wchar != _ISO2022INVALID) {
909 		c += result - s0;
910 		psenc->chlen = 0;
911 		s0 = result;
912 		goto output;
913 	}
914 	if (result > s0) {
915 		c += (result - s0);
916 		n -= (result - s0);
917 		s0 = result;
918 		if (n>0)
919 			goto emptybuf;
920 		/* complete shift sequence. */
921 		goto restart;
922 	}
923 	n += c;
924 	if (n < sizeof(psenc->ch)) {
925 		memcpy(psenc->ch, s0 - c, n);
926 		psenc->chlen = n;
927 		s0 = result;
928 		goto restart;
929 	}
930 
931 	/* escape sequence too long? */
932 
933 encoding_error:
934 	psenc->chlen = 0;
935 	*nresult = (size_t)-1;
936 	return (EILSEQ);
937 
938 output:
939 	*s = s0;
940 	if (pwc)
941 		*pwc = wchar;
942 
943 	if (!wchar)
944 		*nresult = 0;
945 	else
946 		*nresult = c - chlenbak;
947 
948 	return (0);
949 
950 restart:
951 	*s = s0;
952 	*nresult = (size_t)-2;
953 
954 	return (0);
955 }
956 
957 static int
958 recommendation(_ISO2022EncodingInfo * __restrict ei,
959 	       _ISO2022Charset * __restrict cs)
960 {
961 	int i, j;
962 	_ISO2022Charset *recommend;
963 
964 	_DIAGASSERT(ei != NULL);
965 	_DIAGASSERT(cs != NULL);
966 
967 	/* first, try a exact match. */
968 	for (i = 0; i < 4; i++) {
969 		recommend = ei->recommend[i];
970 		for (j = 0; j < ei->recommendsize[i]; j++) {
971 			if (cs->type != recommend[j].type)
972 				continue;
973 			if (cs->final != recommend[j].final)
974 				continue;
975 			if (cs->interm != recommend[j].interm)
976 				continue;
977 
978 			return i;
979 		}
980 	}
981 
982 	/* then, try a wildcard match over final char. */
983 	for (i = 0; i < 4; i++) {
984 		recommend = ei->recommend[i];
985 		for (j = 0; j < ei->recommendsize[i]; j++) {
986 			if (cs->type != recommend[j].type)
987 				continue;
988 			if (cs->final && (cs->final != recommend[j].final))
989 				continue;
990 			if (cs->interm && (cs->interm != recommend[j].interm))
991 				continue;
992 
993 			return i;
994 		}
995 	}
996 
997 	/* there's no recommendation. make a guess. */
998 	if (ei->maxcharset == 0) {
999 		return 0;
1000 	} else {
1001 		switch (cs->type) {
1002 		case CS94:
1003 		case CS94MULTI:
1004 			return 0;
1005 		case CS96:
1006 		case CS96MULTI:
1007 			return 1;
1008 		}
1009 	}
1010 	return 0;
1011 }
1012 
1013 static int
1014 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1015 		   char * __restrict string, size_t n,
1016 		   char ** __restrict result,
1017 		   _ISO2022State * __restrict psenc)
1018 {
1019 	int i = 0, len;
1020 	_ISO2022Charset cs;
1021 	char *p;
1022 	char tmp[MB_LEN_MAX];
1023 	int target;
1024 	u_char mask;
1025 	int bit8;
1026 
1027 	_DIAGASSERT(ei != NULL);
1028 	_DIAGASSERT(string != NULL);
1029 	/* result may be NULL */
1030 	/* state appears to be unused */
1031 
1032 	if (iscntl(wc & 0xff)) {
1033 		/* go back to ASCII on control chars */
1034 		cs.type = CS94;
1035 		cs.final = 'B';
1036 		cs.interm = '\0';
1037 	} else if (!(wc & ~0xff)) {
1038 		if (wc & 0x80) {
1039 			/* special treatment for ISO-8859-1 */
1040 			cs.type = CS96;
1041 			cs.final = 'A';
1042 			cs.interm = '\0';
1043 		} else {
1044 			/* special treatment for ASCII */
1045 			cs.type = CS94;
1046 			cs.final = 'B';
1047 			cs.interm = '\0';
1048 		}
1049 	} else {
1050 		cs.final = (wc >> 24) & 0x7f;
1051 		if ((wc >> 16) & 0x80)
1052 			cs.interm = (wc >> 16) & 0x7f;
1053 		else
1054 			cs.interm = '\0';
1055 		if (wc & 0x80)
1056 			cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1057 		else
1058 			cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1059 	}
1060 	target = recommendation(ei, &cs);
1061 	p = tmp;
1062 	bit8 = ei->flags & F_8BIT;
1063 
1064 	/* designate the charset onto the target plane(G0/1/2/3). */
1065 	if (psenc->g[target].type == cs.type
1066 	 && psenc->g[target].final == cs.final
1067 	 && psenc->g[target].interm == cs.interm)
1068 		goto planeok;
1069 
1070 	*p++ = '\033';
1071 	if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1072 		*p++ = '$';
1073 	if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1074 	 && !cs.interm && !(ei->flags & F_NOOLD))
1075 		;
1076 	else if (cs.type == CS94 || cs.type == CS94MULTI)
1077 		*p++ = "()*+"[target];
1078 	else
1079 		*p++ = ",-./"[target];
1080 	if (cs.interm)
1081 		*p++ = cs.interm;
1082 	*p++ = cs.final;
1083 
1084 	psenc->g[target].type = cs.type;
1085 	psenc->g[target].final = cs.final;
1086 	psenc->g[target].interm = cs.interm;
1087 
1088 planeok:
1089 	/* invoke the plane onto GL or GR. */
1090 	if (psenc->gl == target)
1091 		goto sideok;
1092 	if (bit8 && psenc->gr == target)
1093 		goto sideok;
1094 
1095 	if (target == 0 && (ei->flags & F_LS0)) {
1096 		*p++ = '\017';
1097 		psenc->gl = 0;
1098 	} else if (target == 1 && (ei->flags & F_LS1)) {
1099 		*p++ = '\016';
1100 		psenc->gl = 1;
1101 	} else if (target == 2 && (ei->flags & F_LS2)) {
1102 		*p++ = '\033';
1103 		*p++ = 'n';
1104 		psenc->gl = 2;
1105 	} else if (target == 3 && (ei->flags & F_LS3)) {
1106 		*p++ = '\033';
1107 		*p++ = 'o';
1108 		psenc->gl = 3;
1109 	} else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1110 		*p++ = '\033';
1111 		*p++ = '~';
1112 		psenc->gr = 1;
1113 	} else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1114 		*p++ = '\033';
1115 		/*{*/
1116 		*p++ = '}';
1117 		psenc->gr = 2;
1118 	} else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1119 		*p++ = '\033';
1120 		*p++ = '|';
1121 		psenc->gr = 3;
1122 	} else if (target == 2 && (ei->flags & F_SS2)) {
1123 		*p++ = '\033';
1124 		*p++ = 'N';
1125 		psenc->singlegl = 2;
1126 	} else if (target == 3 && (ei->flags & F_SS3)) {
1127 		*p++ = '\033';
1128 		*p++ = 'O';
1129 		psenc->singlegl = 3;
1130 	} else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1131 		*p++ = '\216';
1132 		*p++ = 'N';
1133 		psenc->singlegl = psenc->singlegr = 2;
1134 	} else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1135 		*p++ = '\217';
1136 		*p++ = 'O';
1137 		psenc->singlegl = psenc->singlegr = 3;
1138 	} else
1139 		abort();
1140 
1141 sideok:
1142 	if (psenc->singlegl == target)
1143 		mask = 0x00;
1144 	else if (psenc->singlegr == target)
1145 		mask = 0x80;
1146 	else if (psenc->gl == target)
1147 		mask = 0x00;
1148 	else if ((ei->flags & F_8BIT) && psenc->gr == target)
1149 		mask = 0x80;
1150 	else
1151 		abort();
1152 
1153 	switch (cs.type) {
1154 	case CS94:
1155 	case CS96:
1156 		i = 1;
1157 		break;
1158 	case CS94MULTI:
1159 	case CS96MULTI:
1160 		i = isthree(cs.final) ? 3 : 2;
1161 		break;
1162 	}
1163 	while (i-- > 0)
1164 		*p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1165 
1166 	/* reset single shift state */
1167 	psenc->singlegl = psenc->singlegr = -1;
1168 
1169 	len = p - tmp;
1170 	if (n < len) {
1171 		if (result)
1172 			*result = (char *)0;
1173 	} else {
1174 		if (result)
1175 			*result = string + len;
1176 		memcpy(string, tmp, len);
1177 	}
1178 	return len;
1179 }
1180 
1181 static int
1182 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1183 				char * __restrict s, size_t n,
1184 				_ISO2022State * __restrict psenc,
1185 				size_t * __restrict nresult)
1186 {
1187 	char buf[MB_LEN_MAX];
1188 	char *result;
1189 	int len, ret;
1190 
1191 	_DIAGASSERT(ei != NULL);
1192 	_DIAGASSERT(nresult != 0);
1193 	_DIAGASSERT(s != NULL);
1194 
1195 	/* XXX state will be modified after this operation... */
1196 	len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc);
1197 	if (len==0) {
1198 		ret = EINVAL;
1199 		goto err;
1200 	}
1201 	if (sizeof(buf) < len || n < len-1) {
1202 		/* XXX should recover state? */
1203 		ret = E2BIG;
1204 		goto err;
1205 	}
1206 
1207 	memcpy(s, buf, len-1);
1208 	*nresult = (size_t)(len-1);
1209 	return (0);
1210 
1211 err:
1212 	/* bound check failure */
1213 	*nresult = (size_t)-1;
1214 	return ret;
1215 }
1216 
1217 static int
1218 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1219 			     char * __restrict s, size_t n, wchar_t wc,
1220 			     _ISO2022State * __restrict psenc,
1221 			     size_t * __restrict nresult)
1222 {
1223 	char buf[MB_LEN_MAX];
1224 	char *result;
1225 	int len, ret;
1226 
1227 	_DIAGASSERT(ei != NULL);
1228 	_DIAGASSERT(nresult != 0);
1229 	_DIAGASSERT(s != NULL);
1230 
1231 	/* XXX state will be modified after this operation... */
1232 	len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1233 	if (sizeof(buf) < len || n < len) {
1234 		/* XXX should recover state? */
1235 		ret = E2BIG;
1236 		goto err;
1237 	}
1238 
1239 	memcpy(s, buf, len);
1240 	*nresult = (size_t)len;
1241 	return (0);
1242 
1243 err:
1244 	/* bound check failure */
1245 	*nresult = (size_t)-1;
1246 	return ret;
1247 }
1248 
1249 static __inline int
1250 /*ARGSUSED*/
1251 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1252 			      _csid_t * __restrict csid,
1253 			      _index_t * __restrict idx, wchar_t wc)
1254 {
1255 	wchar_t m, nm;
1256 
1257 	_DIAGASSERT(csid != NULL && idx != NULL);
1258 
1259 	m = wc & 0x7FFF8080;
1260 	nm = wc & 0x007F7F7F;
1261 	if (m & 0x00800000) {
1262 		nm &= 0x00007F7F;
1263 	} else {
1264 		m &= 0x7F008080;
1265 	}
1266 	if (nm & 0x007F0000) {
1267 		/* ^3 mark */
1268 		m |= 0x007F0000;
1269 	} else if (nm & 0x00007F00) {
1270 		/* ^2 mark */
1271 		m |= 0x00007F00;
1272 	}
1273 	*csid = (_csid_t)m;
1274 	*idx  = (_index_t)nm;
1275 
1276 	return (0);
1277 }
1278 
1279 static __inline int
1280 /*ARGSUSED*/
1281 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1282 			      wchar_t * __restrict wc,
1283 			      _csid_t csid, _index_t idx)
1284 {
1285 
1286 	_DIAGASSERT(ei != NULL && wc != NULL);
1287 
1288 	*wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1289 
1290 	return (0);
1291 }
1292 
1293 static __inline int
1294 /*ARGSUSED*/
1295 _citrus_ISO2022_stdenc_get_state_desc_generic(_ISO2022EncodingInfo * __restrict ei,
1296 					      _ISO2022State * __restrict psenc,
1297 					      int * __restrict rstate)
1298 {
1299 
1300 	if (psenc->chlen == 0) {
1301 		/* XXX: it should distinguish initial and stable. */
1302 		*rstate = _STDENC_SDGEN_STABLE;
1303 	} else {
1304 		if (psenc->ch[0] == '\033')
1305 			*rstate = _STDENC_SDGEN_INCOMPLETE_SHIFT;
1306 		else
1307 			*rstate = _STDENC_SDGEN_INCOMPLETE_CHAR;
1308 	}
1309 
1310 	return 0;
1311 }
1312 
1313 /* ----------------------------------------------------------------------
1314  * public interface for ctype
1315  */
1316 
1317 _CITRUS_CTYPE_DECLS(ISO2022);
1318 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1319 
1320 #include "citrus_ctype_template.h"
1321 
1322 /* ----------------------------------------------------------------------
1323  * public interface for stdenc
1324  */
1325 
1326 _CITRUS_STDENC_DECLS(ISO2022);
1327 _CITRUS_STDENC_DEF_OPS(ISO2022);
1328 
1329 #include "citrus_stdenc_template.h"
1330