xref: /netbsd-src/lib/libc/citrus/modules/citrus_iso2022.c (revision fd5cb0acea84d278e04e640d37ca2398f894991f)
1 /*	$NetBSD: citrus_iso2022.c,v 1.12 2004/12/21 11:25:43 yamt Exp $	*/
2 
3 /*-
4  * Copyright (c)1999, 2002 Citrus Project,
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  *	$Citrus: xpg4dl/FreeBSD/lib/libc/locale/iso2022.c,v 1.23 2001/06/21 01:51:44 yamt Exp $
29  */
30 
31 #include <sys/cdefs.h>
32 #if defined(LIBC_SCCS) && !defined(lint)
33 __RCSID("$NetBSD: citrus_iso2022.c,v 1.12 2004/12/21 11:25:43 yamt Exp $");
34 #endif /* LIBC_SCCS and not lint */
35 
36 #include <assert.h>
37 #include <errno.h>
38 #include <string.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stddef.h>
42 #include <locale.h>
43 #include <wchar.h>
44 #include <sys/types.h>
45 #include <limits.h>
46 
47 #include "citrus_namespace.h"
48 #include "citrus_types.h"
49 #include "citrus_module.h"
50 #include "citrus_ctype.h"
51 #include "citrus_stdenc.h"
52 #include "citrus_iso2022.h"
53 
54 
55 /* ----------------------------------------------------------------------
56  * private stuffs used by templates
57  */
58 
59 
60 /*
61  * wchar_t mappings:
62  * ASCII (ESC ( B)		00000000 00000000 00000000 0xxxxxxx
63  * iso-8859-1 (ESC , A)		00000000 00000000 00000000 1xxxxxxx
64  * 94 charset (ESC ( F)		0fffffff 00000000 00000000 0xxxxxxx
65  * 94 charset (ESC ( M F)	0fffffff 1mmmmmmm 00000000 0xxxxxxx
66  * 96 charset (ESC , F)		0fffffff 00000000 00000000 1xxxxxxx
67  * 96 charset (ESC , M F)	0fffffff 1mmmmmmm 00000000 1xxxxxxx
68  * 94x94 charset (ESC $ ( F)	0fffffff 00000000 0xxxxxxx 0xxxxxxx
69  * 96x96 charset (ESC $ , F)	0fffffff 00000000 0xxxxxxx 1xxxxxxx
70  * 94x94 charset (ESC & V ESC $ ( F)
71  *				0fffffff 1vvvvvvv 0xxxxxxx 0xxxxxxx
72  * 94x94x94 charset (ESC $ ( F)	0fffffff 0xxxxxxx 0xxxxxxx 0xxxxxxx
73  * 96x96x96 charset (ESC $ , F)	0fffffff 0xxxxxxx 0xxxxxxx 1xxxxxxx
74  * reserved for UCS4 co-existence (UCS4 is 31bit encoding thanks to mohta bit)
75  *				1xxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx
76  */
77 
78 typedef struct {
79 	u_char	type;
80 #define	CS94		(0U)
81 #define	CS96		(1U)
82 #define	CS94MULTI	(2U)
83 #define	CS96MULTI	(3U)
84 
85 	u_char	final;
86 	u_char	interm;
87 	u_char	vers;
88 } _ISO2022Charset;
89 
90 typedef struct {
91 	_ISO2022Charset	g[4];
92 	/* need 3 bits to hold -1, 0, ..., 3 */
93 	int	gl:3,
94 		gr:3,
95 		singlegl:3,
96 		singlegr:3;
97 	char ch[7];	/* longest escape sequence (ESC & V ESC $ ( F) */
98 	int chlen;
99 	int flags;
100 #define _ISO2022STATE_FLAG_INITIALIZED	1
101 } _ISO2022State;
102 
103 typedef struct {
104 	_ISO2022Charset	*recommend[4];
105 	size_t	recommendsize[4];
106 	_ISO2022Charset	initg[4];
107 	int	maxcharset;
108 	int	flags;
109 #define	F_8BIT	0x0001
110 #define	F_NOOLD	0x0002
111 #define	F_SI	0x0010	/*0F*/
112 #define	F_SO	0x0020	/*0E*/
113 #define	F_LS0	0x0010	/*0F*/
114 #define	F_LS1	0x0020	/*0E*/
115 #define	F_LS2	0x0040	/*ESC n*/
116 #define	F_LS3	0x0080	/*ESC o*/
117 #define	F_LS1R	0x0100	/*ESC ~*/
118 #define	F_LS2R	0x0200	/*ESC }*/
119 #define	F_LS3R	0x0400	/*ESC |*/
120 #define	F_SS2	0x0800	/*ESC N*/
121 #define	F_SS3	0x1000	/*ESC O*/
122 #define	F_SS2R	0x2000	/*8E*/
123 #define	F_SS3R	0x4000	/*8F*/
124 } _ISO2022EncodingInfo;
125 typedef struct {
126 	_ISO2022EncodingInfo ei;
127 	struct {
128 		/* for future multi-locale facility */
129 		_ISO2022State	s_mblen;
130 		_ISO2022State	s_mbrlen;
131 		_ISO2022State	s_mbrtowc;
132 		_ISO2022State	s_mbtowc;
133 		_ISO2022State	s_mbsrtowcs;
134 		_ISO2022State	s_wcrtomb;
135 		_ISO2022State	s_wcsrtombs;
136 		_ISO2022State	s_wctomb;
137 	} states;
138 } _ISO2022CTypeInfo;
139 
140 #define _CEI_TO_EI(_cei_)		(&(_cei_)->ei)
141 #define _CEI_TO_STATE(_cei_, _func_)	(_cei_)->states.s_##_func_
142 
143 #define _FUNCNAME(m)			_citrus_ISO2022_##m
144 #define _ENCODING_INFO			_ISO2022EncodingInfo
145 #define _CTYPE_INFO			_ISO2022CTypeInfo
146 #define _ENCODING_STATE			_ISO2022State
147 #define _ENCODING_MB_CUR_MAX(_ei_)	MB_LEN_MAX
148 #define _ENCODING_IS_STATE_DEPENDENT	1
149 #define _STATE_NEEDS_EXPLICIT_INIT(_ps_)	\
150     (!((_ps_)->flags & _ISO2022STATE_FLAG_INITIALIZED))
151 
152 
153 #define _ISO2022INVALID (wchar_t)-1
154 
155 static __inline int isc0(__uint8_t x) { return ((x & 0x1f) == x); }
156 static __inline int isc1(__uint8_t x) { return (0x80 <= x && x <= 0x9f); }
157 static __inline int iscntl(__uint8_t x) { return (isc0(x) || isc1(x) || x == 0x7f); }
158 static __inline int is94(__uint8_t x) { return (0x21 <= x && x <= 0x7e); }
159 static __inline int is96(__uint8_t x) { return (0x20 <= x && x <= 0x7f); }
160 static __inline int isecma(__uint8_t x) { return (0x30 <= x && x <= 0x7f); }
161 static __inline int isinterm(__uint8_t x) { return (0x20 <= x && x <= 0x2f); }
162 static __inline int isthree(__uint8_t x) { return (0x60 <= x && x <= 0x6f); }
163 
164 static __inline int
165 getcs(const char * __restrict p, _ISO2022Charset * __restrict cs)
166 {
167 
168 	_DIAGASSERT(p != NULL);
169 	_DIAGASSERT(cs != NULL);
170 
171 	if (!strncmp(p, "94$", 3) && p[3] && !p[4]) {
172 		cs->final = (u_char)(p[3] & 0xff);
173 		cs->interm = '\0';
174 		cs->vers = '\0';
175 		cs->type = CS94MULTI;
176 	} else if (!strncmp(p, "96$", 3) && p[3] && !p[4]) {
177 		cs->final = (u_char)(p[3] & 0xff);
178 		cs->interm = '\0';
179 		cs->vers = '\0';
180 		cs->type = CS96MULTI;
181 	} else if (!strncmp(p, "94", 2) && p[2] && !p[3]) {
182 		cs->final = (u_char)(p[2] & 0xff);
183 		cs->interm = '\0';
184 		cs->vers = '\0';
185 		cs->type = CS94;
186 	} else if (!strncmp(p, "96", 2) && p[2] && !p[3]) {
187 		cs->final = (u_char )(p[2] & 0xff);
188 		cs->interm = '\0';
189 		cs->vers = '\0';
190 		cs->type = CS96;
191 	} else {
192 		return 1;
193 	}
194 
195 	return 0;
196 }
197 
198 
199 #define _NOTMATCH	0
200 #define _MATCH		1
201 #define _PARSEFAIL	2
202 
203 static __inline int
204 get_recommend(_ISO2022EncodingInfo * __restrict ei,
205 	      const char * __restrict token)
206 {
207 	int i;
208 	_ISO2022Charset cs, *p;
209 
210 	if (!strchr("0123", token[0]) || token[1] != '=')
211 		return (_NOTMATCH);
212 
213 	if (getcs(&token[2], &cs) == 0)
214 		;
215 	else if (!strcmp(&token[2], "94")) {
216 		cs.final = (u_char)(token[4]);
217 		cs.interm = '\0';
218 		cs.vers = '\0';
219 		cs.type = CS94;
220 	} else if (!strcmp(&token[2], "96")) {
221 		cs.final = (u_char)(token[4]);
222 		cs.interm = '\0';
223 		cs.vers = '\0';
224 		cs.type = CS96;
225 	} else if (!strcmp(&token[2], "94$")) {
226 		cs.final = (u_char)(token[5]);
227 		cs.interm = '\0';
228 		cs.vers = '\0';
229 		cs.type = CS94MULTI;
230 	} else if (!strcmp(&token[2], "96$")) {
231 		cs.final = (u_char)(token[5]);
232 		cs.interm = '\0';
233 		cs.vers = '\0';
234 		cs.type = CS96MULTI;
235 	} else {
236 		return (_PARSEFAIL);
237 	}
238 
239 	i = token[0] - '0';
240 	if (!ei->recommend[i]) {
241 		ei->recommend[i] = malloc(sizeof(_ISO2022Charset));
242 	} else {
243 		p = realloc(ei->recommend[i],
244 		    sizeof(_ISO2022Charset) * (ei->recommendsize[i] + 1));
245 		if (!p)
246 			return (_PARSEFAIL);
247 		ei->recommend[i] = p;
248 	}
249 	if (!ei->recommend[i])
250 		return (_PARSEFAIL);
251 	ei->recommendsize[i]++;
252 
253 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->final = cs.final;
254 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->interm = cs.interm;
255 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->vers = cs.vers;
256 	(ei->recommend[i] + (ei->recommendsize[i] - 1))->type = cs.type;
257 
258 	return (_MATCH);
259 }
260 
261 static __inline int
262 get_initg(_ISO2022EncodingInfo * __restrict ei,
263 	  const char * __restrict token)
264 {
265 	_ISO2022Charset cs;
266 
267 	if (strncmp("INIT", &token[0], 4) ||
268 	    !strchr("0123", token[4]) ||
269 	    token[5] != '=')
270 		return (_NOTMATCH);
271 
272 	if (getcs(&token[6], &cs) != 0)
273 		return (_PARSEFAIL);
274 
275 	ei->initg[token[4] - '0'].type = cs.type;
276 	ei->initg[token[4] - '0'].final = cs.final;
277 	ei->initg[token[4] - '0'].interm = cs.interm;
278 	ei->initg[token[4] - '0'].vers = cs.vers;
279 
280 	return (_MATCH);
281 }
282 
283 static __inline int
284 get_max(_ISO2022EncodingInfo * __restrict ei,
285 	const char * __restrict token)
286 {
287 	if (!strcmp(token, "MAX1")) {
288 		ei->maxcharset = 1;
289 	} else if (!strcmp(token, "MAX2")) {
290 		ei->maxcharset = 2;
291 	} else if (!strcmp(token, "MAX3")) {
292 		ei->maxcharset = 3;
293 	} else
294 		return (_NOTMATCH);
295 
296 	return (_MATCH);
297 }
298 
299 
300 static __inline int
301 get_flags(_ISO2022EncodingInfo * __restrict ei,
302 	  const char * __restrict token)
303 {
304 	int i;
305 	static struct {
306 		const char	*tag;
307 		int		flag;
308 	} const tags[] = {
309 		{ "DUMMY",	0	},
310 		{ "8BIT",	F_8BIT	},
311 		{ "NOOLD",	F_NOOLD	},
312 		{ "SI",		F_SI	},
313 		{ "SO",		F_SO	},
314 		{ "LS0",	F_LS0	},
315 		{ "LS1",	F_LS1	},
316 		{ "LS2",	F_LS2	},
317 		{ "LS3",	F_LS3	},
318 		{ "LS1R",	F_LS1R	},
319 		{ "LS2R",	F_LS2R	},
320 		{ "LS3R",	F_LS3R	},
321 		{ "SS2",	F_SS2	},
322 		{ "SS3",	F_SS3	},
323 		{ "SS2R",	F_SS2R	},
324 		{ "SS3R",	F_SS3R	},
325 		{ NULL,		0 }
326 	};
327 
328 	for (i = 0; tags[i].tag; i++) {
329 		if (!strcmp(token, tags[i].tag)) {
330 			ei->flags |= tags[i].flag;
331 			return (_MATCH);
332 		}
333 	}
334 
335 	return (_NOTMATCH);
336 }
337 
338 
339 static __inline int
340 _citrus_ISO2022_parse_variable(_ISO2022EncodingInfo * __restrict ei,
341 			       const void * __restrict var, size_t lenvar)
342 {
343 	char const *v, *e;
344 	char buf[20];
345 	int i, len, ret;
346 
347 	_DIAGASSERT(ei != NULL);
348 
349 
350 	/*
351 	 * parse VARIABLE section.
352 	 */
353 
354 	if (!var)
355 		return (EFTYPE);
356 
357 	v = (const char *) var;
358 
359 	/* initialize structure */
360 	ei->maxcharset = 0;
361 	for (i = 0; i < 4; i++) {
362 		ei->recommend[i] = NULL;
363 		ei->recommendsize[i] = 0;
364 	}
365 	ei->flags = 0;
366 
367 	while (*v) {
368 		while (*v == ' ' || *v == '\t')
369 			++v;
370 
371 		/* find the token */
372 		e = v;
373 		while (*e && *e != ' ' && *e != '\t')
374 			++e;
375 
376 		len = e-v;
377 		if (len == 0)
378 			break;
379 		if (len>=sizeof(buf))
380 			goto parsefail;
381 		snprintf(buf, sizeof(buf), "%.*s", len, v);
382 
383 		if ((ret = get_recommend(ei, buf)) != _NOTMATCH)
384 			;
385 		else if ((ret = get_initg(ei, buf)) != _NOTMATCH)
386 			;
387 		else if ((ret = get_max(ei, buf)) != _NOTMATCH)
388 			;
389 		else if ((ret = get_flags(ei, buf)) != _NOTMATCH)
390 			;
391 		else
392 			ret = _PARSEFAIL;
393 		if (ret==_PARSEFAIL)
394 			goto parsefail;
395 		v = e;
396 
397 	}
398 
399 	return (0);
400 
401 parsefail:
402 	free(ei->recommend[0]);
403 	free(ei->recommend[1]);
404 	free(ei->recommend[2]);
405 	free(ei->recommend[3]);
406 
407 	return (EFTYPE);
408 }
409 
410 static __inline void
411 /*ARGSUSED*/
412 _citrus_ISO2022_init_state(_ISO2022EncodingInfo * __restrict ei,
413 			   _ISO2022State * __restrict s)
414 {
415 	int i;
416 
417 	memset(s, 0, sizeof(*s));
418 	s->gl = 0;
419 	s->gr = (ei->flags & F_8BIT) ? 1 : -1;
420 
421 	for (i = 0; i < 4; i++) {
422 		if (ei->initg[i].final) {
423 			s->g[i].type = ei->initg[i].type;
424 			s->g[i].final = ei->initg[i].final;
425 			s->g[i].interm = ei->initg[i].interm;
426 		}
427 	}
428 	s->singlegl = s->singlegr = -1;
429 	s->flags |= _ISO2022STATE_FLAG_INITIALIZED;
430 }
431 
432 static __inline void
433 /*ARGSUSED*/
434 _citrus_ISO2022_pack_state(_ISO2022EncodingInfo * __restrict ei,
435 			   void * __restrict pspriv,
436 			   const _ISO2022State * __restrict s)
437 {
438 	memcpy(pspriv, (const void *)s, sizeof(*s));
439 }
440 
441 static __inline void
442 /*ARGSUSED*/
443 _citrus_ISO2022_unpack_state(_ISO2022EncodingInfo * __restrict ei,
444 			     _ISO2022State * __restrict s,
445 			     const void * __restrict pspriv)
446 {
447 	memcpy((void *)s, pspriv, sizeof(*s));
448 }
449 
450 static int
451 /*ARGSUSED*/
452 _citrus_ISO2022_encoding_module_init(_ISO2022EncodingInfo * __restrict ei,
453 				     const void * __restrict var,
454 				     size_t lenvar)
455 {
456 
457 	_DIAGASSERT(ei != NULL);
458 
459 	return _citrus_ISO2022_parse_variable(ei, var, lenvar);
460 }
461 
462 static void
463 /*ARGSUSED*/
464 _citrus_ISO2022_encoding_module_uninit(_ISO2022EncodingInfo *ei)
465 {
466 }
467 
468 #define	ESC	'\033'
469 #define	ECMA	-1
470 #define	INTERM	-2
471 #define	OECMA	-3
472 static const struct seqtable {
473 	int type;
474 	int csoff;
475 	int finaloff;
476 	int intermoff;
477 	int versoff;
478 	int len;
479 	int chars[10];
480 } seqtable[] = {
481 	/* G0 94MULTI special */
482 	{ CS94MULTI, -1, 2, -1, -1,	3, { ESC, '$', OECMA }, },
483 	/* G0 94MULTI special with version identification */
484 	{ CS94MULTI, -1, 5, -1, 2,	6, { ESC, '&', ECMA, ESC, '$', OECMA }, },
485 	/* G? 94 */
486 	{ CS94, 1, 2, -1, -1,		3, { ESC, CS94, ECMA, }, },
487 	/* G? 94 with 2nd intermediate char */
488 	{ CS94, 1, 3, 2, -1,		4, { ESC, CS94, INTERM, ECMA, }, },
489 	/* G? 96 */
490 	{ CS96, 1, 2, -1, -1,		3, { ESC, CS96, ECMA, }, },
491 	/* G? 96 with 2nd intermediate char */
492 	{ CS96, 1, 3, 2, -1,		4, { ESC, CS96, INTERM, ECMA, }, },
493 	/* G? 94MULTI */
494 	{ CS94MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS94, ECMA, }, },
495 	/* G? 96MULTI */
496 	{ CS96MULTI, 2, 3, -1, -1,	4, { ESC, '$', CS96, ECMA, }, },
497 	/* G? 94MULTI with version specification */
498 	{ CS94MULTI, 5, 6, -1, 2,	7, { ESC, '&', ECMA, ESC, '$', CS94, ECMA, }, },
499 	/* LS2/3 */
500 	{ -1, -1, -1, -1, -1,		2, { ESC, 'n', }, },
501 	{ -1, -1, -1, -1, -1,		2, { ESC, 'o', }, },
502 	/* LS1/2/3R */
503 	{ -1, -1, -1, -1, -1,		2, { ESC, '~', }, },
504 	{ -1, -1, -1, -1, -1,		2, { ESC, /*{*/ '}', }, },
505 	{ -1, -1, -1, -1, -1,		2, { ESC, '|', }, },
506 	/* SS2/3 */
507 	{ -1, -1, -1, -1, -1,		2, { ESC, 'N', }, },
508 	{ -1, -1, -1, -1, -1,		2, { ESC, 'O', }, },
509 	/* end of records */
510 	{ 0, }
511 };
512 
513 static int
514 seqmatch(const char * __restrict s, size_t n,
515 	 const struct seqtable * __restrict sp)
516 {
517 	const int *p;
518 
519 	_DIAGASSERT(s != NULL);
520 	_DIAGASSERT(sp != NULL);
521 
522 	p = sp->chars;
523 	while (p - sp->chars < n && p - sp->chars < sp->len) {
524 		switch (*p) {
525 		case ECMA:
526 			if (!isecma(*s))
527 				goto terminate;
528 			break;
529 		case OECMA:
530 			if (*s && strchr("@AB", *s))
531 				break;
532 			else
533 				goto terminate;
534 		case INTERM:
535 			if (!isinterm(*s))
536 				goto terminate;
537 			break;
538 		case CS94:
539 			if (*s && strchr("()*+", *s))
540 				break;
541 			else
542 				goto terminate;
543 		case CS96:
544 			if (*s && strchr(",-./", *s))
545 				break;
546 			else
547 				goto terminate;
548 		default:
549 			if (*s != *p)
550 				goto terminate;
551 			break;
552 		}
553 
554 		p++;
555 		s++;
556 	}
557 
558 terminate:
559 	return p - sp->chars;
560 }
561 
562 static wchar_t
563 _ISO2022_sgetwchar(_ISO2022EncodingInfo * __restrict ei,
564 			  const char * __restrict string, size_t n,
565 			  const char ** __restrict result,
566 			  _ISO2022State * __restrict psenc)
567 {
568 	wchar_t wchar = 0;
569 	int cur;
570 	const struct seqtable *sp;
571 	int nmatch;
572 	int i;
573 
574 	_DIAGASSERT(ei != NULL);
575 	_DIAGASSERT(state != NULL);
576 	_DIAGASSERT(string != NULL);
577 	/* result may be NULL */
578 
579 	while (1) {
580 		/* SI/SO */
581 		if (1 <= n && string[0] == '\017') {
582 			psenc->gl = 0;
583 			string++;
584 			n--;
585 			continue;
586 		}
587 		if (1 <= n && string[0] == '\016') {
588 			psenc->gl = 1;
589 			string++;
590 			n--;
591 			continue;
592 		}
593 
594 		/* SS2/3R */
595 		if (1 <= n && string[0] && strchr("\217\216", string[0])) {
596 			psenc->singlegl = psenc->singlegr =
597 			    (string[0] - '\216') + 2;
598 			string++;
599 			n--;
600 			continue;
601 		}
602 
603 		/* eat the letter if this is not ESC */
604 		if (1 <= n && string[0] != '\033')
605 			break;
606 
607 		/* look for a perfect match from escape sequences */
608 		for (sp = &seqtable[0]; sp->len; sp++) {
609 			nmatch = seqmatch(string, n, sp);
610 			if (sp->len == nmatch && n >= sp->len)
611 				break;
612 		}
613 
614 		if (!sp->len)
615 			goto notseq;
616 
617 		if (sp->type != -1) {
618 			if (sp->csoff == -1)
619 				i = 0;
620 			else {
621 				switch (sp->type) {
622 				case CS94:
623 				case CS94MULTI:
624 					i = string[sp->csoff] - '(';
625 					break;
626 				case CS96:
627 				case CS96MULTI:
628 					i = string[sp->csoff] - ',';
629 					break;
630 				}
631 			}
632 			psenc->g[i].type = sp->type;
633 			psenc->g[i].final = '\0';
634 			psenc->g[i].interm = '\0';
635 			psenc->g[i].vers = '\0';
636 			/* sp->finaloff must not be -1 */
637 			if (sp->finaloff != -1)
638 				psenc->g[i].final = string[sp->finaloff];
639 			if (sp->intermoff != -1)
640 				psenc->g[i].interm = string[sp->intermoff];
641 			if (sp->versoff != -1)
642 				psenc->g[i].vers = string[sp->versoff];
643 
644 			string += sp->len;
645 			n -= sp->len;
646 			continue;
647 		}
648 
649 		/* LS2/3 */
650 		if (2 <= n && string[0] == '\033'
651 		 && string[1] && strchr("no", string[1])) {
652 			psenc->gl = string[1] - 'n' + 2;
653 			string += 2;
654 			n -= 2;
655 			continue;
656 		}
657 
658 		/* LS1/2/3R */
659 			/* XXX: { for vi showmatch */
660 		if (2 <= n && string[0] == '\033'
661 		 && string[1] && strchr("~}|", string[1])) {
662 			psenc->gr = 3 - (string[1] - '|');
663 			string += 2;
664 			n -= 2;
665 			continue;
666 		}
667 
668 		/* SS2/3 */
669 		if (2 <= n && string[0] == '\033'
670 		 && string[1] && strchr("NO", string[1])) {
671 			psenc->singlegl = (string[1] - 'N') + 2;
672 			string += 2;
673 			n -= 2;
674 			continue;
675 		}
676 
677 	notseq:
678 		/*
679 		 * if we've got an unknown escape sequence, eat the ESC at the
680 		 * head.  otherwise, wait till full escape sequence comes.
681 		 */
682 		for (sp = &seqtable[0]; sp->len; sp++) {
683 			nmatch = seqmatch(string, n, sp);
684 			if (!nmatch)
685 				continue;
686 
687 			/*
688 			 * if we are in the middle of escape sequence,
689 			 * we still need to wait for more characters to come
690 			 */
691 			if (n < sp->len) {
692 				if (nmatch == n) {
693 					if (result)
694 						*result = string;
695 					return (_ISO2022INVALID);
696 				}
697 			} else {
698 				if (nmatch == sp->len) {
699 					/* this case should not happen */
700 					goto eat;
701 				}
702 			}
703 		}
704 
705 		break;
706 	}
707 
708 eat:
709 	/* no letter to eat */
710 	if (n < 1) {
711 		if (result)
712 			*result = string;
713 		return (_ISO2022INVALID);
714 	}
715 
716 	/* normal chars.  always eat C0/C1 as is. */
717 	if (iscntl(*string & 0xff))
718 		cur = -1;
719 	else if (*string & 0x80) {
720 		cur = (psenc->singlegr == -1)
721 			? psenc->gr : psenc->singlegr;
722 	} else {
723 		cur = (psenc->singlegl == -1)
724 			? psenc->gl : psenc->singlegl;
725 	}
726 
727 	if (cur == -1) {
728 asis:
729 		wchar = *string++ & 0xff;
730 		if (result)
731 			*result = string;
732 		/* reset single shift state */
733 		psenc->singlegr = psenc->singlegl = -1;
734 		return wchar;
735 	}
736 
737 	/* length error check */
738 	switch (psenc->g[cur].type) {
739 	case CS94MULTI:
740 	case CS96MULTI:
741 		if (!isthree(psenc->g[cur].final)) {
742 			if (2 <= n
743 			 && (string[0] & 0x80) == (string[1] & 0x80))
744 				break;
745 		} else {
746 			if (3 <= n
747 			 && (string[0] & 0x80) == (string[1] & 0x80)
748 			 && (string[0] & 0x80) == (string[2] & 0x80))
749 				break;
750 		}
751 
752 		/* we still need to wait for more characters to come */
753 		if (result)
754 			*result = string;
755 		return (_ISO2022INVALID);
756 
757 	case CS94:
758 	case CS96:
759 		if (1 <= n)
760 			break;
761 
762 		/* we still need to wait for more characters to come */
763 		if (result)
764 			*result = string;
765 		return (_ISO2022INVALID);
766 	}
767 
768 	/* range check */
769 	switch (psenc->g[cur].type) {
770 	case CS94:
771 		if (!(is94(string[0] & 0x7f)))
772 			goto asis;
773 	case CS96:
774 		if (!(is96(string[0] & 0x7f)))
775 			goto asis;
776 		break;
777 	case CS94MULTI:
778 		if (!(is94(string[0] & 0x7f) && is94(string[1] & 0x7f)))
779 			goto asis;
780 		break;
781 	case CS96MULTI:
782 		if (!(is96(string[0] & 0x7f) && is96(string[1] & 0x7f)))
783 			goto asis;
784 		break;
785 	}
786 
787 	/* extract the character. */
788 	switch (psenc->g[cur].type) {
789 	case CS94:
790 		/* special case for ASCII. */
791 		if (psenc->g[cur].final == 'B' && !psenc->g[cur].interm) {
792 			wchar = *string++;
793 			wchar &= 0x7f;
794 			break;
795 		}
796 		wchar = psenc->g[cur].final;
797 		wchar = (wchar << 8);
798 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
799 		wchar = (wchar << 8);
800 		wchar = (wchar << 8) | (*string++ & 0x7f);
801 		break;
802 	case CS96:
803 		/* special case for ISO-8859-1. */
804 		if (psenc->g[cur].final == 'A' && !psenc->g[cur].interm) {
805 			wchar = *string++;
806 			wchar &= 0x7f;
807 			wchar |= 0x80;
808 			break;
809 		}
810 		wchar = psenc->g[cur].final;
811 		wchar = (wchar << 8);
812 		wchar |= (psenc->g[cur].interm ? (0x80 | psenc->g[cur].interm) : 0);
813 		wchar = (wchar << 8);
814 		wchar = (wchar << 8) | (*string++ & 0x7f);
815 		wchar |= 0x80;
816 		break;
817 	case CS94MULTI:
818 	case CS96MULTI:
819 		wchar = psenc->g[cur].final;
820 		wchar = (wchar << 8);
821 		if (isthree(psenc->g[cur].final))
822 			wchar |= (*string++ & 0x7f);
823 		wchar = (wchar << 8) | (*string++ & 0x7f);
824 		wchar = (wchar << 8) | (*string++ & 0x7f);
825 		if (psenc->g[cur].type == CS96MULTI)
826 			wchar |= 0x80;
827 		break;
828 	}
829 
830 	if (result)
831 		*result = string;
832 	/* reset single shift state */
833 	psenc->singlegr = psenc->singlegl = -1;
834 	return wchar;
835 }
836 
837 
838 
839 static int
840 _citrus_ISO2022_mbrtowc_priv(_ISO2022EncodingInfo * __restrict ei,
841 			     wchar_t * __restrict pwc,
842 			     const char ** __restrict s,
843 			     size_t n, _ISO2022State * __restrict psenc,
844 			     size_t * __restrict nresult)
845 {
846 	wchar_t wchar;
847 	const char *s0, *p, *result;
848 	int c;
849 	int chlenbak;
850 
851 	_DIAGASSERT(nresult != 0);
852 	_DIAGASSERT(ei != NULL);
853 	_DIAGASSERT(psenc != NULL);
854 	_DIAGASSERT(s != NULL);
855 
856 	s0 = *s;
857 	c = 0;
858 	chlenbak = psenc->chlen;
859 
860 	/*
861 	 * if we have something in buffer, use that.
862 	 * otherwise, skip here
863 	 */
864 	if (psenc->chlen < 0 || psenc->chlen > sizeof(psenc->ch)) {
865 		/* illgeal state */
866 		_citrus_ISO2022_init_state(ei, psenc);
867 		goto encoding_error;
868 	}
869 	if (psenc->chlen == 0)
870 		goto emptybuf;
871 
872 	/* buffer is not empty */
873 	p = psenc->ch;
874 	while (psenc->chlen < sizeof(psenc->ch) && n >= 0) {
875 		if (n > 0) {
876 			psenc->ch[psenc->chlen++] = *s0++;
877 			n--;
878 		}
879 
880 		wchar = _ISO2022_sgetwchar(ei, p, psenc->chlen - (p-psenc->ch),
881 					   &result, psenc);
882 		if (wchar != _ISO2022INVALID) {
883 			c += result - p;
884 			if (psenc->chlen > c)
885 				memmove(psenc->ch, result, psenc->chlen - c);
886 			if (psenc->chlen < c)
887 				psenc->chlen = 0;
888 			else
889 				psenc->chlen -= c;
890 			goto output;
891 		}
892 
893 		c += result - p;
894 		p = result;
895 
896 		if (n == 0)
897 			goto restart;
898 	}
899 
900 	/* escape sequence too long? */
901 	goto encoding_error;
902 
903 emptybuf:
904 	wchar = _ISO2022_sgetwchar(ei, s0, n, &result, psenc);
905 	if (wchar != _ISO2022INVALID) {
906 		c += result - s0;
907 		psenc->chlen = 0;
908 		s0 = result;
909 		goto output;
910 	}
911 	if (result > s0 && n > result - s0) {
912 		c += (result - s0);
913 		n -= (result - s0);
914 		s0 = result;
915 		goto emptybuf;
916 	}
917 	n += c;
918 	if (n < sizeof(psenc->ch)) {
919 		memcpy(psenc->ch, s0 - c, n);
920 		psenc->chlen = n;
921 		s0 = result;
922 		goto restart;
923 	}
924 
925 	/* escape sequence too long? */
926 
927 encoding_error:
928 	psenc->chlen = 0;
929 	*nresult = (size_t)-1;
930 	return (EILSEQ);
931 
932 output:
933 	*s = s0;
934 	if (pwc)
935 		*pwc = wchar;
936 
937 	if (!wchar)
938 		*nresult = 0;
939 	else
940 		*nresult = c - chlenbak;
941 
942 	return (0);
943 
944 restart:
945 	*s = s0;
946 	*nresult = (size_t)-2;
947 
948 	return (0);
949 }
950 
951 static int
952 recommendation(_ISO2022EncodingInfo * __restrict ei,
953 	       _ISO2022Charset * __restrict cs)
954 {
955 	int i, j;
956 	_ISO2022Charset *recommend;
957 
958 	_DIAGASSERT(ei != NULL);
959 	_DIAGASSERT(cs != NULL);
960 
961 	/* first, try a exact match. */
962 	for (i = 0; i < 4; i++) {
963 		recommend = ei->recommend[i];
964 		for (j = 0; j < ei->recommendsize[i]; j++) {
965 			if (cs->type != recommend[j].type)
966 				continue;
967 			if (cs->final != recommend[j].final)
968 				continue;
969 			if (cs->interm != recommend[j].interm)
970 				continue;
971 
972 			return i;
973 		}
974 	}
975 
976 	/* then, try a wildcard match over final char. */
977 	for (i = 0; i < 4; i++) {
978 		recommend = ei->recommend[i];
979 		for (j = 0; j < ei->recommendsize[i]; j++) {
980 			if (cs->type != recommend[j].type)
981 				continue;
982 			if (cs->final && (cs->final != recommend[j].final))
983 				continue;
984 			if (cs->interm && (cs->interm != recommend[j].interm))
985 				continue;
986 
987 			return i;
988 		}
989 	}
990 
991 	/* there's no recommendation. make a guess. */
992 	if (ei->maxcharset == 0) {
993 		return 0;
994 	} else {
995 		switch (cs->type) {
996 		case CS94:
997 		case CS94MULTI:
998 			return 0;
999 		case CS96:
1000 		case CS96MULTI:
1001 			return 1;
1002 		}
1003 	}
1004 	return 0;
1005 }
1006 
1007 static int
1008 _ISO2022_sputwchar(_ISO2022EncodingInfo * __restrict ei, wchar_t wc,
1009 		   char * __restrict string, size_t n,
1010 		   char ** __restrict result,
1011 		   _ISO2022State * __restrict psenc)
1012 {
1013 	int i = 0, len;
1014 	_ISO2022Charset cs;
1015 	char *p;
1016 	char tmp[MB_LEN_MAX];
1017 	int target;
1018 	u_char mask;
1019 	int bit8;
1020 
1021 	_DIAGASSERT(ei != NULL);
1022 	_DIAGASSERT(string != NULL);
1023 	/* result may be NULL */
1024 	/* state appears to be unused */
1025 
1026 	if (iscntl(wc & 0xff)) {
1027 		/* go back to ASCII on control chars */
1028 		cs.type = CS94;
1029 		cs.final = 'B';
1030 		cs.interm = '\0';
1031 	} else if (!(wc & ~0xff)) {
1032 		if (wc & 0x80) {
1033 			/* special treatment for ISO-8859-1 */
1034 			cs.type = CS96;
1035 			cs.final = 'A';
1036 			cs.interm = '\0';
1037 		} else {
1038 			/* special treatment for ASCII */
1039 			cs.type = CS94;
1040 			cs.final = 'B';
1041 			cs.interm = '\0';
1042 		}
1043 	} else {
1044 		cs.final = (wc >> 24) & 0x7f;
1045 		if ((wc >> 16) & 0x80)
1046 			cs.interm = (wc >> 16) & 0x7f;
1047 		else
1048 			cs.interm = '\0';
1049 		if (wc & 0x80)
1050 			cs.type = (wc & 0x00007f00) ? CS96MULTI : CS96;
1051 		else
1052 			cs.type = (wc & 0x00007f00) ? CS94MULTI : CS94;
1053 	}
1054 	target = recommendation(ei, &cs);
1055 	p = tmp;
1056 	bit8 = ei->flags & F_8BIT;
1057 
1058 	/* designate the charset onto the target plane(G0/1/2/3). */
1059 	if (psenc->g[target].type == cs.type
1060 	 && psenc->g[target].final == cs.final
1061 	 && psenc->g[target].interm == cs.interm)
1062 		goto planeok;
1063 
1064 	*p++ = '\033';
1065 	if (cs.type == CS94MULTI || cs.type == CS96MULTI)
1066 		*p++ = '$';
1067 	if (target == 0 && cs.type == CS94MULTI && strchr("@AB", cs.final)
1068 	 && !cs.interm && !(ei->flags & F_NOOLD))
1069 		;
1070 	else if (cs.type == CS94 || cs.type == CS94MULTI)
1071 		*p++ = "()*+"[target];
1072 	else
1073 		*p++ = ",-./"[target];
1074 	if (cs.interm)
1075 		*p++ = cs.interm;
1076 	*p++ = cs.final;
1077 
1078 	psenc->g[target].type = cs.type;
1079 	psenc->g[target].final = cs.final;
1080 	psenc->g[target].interm = cs.interm;
1081 
1082 planeok:
1083 	/* invoke the plane onto GL or GR. */
1084 	if (psenc->gl == target)
1085 		goto sideok;
1086 	if (bit8 && psenc->gr == target)
1087 		goto sideok;
1088 
1089 	if (target == 0 && (ei->flags & F_LS0)) {
1090 		*p++ = '\017';
1091 		psenc->gl = 0;
1092 	} else if (target == 1 && (ei->flags & F_LS1)) {
1093 		*p++ = '\016';
1094 		psenc->gl = 1;
1095 	} else if (target == 2 && (ei->flags & F_LS2)) {
1096 		*p++ = '\033';
1097 		*p++ = 'n';
1098 		psenc->gl = 2;
1099 	} else if (target == 3 && (ei->flags & F_LS3)) {
1100 		*p++ = '\033';
1101 		*p++ = 'o';
1102 		psenc->gl = 3;
1103 	} else if (bit8 && target == 1 && (ei->flags & F_LS1R)) {
1104 		*p++ = '\033';
1105 		*p++ = '~';
1106 		psenc->gr = 1;
1107 	} else if (bit8 && target == 2 && (ei->flags & F_LS2R)) {
1108 		*p++ = '\033';
1109 		/*{*/
1110 		*p++ = '}';
1111 		psenc->gr = 2;
1112 	} else if (bit8 && target == 3 && (ei->flags & F_LS3R)) {
1113 		*p++ = '\033';
1114 		*p++ = '|';
1115 		psenc->gr = 3;
1116 	} else if (target == 2 && (ei->flags & F_SS2)) {
1117 		*p++ = '\033';
1118 		*p++ = 'N';
1119 		psenc->singlegl = 2;
1120 	} else if (target == 3 && (ei->flags & F_SS3)) {
1121 		*p++ = '\033';
1122 		*p++ = 'O';
1123 		psenc->singlegl = 3;
1124 	} else if (bit8 && target == 2 && (ei->flags & F_SS2R)) {
1125 		*p++ = '\216';
1126 		*p++ = 'N';
1127 		psenc->singlegl = psenc->singlegr = 2;
1128 	} else if (bit8 && target == 3 && (ei->flags & F_SS3R)) {
1129 		*p++ = '\217';
1130 		*p++ = 'O';
1131 		psenc->singlegl = psenc->singlegr = 3;
1132 	} else
1133 		abort();
1134 
1135 sideok:
1136 	if (psenc->singlegl == target)
1137 		mask = 0x00;
1138 	else if (psenc->singlegr == target)
1139 		mask = 0x80;
1140 	else if (psenc->gl == target)
1141 		mask = 0x00;
1142 	else if ((ei->flags & F_8BIT) && psenc->gr == target)
1143 		mask = 0x80;
1144 	else
1145 		abort();
1146 
1147 	switch (cs.type) {
1148 	case CS94:
1149 	case CS96:
1150 		i = 1;
1151 		break;
1152 	case CS94MULTI:
1153 	case CS96MULTI:
1154 		i = isthree(cs.final) ? 3 : 2;
1155 		break;
1156 	}
1157 	while (i-- > 0)
1158 		*p++ = ((wc >> (i << 3)) & 0x7f) | mask;
1159 
1160 	/* reset single shift state */
1161 	psenc->singlegl = psenc->singlegr = -1;
1162 
1163 	len = p - tmp;
1164 	if (n < len) {
1165 		if (result)
1166 			*result = (char *)0;
1167 	} else {
1168 		if (result)
1169 			*result = string + len;
1170 		memcpy(string, tmp, len);
1171 	}
1172 	return len;
1173 }
1174 
1175 static int
1176 _citrus_ISO2022_put_state_reset(_ISO2022EncodingInfo * __restrict ei,
1177 				char * __restrict s, size_t n,
1178 				_ISO2022State * __restrict psenc,
1179 				size_t * __restrict nresult)
1180 {
1181 	char buf[MB_LEN_MAX];
1182 	char *result;
1183 	int len, ret;
1184 
1185 	_DIAGASSERT(ei != NULL);
1186 	_DIAGASSERT(nresult != 0);
1187 	_DIAGASSERT(s != NULL);
1188 
1189 	/* XXX state will be modified after this operation... */
1190 	len = _ISO2022_sputwchar(ei, L'\0', buf, sizeof(buf), &result, psenc);
1191 	if (len==0) {
1192 		ret = EINVAL;
1193 		goto err;
1194 	}
1195 	if (sizeof(buf) < len || n < len-1) {
1196 		/* XXX should recover state? */
1197 		ret = E2BIG;
1198 		goto err;
1199 	}
1200 
1201 	memcpy(s, buf, len-1);
1202 	*nresult = (size_t)(len-1);
1203 	return (0);
1204 
1205 err:
1206 	/* bound check failure */
1207 	*nresult = (size_t)-1;
1208 	return ret;
1209 }
1210 
1211 static int
1212 _citrus_ISO2022_wcrtomb_priv(_ISO2022EncodingInfo * __restrict ei,
1213 			     char * __restrict s, size_t n, wchar_t wc,
1214 			     _ISO2022State * __restrict psenc,
1215 			     size_t * __restrict nresult)
1216 {
1217 	char buf[MB_LEN_MAX];
1218 	char *result;
1219 	int len, ret;
1220 
1221 	_DIAGASSERT(ei != NULL);
1222 	_DIAGASSERT(nresult != 0);
1223 	_DIAGASSERT(s != NULL);
1224 
1225 	/* XXX state will be modified after this operation... */
1226 	len = _ISO2022_sputwchar(ei, wc, buf, sizeof(buf), &result, psenc);
1227 	if (sizeof(buf) < len || n < len) {
1228 		/* XXX should recover state? */
1229 		ret = E2BIG;
1230 		goto err;
1231 	}
1232 
1233 	memcpy(s, buf, len);
1234 	*nresult = (size_t)len;
1235 	return (0);
1236 
1237 err:
1238 	/* bound check failure */
1239 	*nresult = (size_t)-1;
1240 	return ret;
1241 }
1242 
1243 static __inline int
1244 /*ARGSUSED*/
1245 _citrus_ISO2022_stdenc_wctocs(_ISO2022EncodingInfo * __restrict ei,
1246 			      _csid_t * __restrict csid,
1247 			      _index_t * __restrict idx, wchar_t wc)
1248 {
1249 	wchar_t m, nm;
1250 
1251 	_DIAGASSERT(csid != NULL && idx != NULL);
1252 
1253 	m = wc & 0x7FFF8080;
1254 	nm = wc & 0x007F7F7F;
1255 	if (m & 0x00800000) {
1256 		nm &= 0x00007F7F;
1257 	} else {
1258 		m &= 0x7F008080;
1259 	}
1260 	if (nm & 0x007F0000) {
1261 		/* ^3 mark */
1262 		m |= 0x007F0000;
1263 	} else if (nm & 0x00007F00) {
1264 		/* ^2 mark */
1265 		m |= 0x00007F00;
1266 	}
1267 	*csid = (_csid_t)m;
1268 	*idx  = (_index_t)nm;
1269 
1270 	return (0);
1271 }
1272 
1273 static __inline int
1274 /*ARGSUSED*/
1275 _citrus_ISO2022_stdenc_cstowc(_ISO2022EncodingInfo * __restrict ei,
1276 			      wchar_t * __restrict wc,
1277 			      _csid_t csid, _index_t idx)
1278 {
1279 
1280 	_DIAGASSERT(ei != NULL && wc != NULL);
1281 
1282 	*wc = (wchar_t)(csid & 0x7F808080) | (wchar_t)idx;
1283 
1284 	return (0);
1285 }
1286 
1287 /* ----------------------------------------------------------------------
1288  * public interface for ctype
1289  */
1290 
1291 _CITRUS_CTYPE_DECLS(ISO2022);
1292 _CITRUS_CTYPE_DEF_OPS(ISO2022);
1293 
1294 #include "citrus_ctype_template.h"
1295 
1296 /* ----------------------------------------------------------------------
1297  * public interface for stdenc
1298  */
1299 
1300 _CITRUS_STDENC_DECLS(ISO2022);
1301 _CITRUS_STDENC_DEF_OPS(ISO2022);
1302 
1303 #include "citrus_stdenc_template.h"
1304