xref: /openbsd-src/sys/dev/wscons/wsemul_subr.c (revision ca22e28b292ef38be4b4c64ba27ad85da6c5f1ee)
1*ca22e28bSmiod /*	$OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $	*/
2ae56ac94Smiod 
3ae56ac94Smiod /*
4ae56ac94Smiod  * Copyright (c) 2007, 2013 Miodrag Vallat.
5ae56ac94Smiod  *
6ae56ac94Smiod  * Permission to use, copy, modify, and distribute this software for any
7ae56ac94Smiod  * purpose with or without fee is hereby granted, provided that the above
8ae56ac94Smiod  * copyright notice, this permission notice, and the disclaimer below
9ae56ac94Smiod  * appear in all copies.
10ae56ac94Smiod  *
11ae56ac94Smiod  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12ae56ac94Smiod  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13ae56ac94Smiod  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14ae56ac94Smiod  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15ae56ac94Smiod  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16ae56ac94Smiod  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17ae56ac94Smiod  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18ae56ac94Smiod  */
19ae56ac94Smiod 
20*ca22e28bSmiod /*
21*ca22e28bSmiod  * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c
22*ca22e28bSmiod  * under the following licence:
23*ca22e28bSmiod  */
24*ca22e28bSmiod /*-
25*ca22e28bSmiod  * Copyright (c) 2002-2004 Tim J. Robbins
26*ca22e28bSmiod  * All rights reserved.
27*ca22e28bSmiod  *
28*ca22e28bSmiod  * Redistribution and use in source and binary forms, with or without
29*ca22e28bSmiod  * modification, are permitted provided that the following conditions
30*ca22e28bSmiod  * are met:
31*ca22e28bSmiod  * 1. Redistributions of source code must retain the above copyright
32*ca22e28bSmiod  *    notice, this list of conditions and the following disclaimer.
33*ca22e28bSmiod  * 2. Redistributions in binary form must reproduce the above copyright
34*ca22e28bSmiod  *    notice, this list of conditions and the following disclaimer in the
35*ca22e28bSmiod  *    documentation and/or other materials provided with the distribution.
36*ca22e28bSmiod  *
37*ca22e28bSmiod  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38*ca22e28bSmiod  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39*ca22e28bSmiod  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40*ca22e28bSmiod  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41*ca22e28bSmiod  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42*ca22e28bSmiod  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43*ca22e28bSmiod  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44*ca22e28bSmiod  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45*ca22e28bSmiod  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46*ca22e28bSmiod  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47*ca22e28bSmiod  * SUCH DAMAGE.
48*ca22e28bSmiod  */
49*ca22e28bSmiod 
50ae56ac94Smiod #include <sys/param.h>
51ae56ac94Smiod #include <sys/systm.h>
52ae56ac94Smiod #include <sys/errno.h>
53ae56ac94Smiod 
54ae56ac94Smiod #include <dev/wscons/wscons_features.h>
55ae56ac94Smiod #include <dev/wscons/wsconsio.h>
56ae56ac94Smiod #include <dev/wscons/wsdisplayvar.h>
57ae56ac94Smiod #include <dev/wscons/wsemulvar.h>
58ae56ac94Smiod #include <dev/wscons/wsksymdef.h>
59ae56ac94Smiod 
60ae56ac94Smiod int	wsemul_local_translate(u_int32_t, kbd_t, u_char *);
61ae56ac94Smiod 
62ae56ac94Smiod /*
63ae56ac94Smiod  * Get characters from an input stream and update the input state.
64ae56ac94Smiod  * Processing stops when the stream is empty, or a complete character
65ae56ac94Smiod  * sequence has been recognized, in which case it returns zero.
66ae56ac94Smiod  */
67ae56ac94Smiod int
wsemul_getchar(const u_char ** inbuf,u_int * inlen,struct wsemul_inputstate * state,int allow_utf8)68ae56ac94Smiod wsemul_getchar(const u_char **inbuf, u_int *inlen,
69ae56ac94Smiod     struct wsemul_inputstate *state, int allow_utf8)
70ae56ac94Smiod {
71ae56ac94Smiod 	u_int len = *inlen;
72ae56ac94Smiod 	const u_char *buf = *inbuf;
73*ca22e28bSmiod #ifdef HAVE_UTF8_SUPPORT
74*ca22e28bSmiod 	int rc;
75*ca22e28bSmiod 	u_int32_t tmpchar, lbound;
76*ca22e28bSmiod 	u_int mbleft;
77*ca22e28bSmiod #endif
78ae56ac94Smiod 
79ae56ac94Smiod 	if (len == 0)
80*ca22e28bSmiod 		return EAGAIN;
81ae56ac94Smiod 
82*ca22e28bSmiod #ifndef HAVE_UTF8_SUPPORT
83ae56ac94Smiod 	state->inchar = *buf++;
84ae56ac94Smiod 	state->mbleft = 0;
85ae56ac94Smiod 	len--;
86ae56ac94Smiod 	*inlen = len;
87ae56ac94Smiod 	*inbuf = buf;
88*ca22e28bSmiod 	return 0;
89ae56ac94Smiod #else
90ae56ac94Smiod 	/*
91ae56ac94Smiod 	 * If we do not allow multibyte sequences, process as quickly
92ae56ac94Smiod 	 * as possible.
93ae56ac94Smiod 	 */
94ae56ac94Smiod 	if (!allow_utf8) {
95ae56ac94Smiod 		state->inchar = *buf++;
96ae56ac94Smiod 		state->mbleft = 0;
97ae56ac94Smiod 		len--;
98ae56ac94Smiod 		*inlen = len;
99ae56ac94Smiod 		*inbuf = buf;
100*ca22e28bSmiod 		return 0;
101ae56ac94Smiod 	}
102ae56ac94Smiod 
103*ca22e28bSmiod 	rc = EAGAIN;
104ae56ac94Smiod 	tmpchar = state->inchar;
105*ca22e28bSmiod 	lbound = state->lbound;
106ae56ac94Smiod 	mbleft = state->mbleft;
107ae56ac94Smiod 
108ae56ac94Smiod 	while (len != 0) {
109ae56ac94Smiod 		u_int32_t frag = (u_int32_t)*buf++;
110ae56ac94Smiod 		len--;
111ae56ac94Smiod 
112ae56ac94Smiod 		/*
113ae56ac94Smiod 		 * If we are in the middle of a multibyte sequence, try
114ae56ac94Smiod 		 * to complete it.
115ae56ac94Smiod 		 */
116ae56ac94Smiod 
117ae56ac94Smiod 		if (mbleft != 0) {
118*ca22e28bSmiod 			if ((frag & 0xc0) != 0x80)
119*ca22e28bSmiod 				goto invalid;
120*ca22e28bSmiod 
121ae56ac94Smiod 			tmpchar = (tmpchar << 6) | (frag & 0x3f);
122ae56ac94Smiod 			mbleft--;
123ae56ac94Smiod 			if (mbleft == 0) {
124*ca22e28bSmiod 				if (tmpchar < lbound)
125*ca22e28bSmiod 					goto invalid;
126*ca22e28bSmiod 				if (tmpchar >= 0xd800 && tmpchar < 0xe000)
127*ca22e28bSmiod 					goto invalid;
128*ca22e28bSmiod 				if (tmpchar >= 0x110000)
129*ca22e28bSmiod 					goto invalid;
130ae56ac94Smiod 				rc = 0;
131ae56ac94Smiod 				break;
132ae56ac94Smiod 			}
133*ca22e28bSmiod 			continue;
134ae56ac94Smiod 		}
135ae56ac94Smiod 
136ae56ac94Smiod 		/*
137ae56ac94Smiod 		 * Otherwise let's decide if this is the start of a new
138ae56ac94Smiod 		 * multibyte sequence, or a 7-bit character.
139ae56ac94Smiod 		 */
140ae56ac94Smiod 
141ae56ac94Smiod 		if ((frag & 0x80) == 0) {
142ae56ac94Smiod 			tmpchar = frag;
143ae56ac94Smiod 			rc = 0;
144ae56ac94Smiod 			break;
145ae56ac94Smiod 		}
146ae56ac94Smiod 
147*ca22e28bSmiod 		if ((frag & 0xe0) == 0xc0) {
148*ca22e28bSmiod 			frag &= 0x1f;
149ae56ac94Smiod 			mbleft = 1;
150*ca22e28bSmiod 			lbound = 0x80;
151*ca22e28bSmiod 		} else if ((frag & 0xf0) == 0xe0) {
152*ca22e28bSmiod 			frag &= 0x0f;
153*ca22e28bSmiod 			mbleft = 2;
154*ca22e28bSmiod 			lbound = 0x800;
155*ca22e28bSmiod 		} else if ((frag & 0xf8) == 0xf0) {
156*ca22e28bSmiod 			frag &= 0x07;
157*ca22e28bSmiod 			mbleft = 3;
158*ca22e28bSmiod 			lbound = 0x10000;
159*ca22e28bSmiod 		} else {
160*ca22e28bSmiod 			goto invalid;
161ae56ac94Smiod 		}
162ae56ac94Smiod 
163ae56ac94Smiod 		tmpchar = frag;
164*ca22e28bSmiod 		state->lbound = lbound;
165*ca22e28bSmiod 		continue;
166*ca22e28bSmiod 
167*ca22e28bSmiod invalid:
168*ca22e28bSmiod 		/* Abort the ill-formed sequence and continue */
169*ca22e28bSmiod 		mbleft = 0;
170*ca22e28bSmiod 		tmpchar = 0;
171*ca22e28bSmiod 		rc = EILSEQ;
172ae56ac94Smiod 	}
173ae56ac94Smiod 
174ae56ac94Smiod 	state->inchar = tmpchar;
175ae56ac94Smiod 	state->mbleft = mbleft;
176ae56ac94Smiod 	*inlen = len;
177ae56ac94Smiod 	*inbuf = buf;
178*ca22e28bSmiod 	return rc;
179ae56ac94Smiod #endif
180ae56ac94Smiod }
181ae56ac94Smiod 
182ae56ac94Smiod /*
183ae56ac94Smiod  * Unicode Cyrillic to KOI8 translation table (starts at U+0400),
184ae56ac94Smiod  * from RFC 2319.
185ae56ac94Smiod  */
186ae56ac94Smiod const u_int8_t cyrillic_to_koi8[] = {
187ae56ac94Smiod 	0x00,	/* IE grave */		/* 0400 */
188ae56ac94Smiod 	0xb3,	/* IO */
189ae56ac94Smiod 	0x00,	/* DJE */
190ae56ac94Smiod 	0x00,	/* GJE */
191ae56ac94Smiod 	0xb4,	/* UKR IE */
192ae56ac94Smiod 	0x00,	/* DZE */
193ae56ac94Smiod 	0xb6,	/* BYE/UKR I */
194ae56ac94Smiod 	0xb7,	/* YI */
195ae56ac94Smiod 	0x00,	/* JE */
196ae56ac94Smiod 	0x00,	/* LJE */
197ae56ac94Smiod 	0x00,	/* NJE */
198ae56ac94Smiod 	0x00,	/* TSHE */
199ae56ac94Smiod 	0x00,	/* KJE */
200ae56ac94Smiod 	0x00,	/* I grave */
201ae56ac94Smiod 	0x00,	/* short U */
202ae56ac94Smiod 	0x00,	/* DZHE */
203ae56ac94Smiod 	0xe1,	/* A */			/* 0410 */
204ae56ac94Smiod 	0xe2,	/* BE */
205ae56ac94Smiod 	0xf7,	/* VE */
206ae56ac94Smiod 	0xe7,	/* GHE */
207ae56ac94Smiod 	0xe4,	/* DE */
208ae56ac94Smiod 	0xe5,	/* IE */
209ae56ac94Smiod 	0xf6,	/* ZHE */
210ae56ac94Smiod 	0xfa,	/* ZE */
211ae56ac94Smiod 	0xe9,	/* I */
212ae56ac94Smiod 	0xea,	/* short I */
213ae56ac94Smiod 	0xeb,	/* KA */
214ae56ac94Smiod 	0xec,	/* EL */
215ae56ac94Smiod 	0xed,	/* EM */
216ae56ac94Smiod 	0xee,	/* EN */
217ae56ac94Smiod 	0xef,	/* O */
218ae56ac94Smiod 	0xf0,	/* PE */
219ae56ac94Smiod 	0xf2,	/* ER */		/* 0420 */
220ae56ac94Smiod 	0xf3,	/* ES */
221ae56ac94Smiod 	0xf4,	/* TE */
222ae56ac94Smiod 	0xf5,	/* U */
223ae56ac94Smiod 	0xe6,	/* EF */
224ae56ac94Smiod 	0xe8,	/* HA */
225ae56ac94Smiod 	0xe3,	/* TSE */
226ae56ac94Smiod 	0xfe,	/* CHE */
227ae56ac94Smiod 	0xfb,	/* SHA */
228ae56ac94Smiod 	0xfd,	/* SHCHA */
229ae56ac94Smiod 	0xff,	/* HARD SIGN */
230ae56ac94Smiod 	0xf9,	/* YERU */
231ae56ac94Smiod 	0xf8,	/* SOFT SIGN */
232ae56ac94Smiod 	0xfc,	/* E */
233ae56ac94Smiod 	0xe0,	/* YU */
234ae56ac94Smiod 	0xf1,	/* YA */
235ae56ac94Smiod 	0xc1,	/* a */			/* 0430 */
236ae56ac94Smiod 	0xc2,	/* be */
237ae56ac94Smiod 	0xd7,	/* ve */
238ae56ac94Smiod 	0xc7,	/* ghe */
239ae56ac94Smiod 	0xc4,	/* de */
240ae56ac94Smiod 	0xc5,	/* ie */
241ae56ac94Smiod 	0xd6,	/* zhe */
242ae56ac94Smiod 	0xda,	/* ze */
243ae56ac94Smiod 	0xc9,	/* i */
244ae56ac94Smiod 	0xca,	/* short i */
245ae56ac94Smiod 	0xcb,	/* ka */
246ae56ac94Smiod 	0xcc,	/* el */
247ae56ac94Smiod 	0xcd,	/* em */
248ae56ac94Smiod 	0xce,	/* en */
249ae56ac94Smiod 	0xcf,	/* o */
250ae56ac94Smiod 	0xd0,	/* pe */
251ae56ac94Smiod 	0xd2,	/* er */		/* 0440 */
252ae56ac94Smiod 	0xd3,	/* es */
253ae56ac94Smiod 	0xd4,	/* te */
254ae56ac94Smiod 	0xd5,	/* u */
255ae56ac94Smiod 	0xc6,	/* ef */
256ae56ac94Smiod 	0xc8,	/* ha */
257ae56ac94Smiod 	0xc3,	/* tse */
258ae56ac94Smiod 	0xde,	/* che */
259ae56ac94Smiod 	0xdb,	/* sha */
260ae56ac94Smiod 	0xdd,	/* shcha */
261ae56ac94Smiod 	0xdf,	/* hard sign */
262ae56ac94Smiod 	0xd9,	/* yeru */
263ae56ac94Smiod 	0xd8,	/* soft sign */
264ae56ac94Smiod 	0xdc,	/* e */
265ae56ac94Smiod 	0xc0,	/* yu */
266ae56ac94Smiod 	0xd1,	/* ya */
267ae56ac94Smiod 	0x00,	/* ie grave */		/* 0450 */
268ae56ac94Smiod 	0xa3,	/* io */
269ae56ac94Smiod 	0x00,	/* dje */
270ae56ac94Smiod 	0x00,	/* GJE */
271ae56ac94Smiod 	0xa4,	/* UKR ie */
272ae56ac94Smiod 	0x00,	/* DZE */
273ae56ac94Smiod 	0xa6,	/* BYE/UKR I */
274ae56ac94Smiod 	0xa7,	/* YI */
275ae56ac94Smiod 	0x00,	/* JE */
276ae56ac94Smiod 	0x00,	/* LJE */
277ae56ac94Smiod 	0x00,	/* NJE */
278ae56ac94Smiod 	0x00,	/* TSHE */
279ae56ac94Smiod 	0x00,	/* KJE */
280ae56ac94Smiod 	0x00,	/* I grave */
281ae56ac94Smiod 	0x00,	/* short U */
282ae56ac94Smiod 	0x00	/* DZHE */
283ae56ac94Smiod };
284ae56ac94Smiod 
285ae56ac94Smiod /*
286ae56ac94Smiod  * Europe to Latin-2 translation table (starts at U+0100).
287ae56ac94Smiod  */
288ae56ac94Smiod const u_int8_t unicode_to_latin2[] = {
289ae56ac94Smiod 	0x00,	/* A macron */		/* 0100 */
290ae56ac94Smiod 	0x00,	/* a macron */
291ae56ac94Smiod 	0xc3,	/* A breve */
292ae56ac94Smiod 	0xe3,	/* a breve */
293ae56ac94Smiod 	0xa1,	/* A ogonek */
294ae56ac94Smiod 	0xb1,	/* a ogonek */
295ae56ac94Smiod 	0xc6,	/* C acute */
296ae56ac94Smiod 	0xe6,	/* c acute */
297ae56ac94Smiod 	0x00,	/* C circumflex */
298ae56ac94Smiod 	0x00,	/* c circumflex */
299ae56ac94Smiod 	0x00,	/* C abovering */
300ae56ac94Smiod 	0x00,	/* c abovering */
301ae56ac94Smiod 	0xc8,	/* C caron */
302ae56ac94Smiod 	0xe8,	/* c caron */
303ae56ac94Smiod 	0xcf,	/* D caron */
304ae56ac94Smiod 	0xef,	/* d caron */
305ae56ac94Smiod 	0xd0,	/* D stroke */		/* 0110 */
306ae56ac94Smiod 	0xf0,	/* d stroke */
307ae56ac94Smiod 	0x00,	/* E macron */
308ae56ac94Smiod 	0x00,	/* e macron */
309ae56ac94Smiod 	0x00,	/* E breve */
310ae56ac94Smiod 	0x00,	/* e breve */
311ae56ac94Smiod 	0x00,	/* E abovering */
312ae56ac94Smiod 	0x00,	/* e abovering */
313ae56ac94Smiod 	0xca,	/* E ogonek */
314ae56ac94Smiod 	0xea,	/* e ogonek */
315ae56ac94Smiod 	0xcc,	/* E caron */
316ae56ac94Smiod 	0xec,	/* e caron */
317ae56ac94Smiod 	0x00,	/* G circumflex */
318ae56ac94Smiod 	0x00,	/* g circumflex */
319ae56ac94Smiod 	0x00,	/* G breve */
320ae56ac94Smiod 	0x00,	/* g breve */
321ae56ac94Smiod 	0x00,	/* G abovering */	/* 0120 */
322ae56ac94Smiod 	0x00,	/* g abovering */
323ae56ac94Smiod 	0x00,	/* G cedilla */
324ae56ac94Smiod 	0x00,	/* g cedilla */
325ae56ac94Smiod 	0x00,	/* H circumflex */
326ae56ac94Smiod 	0x00,	/* h circumflex */
327ae56ac94Smiod 	0x00,	/* H stroke */
328ae56ac94Smiod 	0x00,	/* h stroke */
329ae56ac94Smiod 	0x00,	/* I tilde */
330ae56ac94Smiod 	0x00,	/* i tilde */
331ae56ac94Smiod 	0x00,	/* I macron */
332ae56ac94Smiod 	0x00,	/* i macron */
333ae56ac94Smiod 	0x00,	/* I breve */
334ae56ac94Smiod 	0x00,	/* i breve */
335ae56ac94Smiod 	0x00,	/* I ogonek */
336ae56ac94Smiod 	0x00,	/* i ogonek */
337ae56ac94Smiod 	0x00,	/* dotted I */		/* 0130 */
338ae56ac94Smiod 	0x00,	/* non-dotted i */
339ae56ac94Smiod 	0x00,	/* ligature IJ */
340ae56ac94Smiod 	0x00,	/* ligature ij */
341ae56ac94Smiod 	0x00,	/* J circumflex */
342ae56ac94Smiod 	0x00,	/* j circumflex */
343ae56ac94Smiod 	0x00,	/* K cedilla */
344ae56ac94Smiod 	0x00,	/* k cedilla */
345ae56ac94Smiod 	0x00,	/* kra */
346ae56ac94Smiod 	0xc5,	/* L acute */
347ae56ac94Smiod 	0xe5,	/* l acute */
348ae56ac94Smiod 	0x00,	/* L cedilla */
349ae56ac94Smiod 	0x00,	/* l cedilla */
350ae56ac94Smiod 	0xa5,	/* L caron */
351ae56ac94Smiod 	0xb5,	/* l caron */
352ae56ac94Smiod 	0x00,	/* L middle dot */
353ae56ac94Smiod 	0x00,	/* l middle dot */	/* 0140 */
354ae56ac94Smiod 	0xa3,	/* L stroke */
355ae56ac94Smiod 	0xb3,	/* l stroke */
356ae56ac94Smiod 	0xd1,	/* N acute */
357ae56ac94Smiod 	0xf1,	/* n acute */
358ae56ac94Smiod 	0x00,	/* N cedilla */
359ae56ac94Smiod 	0x00,	/* n cedilla */
360ae56ac94Smiod 	0xd2,	/* N caron */
361ae56ac94Smiod 	0xf2,	/* n caron */
362ae56ac94Smiod 	0x00,	/* N preceded by apostrophe */
363ae56ac94Smiod 	0x00,	/* ENG */
364ae56ac94Smiod 	0x00,	/* eng */
365ae56ac94Smiod 	0x00,	/* O macron */
366ae56ac94Smiod 	0x00,	/* o macron */
367ae56ac94Smiod 	0x00,	/* O breve */
368ae56ac94Smiod 	0x00,	/* o breve */
369ae56ac94Smiod 	0xd5,	/* O double acute */	/* 0150 */
370ae56ac94Smiod 	0xf5,	/* o double acute */
371ae56ac94Smiod 	0x00,	/* ligature OE */
372ae56ac94Smiod 	0x00,	/* ligature oe */
373ae56ac94Smiod 	0xc0,	/* R acute */
374ae56ac94Smiod 	0xe0,	/* r acute */
375ae56ac94Smiod 	0x00,	/* R cedilla */
376ae56ac94Smiod 	0x00,	/* r cedilla */
377ae56ac94Smiod 	0xd8,	/* R caron */
378ae56ac94Smiod 	0xf8,	/* r caron */
379ae56ac94Smiod 	0xa6,	/* S acute */
380ae56ac94Smiod 	0xb6,	/* s acute */
381ae56ac94Smiod 	0x00,	/* S circumflex */
382ae56ac94Smiod 	0x00,	/* s circumflex */
383ae56ac94Smiod 	0xaa,	/* S cedilla */
384ae56ac94Smiod 	0xba,	/* s cedilla */
385ae56ac94Smiod 	0xa9,	/* S caron */		/* 0160 */
386ae56ac94Smiod 	0xb9,	/* s caron */
387ae56ac94Smiod 	0xde,	/* T cedilla */
388ae56ac94Smiod 	0xfe,	/* t cedilla */
389ae56ac94Smiod 	0xab,	/* T caron */
390ae56ac94Smiod 	0xbb,	/* t caron */
391ae56ac94Smiod 	0x00,	/* T stroke */
392ae56ac94Smiod 	0x00,	/* t stroke */
393ae56ac94Smiod 	0x00,	/* U tilde */
394ae56ac94Smiod 	0x00,	/* u tilde */
395ae56ac94Smiod 	0x00,	/* U macron */
396ae56ac94Smiod 	0x00,	/* u macron */
397ae56ac94Smiod 	0x00,	/* U breve */
398ae56ac94Smiod 	0x00,	/* u breve */
399ae56ac94Smiod 	0xd9,	/* U abovering */
400ae56ac94Smiod 	0xf9,	/* u abovering */
401ae56ac94Smiod 	0xdb,	/* U double acute */	/* 0170 */
402ae56ac94Smiod 	0xfb,	/* u double acute */
403ae56ac94Smiod 	0x00,	/* U ogonek */
404ae56ac94Smiod 	0x00,	/* u ogonek */
405ae56ac94Smiod 	0x00,	/* W circumflex */
406ae56ac94Smiod 	0x00,	/* w circumflex */
407ae56ac94Smiod 	0x00,	/* Y circumflex */
408ae56ac94Smiod 	0x00,	/* y circumflex */
409ae56ac94Smiod 	0x00,	/* Y diaeresis */
410ae56ac94Smiod 	0xac,	/* Z acute */
411ae56ac94Smiod 	0xbc,	/* z acute */
412ae56ac94Smiod 	0xaf,	/* Z abovering */
413ae56ac94Smiod 	0xbf,	/* z abovering */
414ae56ac94Smiod 	0xae,	/* Z caron */
415ae56ac94Smiod 	0xbe,	/* z caron */
416ae56ac94Smiod 	0x00	/* long s */
417ae56ac94Smiod };
418ae56ac94Smiod 
419ae56ac94Smiod /*
420ae56ac94Smiod  * Baltic to Latin-7 translation table.
421ae56ac94Smiod  */
422ae56ac94Smiod const u_int8_t unicode_to_latin7[] = {
423ae56ac94Smiod 	0xc2,	/* A macron */		/* 0100 */
424ae56ac94Smiod 	0xe2,	/* a macron */
425ae56ac94Smiod 	0x00,	/* A breve */
426ae56ac94Smiod 	0x00,	/* a breve */
427ae56ac94Smiod 	0xc0,	/* A ogonek */
428ae56ac94Smiod 	0xe0,	/* a ogonek */
429ae56ac94Smiod 	0xc3,	/* C acute */
430ae56ac94Smiod 	0xe3,	/* c acute */
431ae56ac94Smiod 	0x00,	/* C circumflex */
432ae56ac94Smiod 	0x00,	/* c circumflex */
433ae56ac94Smiod 	0x00,	/* C abovering */
434ae56ac94Smiod 	0x00,	/* c abovering */
435ae56ac94Smiod 	0xc8,	/* C caron */
436ae56ac94Smiod 	0xe8,	/* c caron */
437ae56ac94Smiod 	0x00,	/* D caron */
438ae56ac94Smiod 	0x00,	/* d caron */
439ae56ac94Smiod 	0x00,	/* D stroke */		/* 0110 */
440ae56ac94Smiod 	0x00,	/* d stroke */
441ae56ac94Smiod 	0xc7,	/* E macron */
442ae56ac94Smiod 	0xe7,	/* e macron */
443ae56ac94Smiod 	0x00,	/* E breve */
444ae56ac94Smiod 	0x00,	/* e breve */
445ae56ac94Smiod 	0xcb,	/* E abovering */
446ae56ac94Smiod 	0xeb,	/* e abovering */
447ae56ac94Smiod 	0xc6,	/* E ogonek */
448ae56ac94Smiod 	0xe6,	/* e ogonek */
449ae56ac94Smiod 	0x00,	/* E caron */
450ae56ac94Smiod 	0x00,	/* e caron */
451ae56ac94Smiod 	0x00,	/* G circumflex */
452ae56ac94Smiod 	0x00,	/* g circumflex */
453ae56ac94Smiod 	0x00,	/* G breve */
454ae56ac94Smiod 	0x00,	/* g breve */
455ae56ac94Smiod 	0x00,	/* G abovering */	/* 0120 */
456ae56ac94Smiod 	0x00,	/* g abovering */
457ae56ac94Smiod 	0xcc,	/* G cedilla */
458ae56ac94Smiod 	0xec,	/* g cedilla */
459ae56ac94Smiod 	0x00,	/* H circumflex */
460ae56ac94Smiod 	0x00,	/* h circumflex */
461ae56ac94Smiod 	0x00,	/* H stroke */
462ae56ac94Smiod 	0x00,	/* h stroke */
463ae56ac94Smiod 	0x00,	/* I tilde */
464ae56ac94Smiod 	0x00,	/* i tilde */
465ae56ac94Smiod 	0xce,	/* I macron */
466ae56ac94Smiod 	0xee,	/* i macron */
467ae56ac94Smiod 	0x00,	/* I breve */
468ae56ac94Smiod 	0x00,	/* i breve */
469ae56ac94Smiod 	0xc1,	/* I ogonek */
470ae56ac94Smiod 	0xe1,	/* i ogonek */
471ae56ac94Smiod 	0x00,	/* dotted I */		/* 0130 */
472ae56ac94Smiod 	0x00,	/* non-dotted I */
473ae56ac94Smiod 	0x00,	/* ligature IJ */
474ae56ac94Smiod 	0x00,	/* ligature ij */
475ae56ac94Smiod 	0x00,	/* J circumflex */
476ae56ac94Smiod 	0x00,	/* j circumflex */
477ae56ac94Smiod 	0xcd,	/* K cedilla */
478ae56ac94Smiod 	0xed,	/* k cedilla */
479ae56ac94Smiod 	0x00,	/* kra */
480ae56ac94Smiod 	0x00,	/* L acute */
481ae56ac94Smiod 	0x00,	/* l acute */
482ae56ac94Smiod 	0xcf,	/* L cedilla */
483ae56ac94Smiod 	0xef,	/* l cedilla */
484ae56ac94Smiod 	0x00,	/* L caron */
485ae56ac94Smiod 	0x00,	/* l caron */
486ae56ac94Smiod 	0x00,	/* L middle dot */
487ae56ac94Smiod 	0x00,	/* l middle dot */	/* 0140 */
488ae56ac94Smiod 	0xd9,	/* L stroke */
489ae56ac94Smiod 	0xf9,	/* l stroke */
490ae56ac94Smiod 	0xd1,	/* N acute */
491ae56ac94Smiod 	0xf1,	/* n acute */
492ae56ac94Smiod 	0xd2,	/* N cedilla */
493ae56ac94Smiod 	0xf2,	/* n cedilla */
494ae56ac94Smiod 	0x00,	/* N caron */
495ae56ac94Smiod 	0x00,	/* n caron */
496ae56ac94Smiod 	0x00,	/* N preceded by apostrophe */
497ae56ac94Smiod 	0x00,	/* ENG */
498ae56ac94Smiod 	0x00,	/* eng */
499ae56ac94Smiod 	0xd4,	/* O macron */
500ae56ac94Smiod 	0xf4,	/* o macron */
501ae56ac94Smiod 	0x00,	/* O breve */
502ae56ac94Smiod 	0x00,	/* o breve */
503ae56ac94Smiod 	0x00,	/* O double acute */	/* 0150 */
504ae56ac94Smiod 	0x00,	/* o double acute */
505ae56ac94Smiod 	0x00,	/* ligature OE */
506ae56ac94Smiod 	0x00,	/* ligature oe */
507ae56ac94Smiod 	0x00,	/* R acute */
508ae56ac94Smiod 	0x00,	/* r acute */
509ae56ac94Smiod 	0xaa,	/* R cedilla */
510ae56ac94Smiod 	0xba,	/* r cedilla */
511ae56ac94Smiod 	0x00,	/* R caron */
512ae56ac94Smiod 	0x00,	/* r caron */
513ae56ac94Smiod 	0xda,	/* S acute */
514ae56ac94Smiod 	0xfa,	/* s acute */
515ae56ac94Smiod 	0x00,	/* S circumflex */
516ae56ac94Smiod 	0x00,	/* s circumflex */
517ae56ac94Smiod 	0x00,	/* S cedilla */
518ae56ac94Smiod 	0x00,	/* s cedilla */
519ae56ac94Smiod 	0xd0,	/* S caron */		/* 0160 */
520ae56ac94Smiod 	0xf0,	/* s caron */
521ae56ac94Smiod 	0x00,	/* T cedilla */
522ae56ac94Smiod 	0x00,	/* t cedilla */
523ae56ac94Smiod 	0x00,	/* T caron */
524ae56ac94Smiod 	0x00,	/* t caron */
525ae56ac94Smiod 	0x00,	/* T stroke */
526ae56ac94Smiod 	0x00,	/* t stroke */
527ae56ac94Smiod 	0x00,	/* U tilde */
528ae56ac94Smiod 	0x00,	/* u tilde */
529ae56ac94Smiod 	0xdb,	/* U macron */
530ae56ac94Smiod 	0xfb,	/* u macron */
531ae56ac94Smiod 	0x00,	/* U breve */
532ae56ac94Smiod 	0x00,	/* u breve */
533ae56ac94Smiod 	0x00,	/* U abovering */
534ae56ac94Smiod 	0x00,	/* u abovering */
535ae56ac94Smiod 	0x00,	/* U double acute */	/* 0170 */
536ae56ac94Smiod 	0x00,	/* u double acute */
537ae56ac94Smiod 	0xd8,	/* U ogonek */
538ae56ac94Smiod 	0xf8,	/* u ogonek */
539ae56ac94Smiod 	0x00,	/* W circumflex */
540ae56ac94Smiod 	0x00,	/* w circumflex */
541ae56ac94Smiod 	0x00,	/* Y circumflex */
542ae56ac94Smiod 	0x00,	/* y circumflex */
543ae56ac94Smiod 	0x00,	/* Y diaeresis */
544ae56ac94Smiod 	0xca,	/* Z acute */
545ae56ac94Smiod 	0xea,	/* z acute */
546ae56ac94Smiod 	0xdd,	/* Z abovering */
547ae56ac94Smiod 	0xfd,	/* z abovering */
548ae56ac94Smiod 	0xde,	/* Z caron */
549ae56ac94Smiod 	0xfe,	/* z caron */
550ae56ac94Smiod 	0x00	/* long s */
551ae56ac94Smiod };
552ae56ac94Smiod 
553ae56ac94Smiod /*
554ae56ac94Smiod  * Keysym to local 8-bit charset sequence translation function.
555ae56ac94Smiod  * The out buffer is at least one character long.
556ae56ac94Smiod  * The keyboard layout is used as a hint to decide which latin charset to
557ae56ac94Smiod  * assume.
558ae56ac94Smiod  */
559ae56ac94Smiod int
wsemul_local_translate(u_int32_t unisym,kbd_t layout,u_char * out)560ae56ac94Smiod wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out)
561ae56ac94Smiod {
562ae56ac94Smiod 	switch (unisym >> 7) {
563ae56ac94Smiod 	case 0x0080 >> 7:
564ae56ac94Smiod 		switch (KB_ENCODING(layout)) {
565ae56ac94Smiod 		case KB_LT:
566ae56ac94Smiod 		case KB_LV:
567ae56ac94Smiod 			switch (unisym) {
568ae56ac94Smiod 			case KS_L7_AE:
569ae56ac94Smiod 				unisym = 0xaf;
570ae56ac94Smiod 				break;
571ae56ac94Smiod 			case KS_L7_Ostroke:
572ae56ac94Smiod 				unisym = 0xa8;
573ae56ac94Smiod 				break;
574ae56ac94Smiod 			case KS_L7_ae:
575ae56ac94Smiod 				unisym = 0xbf;
576ae56ac94Smiod 				break;
577ae56ac94Smiod 			case KS_L7_ostroke:
578ae56ac94Smiod 				unisym = 0xb8;
579ae56ac94Smiod 				break;
580ae56ac94Smiod 			}
581ae56ac94Smiod 		}
582ae56ac94Smiod 		break;
583ae56ac94Smiod 
584ae56ac94Smiod 	case 0x0100 >> 7:
585ae56ac94Smiod 		switch (KB_ENCODING(layout)) {
586ae56ac94Smiod 		case KB_LT:
587ae56ac94Smiod 		case KB_LV:
588ae56ac94Smiod 			if (unisym < 0x100 + nitems(unicode_to_latin7) &&
589ae56ac94Smiod 			    unicode_to_latin7[unisym - 0x100] != 0)
590ae56ac94Smiod 				unisym = unicode_to_latin7[unisym - 0x100];
591ae56ac94Smiod 			break;
592ae56ac94Smiod 		case KB_TR:
593ae56ac94Smiod 			switch (unisym) {
594ae56ac94Smiod 			case KS_L5_Gbreve:
595ae56ac94Smiod 				unisym = 0xd0;
596ae56ac94Smiod 				break;
597ae56ac94Smiod 			case KS_L5_gbreve:
598ae56ac94Smiod 				unisym = 0xf0;
599ae56ac94Smiod 				break;
600ae56ac94Smiod 			case KS_L5_Idotabove:
601ae56ac94Smiod 				unisym = 0xdd;
602ae56ac94Smiod 				break;
603ae56ac94Smiod 			case KS_L5_idotless:
604ae56ac94Smiod 				unisym = 0xfd;
605ae56ac94Smiod 				break;
606ae56ac94Smiod 			case KS_L5_Scedilla:
607ae56ac94Smiod 				unisym = 0xde;
608ae56ac94Smiod 				break;
609ae56ac94Smiod 			case KS_L5_scedilla:
610ae56ac94Smiod 				unisym = 0xfe;
611ae56ac94Smiod 				break;
612ae56ac94Smiod 			}
613ae56ac94Smiod 			break;
614ae56ac94Smiod 		case KB_PL:
615ae56ac94Smiod 		case KB_SI:
616ae56ac94Smiod 			if (unisym < 0x100 + nitems(unicode_to_latin2) &&
617ae56ac94Smiod 			    unicode_to_latin2[unisym - 0x100] != 0)
618ae56ac94Smiod 				unisym = unicode_to_latin2[unisym - 0x100];
619ae56ac94Smiod 			break;
620ae56ac94Smiod 		}
621ae56ac94Smiod 		break;
622ae56ac94Smiod 
623ae56ac94Smiod 	case 0x0280 >> 7:
624ae56ac94Smiod 		switch (KB_ENCODING(layout)) {
625ae56ac94Smiod 		case KB_PL:
626ae56ac94Smiod 		case KB_SI:
627ae56ac94Smiod 			switch (unisym) {
628ae56ac94Smiod 			case KS_L2_caron:
629ae56ac94Smiod 				unisym = 0xb7;
630ae56ac94Smiod 				break;
631ae56ac94Smiod 			case KS_L2_breve:
632ae56ac94Smiod 				unisym = 0xa2;
633ae56ac94Smiod 				break;
634ae56ac94Smiod 			case KS_L2_dotabove:
635ae56ac94Smiod 				unisym = 0xff;
636ae56ac94Smiod 				break;
637ae56ac94Smiod 			case KS_L2_ogonek:
638ae56ac94Smiod 				unisym = 0xb2;
639ae56ac94Smiod 				break;
640ae56ac94Smiod 			case KS_L2_dblacute:
641ae56ac94Smiod 				unisym = 0xbd;
642ae56ac94Smiod 				break;
643ae56ac94Smiod 			}
644ae56ac94Smiod 			break;
645ae56ac94Smiod 		}
646ae56ac94Smiod 		break;
647ae56ac94Smiod 
648ae56ac94Smiod 	case 0x0400 >> 7:
649ae56ac94Smiod 		if (unisym < 0x400 +
650ae56ac94Smiod 		    sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) &&
651ae56ac94Smiod 		    cyrillic_to_koi8[unisym - 0x400] != 0)
652ae56ac94Smiod 			unisym = cyrillic_to_koi8[unisym - 0x400];
653ae56ac94Smiod 		break;
654ae56ac94Smiod 	case 0x0480 >> 7:
655ae56ac94Smiod 		if (unisym == KS_Cyrillic_GHEUKR)
656ae56ac94Smiod 			unisym = 0xbd;	/* ukrainian GHE */
657ae56ac94Smiod 		else if (unisym == KS_Cyrillic_gheukr)
658ae56ac94Smiod 			unisym = 0xad;	/* ukrainian ghe */
659ae56ac94Smiod 		break;
660ae56ac94Smiod 
661ae56ac94Smiod 	case 0x2000 >> 7:
662ae56ac94Smiod 		switch (KB_ENCODING(layout)) {
663ae56ac94Smiod 		case KB_LT:
664ae56ac94Smiod 		case KB_LV:
665ae56ac94Smiod 			switch (unisym) {
666ae56ac94Smiod 			case KS_L7_rightsnglquot:
667ae56ac94Smiod 				unisym = 0xff;
668ae56ac94Smiod 				break;
669ae56ac94Smiod 			case KS_L7_leftdblquot:
670ae56ac94Smiod 				unisym = 0xb4;
671ae56ac94Smiod 				break;
672ae56ac94Smiod 			case KS_L7_rightdblquot:
673ae56ac94Smiod 				unisym = 0xa1;
674ae56ac94Smiod 				break;
675ae56ac94Smiod 			case KS_L7_dbllow9quot:
676ae56ac94Smiod 				unisym = 0xa5;
677ae56ac94Smiod 				break;
678ae56ac94Smiod 			}
679ae56ac94Smiod 		}
680ae56ac94Smiod 		break;
681ae56ac94Smiod 
682ae56ac94Smiod 	}
683ae56ac94Smiod 
684ae56ac94Smiod 	out[0] = unisym & 0xff;
685ae56ac94Smiod 	return (1);
686ae56ac94Smiod }
687ae56ac94Smiod 
688ae56ac94Smiod /*
689ae56ac94Smiod  * Keysym to UTF-8 sequence translation function.
690*ca22e28bSmiod  * The out buffer is at least 4 characters long.
691ae56ac94Smiod  */
692ae56ac94Smiod int
wsemul_utf8_translate(u_int32_t unisym,kbd_t layout,u_char * out,int allow_utf8)693ae56ac94Smiod wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
694ae56ac94Smiod     int allow_utf8)
695ae56ac94Smiod {
696ae56ac94Smiod #ifndef HAVE_UTF8_SUPPORT
697ae56ac94Smiod 	return (wsemul_local_translate(unisym, layout, out));
698ae56ac94Smiod #else
699ae56ac94Smiod 	u_int pos, length, headpat;
700ae56ac94Smiod 
701ae56ac94Smiod 	if (!allow_utf8)
702*ca22e28bSmiod 		return wsemul_local_translate(unisym, layout, out);
703ae56ac94Smiod 
704*ca22e28bSmiod 	if (unisym < 0x80) {
705*ca22e28bSmiod 		/* Fast path for plain ASCII characters. */
706*ca22e28bSmiod 		*out = (u_char)unisym;
707*ca22e28bSmiod 		return 1;
708*ca22e28bSmiod 	}
709*ca22e28bSmiod 
710*ca22e28bSmiod 	if (unisym < 0x800) {
711ae56ac94Smiod 		headpat = 0xc0;
712ae56ac94Smiod 		length = 2;
713*ca22e28bSmiod 	} else if (unisym < 0x10000) {
714*ca22e28bSmiod 		if (unisym >= 0xd800 && unisym < 0xe000)
715*ca22e28bSmiod 			return 0;
716*ca22e28bSmiod 		headpat = 0xe0;
717*ca22e28bSmiod 		length = 3;
718ae56ac94Smiod 	} else {
719*ca22e28bSmiod 		if (unisym >= 0x110000)
720*ca22e28bSmiod 			return 0;
721*ca22e28bSmiod 		headpat = 0xf0;
722*ca22e28bSmiod 		length = 4;
723ae56ac94Smiod 	}
724ae56ac94Smiod 
725ae56ac94Smiod 	for (pos = length - 1; pos > 0; pos--) {
726ae56ac94Smiod 		out[pos] = 0x80 | (unisym & 0x3f);
727ae56ac94Smiod 		unisym >>= 6;
728ae56ac94Smiod 	}
729ae56ac94Smiod 	out[0] = headpat | unisym;
730ae56ac94Smiod 
731*ca22e28bSmiod 	return length;
732ae56ac94Smiod #endif
733ae56ac94Smiod }
734