1*ca22e28bSmiod /* $OpenBSD: wsemul_subr.c,v 1.2 2023/03/06 17:14:44 miod Exp $ */
2ae56ac94Smiod
3ae56ac94Smiod /*
4ae56ac94Smiod * Copyright (c) 2007, 2013 Miodrag Vallat.
5ae56ac94Smiod *
6ae56ac94Smiod * Permission to use, copy, modify, and distribute this software for any
7ae56ac94Smiod * purpose with or without fee is hereby granted, provided that the above
8ae56ac94Smiod * copyright notice, this permission notice, and the disclaimer below
9ae56ac94Smiod * appear in all copies.
10ae56ac94Smiod *
11ae56ac94Smiod * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12ae56ac94Smiod * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13ae56ac94Smiod * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14ae56ac94Smiod * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15ae56ac94Smiod * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16ae56ac94Smiod * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17ae56ac94Smiod * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18ae56ac94Smiod */
19ae56ac94Smiod
20*ca22e28bSmiod /*
21*ca22e28bSmiod * Part of the UTF-8 state machine logic borrowed from citrus_utf8.c
22*ca22e28bSmiod * under the following licence:
23*ca22e28bSmiod */
24*ca22e28bSmiod /*-
25*ca22e28bSmiod * Copyright (c) 2002-2004 Tim J. Robbins
26*ca22e28bSmiod * All rights reserved.
27*ca22e28bSmiod *
28*ca22e28bSmiod * Redistribution and use in source and binary forms, with or without
29*ca22e28bSmiod * modification, are permitted provided that the following conditions
30*ca22e28bSmiod * are met:
31*ca22e28bSmiod * 1. Redistributions of source code must retain the above copyright
32*ca22e28bSmiod * notice, this list of conditions and the following disclaimer.
33*ca22e28bSmiod * 2. Redistributions in binary form must reproduce the above copyright
34*ca22e28bSmiod * notice, this list of conditions and the following disclaimer in the
35*ca22e28bSmiod * documentation and/or other materials provided with the distribution.
36*ca22e28bSmiod *
37*ca22e28bSmiod * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
38*ca22e28bSmiod * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
39*ca22e28bSmiod * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
40*ca22e28bSmiod * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
41*ca22e28bSmiod * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
42*ca22e28bSmiod * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
43*ca22e28bSmiod * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
44*ca22e28bSmiod * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
45*ca22e28bSmiod * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
46*ca22e28bSmiod * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
47*ca22e28bSmiod * SUCH DAMAGE.
48*ca22e28bSmiod */
49*ca22e28bSmiod
50ae56ac94Smiod #include <sys/param.h>
51ae56ac94Smiod #include <sys/systm.h>
52ae56ac94Smiod #include <sys/errno.h>
53ae56ac94Smiod
54ae56ac94Smiod #include <dev/wscons/wscons_features.h>
55ae56ac94Smiod #include <dev/wscons/wsconsio.h>
56ae56ac94Smiod #include <dev/wscons/wsdisplayvar.h>
57ae56ac94Smiod #include <dev/wscons/wsemulvar.h>
58ae56ac94Smiod #include <dev/wscons/wsksymdef.h>
59ae56ac94Smiod
60ae56ac94Smiod int wsemul_local_translate(u_int32_t, kbd_t, u_char *);
61ae56ac94Smiod
62ae56ac94Smiod /*
63ae56ac94Smiod * Get characters from an input stream and update the input state.
64ae56ac94Smiod * Processing stops when the stream is empty, or a complete character
65ae56ac94Smiod * sequence has been recognized, in which case it returns zero.
66ae56ac94Smiod */
67ae56ac94Smiod int
wsemul_getchar(const u_char ** inbuf,u_int * inlen,struct wsemul_inputstate * state,int allow_utf8)68ae56ac94Smiod wsemul_getchar(const u_char **inbuf, u_int *inlen,
69ae56ac94Smiod struct wsemul_inputstate *state, int allow_utf8)
70ae56ac94Smiod {
71ae56ac94Smiod u_int len = *inlen;
72ae56ac94Smiod const u_char *buf = *inbuf;
73*ca22e28bSmiod #ifdef HAVE_UTF8_SUPPORT
74*ca22e28bSmiod int rc;
75*ca22e28bSmiod u_int32_t tmpchar, lbound;
76*ca22e28bSmiod u_int mbleft;
77*ca22e28bSmiod #endif
78ae56ac94Smiod
79ae56ac94Smiod if (len == 0)
80*ca22e28bSmiod return EAGAIN;
81ae56ac94Smiod
82*ca22e28bSmiod #ifndef HAVE_UTF8_SUPPORT
83ae56ac94Smiod state->inchar = *buf++;
84ae56ac94Smiod state->mbleft = 0;
85ae56ac94Smiod len--;
86ae56ac94Smiod *inlen = len;
87ae56ac94Smiod *inbuf = buf;
88*ca22e28bSmiod return 0;
89ae56ac94Smiod #else
90ae56ac94Smiod /*
91ae56ac94Smiod * If we do not allow multibyte sequences, process as quickly
92ae56ac94Smiod * as possible.
93ae56ac94Smiod */
94ae56ac94Smiod if (!allow_utf8) {
95ae56ac94Smiod state->inchar = *buf++;
96ae56ac94Smiod state->mbleft = 0;
97ae56ac94Smiod len--;
98ae56ac94Smiod *inlen = len;
99ae56ac94Smiod *inbuf = buf;
100*ca22e28bSmiod return 0;
101ae56ac94Smiod }
102ae56ac94Smiod
103*ca22e28bSmiod rc = EAGAIN;
104ae56ac94Smiod tmpchar = state->inchar;
105*ca22e28bSmiod lbound = state->lbound;
106ae56ac94Smiod mbleft = state->mbleft;
107ae56ac94Smiod
108ae56ac94Smiod while (len != 0) {
109ae56ac94Smiod u_int32_t frag = (u_int32_t)*buf++;
110ae56ac94Smiod len--;
111ae56ac94Smiod
112ae56ac94Smiod /*
113ae56ac94Smiod * If we are in the middle of a multibyte sequence, try
114ae56ac94Smiod * to complete it.
115ae56ac94Smiod */
116ae56ac94Smiod
117ae56ac94Smiod if (mbleft != 0) {
118*ca22e28bSmiod if ((frag & 0xc0) != 0x80)
119*ca22e28bSmiod goto invalid;
120*ca22e28bSmiod
121ae56ac94Smiod tmpchar = (tmpchar << 6) | (frag & 0x3f);
122ae56ac94Smiod mbleft--;
123ae56ac94Smiod if (mbleft == 0) {
124*ca22e28bSmiod if (tmpchar < lbound)
125*ca22e28bSmiod goto invalid;
126*ca22e28bSmiod if (tmpchar >= 0xd800 && tmpchar < 0xe000)
127*ca22e28bSmiod goto invalid;
128*ca22e28bSmiod if (tmpchar >= 0x110000)
129*ca22e28bSmiod goto invalid;
130ae56ac94Smiod rc = 0;
131ae56ac94Smiod break;
132ae56ac94Smiod }
133*ca22e28bSmiod continue;
134ae56ac94Smiod }
135ae56ac94Smiod
136ae56ac94Smiod /*
137ae56ac94Smiod * Otherwise let's decide if this is the start of a new
138ae56ac94Smiod * multibyte sequence, or a 7-bit character.
139ae56ac94Smiod */
140ae56ac94Smiod
141ae56ac94Smiod if ((frag & 0x80) == 0) {
142ae56ac94Smiod tmpchar = frag;
143ae56ac94Smiod rc = 0;
144ae56ac94Smiod break;
145ae56ac94Smiod }
146ae56ac94Smiod
147*ca22e28bSmiod if ((frag & 0xe0) == 0xc0) {
148*ca22e28bSmiod frag &= 0x1f;
149ae56ac94Smiod mbleft = 1;
150*ca22e28bSmiod lbound = 0x80;
151*ca22e28bSmiod } else if ((frag & 0xf0) == 0xe0) {
152*ca22e28bSmiod frag &= 0x0f;
153*ca22e28bSmiod mbleft = 2;
154*ca22e28bSmiod lbound = 0x800;
155*ca22e28bSmiod } else if ((frag & 0xf8) == 0xf0) {
156*ca22e28bSmiod frag &= 0x07;
157*ca22e28bSmiod mbleft = 3;
158*ca22e28bSmiod lbound = 0x10000;
159*ca22e28bSmiod } else {
160*ca22e28bSmiod goto invalid;
161ae56ac94Smiod }
162ae56ac94Smiod
163ae56ac94Smiod tmpchar = frag;
164*ca22e28bSmiod state->lbound = lbound;
165*ca22e28bSmiod continue;
166*ca22e28bSmiod
167*ca22e28bSmiod invalid:
168*ca22e28bSmiod /* Abort the ill-formed sequence and continue */
169*ca22e28bSmiod mbleft = 0;
170*ca22e28bSmiod tmpchar = 0;
171*ca22e28bSmiod rc = EILSEQ;
172ae56ac94Smiod }
173ae56ac94Smiod
174ae56ac94Smiod state->inchar = tmpchar;
175ae56ac94Smiod state->mbleft = mbleft;
176ae56ac94Smiod *inlen = len;
177ae56ac94Smiod *inbuf = buf;
178*ca22e28bSmiod return rc;
179ae56ac94Smiod #endif
180ae56ac94Smiod }
181ae56ac94Smiod
182ae56ac94Smiod /*
183ae56ac94Smiod * Unicode Cyrillic to KOI8 translation table (starts at U+0400),
184ae56ac94Smiod * from RFC 2319.
185ae56ac94Smiod */
186ae56ac94Smiod const u_int8_t cyrillic_to_koi8[] = {
187ae56ac94Smiod 0x00, /* IE grave */ /* 0400 */
188ae56ac94Smiod 0xb3, /* IO */
189ae56ac94Smiod 0x00, /* DJE */
190ae56ac94Smiod 0x00, /* GJE */
191ae56ac94Smiod 0xb4, /* UKR IE */
192ae56ac94Smiod 0x00, /* DZE */
193ae56ac94Smiod 0xb6, /* BYE/UKR I */
194ae56ac94Smiod 0xb7, /* YI */
195ae56ac94Smiod 0x00, /* JE */
196ae56ac94Smiod 0x00, /* LJE */
197ae56ac94Smiod 0x00, /* NJE */
198ae56ac94Smiod 0x00, /* TSHE */
199ae56ac94Smiod 0x00, /* KJE */
200ae56ac94Smiod 0x00, /* I grave */
201ae56ac94Smiod 0x00, /* short U */
202ae56ac94Smiod 0x00, /* DZHE */
203ae56ac94Smiod 0xe1, /* A */ /* 0410 */
204ae56ac94Smiod 0xe2, /* BE */
205ae56ac94Smiod 0xf7, /* VE */
206ae56ac94Smiod 0xe7, /* GHE */
207ae56ac94Smiod 0xe4, /* DE */
208ae56ac94Smiod 0xe5, /* IE */
209ae56ac94Smiod 0xf6, /* ZHE */
210ae56ac94Smiod 0xfa, /* ZE */
211ae56ac94Smiod 0xe9, /* I */
212ae56ac94Smiod 0xea, /* short I */
213ae56ac94Smiod 0xeb, /* KA */
214ae56ac94Smiod 0xec, /* EL */
215ae56ac94Smiod 0xed, /* EM */
216ae56ac94Smiod 0xee, /* EN */
217ae56ac94Smiod 0xef, /* O */
218ae56ac94Smiod 0xf0, /* PE */
219ae56ac94Smiod 0xf2, /* ER */ /* 0420 */
220ae56ac94Smiod 0xf3, /* ES */
221ae56ac94Smiod 0xf4, /* TE */
222ae56ac94Smiod 0xf5, /* U */
223ae56ac94Smiod 0xe6, /* EF */
224ae56ac94Smiod 0xe8, /* HA */
225ae56ac94Smiod 0xe3, /* TSE */
226ae56ac94Smiod 0xfe, /* CHE */
227ae56ac94Smiod 0xfb, /* SHA */
228ae56ac94Smiod 0xfd, /* SHCHA */
229ae56ac94Smiod 0xff, /* HARD SIGN */
230ae56ac94Smiod 0xf9, /* YERU */
231ae56ac94Smiod 0xf8, /* SOFT SIGN */
232ae56ac94Smiod 0xfc, /* E */
233ae56ac94Smiod 0xe0, /* YU */
234ae56ac94Smiod 0xf1, /* YA */
235ae56ac94Smiod 0xc1, /* a */ /* 0430 */
236ae56ac94Smiod 0xc2, /* be */
237ae56ac94Smiod 0xd7, /* ve */
238ae56ac94Smiod 0xc7, /* ghe */
239ae56ac94Smiod 0xc4, /* de */
240ae56ac94Smiod 0xc5, /* ie */
241ae56ac94Smiod 0xd6, /* zhe */
242ae56ac94Smiod 0xda, /* ze */
243ae56ac94Smiod 0xc9, /* i */
244ae56ac94Smiod 0xca, /* short i */
245ae56ac94Smiod 0xcb, /* ka */
246ae56ac94Smiod 0xcc, /* el */
247ae56ac94Smiod 0xcd, /* em */
248ae56ac94Smiod 0xce, /* en */
249ae56ac94Smiod 0xcf, /* o */
250ae56ac94Smiod 0xd0, /* pe */
251ae56ac94Smiod 0xd2, /* er */ /* 0440 */
252ae56ac94Smiod 0xd3, /* es */
253ae56ac94Smiod 0xd4, /* te */
254ae56ac94Smiod 0xd5, /* u */
255ae56ac94Smiod 0xc6, /* ef */
256ae56ac94Smiod 0xc8, /* ha */
257ae56ac94Smiod 0xc3, /* tse */
258ae56ac94Smiod 0xde, /* che */
259ae56ac94Smiod 0xdb, /* sha */
260ae56ac94Smiod 0xdd, /* shcha */
261ae56ac94Smiod 0xdf, /* hard sign */
262ae56ac94Smiod 0xd9, /* yeru */
263ae56ac94Smiod 0xd8, /* soft sign */
264ae56ac94Smiod 0xdc, /* e */
265ae56ac94Smiod 0xc0, /* yu */
266ae56ac94Smiod 0xd1, /* ya */
267ae56ac94Smiod 0x00, /* ie grave */ /* 0450 */
268ae56ac94Smiod 0xa3, /* io */
269ae56ac94Smiod 0x00, /* dje */
270ae56ac94Smiod 0x00, /* GJE */
271ae56ac94Smiod 0xa4, /* UKR ie */
272ae56ac94Smiod 0x00, /* DZE */
273ae56ac94Smiod 0xa6, /* BYE/UKR I */
274ae56ac94Smiod 0xa7, /* YI */
275ae56ac94Smiod 0x00, /* JE */
276ae56ac94Smiod 0x00, /* LJE */
277ae56ac94Smiod 0x00, /* NJE */
278ae56ac94Smiod 0x00, /* TSHE */
279ae56ac94Smiod 0x00, /* KJE */
280ae56ac94Smiod 0x00, /* I grave */
281ae56ac94Smiod 0x00, /* short U */
282ae56ac94Smiod 0x00 /* DZHE */
283ae56ac94Smiod };
284ae56ac94Smiod
285ae56ac94Smiod /*
286ae56ac94Smiod * Europe to Latin-2 translation table (starts at U+0100).
287ae56ac94Smiod */
288ae56ac94Smiod const u_int8_t unicode_to_latin2[] = {
289ae56ac94Smiod 0x00, /* A macron */ /* 0100 */
290ae56ac94Smiod 0x00, /* a macron */
291ae56ac94Smiod 0xc3, /* A breve */
292ae56ac94Smiod 0xe3, /* a breve */
293ae56ac94Smiod 0xa1, /* A ogonek */
294ae56ac94Smiod 0xb1, /* a ogonek */
295ae56ac94Smiod 0xc6, /* C acute */
296ae56ac94Smiod 0xe6, /* c acute */
297ae56ac94Smiod 0x00, /* C circumflex */
298ae56ac94Smiod 0x00, /* c circumflex */
299ae56ac94Smiod 0x00, /* C abovering */
300ae56ac94Smiod 0x00, /* c abovering */
301ae56ac94Smiod 0xc8, /* C caron */
302ae56ac94Smiod 0xe8, /* c caron */
303ae56ac94Smiod 0xcf, /* D caron */
304ae56ac94Smiod 0xef, /* d caron */
305ae56ac94Smiod 0xd0, /* D stroke */ /* 0110 */
306ae56ac94Smiod 0xf0, /* d stroke */
307ae56ac94Smiod 0x00, /* E macron */
308ae56ac94Smiod 0x00, /* e macron */
309ae56ac94Smiod 0x00, /* E breve */
310ae56ac94Smiod 0x00, /* e breve */
311ae56ac94Smiod 0x00, /* E abovering */
312ae56ac94Smiod 0x00, /* e abovering */
313ae56ac94Smiod 0xca, /* E ogonek */
314ae56ac94Smiod 0xea, /* e ogonek */
315ae56ac94Smiod 0xcc, /* E caron */
316ae56ac94Smiod 0xec, /* e caron */
317ae56ac94Smiod 0x00, /* G circumflex */
318ae56ac94Smiod 0x00, /* g circumflex */
319ae56ac94Smiod 0x00, /* G breve */
320ae56ac94Smiod 0x00, /* g breve */
321ae56ac94Smiod 0x00, /* G abovering */ /* 0120 */
322ae56ac94Smiod 0x00, /* g abovering */
323ae56ac94Smiod 0x00, /* G cedilla */
324ae56ac94Smiod 0x00, /* g cedilla */
325ae56ac94Smiod 0x00, /* H circumflex */
326ae56ac94Smiod 0x00, /* h circumflex */
327ae56ac94Smiod 0x00, /* H stroke */
328ae56ac94Smiod 0x00, /* h stroke */
329ae56ac94Smiod 0x00, /* I tilde */
330ae56ac94Smiod 0x00, /* i tilde */
331ae56ac94Smiod 0x00, /* I macron */
332ae56ac94Smiod 0x00, /* i macron */
333ae56ac94Smiod 0x00, /* I breve */
334ae56ac94Smiod 0x00, /* i breve */
335ae56ac94Smiod 0x00, /* I ogonek */
336ae56ac94Smiod 0x00, /* i ogonek */
337ae56ac94Smiod 0x00, /* dotted I */ /* 0130 */
338ae56ac94Smiod 0x00, /* non-dotted i */
339ae56ac94Smiod 0x00, /* ligature IJ */
340ae56ac94Smiod 0x00, /* ligature ij */
341ae56ac94Smiod 0x00, /* J circumflex */
342ae56ac94Smiod 0x00, /* j circumflex */
343ae56ac94Smiod 0x00, /* K cedilla */
344ae56ac94Smiod 0x00, /* k cedilla */
345ae56ac94Smiod 0x00, /* kra */
346ae56ac94Smiod 0xc5, /* L acute */
347ae56ac94Smiod 0xe5, /* l acute */
348ae56ac94Smiod 0x00, /* L cedilla */
349ae56ac94Smiod 0x00, /* l cedilla */
350ae56ac94Smiod 0xa5, /* L caron */
351ae56ac94Smiod 0xb5, /* l caron */
352ae56ac94Smiod 0x00, /* L middle dot */
353ae56ac94Smiod 0x00, /* l middle dot */ /* 0140 */
354ae56ac94Smiod 0xa3, /* L stroke */
355ae56ac94Smiod 0xb3, /* l stroke */
356ae56ac94Smiod 0xd1, /* N acute */
357ae56ac94Smiod 0xf1, /* n acute */
358ae56ac94Smiod 0x00, /* N cedilla */
359ae56ac94Smiod 0x00, /* n cedilla */
360ae56ac94Smiod 0xd2, /* N caron */
361ae56ac94Smiod 0xf2, /* n caron */
362ae56ac94Smiod 0x00, /* N preceded by apostrophe */
363ae56ac94Smiod 0x00, /* ENG */
364ae56ac94Smiod 0x00, /* eng */
365ae56ac94Smiod 0x00, /* O macron */
366ae56ac94Smiod 0x00, /* o macron */
367ae56ac94Smiod 0x00, /* O breve */
368ae56ac94Smiod 0x00, /* o breve */
369ae56ac94Smiod 0xd5, /* O double acute */ /* 0150 */
370ae56ac94Smiod 0xf5, /* o double acute */
371ae56ac94Smiod 0x00, /* ligature OE */
372ae56ac94Smiod 0x00, /* ligature oe */
373ae56ac94Smiod 0xc0, /* R acute */
374ae56ac94Smiod 0xe0, /* r acute */
375ae56ac94Smiod 0x00, /* R cedilla */
376ae56ac94Smiod 0x00, /* r cedilla */
377ae56ac94Smiod 0xd8, /* R caron */
378ae56ac94Smiod 0xf8, /* r caron */
379ae56ac94Smiod 0xa6, /* S acute */
380ae56ac94Smiod 0xb6, /* s acute */
381ae56ac94Smiod 0x00, /* S circumflex */
382ae56ac94Smiod 0x00, /* s circumflex */
383ae56ac94Smiod 0xaa, /* S cedilla */
384ae56ac94Smiod 0xba, /* s cedilla */
385ae56ac94Smiod 0xa9, /* S caron */ /* 0160 */
386ae56ac94Smiod 0xb9, /* s caron */
387ae56ac94Smiod 0xde, /* T cedilla */
388ae56ac94Smiod 0xfe, /* t cedilla */
389ae56ac94Smiod 0xab, /* T caron */
390ae56ac94Smiod 0xbb, /* t caron */
391ae56ac94Smiod 0x00, /* T stroke */
392ae56ac94Smiod 0x00, /* t stroke */
393ae56ac94Smiod 0x00, /* U tilde */
394ae56ac94Smiod 0x00, /* u tilde */
395ae56ac94Smiod 0x00, /* U macron */
396ae56ac94Smiod 0x00, /* u macron */
397ae56ac94Smiod 0x00, /* U breve */
398ae56ac94Smiod 0x00, /* u breve */
399ae56ac94Smiod 0xd9, /* U abovering */
400ae56ac94Smiod 0xf9, /* u abovering */
401ae56ac94Smiod 0xdb, /* U double acute */ /* 0170 */
402ae56ac94Smiod 0xfb, /* u double acute */
403ae56ac94Smiod 0x00, /* U ogonek */
404ae56ac94Smiod 0x00, /* u ogonek */
405ae56ac94Smiod 0x00, /* W circumflex */
406ae56ac94Smiod 0x00, /* w circumflex */
407ae56ac94Smiod 0x00, /* Y circumflex */
408ae56ac94Smiod 0x00, /* y circumflex */
409ae56ac94Smiod 0x00, /* Y diaeresis */
410ae56ac94Smiod 0xac, /* Z acute */
411ae56ac94Smiod 0xbc, /* z acute */
412ae56ac94Smiod 0xaf, /* Z abovering */
413ae56ac94Smiod 0xbf, /* z abovering */
414ae56ac94Smiod 0xae, /* Z caron */
415ae56ac94Smiod 0xbe, /* z caron */
416ae56ac94Smiod 0x00 /* long s */
417ae56ac94Smiod };
418ae56ac94Smiod
419ae56ac94Smiod /*
420ae56ac94Smiod * Baltic to Latin-7 translation table.
421ae56ac94Smiod */
422ae56ac94Smiod const u_int8_t unicode_to_latin7[] = {
423ae56ac94Smiod 0xc2, /* A macron */ /* 0100 */
424ae56ac94Smiod 0xe2, /* a macron */
425ae56ac94Smiod 0x00, /* A breve */
426ae56ac94Smiod 0x00, /* a breve */
427ae56ac94Smiod 0xc0, /* A ogonek */
428ae56ac94Smiod 0xe0, /* a ogonek */
429ae56ac94Smiod 0xc3, /* C acute */
430ae56ac94Smiod 0xe3, /* c acute */
431ae56ac94Smiod 0x00, /* C circumflex */
432ae56ac94Smiod 0x00, /* c circumflex */
433ae56ac94Smiod 0x00, /* C abovering */
434ae56ac94Smiod 0x00, /* c abovering */
435ae56ac94Smiod 0xc8, /* C caron */
436ae56ac94Smiod 0xe8, /* c caron */
437ae56ac94Smiod 0x00, /* D caron */
438ae56ac94Smiod 0x00, /* d caron */
439ae56ac94Smiod 0x00, /* D stroke */ /* 0110 */
440ae56ac94Smiod 0x00, /* d stroke */
441ae56ac94Smiod 0xc7, /* E macron */
442ae56ac94Smiod 0xe7, /* e macron */
443ae56ac94Smiod 0x00, /* E breve */
444ae56ac94Smiod 0x00, /* e breve */
445ae56ac94Smiod 0xcb, /* E abovering */
446ae56ac94Smiod 0xeb, /* e abovering */
447ae56ac94Smiod 0xc6, /* E ogonek */
448ae56ac94Smiod 0xe6, /* e ogonek */
449ae56ac94Smiod 0x00, /* E caron */
450ae56ac94Smiod 0x00, /* e caron */
451ae56ac94Smiod 0x00, /* G circumflex */
452ae56ac94Smiod 0x00, /* g circumflex */
453ae56ac94Smiod 0x00, /* G breve */
454ae56ac94Smiod 0x00, /* g breve */
455ae56ac94Smiod 0x00, /* G abovering */ /* 0120 */
456ae56ac94Smiod 0x00, /* g abovering */
457ae56ac94Smiod 0xcc, /* G cedilla */
458ae56ac94Smiod 0xec, /* g cedilla */
459ae56ac94Smiod 0x00, /* H circumflex */
460ae56ac94Smiod 0x00, /* h circumflex */
461ae56ac94Smiod 0x00, /* H stroke */
462ae56ac94Smiod 0x00, /* h stroke */
463ae56ac94Smiod 0x00, /* I tilde */
464ae56ac94Smiod 0x00, /* i tilde */
465ae56ac94Smiod 0xce, /* I macron */
466ae56ac94Smiod 0xee, /* i macron */
467ae56ac94Smiod 0x00, /* I breve */
468ae56ac94Smiod 0x00, /* i breve */
469ae56ac94Smiod 0xc1, /* I ogonek */
470ae56ac94Smiod 0xe1, /* i ogonek */
471ae56ac94Smiod 0x00, /* dotted I */ /* 0130 */
472ae56ac94Smiod 0x00, /* non-dotted I */
473ae56ac94Smiod 0x00, /* ligature IJ */
474ae56ac94Smiod 0x00, /* ligature ij */
475ae56ac94Smiod 0x00, /* J circumflex */
476ae56ac94Smiod 0x00, /* j circumflex */
477ae56ac94Smiod 0xcd, /* K cedilla */
478ae56ac94Smiod 0xed, /* k cedilla */
479ae56ac94Smiod 0x00, /* kra */
480ae56ac94Smiod 0x00, /* L acute */
481ae56ac94Smiod 0x00, /* l acute */
482ae56ac94Smiod 0xcf, /* L cedilla */
483ae56ac94Smiod 0xef, /* l cedilla */
484ae56ac94Smiod 0x00, /* L caron */
485ae56ac94Smiod 0x00, /* l caron */
486ae56ac94Smiod 0x00, /* L middle dot */
487ae56ac94Smiod 0x00, /* l middle dot */ /* 0140 */
488ae56ac94Smiod 0xd9, /* L stroke */
489ae56ac94Smiod 0xf9, /* l stroke */
490ae56ac94Smiod 0xd1, /* N acute */
491ae56ac94Smiod 0xf1, /* n acute */
492ae56ac94Smiod 0xd2, /* N cedilla */
493ae56ac94Smiod 0xf2, /* n cedilla */
494ae56ac94Smiod 0x00, /* N caron */
495ae56ac94Smiod 0x00, /* n caron */
496ae56ac94Smiod 0x00, /* N preceded by apostrophe */
497ae56ac94Smiod 0x00, /* ENG */
498ae56ac94Smiod 0x00, /* eng */
499ae56ac94Smiod 0xd4, /* O macron */
500ae56ac94Smiod 0xf4, /* o macron */
501ae56ac94Smiod 0x00, /* O breve */
502ae56ac94Smiod 0x00, /* o breve */
503ae56ac94Smiod 0x00, /* O double acute */ /* 0150 */
504ae56ac94Smiod 0x00, /* o double acute */
505ae56ac94Smiod 0x00, /* ligature OE */
506ae56ac94Smiod 0x00, /* ligature oe */
507ae56ac94Smiod 0x00, /* R acute */
508ae56ac94Smiod 0x00, /* r acute */
509ae56ac94Smiod 0xaa, /* R cedilla */
510ae56ac94Smiod 0xba, /* r cedilla */
511ae56ac94Smiod 0x00, /* R caron */
512ae56ac94Smiod 0x00, /* r caron */
513ae56ac94Smiod 0xda, /* S acute */
514ae56ac94Smiod 0xfa, /* s acute */
515ae56ac94Smiod 0x00, /* S circumflex */
516ae56ac94Smiod 0x00, /* s circumflex */
517ae56ac94Smiod 0x00, /* S cedilla */
518ae56ac94Smiod 0x00, /* s cedilla */
519ae56ac94Smiod 0xd0, /* S caron */ /* 0160 */
520ae56ac94Smiod 0xf0, /* s caron */
521ae56ac94Smiod 0x00, /* T cedilla */
522ae56ac94Smiod 0x00, /* t cedilla */
523ae56ac94Smiod 0x00, /* T caron */
524ae56ac94Smiod 0x00, /* t caron */
525ae56ac94Smiod 0x00, /* T stroke */
526ae56ac94Smiod 0x00, /* t stroke */
527ae56ac94Smiod 0x00, /* U tilde */
528ae56ac94Smiod 0x00, /* u tilde */
529ae56ac94Smiod 0xdb, /* U macron */
530ae56ac94Smiod 0xfb, /* u macron */
531ae56ac94Smiod 0x00, /* U breve */
532ae56ac94Smiod 0x00, /* u breve */
533ae56ac94Smiod 0x00, /* U abovering */
534ae56ac94Smiod 0x00, /* u abovering */
535ae56ac94Smiod 0x00, /* U double acute */ /* 0170 */
536ae56ac94Smiod 0x00, /* u double acute */
537ae56ac94Smiod 0xd8, /* U ogonek */
538ae56ac94Smiod 0xf8, /* u ogonek */
539ae56ac94Smiod 0x00, /* W circumflex */
540ae56ac94Smiod 0x00, /* w circumflex */
541ae56ac94Smiod 0x00, /* Y circumflex */
542ae56ac94Smiod 0x00, /* y circumflex */
543ae56ac94Smiod 0x00, /* Y diaeresis */
544ae56ac94Smiod 0xca, /* Z acute */
545ae56ac94Smiod 0xea, /* z acute */
546ae56ac94Smiod 0xdd, /* Z abovering */
547ae56ac94Smiod 0xfd, /* z abovering */
548ae56ac94Smiod 0xde, /* Z caron */
549ae56ac94Smiod 0xfe, /* z caron */
550ae56ac94Smiod 0x00 /* long s */
551ae56ac94Smiod };
552ae56ac94Smiod
553ae56ac94Smiod /*
554ae56ac94Smiod * Keysym to local 8-bit charset sequence translation function.
555ae56ac94Smiod * The out buffer is at least one character long.
556ae56ac94Smiod * The keyboard layout is used as a hint to decide which latin charset to
557ae56ac94Smiod * assume.
558ae56ac94Smiod */
559ae56ac94Smiod int
wsemul_local_translate(u_int32_t unisym,kbd_t layout,u_char * out)560ae56ac94Smiod wsemul_local_translate(u_int32_t unisym, kbd_t layout, u_char *out)
561ae56ac94Smiod {
562ae56ac94Smiod switch (unisym >> 7) {
563ae56ac94Smiod case 0x0080 >> 7:
564ae56ac94Smiod switch (KB_ENCODING(layout)) {
565ae56ac94Smiod case KB_LT:
566ae56ac94Smiod case KB_LV:
567ae56ac94Smiod switch (unisym) {
568ae56ac94Smiod case KS_L7_AE:
569ae56ac94Smiod unisym = 0xaf;
570ae56ac94Smiod break;
571ae56ac94Smiod case KS_L7_Ostroke:
572ae56ac94Smiod unisym = 0xa8;
573ae56ac94Smiod break;
574ae56ac94Smiod case KS_L7_ae:
575ae56ac94Smiod unisym = 0xbf;
576ae56ac94Smiod break;
577ae56ac94Smiod case KS_L7_ostroke:
578ae56ac94Smiod unisym = 0xb8;
579ae56ac94Smiod break;
580ae56ac94Smiod }
581ae56ac94Smiod }
582ae56ac94Smiod break;
583ae56ac94Smiod
584ae56ac94Smiod case 0x0100 >> 7:
585ae56ac94Smiod switch (KB_ENCODING(layout)) {
586ae56ac94Smiod case KB_LT:
587ae56ac94Smiod case KB_LV:
588ae56ac94Smiod if (unisym < 0x100 + nitems(unicode_to_latin7) &&
589ae56ac94Smiod unicode_to_latin7[unisym - 0x100] != 0)
590ae56ac94Smiod unisym = unicode_to_latin7[unisym - 0x100];
591ae56ac94Smiod break;
592ae56ac94Smiod case KB_TR:
593ae56ac94Smiod switch (unisym) {
594ae56ac94Smiod case KS_L5_Gbreve:
595ae56ac94Smiod unisym = 0xd0;
596ae56ac94Smiod break;
597ae56ac94Smiod case KS_L5_gbreve:
598ae56ac94Smiod unisym = 0xf0;
599ae56ac94Smiod break;
600ae56ac94Smiod case KS_L5_Idotabove:
601ae56ac94Smiod unisym = 0xdd;
602ae56ac94Smiod break;
603ae56ac94Smiod case KS_L5_idotless:
604ae56ac94Smiod unisym = 0xfd;
605ae56ac94Smiod break;
606ae56ac94Smiod case KS_L5_Scedilla:
607ae56ac94Smiod unisym = 0xde;
608ae56ac94Smiod break;
609ae56ac94Smiod case KS_L5_scedilla:
610ae56ac94Smiod unisym = 0xfe;
611ae56ac94Smiod break;
612ae56ac94Smiod }
613ae56ac94Smiod break;
614ae56ac94Smiod case KB_PL:
615ae56ac94Smiod case KB_SI:
616ae56ac94Smiod if (unisym < 0x100 + nitems(unicode_to_latin2) &&
617ae56ac94Smiod unicode_to_latin2[unisym - 0x100] != 0)
618ae56ac94Smiod unisym = unicode_to_latin2[unisym - 0x100];
619ae56ac94Smiod break;
620ae56ac94Smiod }
621ae56ac94Smiod break;
622ae56ac94Smiod
623ae56ac94Smiod case 0x0280 >> 7:
624ae56ac94Smiod switch (KB_ENCODING(layout)) {
625ae56ac94Smiod case KB_PL:
626ae56ac94Smiod case KB_SI:
627ae56ac94Smiod switch (unisym) {
628ae56ac94Smiod case KS_L2_caron:
629ae56ac94Smiod unisym = 0xb7;
630ae56ac94Smiod break;
631ae56ac94Smiod case KS_L2_breve:
632ae56ac94Smiod unisym = 0xa2;
633ae56ac94Smiod break;
634ae56ac94Smiod case KS_L2_dotabove:
635ae56ac94Smiod unisym = 0xff;
636ae56ac94Smiod break;
637ae56ac94Smiod case KS_L2_ogonek:
638ae56ac94Smiod unisym = 0xb2;
639ae56ac94Smiod break;
640ae56ac94Smiod case KS_L2_dblacute:
641ae56ac94Smiod unisym = 0xbd;
642ae56ac94Smiod break;
643ae56ac94Smiod }
644ae56ac94Smiod break;
645ae56ac94Smiod }
646ae56ac94Smiod break;
647ae56ac94Smiod
648ae56ac94Smiod case 0x0400 >> 7:
649ae56ac94Smiod if (unisym < 0x400 +
650ae56ac94Smiod sizeof(cyrillic_to_koi8) / sizeof(cyrillic_to_koi8[0]) &&
651ae56ac94Smiod cyrillic_to_koi8[unisym - 0x400] != 0)
652ae56ac94Smiod unisym = cyrillic_to_koi8[unisym - 0x400];
653ae56ac94Smiod break;
654ae56ac94Smiod case 0x0480 >> 7:
655ae56ac94Smiod if (unisym == KS_Cyrillic_GHEUKR)
656ae56ac94Smiod unisym = 0xbd; /* ukrainian GHE */
657ae56ac94Smiod else if (unisym == KS_Cyrillic_gheukr)
658ae56ac94Smiod unisym = 0xad; /* ukrainian ghe */
659ae56ac94Smiod break;
660ae56ac94Smiod
661ae56ac94Smiod case 0x2000 >> 7:
662ae56ac94Smiod switch (KB_ENCODING(layout)) {
663ae56ac94Smiod case KB_LT:
664ae56ac94Smiod case KB_LV:
665ae56ac94Smiod switch (unisym) {
666ae56ac94Smiod case KS_L7_rightsnglquot:
667ae56ac94Smiod unisym = 0xff;
668ae56ac94Smiod break;
669ae56ac94Smiod case KS_L7_leftdblquot:
670ae56ac94Smiod unisym = 0xb4;
671ae56ac94Smiod break;
672ae56ac94Smiod case KS_L7_rightdblquot:
673ae56ac94Smiod unisym = 0xa1;
674ae56ac94Smiod break;
675ae56ac94Smiod case KS_L7_dbllow9quot:
676ae56ac94Smiod unisym = 0xa5;
677ae56ac94Smiod break;
678ae56ac94Smiod }
679ae56ac94Smiod }
680ae56ac94Smiod break;
681ae56ac94Smiod
682ae56ac94Smiod }
683ae56ac94Smiod
684ae56ac94Smiod out[0] = unisym & 0xff;
685ae56ac94Smiod return (1);
686ae56ac94Smiod }
687ae56ac94Smiod
688ae56ac94Smiod /*
689ae56ac94Smiod * Keysym to UTF-8 sequence translation function.
690*ca22e28bSmiod * The out buffer is at least 4 characters long.
691ae56ac94Smiod */
692ae56ac94Smiod int
wsemul_utf8_translate(u_int32_t unisym,kbd_t layout,u_char * out,int allow_utf8)693ae56ac94Smiod wsemul_utf8_translate(u_int32_t unisym, kbd_t layout, u_char *out,
694ae56ac94Smiod int allow_utf8)
695ae56ac94Smiod {
696ae56ac94Smiod #ifndef HAVE_UTF8_SUPPORT
697ae56ac94Smiod return (wsemul_local_translate(unisym, layout, out));
698ae56ac94Smiod #else
699ae56ac94Smiod u_int pos, length, headpat;
700ae56ac94Smiod
701ae56ac94Smiod if (!allow_utf8)
702*ca22e28bSmiod return wsemul_local_translate(unisym, layout, out);
703ae56ac94Smiod
704*ca22e28bSmiod if (unisym < 0x80) {
705*ca22e28bSmiod /* Fast path for plain ASCII characters. */
706*ca22e28bSmiod *out = (u_char)unisym;
707*ca22e28bSmiod return 1;
708*ca22e28bSmiod }
709*ca22e28bSmiod
710*ca22e28bSmiod if (unisym < 0x800) {
711ae56ac94Smiod headpat = 0xc0;
712ae56ac94Smiod length = 2;
713*ca22e28bSmiod } else if (unisym < 0x10000) {
714*ca22e28bSmiod if (unisym >= 0xd800 && unisym < 0xe000)
715*ca22e28bSmiod return 0;
716*ca22e28bSmiod headpat = 0xe0;
717*ca22e28bSmiod length = 3;
718ae56ac94Smiod } else {
719*ca22e28bSmiod if (unisym >= 0x110000)
720*ca22e28bSmiod return 0;
721*ca22e28bSmiod headpat = 0xf0;
722*ca22e28bSmiod length = 4;
723ae56ac94Smiod }
724ae56ac94Smiod
725ae56ac94Smiod for (pos = length - 1; pos > 0; pos--) {
726ae56ac94Smiod out[pos] = 0x80 | (unisym & 0x3f);
727ae56ac94Smiod unisym >>= 6;
728ae56ac94Smiod }
729ae56ac94Smiod out[0] = headpat | unisym;
730ae56ac94Smiod
731*ca22e28bSmiod return length;
732ae56ac94Smiod #endif
733ae56ac94Smiod }
734