1 /* $OpenBSD: c16rtomb.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <errno.h>
19 #include <uchar.h>
20 #include <wchar.h>
21
22 /*
23 * Keep this structure compatible with
24 * struct _utf8_state in the file citrus/citrus_utf8.c.
25 */
26 struct _utf16_state {
27 wchar_t ch;
28 int want;
29 };
30
31 size_t
c16rtomb(char * s,char16_t c16,mbstate_t * ps)32 c16rtomb(char *s, char16_t c16, mbstate_t *ps)
33 {
34 static mbstate_t mbs;
35 struct _utf16_state *us;
36 wchar_t wc;
37
38 if (ps == NULL)
39 ps = &mbs;
40
41 /*
42 * Handle the special case of NULL output first
43 * to avoid inspecting c16 and ps and possibly drawing
44 * bogus conclusions from whatever those may contain.
45 * Instead, just restore the initial conversion state.
46 * The return value represents the length of the NUL byte
47 * corresponding to the NUL wide character, even though
48 * there is no place to write that NUL byte to.
49 */
50 if (s == NULL) {
51 memset(ps, 0, sizeof(*ps));
52 return 1;
53 }
54
55 us = (struct _utf16_state *)ps;
56
57 if (us->want == (size_t)-3) {
58
59 /*
60 * The previous call read a high surrogate,
61 * so expect a low surrogate now.
62 */
63 if ((c16 & 0xfc00) != 0xdc00) {
64 errno = EILSEQ;
65 return -1;
66 }
67
68 /*
69 * Assemble the full code point for processing
70 * by wcrtomb(3). Since we do not support
71 * state-dependent encodings, our wcrtomb(3)
72 * always expects the initial conversion state,
73 * so clearing the state here is just fine.
74 */
75 wc = us->ch + (c16 & 0x3ff);
76 us->ch = 0;
77 us->want = 0;
78
79 } else if ((c16 & 0xfc00) == 0xd800) {
80
81 /*
82 * Got a high surrogate while being in the initial
83 * conversion state. Remeber its contribution to
84 * the codepoint and defer encoding to the next call.
85 */
86 us->ch = 0x10000 + ((c16 & 0x3ff) << 10);
87 us->want = -3;
88
89 /* Nothing was written to *s just yet. */
90 return 0;
91
92 } else
93 wc = c16;
94
95 /*
96 * The following correctly returns an error when a low
97 * surrogate is encountered without a preceding high one.
98 */
99 return wcrtomb(s, wc, ps);
100 }
101