xref: /openbsd-src/lib/libc/locale/c16rtomb.c (revision 46c354aa2baf687e7a81339ec07289555b065bb2)
1 /*	$OpenBSD: c16rtomb.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
2 /*
3  * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  */
17 
18 #include <errno.h>
19 #include <uchar.h>
20 #include <wchar.h>
21 
22 /*
23  * Keep this structure compatible with
24  * struct _utf8_state in the file citrus/citrus_utf8.c.
25  */
26 struct _utf16_state {
27 	wchar_t	ch;
28 	int	want;
29 };
30 
31 size_t
c16rtomb(char * s,char16_t c16,mbstate_t * ps)32 c16rtomb(char *s, char16_t c16, mbstate_t *ps)
33 {
34 	static mbstate_t	 mbs;
35 	struct _utf16_state	*us;
36 	wchar_t			 wc;
37 
38 	if (ps == NULL)
39 		ps = &mbs;
40 
41 	/*
42 	 * Handle the special case of NULL output first
43 	 * to avoid inspecting c16 and ps and possibly drawing
44 	 * bogus conclusions from whatever those may contain.
45 	 * Instead, just restore the initial conversion state.
46 	 * The return value represents the length of the NUL byte
47 	 * corresponding to the NUL wide character, even though
48 	 * there is no place to write that NUL byte to.
49 	 */
50 	if (s == NULL) {
51 		memset(ps, 0, sizeof(*ps));
52 		return 1;
53 	}
54 
55 	us = (struct _utf16_state *)ps;
56 
57 	if (us->want == (size_t)-3) {
58 
59 		/*
60 		 * The previous call read a high surrogate,
61 		 * so expect a low surrogate now.
62 		 */
63 		if ((c16 & 0xfc00) != 0xdc00) {
64 			errno = EILSEQ;
65 			return -1;
66 		}
67 
68 		/*
69 		 * Assemble the full code point for processing
70 		 * by wcrtomb(3).  Since we do not support
71 		 * state-dependent encodings, our wcrtomb(3)
72 		 * always expects the initial conversion state,
73 		 * so clearing the state here is just fine.
74 		 */
75 		wc = us->ch + (c16 & 0x3ff);
76 		us->ch = 0;
77 		us->want = 0;
78 
79 	} else if ((c16 & 0xfc00) == 0xd800) {
80 
81 		/*
82 		 * Got a high surrogate while being in the initial
83 		 * conversion state.  Remeber its contribution to
84 		 * the codepoint and defer encoding to the next call.
85 		 */
86 		us->ch = 0x10000 + ((c16 & 0x3ff) << 10);
87 		us->want = -3;
88 
89 		/* Nothing was written to *s just yet. */
90 		return 0;
91 
92 	} else
93 		wc = c16;
94 
95 	/*
96 	 * The following correctly returns an error when a low
97 	 * surrogate is encountered without a preceding high one.
98 	 */
99 	return wcrtomb(s, wc, ps);
100 }
101