1 /* $OpenBSD: mbrtoc16.c,v 1.1 2023/08/20 15:02:51 schwarze Exp $ */
2 /*
3 * Copyright (c) 2022 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <stdint.h>
19 #include <uchar.h>
20 #include <wchar.h>
21
22 /*
23 * Keep this structure compatible with
24 * struct _utf8_state in the file citrus/citrus_utf8.c.
25 * In particular, only use values for the "want" field
26 * that do not collide with values used by the function
27 * _citrus_utf8_ctype_mbrtowc().
28 */
29 struct _utf16_state {
30 wchar_t ch;
31 int want;
32 };
33
34 size_t
mbrtoc16(char16_t * pc16,const char * s,size_t n,mbstate_t * ps)35 mbrtoc16(char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
36 {
37 static mbstate_t mbs;
38 struct _utf16_state *us;
39 size_t rv;
40 wchar_t wc;
41
42 /*
43 * Fall back to a state object local to this function
44 * and do not use the fallback object in mbrtowc(3)
45 * because an application program might mix calls to mbrtowc(3)
46 * and mbrtoc16(3) decoding different strings, and they must
47 * not clobber each other's state.
48 */
49 if (ps == NULL)
50 ps = &mbs;
51
52 us = (struct _utf16_state *)ps;
53
54 /*
55 * Handle the special case of NULL input first such that
56 * a low surrogate left over from a previous call does not
57 * clobber an object pointed to by the pc16 argument.
58 */
59 if (s == NULL) {
60 s = "";
61 n = 1;
62 pc16 = NULL;
63 }
64
65 /*
66 * If the previous call stored a high surrogate,
67 * store the corresponding low surrogate now
68 * and do not inspect any further input yet.
69 */
70 if (us->want == (size_t)-3) {
71 if (pc16 != NULL)
72 *pc16 = 0xdc00 + (us->ch & 0x3ff);
73 us->ch = 0;
74 us->want = 0;
75 return -3;
76 }
77
78 /*
79 * Decode the multibyte character.
80 * All the mbrtowc(3) use cases can be reached from here,
81 * including continuing an imcomplete character started earlier,
82 * decoding a NUL character, a valid complete character,
83 * an incomplete character to be continued later,
84 * or a decoding error.
85 */
86 rv = mbrtowc(&wc, s, n, ps);
87
88 if (rv < (size_t)-2) {
89 /* A new character that is valid and complete. */
90 if (wc > UINT16_MAX) {
91 /* Store a high surrogate. */
92 if (pc16 != NULL)
93 *pc16 = 0xd7c0 + (wc >> 10);
94 /* Remember that the low surrogate is pending. */
95 us->ch = wc;
96 us->want = -3;
97 } else if (pc16 != NULL)
98 /* Store a basic multilingual plane codepoint. */
99 *pc16 = wc;
100 }
101 return rv;
102 }
103