xref: /netbsd-src/lib/libc/locale/mbrtoc32.3 (revision fdd9db8a91c767e1b3e0b7be194f588935269cca)
1.\"	$NetBSD: mbrtoc32.3,v 1.9 2024/08/23 12:59:49 riastradh Exp $
2.\"
3.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25.\" POSSIBILITY OF SUCH DAMAGE.
26.\"
27.Dd August 14, 2024
28.Dt MBRTOC32 3
29.Os
30.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
31.Sh NAME
32.Nm mbrtoc32
33.Nd Restartable multibyte to UTF-32 conversion
34.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
35.Sh LIBRARY
36.Lb libc
37.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
38.Sh SYNOPSIS
39.
40.In uchar.h
41.
42.Ft size_t
43.Fo mbrtoc32
44.Fa "char32_t * restrict pc32"
45.Fa "const char * restrict s"
46.Fa "size_t n"
47.Fa "mbstate_t * restrict ps"
48.Fc
49.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
50.Sh DESCRIPTION
51The
52.Nm
53function decodes multibyte characters in the current locale and
54converts them to Unicode scalar values (i.e., to UTF-32), keeping state
55so it can restart after incremental progress.
56.Pp
57Each call to
58.Nm :
59.Bl -enum -compact
60.It
61examines up to
62.Fa n
63bytes starting at
64.Fa s ,
65.It
66yields a Unicode scalar value (i.e., a UTF-32 code unit) if available
67by storing it at
68.Li * Ns Fa pc32 ,
69.It
70saves state at
71.Fa ps ,
72and
73.It
74returns either the number of bytes consumed if any or a special return
75value.
76.El
77.Pp
78Specifically:
79.Bl -bullet
80.It
81If the multibyte sequence at
82.Fa s
83is invalid after any previous input saved at
84.Fa ps ,
85or if an error occurs in decoding,
86.Nm
87returns
88.Li (size_t)-1
89and sets
90.Xr errno 2
91to indicate the error.
92.It
93If the multibyte sequence at
94.Fa s
95is still incomplete after
96.Fa n
97bytes, including any previous input saved in
98.Fa ps ,
99.Nm
100saves its state in
101.Fa ps
102after all the input so far and returns
103.Li "(size_t)-2".
104.It
105If
106.Nm
107decodes the null multibyte character, then it stores zero at
108.Li * Ns Fa pc32
109and returns zero.
110.It
111Otherwise,
112.Nm
113decodes a single multibyte character, stores its Unicode scalar value
114at
115.Li * Ns Fa pc32 ,
116and returns the number of bytes consumed to decode the first multibyte
117character.
118.El
119.Pp
120If
121.Fa pc32
122is a null pointer, nothing is stored, but the effects on
123.Fa ps
124and the return value are unchanged.
125.Pp
126If
127.Fa s
128is a null pointer, the
129.Nm
130call is equivalent to:
131.Bd -ragged -offset indent
132.Fo mbrtoc32
133.Li NULL ,
134.Li \*q\*q ,
135.Li 1 ,
136.Fa ps
137.Fc
138.Ed
139.Pp
140This always returns zero, and has the effect of resetting
141.Fa ps
142to the initial conversion state, without writing to
143.Fa pc32 ,
144even if it is nonnull.
145.Pp
146If
147.Fa ps
148is a null pointer,
149.Nm
150uses an internal
151.Vt mbstate_t
152object with static storage duration, distinct from all other
153.Vt mbstate_t
154objects
155.Po
156including those used by
157.Xr mbrtoc8 3 ,
158.Xr mbrtoc16 3 ,
159.Xr c8rtomb 3 ,
160.Xr c16rtomb 3 ,
161and
162.Xr c32rtomb 3
163.Pc ,
164which is initialized at program startup to the initial conversion
165state.
166.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
167.Sh RETURN VALUES
168The
169.Nm
170function returns:
171.Bl -tag -width Li
172.It Li 0
173.Bq null
174if
175.Nm
176decoded a null multibyte character.
177.It Ar i
178.Bq scalar value
179where
180.Li 0
181\*(Le
182.Ar i
183\*(Le
184.Fa n ,
185if
186.Nm
187consumed
188.Ar i
189bytes of input to decode the next multibyte character, yielding a
190Unicode scalar value.
191.It Li (size_t)-2
192.Bq incomplete
193if
194.Nm
195found only an incomplete multibyte sequence after all
196.Fa n
197bytes of input and any previous input, and saved its state to restart
198in the next call with
199.Fa ps .
200.It Li (size_t)-1
201.Bq error
202if any encoding error was detected;
203.Xr errno 2
204is set to reflect the error.
205.El
206.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
207.Sh EXAMPLES
208.Bd -literal -offset indent
209char *s = ...;
210size_t n = ...;
211mbstate_t mbs = {0};    /* initial conversion state */
212
213while (n) {
214        char32_t c32;
215        size_t len;
216
217        len = mbrtoc32(&c32, s, n, &mbs);
218        switch (len) {
219        case 0:                 /* NUL terminator */
220                assert(c32 == 0);
221                goto out;
222        default:                /* scalar value */
223                printf("U+%04"PRIx32"\en", (uint32_t)c32);
224                break;
225        case (size_t)-2:        /* incomplete */
226                printf("incomplete\en");
227                goto readmore;
228        case (size_t)-1:        /* error */
229                printf("error: %d\en", errno);
230                goto out;
231        }
232        s += len;
233        n -= len;
234}
235.Ed
236.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
237.Sh ERRORS
238.Bl -tag -width Bq
239.It Bq Er EILSEQ
240The multibyte sequence cannot be decoded in the current locale as a
241Unicode scalar value.
242.It Bq Er EIO
243An error occurred in loading the locale's character conversions.
244.El
245.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
246.Sh SEE ALSO
247.Xr c16rtomb 3 ,
248.Xr c32rtomb 3 ,
249.Xr c8rtomb 3 ,
250.Xr mbrtoc16 3 ,
251.Xr mbrtoc8 3 ,
252.Xr uchar 3
253.Rs
254.%B The Unicode Standard
255.%O Version 15.0 \(em Core Specification
256.%Q The Unicode Consortium
257.%D September 2022
258.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
259.Re
260.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
261.Sh STANDARDS
262The
263.Nm
264function conforms to
265.St -isoC-2011 .
266.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
267.Sh HISTORY
268The
269.Nm
270function first appeared in
271.Nx 11.0 .
272