xref: /netbsd-src/lib/libc/locale/c8rtomb.3 (revision dc3553e59b19a24d4de3168ff20ad49370132e47)
1.\"	$NetBSD: c8rtomb.3,v 1.9 2024/08/20 20:36:30 riastradh Exp $
2.\"
3.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
4.\" All rights reserved.
5.\"
6.\" Redistribution and use in source and binary forms, with or without
7.\" modification, are permitted provided that the following conditions
8.\" are met:
9.\" 1. Redistributions of source code must retain the above copyright
10.\"    notice, this list of conditions and the following disclaimer.
11.\" 2. Redistributions in binary form must reproduce the above copyright
12.\"    notice, this list of conditions and the following disclaimer in the
13.\"    documentation and/or other materials provided with the distribution.
14.\"
15.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
16.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
17.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
19.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25.\" POSSIBILITY OF SUCH DAMAGE.
26.\"
27.Dd August 15, 2024
28.Dt C8RTOMB 3
29.Os
30.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
31.Sh NAME
32.Nm c8rtomb
33.Nd Restartable UTF-8 to multibyte conversion
34.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
35.Sh LIBRARY
36.Lb libc
37.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
38.Sh SYNOPSIS
39.
40.In uchar.h
41.
42.Ft size_t
43.Fo c8rtomb
44.Fa "char * restrict s"
45.Fa "char8_t c8"
46.Fa "mbstate_t * restrict ps"
47.Fc
48.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
49.Sh DESCRIPTION
50The
51.Nm
52function decodes UTF-8 and converts it to multibyte characters in the
53current locale, keeping state to remember incremental progress if
54restarted.
55.Pp
56Each call to
57.Nm
58updates the conversion state
59.Fa ps
60with a UTF-8 code unit
61.Fa c8 ,
62writes up to
63.Dv MB_CUR_MAX
64bytes (possibly none) to
65.Fa s ,
66and returns either the number of bytes written to
67.Fa s
68or
69.Li (size_t)-1
70to denote error.
71.Pp
72If
73.Fa s
74is a null pointer,
75no output is produced and
76.Fa ps
77is reset to the initial conversion state, as if the call had been
78.Fo c8rtomb
79.Va buf ,
80.Li 0 ,
81.Fa ps
82.Fc
83for some internal buffer
84.Va buf .
85.Pp
86If
87.Fa c8
88is zero,
89.Nm
90discards any pending incomplete UTF-8 code unit sequence in
91.Fa ps ,
92outputs a (possibly empty) shift sequence to restore the initial state
93followed by a NUL byte, and resets
94.Fa ps
95to the initial conversion state.
96.Pp
97If
98.Fa ps
99is a null pointer,
100.Nm
101uses an internal
102.Vt mbstate_t
103object with static storage duration, distinct from all other
104.Vt mbstate_t
105objects
106.Po
107including those used by other functions such as
108.Xr mbrtoc8 3
109.Pc ,
110which is initialized at program startup to the initial conversion
111state.
112.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
113.Sh RETURN VALUES
114The
115.Nm
116function returns the number of bytes written to
117.Fa s
118on success, or sets
119.Xr errno 2
120and returns
121.Li "(size_t)-1"
122on failure.
123.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
124.Sh EXAMPLES
125Convert a UTF-8 code unit sequence to a multibyte string,
126NUL-terminate it (with any shift sequence needed to restore the initial
127state), and print it:
128.Bd -literal -offset indent
129char8_t c8[] = { 0xf0, 0x9f, 0x92, 0xa9 };
130char buf[(__arraycount(c8) + 1)*MB_LEN_MAX], *s = buf;
131size_t i;
132mbstate_t mbs = {0};    /* initial conversion state */
133
134for (i = 0; i < __arraycount(c8); i++) {
135        size_t len;
136
137        len = c8rtomb(s, c8[i], &mbs);
138        if (len == (size_t)-1)
139                err(1, "c8rtomb");
140        assert(len < sizeof(buf) - (s - buf));
141        s += len;
142}
143len = c8rtomb(s, 0, &mbs);              /* NUL-terminate */
144if (len == (size_t)-1)
145        err(1, "c16rtomb");
146assert(len <= sizeof(buf) - (s - buf));
147printf("%s\en", buf);
148.Ed
149.Pp
150To avoid a variable-length array, this code uses
151.Dv MB_LEN_MAX ,
152which is a constant upper bound on the locale-dependent
153.Dv MB_CUR_MAX .
154.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
155.Sh ERRORS
156.Bl -tag -width Bq
157.It Bq Er EILSEQ
158.Fa c8
159is invalid as the next code unit in the conversion state
160.Fa ps .
161.It Bq Er EILSEQ
162The input cannot be encoded as a multibyte sequence in the current
163locale.
164.It Bq Er EIO
165An error occurred in loading the locale's character conversions.
166.El
167.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
168.Sh SEE ALSO
169.Xr c16rtomb 3 ,
170.Xr c32rtomb 3 ,
171.Xr mbrtoc8 3 ,
172.Xr mbrtoc16 3 ,
173.Xr mbrtoc32 3 ,
174.Xr uchar 3
175.Rs
176.%B The Unicode Standard
177.%O Version 15.0 \(em Core Specification
178.%Q The Unicode Consortium
179.%D September 2022
180.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
181.Re
182.Rs
183.%A F. Yergeau
184.%T UTF-8, a transformation format of ISO 10646
185.%R RFC 3629
186.%D November 2003
187.%I Internet Engineering Task Force
188.%U https://datatracker.ietf.org/doc/html/rfc3629
189.Re
190.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
191.\" .Sh STANDARDS
192.\" The
193.\" .Nm
194.\" function conforms to
195.\" .St -isoC-2023 .
196.\" .\" XXX PR misc/58600: man pages lack C17, C23, C++98, C++03, C++11, C++17, C++20, C++23 citation syntax
197.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
198.Sh HISTORY
199The
200.Nm
201function first appeared in
202.Nx 11.0 .
203.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
204.Sh CAVEATS
205The standard requires that passing zero as
206.Fa c8
207unconditionally reset the conversion state and output a NUL byte:
208.Bd -filled -offset indent
209If
210.Fa c8
211is a null character, a null byte is stored, preceded by any shift
212sequence needed to restore the initial shift state; the resulting state
213described is the initial conversion state.
214.Ed
215.Pp
216However, some implementations such as glibc 2.36 ignore this clause
217and, if the zero was preceded by a nonempty incomplete UTF-8 code unit
218sequence, fail with
219.Er EILSEQ
220instead.
221