xref: /netbsd-src/lib/libc/locale/mbrtoc32.3 (revision fdd9db8a91c767e1b3e0b7be194f588935269cca)
1*fdd9db8aSriastradh.\"	$NetBSD: mbrtoc32.3,v 1.9 2024/08/23 12:59:49 riastradh Exp $
22cbd152aSriastradh.\"
32cbd152aSriastradh.\" Copyright (c) 2024 The NetBSD Foundation, Inc.
42cbd152aSriastradh.\" All rights reserved.
52cbd152aSriastradh.\"
62cbd152aSriastradh.\" Redistribution and use in source and binary forms, with or without
72cbd152aSriastradh.\" modification, are permitted provided that the following conditions
82cbd152aSriastradh.\" are met:
92cbd152aSriastradh.\" 1. Redistributions of source code must retain the above copyright
102cbd152aSriastradh.\"    notice, this list of conditions and the following disclaimer.
112cbd152aSriastradh.\" 2. Redistributions in binary form must reproduce the above copyright
122cbd152aSriastradh.\"    notice, this list of conditions and the following disclaimer in the
132cbd152aSriastradh.\"    documentation and/or other materials provided with the distribution.
142cbd152aSriastradh.\"
152cbd152aSriastradh.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
162cbd152aSriastradh.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
172cbd152aSriastradh.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
182cbd152aSriastradh.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
192cbd152aSriastradh.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
202cbd152aSriastradh.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
212cbd152aSriastradh.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
222cbd152aSriastradh.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
232cbd152aSriastradh.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
242cbd152aSriastradh.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
252cbd152aSriastradh.\" POSSIBILITY OF SUCH DAMAGE.
262cbd152aSriastradh.\"
272cbd152aSriastradh.Dd August 14, 2024
282cbd152aSriastradh.Dt MBRTOC32 3
292cbd152aSriastradh.Os
302cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
312cbd152aSriastradh.Sh NAME
322cbd152aSriastradh.Nm mbrtoc32
332514fdacSriastradh.Nd Restartable multibyte to UTF-32 conversion
342cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
352cbd152aSriastradh.Sh LIBRARY
362cbd152aSriastradh.Lb libc
372cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
382cbd152aSriastradh.Sh SYNOPSIS
39e5f039efSuwe.
402cbd152aSriastradh.In uchar.h
41e5f039efSuwe.
422cbd152aSriastradh.Ft size_t
43e5f039efSuwe.Fo mbrtoc32
44e5f039efSuwe.Fa "char32_t * restrict pc32"
45e5f039efSuwe.Fa "const char * restrict s"
46e5f039efSuwe.Fa "size_t n"
47e5f039efSuwe.Fa "mbstate_t * restrict ps"
48e5f039efSuwe.Fc
492cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
502cbd152aSriastradh.Sh DESCRIPTION
512cbd152aSriastradhThe
522cbd152aSriastradh.Nm
5314f09d28Sriastradhfunction decodes multibyte characters in the current locale and
5414f09d28Sriastradhconverts them to Unicode scalar values (i.e., to UTF-32), keeping state
5514f09d28Sriastradhso it can restart after incremental progress.
562514fdacSriastradh.Pp
572514fdacSriastradhEach call to
582514fdacSriastradh.Nm :
592514fdacSriastradh.Bl -enum -compact
602514fdacSriastradh.It
612514fdacSriastradhexamines up to
622cbd152aSriastradh.Fa n
632514fdacSriastradhbytes starting at
642514fdacSriastradh.Fa s ,
652514fdacSriastradh.It
662514fdacSriastradhyields a Unicode scalar value (i.e., a UTF-32 code unit) if available
672514fdacSriastradhby storing it at
682514fdacSriastradh.Li * Ns Fa pc32 ,
692514fdacSriastradh.It
702514fdacSriastradhsaves state at
712514fdacSriastradh.Fa ps ,
722514fdacSriastradhand
732514fdacSriastradh.It
742514fdacSriastradhreturns either the number of bytes consumed if any or a special return
752514fdacSriastradhvalue.
762514fdacSriastradh.El
772514fdacSriastradh.Pp
782514fdacSriastradhSpecifically:
792cbd152aSriastradh.Bl -bullet
802cbd152aSriastradh.It
812cbd152aSriastradhIf the multibyte sequence at
822cbd152aSriastradh.Fa s
832514fdacSriastradhis invalid after any previous input saved at
842514fdacSriastradh.Fa ps ,
852514fdacSriastradhor if an error occurs in decoding,
862cbd152aSriastradh.Nm
872cbd152aSriastradhreturns
882cbd152aSriastradh.Li (size_t)-1
892cbd152aSriastradhand sets
902cbd152aSriastradh.Xr errno 2
912cbd152aSriastradhto indicate the error.
922cbd152aSriastradh.It
932cbd152aSriastradhIf the multibyte sequence at
942cbd152aSriastradh.Fa s
952cbd152aSriastradhis still incomplete after
962cbd152aSriastradh.Fa n
972514fdacSriastradhbytes, including any previous input saved in
982cbd152aSriastradh.Fa ps ,
992cbd152aSriastradh.Nm
1002cbd152aSriastradhsaves its state in
1012cbd152aSriastradh.Fa ps
1022cbd152aSriastradhafter all the input so far and returns
1032cbd152aSriastradh.Li "(size_t)-2".
1042cbd152aSriastradh.It
1052cbd152aSriastradhIf
1062cbd152aSriastradh.Nm
1072514fdacSriastradhdecodes the null multibyte character, then it stores zero at
1082cbd152aSriastradh.Li * Ns Fa pc32
1092cbd152aSriastradhand returns zero.
1102cbd152aSriastradh.It
1112514fdacSriastradhOtherwise,
1122cbd152aSriastradh.Nm
1132514fdacSriastradhdecodes a single multibyte character, stores its Unicode scalar value
1142514fdacSriastradhat
1152cbd152aSriastradh.Li * Ns Fa pc32 ,
1162514fdacSriastradhand returns the number of bytes consumed to decode the first multibyte
1172514fdacSriastradhcharacter.
1182cbd152aSriastradh.El
1192cbd152aSriastradh.Pp
1202cbd152aSriastradhIf
1212514fdacSriastradh.Fa pc32
1222514fdacSriastradhis a null pointer, nothing is stored, but the effects on
1232514fdacSriastradh.Fa ps
1242514fdacSriastradhand the return value are unchanged.
1252514fdacSriastradh.Pp
1262514fdacSriastradhIf
1272cbd152aSriastradh.Fa s
1282cbd152aSriastradhis a null pointer, the
1292cbd152aSriastradh.Nm
1302cbd152aSriastradhcall is equivalent to:
1312cbd152aSriastradh.Bd -ragged -offset indent
1322cbd152aSriastradh.Fo mbrtoc32
1332cbd152aSriastradh.Li NULL ,
1342cbd152aSriastradh.Li \*q\*q ,
1352cbd152aSriastradh.Li 1 ,
1362cbd152aSriastradh.Fa ps
1372cbd152aSriastradh.Fc
1382cbd152aSriastradh.Ed
1392cbd152aSriastradh.Pp
1402cbd152aSriastradhThis always returns zero, and has the effect of resetting
1412cbd152aSriastradh.Fa ps
1422cbd152aSriastradhto the initial conversion state, without writing to
1432cbd152aSriastradh.Fa pc32 ,
1442cbd152aSriastradheven if it is nonnull.
1452cbd152aSriastradh.Pp
1462cbd152aSriastradhIf
1472cbd152aSriastradh.Fa ps
1482cbd152aSriastradhis a null pointer,
1492cbd152aSriastradh.Nm
1502cbd152aSriastradhuses an internal
1512cbd152aSriastradh.Vt mbstate_t
1522cbd152aSriastradhobject with static storage duration, distinct from all other
1532cbd152aSriastradh.Vt mbstate_t
154e5f039efSuweobjects
155e5f039efSuwe.Po
156e5f039efSuweincluding those used by
157676a922fSriastradh.Xr mbrtoc8 3 ,
1582cbd152aSriastradh.Xr mbrtoc16 3 ,
159676a922fSriastradh.Xr c8rtomb 3 ,
1602cbd152aSriastradh.Xr c16rtomb 3 ,
1612cbd152aSriastradhand
162e5f039efSuwe.Xr c32rtomb 3
163e5f039efSuwe.Pc ,
1642cbd152aSriastradhwhich is initialized at program startup to the initial conversion
1652cbd152aSriastradhstate.
1662cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
1672cbd152aSriastradh.Sh RETURN VALUES
1682cbd152aSriastradhThe
1692cbd152aSriastradh.Nm
1702cbd152aSriastradhfunction returns:
171e5f039efSuwe.Bl -tag -width Li
1722cbd152aSriastradh.It Li 0
173e5f039efSuwe.Bq null
174f51bb893Sriastradhif
175f51bb893Sriastradh.Nm
176f51bb893Sriastradhdecoded a null multibyte character.
177e5f039efSuwe.It Ar i
178e5f039efSuwe.Bq scalar value
1792cbd152aSriastradhwhere
1802cbd152aSriastradh.Li 0
1812cbd152aSriastradh\*(Le
182e5f039efSuwe.Ar i
1832cbd152aSriastradh\*(Le
1842cbd152aSriastradh.Fa n ,
185f51bb893Sriastradhif
186f51bb893Sriastradh.Nm
187f51bb893Sriastradhconsumed
188f51bb893Sriastradh.Ar i
189f51bb893Sriastradhbytes of input to decode the next multibyte character, yielding a
1902514fdacSriastradhUnicode scalar value.
1912cbd152aSriastradh.It Li (size_t)-2
192e5f039efSuwe.Bq incomplete
193f51bb893Sriastradhif
194f51bb893Sriastradh.Nm
1952514fdacSriastradhfound only an incomplete multibyte sequence after all
1962cbd152aSriastradh.Fa n
1972514fdacSriastradhbytes of input and any previous input, and saved its state to restart
1982514fdacSriastradhin the next call with
199f51bb893Sriastradh.Fa ps .
2002cbd152aSriastradh.It Li (size_t)-1
201e5f039efSuwe.Bq error
2022cbd152aSriastradhif any encoding error was detected;
2032cbd152aSriastradh.Xr errno 2
2042cbd152aSriastradhis set to reflect the error.
2052cbd152aSriastradh.El
2062cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2072cbd152aSriastradh.Sh EXAMPLES
208e5f039efSuwe.Bd -literal -offset indent
2092cbd152aSriastradhchar *s = ...;
2102cbd152aSriastradhsize_t n = ...;
2112cbd152aSriastradhmbstate_t mbs = {0};    /* initial conversion state */
2122cbd152aSriastradh
2132cbd152aSriastradhwhile (n) {
2142cbd152aSriastradh        char32_t c32;
2152cbd152aSriastradh        size_t len;
2162cbd152aSriastradh
2172cbd152aSriastradh        len = mbrtoc32(&c32, s, n, &mbs);
2182cbd152aSriastradh        switch (len) {
219*fdd9db8aSriastradh        case 0:                 /* NUL terminator */
220*fdd9db8aSriastradh                assert(c32 == 0);
2212cbd152aSriastradh                goto out;
2222cbd152aSriastradh        default:                /* scalar value */
22307c776ecSriastradh                printf("U+%04"PRIx32"\en", (uint32_t)c32);
2242cbd152aSriastradh                break;
2252cbd152aSriastradh        case (size_t)-2:        /* incomplete */
2262cbd152aSriastradh                printf("incomplete\en");
2272cbd152aSriastradh                goto readmore;
2282cbd152aSriastradh        case (size_t)-1:        /* error */
22907c776ecSriastradh                printf("error: %d\en", errno);
2302cbd152aSriastradh                goto out;
2312cbd152aSriastradh        }
2322cbd152aSriastradh        s += len;
2332cbd152aSriastradh        n -= len;
2342cbd152aSriastradh}
2352cbd152aSriastradh.Ed
2362cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2372cbd152aSriastradh.Sh ERRORS
238e5f039efSuwe.Bl -tag -width Bq
2392cbd152aSriastradh.It Bq Er EILSEQ
2402514fdacSriastradhThe multibyte sequence cannot be decoded in the current locale as a
2412514fdacSriastradhUnicode scalar value.
2422cbd152aSriastradh.It Bq Er EIO
2432cbd152aSriastradhAn error occurred in loading the locale's character conversions.
2442cbd152aSriastradh.El
2452cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2462cbd152aSriastradh.Sh SEE ALSO
2472cbd152aSriastradh.Xr c16rtomb 3 ,
2482cbd152aSriastradh.Xr c32rtomb 3 ,
249685764b6Sriastradh.Xr c8rtomb 3 ,
2502cbd152aSriastradh.Xr mbrtoc16 3 ,
251685764b6Sriastradh.Xr mbrtoc8 3 ,
2522cbd152aSriastradh.Xr uchar 3
2532cbd152aSriastradh.Rs
2542cbd152aSriastradh.%B The Unicode Standard
2552cbd152aSriastradh.%O Version 15.0 \(em Core Specification
2562cbd152aSriastradh.%Q The Unicode Consortium
2572cbd152aSriastradh.%D September 2022
2582cbd152aSriastradh.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf
2592cbd152aSriastradh.Re
2602cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2612cbd152aSriastradh.Sh STANDARDS
2622cbd152aSriastradhThe
2632cbd152aSriastradh.Nm
2642cbd152aSriastradhfunction conforms to
2652cbd152aSriastradh.St -isoC-2011 .
2662cbd152aSriastradh.\"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
2672cbd152aSriastradh.Sh HISTORY
2682cbd152aSriastradhThe
2692cbd152aSriastradh.Nm
2702cbd152aSriastradhfunction first appeared in
2712cbd152aSriastradh.Nx 11.0 .
272