1*fdd9db8aSriastradh.\" $NetBSD: mbrtoc32.3,v 1.9 2024/08/23 12:59:49 riastradh Exp $ 22cbd152aSriastradh.\" 32cbd152aSriastradh.\" Copyright (c) 2024 The NetBSD Foundation, Inc. 42cbd152aSriastradh.\" All rights reserved. 52cbd152aSriastradh.\" 62cbd152aSriastradh.\" Redistribution and use in source and binary forms, with or without 72cbd152aSriastradh.\" modification, are permitted provided that the following conditions 82cbd152aSriastradh.\" are met: 92cbd152aSriastradh.\" 1. Redistributions of source code must retain the above copyright 102cbd152aSriastradh.\" notice, this list of conditions and the following disclaimer. 112cbd152aSriastradh.\" 2. Redistributions in binary form must reproduce the above copyright 122cbd152aSriastradh.\" notice, this list of conditions and the following disclaimer in the 132cbd152aSriastradh.\" documentation and/or other materials provided with the distribution. 142cbd152aSriastradh.\" 152cbd152aSriastradh.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 162cbd152aSriastradh.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 172cbd152aSriastradh.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 182cbd152aSriastradh.\" PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 192cbd152aSriastradh.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 202cbd152aSriastradh.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 212cbd152aSriastradh.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 222cbd152aSriastradh.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 232cbd152aSriastradh.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 242cbd152aSriastradh.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 252cbd152aSriastradh.\" POSSIBILITY OF SUCH DAMAGE. 262cbd152aSriastradh.\" 272cbd152aSriastradh.Dd August 14, 2024 282cbd152aSriastradh.Dt MBRTOC32 3 292cbd152aSriastradh.Os 302cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 312cbd152aSriastradh.Sh NAME 322cbd152aSriastradh.Nm mbrtoc32 332514fdacSriastradh.Nd Restartable multibyte to UTF-32 conversion 342cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 352cbd152aSriastradh.Sh LIBRARY 362cbd152aSriastradh.Lb libc 372cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 382cbd152aSriastradh.Sh SYNOPSIS 39e5f039efSuwe. 402cbd152aSriastradh.In uchar.h 41e5f039efSuwe. 422cbd152aSriastradh.Ft size_t 43e5f039efSuwe.Fo mbrtoc32 44e5f039efSuwe.Fa "char32_t * restrict pc32" 45e5f039efSuwe.Fa "const char * restrict s" 46e5f039efSuwe.Fa "size_t n" 47e5f039efSuwe.Fa "mbstate_t * restrict ps" 48e5f039efSuwe.Fc 492cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 502cbd152aSriastradh.Sh DESCRIPTION 512cbd152aSriastradhThe 522cbd152aSriastradh.Nm 5314f09d28Sriastradhfunction decodes multibyte characters in the current locale and 5414f09d28Sriastradhconverts them to Unicode scalar values (i.e., to UTF-32), keeping state 5514f09d28Sriastradhso it can restart after incremental progress. 562514fdacSriastradh.Pp 572514fdacSriastradhEach call to 582514fdacSriastradh.Nm : 592514fdacSriastradh.Bl -enum -compact 602514fdacSriastradh.It 612514fdacSriastradhexamines up to 622cbd152aSriastradh.Fa n 632514fdacSriastradhbytes starting at 642514fdacSriastradh.Fa s , 652514fdacSriastradh.It 662514fdacSriastradhyields a Unicode scalar value (i.e., a UTF-32 code unit) if available 672514fdacSriastradhby storing it at 682514fdacSriastradh.Li * Ns Fa pc32 , 692514fdacSriastradh.It 702514fdacSriastradhsaves state at 712514fdacSriastradh.Fa ps , 722514fdacSriastradhand 732514fdacSriastradh.It 742514fdacSriastradhreturns either the number of bytes consumed if any or a special return 752514fdacSriastradhvalue. 762514fdacSriastradh.El 772514fdacSriastradh.Pp 782514fdacSriastradhSpecifically: 792cbd152aSriastradh.Bl -bullet 802cbd152aSriastradh.It 812cbd152aSriastradhIf the multibyte sequence at 822cbd152aSriastradh.Fa s 832514fdacSriastradhis invalid after any previous input saved at 842514fdacSriastradh.Fa ps , 852514fdacSriastradhor if an error occurs in decoding, 862cbd152aSriastradh.Nm 872cbd152aSriastradhreturns 882cbd152aSriastradh.Li (size_t)-1 892cbd152aSriastradhand sets 902cbd152aSriastradh.Xr errno 2 912cbd152aSriastradhto indicate the error. 922cbd152aSriastradh.It 932cbd152aSriastradhIf the multibyte sequence at 942cbd152aSriastradh.Fa s 952cbd152aSriastradhis still incomplete after 962cbd152aSriastradh.Fa n 972514fdacSriastradhbytes, including any previous input saved in 982cbd152aSriastradh.Fa ps , 992cbd152aSriastradh.Nm 1002cbd152aSriastradhsaves its state in 1012cbd152aSriastradh.Fa ps 1022cbd152aSriastradhafter all the input so far and returns 1032cbd152aSriastradh.Li "(size_t)-2". 1042cbd152aSriastradh.It 1052cbd152aSriastradhIf 1062cbd152aSriastradh.Nm 1072514fdacSriastradhdecodes the null multibyte character, then it stores zero at 1082cbd152aSriastradh.Li * Ns Fa pc32 1092cbd152aSriastradhand returns zero. 1102cbd152aSriastradh.It 1112514fdacSriastradhOtherwise, 1122cbd152aSriastradh.Nm 1132514fdacSriastradhdecodes a single multibyte character, stores its Unicode scalar value 1142514fdacSriastradhat 1152cbd152aSriastradh.Li * Ns Fa pc32 , 1162514fdacSriastradhand returns the number of bytes consumed to decode the first multibyte 1172514fdacSriastradhcharacter. 1182cbd152aSriastradh.El 1192cbd152aSriastradh.Pp 1202cbd152aSriastradhIf 1212514fdacSriastradh.Fa pc32 1222514fdacSriastradhis a null pointer, nothing is stored, but the effects on 1232514fdacSriastradh.Fa ps 1242514fdacSriastradhand the return value are unchanged. 1252514fdacSriastradh.Pp 1262514fdacSriastradhIf 1272cbd152aSriastradh.Fa s 1282cbd152aSriastradhis a null pointer, the 1292cbd152aSriastradh.Nm 1302cbd152aSriastradhcall is equivalent to: 1312cbd152aSriastradh.Bd -ragged -offset indent 1322cbd152aSriastradh.Fo mbrtoc32 1332cbd152aSriastradh.Li NULL , 1342cbd152aSriastradh.Li \*q\*q , 1352cbd152aSriastradh.Li 1 , 1362cbd152aSriastradh.Fa ps 1372cbd152aSriastradh.Fc 1382cbd152aSriastradh.Ed 1392cbd152aSriastradh.Pp 1402cbd152aSriastradhThis always returns zero, and has the effect of resetting 1412cbd152aSriastradh.Fa ps 1422cbd152aSriastradhto the initial conversion state, without writing to 1432cbd152aSriastradh.Fa pc32 , 1442cbd152aSriastradheven if it is nonnull. 1452cbd152aSriastradh.Pp 1462cbd152aSriastradhIf 1472cbd152aSriastradh.Fa ps 1482cbd152aSriastradhis a null pointer, 1492cbd152aSriastradh.Nm 1502cbd152aSriastradhuses an internal 1512cbd152aSriastradh.Vt mbstate_t 1522cbd152aSriastradhobject with static storage duration, distinct from all other 1532cbd152aSriastradh.Vt mbstate_t 154e5f039efSuweobjects 155e5f039efSuwe.Po 156e5f039efSuweincluding those used by 157676a922fSriastradh.Xr mbrtoc8 3 , 1582cbd152aSriastradh.Xr mbrtoc16 3 , 159676a922fSriastradh.Xr c8rtomb 3 , 1602cbd152aSriastradh.Xr c16rtomb 3 , 1612cbd152aSriastradhand 162e5f039efSuwe.Xr c32rtomb 3 163e5f039efSuwe.Pc , 1642cbd152aSriastradhwhich is initialized at program startup to the initial conversion 1652cbd152aSriastradhstate. 1662cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 1672cbd152aSriastradh.Sh RETURN VALUES 1682cbd152aSriastradhThe 1692cbd152aSriastradh.Nm 1702cbd152aSriastradhfunction returns: 171e5f039efSuwe.Bl -tag -width Li 1722cbd152aSriastradh.It Li 0 173e5f039efSuwe.Bq null 174f51bb893Sriastradhif 175f51bb893Sriastradh.Nm 176f51bb893Sriastradhdecoded a null multibyte character. 177e5f039efSuwe.It Ar i 178e5f039efSuwe.Bq scalar value 1792cbd152aSriastradhwhere 1802cbd152aSriastradh.Li 0 1812cbd152aSriastradh\*(Le 182e5f039efSuwe.Ar i 1832cbd152aSriastradh\*(Le 1842cbd152aSriastradh.Fa n , 185f51bb893Sriastradhif 186f51bb893Sriastradh.Nm 187f51bb893Sriastradhconsumed 188f51bb893Sriastradh.Ar i 189f51bb893Sriastradhbytes of input to decode the next multibyte character, yielding a 1902514fdacSriastradhUnicode scalar value. 1912cbd152aSriastradh.It Li (size_t)-2 192e5f039efSuwe.Bq incomplete 193f51bb893Sriastradhif 194f51bb893Sriastradh.Nm 1952514fdacSriastradhfound only an incomplete multibyte sequence after all 1962cbd152aSriastradh.Fa n 1972514fdacSriastradhbytes of input and any previous input, and saved its state to restart 1982514fdacSriastradhin the next call with 199f51bb893Sriastradh.Fa ps . 2002cbd152aSriastradh.It Li (size_t)-1 201e5f039efSuwe.Bq error 2022cbd152aSriastradhif any encoding error was detected; 2032cbd152aSriastradh.Xr errno 2 2042cbd152aSriastradhis set to reflect the error. 2052cbd152aSriastradh.El 2062cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2072cbd152aSriastradh.Sh EXAMPLES 208e5f039efSuwe.Bd -literal -offset indent 2092cbd152aSriastradhchar *s = ...; 2102cbd152aSriastradhsize_t n = ...; 2112cbd152aSriastradhmbstate_t mbs = {0}; /* initial conversion state */ 2122cbd152aSriastradh 2132cbd152aSriastradhwhile (n) { 2142cbd152aSriastradh char32_t c32; 2152cbd152aSriastradh size_t len; 2162cbd152aSriastradh 2172cbd152aSriastradh len = mbrtoc32(&c32, s, n, &mbs); 2182cbd152aSriastradh switch (len) { 219*fdd9db8aSriastradh case 0: /* NUL terminator */ 220*fdd9db8aSriastradh assert(c32 == 0); 2212cbd152aSriastradh goto out; 2222cbd152aSriastradh default: /* scalar value */ 22307c776ecSriastradh printf("U+%04"PRIx32"\en", (uint32_t)c32); 2242cbd152aSriastradh break; 2252cbd152aSriastradh case (size_t)-2: /* incomplete */ 2262cbd152aSriastradh printf("incomplete\en"); 2272cbd152aSriastradh goto readmore; 2282cbd152aSriastradh case (size_t)-1: /* error */ 22907c776ecSriastradh printf("error: %d\en", errno); 2302cbd152aSriastradh goto out; 2312cbd152aSriastradh } 2322cbd152aSriastradh s += len; 2332cbd152aSriastradh n -= len; 2342cbd152aSriastradh} 2352cbd152aSriastradh.Ed 2362cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2372cbd152aSriastradh.Sh ERRORS 238e5f039efSuwe.Bl -tag -width Bq 2392cbd152aSriastradh.It Bq Er EILSEQ 2402514fdacSriastradhThe multibyte sequence cannot be decoded in the current locale as a 2412514fdacSriastradhUnicode scalar value. 2422cbd152aSriastradh.It Bq Er EIO 2432cbd152aSriastradhAn error occurred in loading the locale's character conversions. 2442cbd152aSriastradh.El 2452cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2462cbd152aSriastradh.Sh SEE ALSO 2472cbd152aSriastradh.Xr c16rtomb 3 , 2482cbd152aSriastradh.Xr c32rtomb 3 , 249685764b6Sriastradh.Xr c8rtomb 3 , 2502cbd152aSriastradh.Xr mbrtoc16 3 , 251685764b6Sriastradh.Xr mbrtoc8 3 , 2522cbd152aSriastradh.Xr uchar 3 2532cbd152aSriastradh.Rs 2542cbd152aSriastradh.%B The Unicode Standard 2552cbd152aSriastradh.%O Version 15.0 \(em Core Specification 2562cbd152aSriastradh.%Q The Unicode Consortium 2572cbd152aSriastradh.%D September 2022 2582cbd152aSriastradh.%U https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf 2592cbd152aSriastradh.Re 2602cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2612cbd152aSriastradh.Sh STANDARDS 2622cbd152aSriastradhThe 2632cbd152aSriastradh.Nm 2642cbd152aSriastradhfunction conforms to 2652cbd152aSriastradh.St -isoC-2011 . 2662cbd152aSriastradh.\""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""" 2672cbd152aSriastradh.Sh HISTORY 2682cbd152aSriastradhThe 2692cbd152aSriastradh.Nm 2702cbd152aSriastradhfunction first appeared in 2712cbd152aSriastradh.Nx 11.0 . 272