xref: /freebsd-src/contrib/llvm-project/libcxx/src/support/ibm/mbsnrtowcs.cpp (revision cb14a3fe5122c879eae1fb480ed7ce82a699ddb6)
1349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
2349cc55cSDimitry Andric //
3349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6349cc55cSDimitry Andric //
7349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8349cc55cSDimitry Andric 
9349cc55cSDimitry Andric #include <cstddef>  // size_t
10349cc55cSDimitry Andric #include <cwchar>   // mbstate_t
11349cc55cSDimitry Andric #include <limits.h> // MB_LEN_MAX
12349cc55cSDimitry Andric #include <string.h> // wmemcpy
13349cc55cSDimitry Andric 
14349cc55cSDimitry Andric // Returns the number of wide characters found in the multi byte sequence `src`
15349cc55cSDimitry Andric // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
16349cc55cSDimitry Andric // elements size). The count returned excludes the null terminator.
17349cc55cSDimitry Andric // When `dst` is NULL, no characters are copied to `dst`.
18349cc55cSDimitry Andric // Returns (size_t) -1 when an invalid sequence is encountered.
19349cc55cSDimitry Andric // Leaves *`src` pointing to the next character to convert or NULL
20349cc55cSDimitry Andric // if a null character was converted from *`src`.
mbsnrtowcs(wchar_t * __restrict dst,const char ** __restrict src,size_t src_size_bytes,size_t max_dest_chars,mbstate_t * __restrict ps)21*cb14a3feSDimitry Andric _LIBCPP_EXPORTED_FROM_ABI size_t mbsnrtowcs(
22*cb14a3feSDimitry Andric     wchar_t* __restrict dst,
23*cb14a3feSDimitry Andric     const char** __restrict src,
24*cb14a3feSDimitry Andric     size_t src_size_bytes,
25*cb14a3feSDimitry Andric     size_t max_dest_chars,
26349cc55cSDimitry Andric     mbstate_t* __restrict ps) {
27349cc55cSDimitry Andric   const size_t terminated_sequence = static_cast<size_t>(0);
28349cc55cSDimitry Andric   const size_t invalid_sequence    = static_cast<size_t>(-1);
29349cc55cSDimitry Andric   const size_t incomplete_sequence = static_cast<size_t>(-2);
30349cc55cSDimitry Andric 
31349cc55cSDimitry Andric   size_t source_converted;
32349cc55cSDimitry Andric   size_t dest_converted;
33349cc55cSDimitry Andric   size_t result = 0;
34349cc55cSDimitry Andric 
35349cc55cSDimitry Andric   // If `dst` is null then `max_dest_chars` should be ignored according to the
36349cc55cSDimitry Andric   // standard. Setting `max_dest_chars` to a large value has this effect.
37349cc55cSDimitry Andric   if (dst == nullptr)
38349cc55cSDimitry Andric     max_dest_chars = static_cast<size_t>(-1);
39349cc55cSDimitry Andric 
40349cc55cSDimitry Andric   for (dest_converted = source_converted = 0;
41349cc55cSDimitry Andric        source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
42349cc55cSDimitry Andric        ++dest_converted, source_converted += result) {
43349cc55cSDimitry Andric     // Converts one multi byte character.
44349cc55cSDimitry Andric     // If result (char_size) is greater than 0, it's the size in bytes of that character.
45349cc55cSDimitry Andric     // If result (char_size) is zero, it indicates that the null character has been found.
46349cc55cSDimitry Andric     // Otherwise, it's an error and errno may be set.
47349cc55cSDimitry Andric     size_t source_remaining = src_size_bytes - source_converted;
48349cc55cSDimitry Andric     size_t dest_remaining   = max_dest_chars - dest_converted;
49349cc55cSDimitry Andric 
50349cc55cSDimitry Andric     if (dst == nullptr) {
51349cc55cSDimitry Andric       result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
52349cc55cSDimitry Andric     } else if (dest_remaining >= source_remaining) {
53349cc55cSDimitry Andric       // dst has enough space to translate in-place.
54349cc55cSDimitry Andric       result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
55349cc55cSDimitry Andric     } else {
56349cc55cSDimitry Andric       /*
57349cc55cSDimitry Andric        * dst may not have enough space, so use a temporary buffer.
58349cc55cSDimitry Andric        *
59349cc55cSDimitry Andric        * We need to save a copy of the conversion state
60349cc55cSDimitry Andric        * here so we can restore it if the multibyte
61349cc55cSDimitry Andric        * character is too long for the buffer.
62349cc55cSDimitry Andric        */
63349cc55cSDimitry Andric       wchar_t buff[MB_LEN_MAX];
64349cc55cSDimitry Andric       mbstate_t mbstate_tmp;
65349cc55cSDimitry Andric 
66349cc55cSDimitry Andric       if (ps != nullptr)
67349cc55cSDimitry Andric         mbstate_tmp = *ps;
68349cc55cSDimitry Andric       result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
69349cc55cSDimitry Andric 
70349cc55cSDimitry Andric       if (result > dest_remaining) {
71349cc55cSDimitry Andric         // Multi-byte sequence for character won't fit.
72349cc55cSDimitry Andric         if (ps != nullptr)
73349cc55cSDimitry Andric           *ps = mbstate_tmp;
74349cc55cSDimitry Andric         break;
75349cc55cSDimitry Andric       } else {
76349cc55cSDimitry Andric         // The buffer was used, so we need copy the translation to dst.
77349cc55cSDimitry Andric         wmemcpy(dst, buff, result);
78349cc55cSDimitry Andric       }
79349cc55cSDimitry Andric     }
80349cc55cSDimitry Andric 
81349cc55cSDimitry Andric     // Don't do anything to change errno from here on.
82349cc55cSDimitry Andric     if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
83349cc55cSDimitry Andric       break;
84349cc55cSDimitry Andric     }
85349cc55cSDimitry Andric   }
86349cc55cSDimitry Andric 
87349cc55cSDimitry Andric   if (dst) {
88349cc55cSDimitry Andric     if (result == terminated_sequence)
89349cc55cSDimitry Andric       *src = NULL;
90349cc55cSDimitry Andric     else
91349cc55cSDimitry Andric       *src += source_converted;
92349cc55cSDimitry Andric   }
93349cc55cSDimitry Andric   if (result == invalid_sequence)
94349cc55cSDimitry Andric     return invalid_sequence;
95349cc55cSDimitry Andric 
96349cc55cSDimitry Andric   return dest_converted;
97349cc55cSDimitry Andric }
98