xref: /freebsd-src/contrib/llvm-project/libcxx/src/support/ibm/mbsnrtowcs.cpp (revision 349cc55c9796c4596a5b9904cd3281af295f878f)
1*349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
2*349cc55cSDimitry Andric //
3*349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*349cc55cSDimitry Andric //
7*349cc55cSDimitry Andric //===----------------------------------------------------------------------===//
8*349cc55cSDimitry Andric 
9*349cc55cSDimitry Andric #include <cstddef> // size_t
10*349cc55cSDimitry Andric #include <cwchar>  // mbstate_t
11*349cc55cSDimitry Andric #include <limits.h> // MB_LEN_MAX
12*349cc55cSDimitry Andric #include <string.h> // wmemcpy
13*349cc55cSDimitry Andric 
14*349cc55cSDimitry Andric // Returns the number of wide characters found in the multi byte sequence `src`
15*349cc55cSDimitry Andric // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
16*349cc55cSDimitry Andric // elements size). The count returned excludes the null terminator.
17*349cc55cSDimitry Andric // When `dst` is NULL, no characters are copied to `dst`.
18*349cc55cSDimitry Andric // Returns (size_t) -1 when an invalid sequence is encountered.
19*349cc55cSDimitry Andric // Leaves *`src` pointing to the next character to convert or NULL
20*349cc55cSDimitry Andric // if a null character was converted from *`src`.
21*349cc55cSDimitry Andric _LIBCPP_FUNC_VIS
22*349cc55cSDimitry Andric size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
23*349cc55cSDimitry Andric                    size_t src_size_bytes, size_t max_dest_chars,
24*349cc55cSDimitry Andric                    mbstate_t *__restrict ps) {
25*349cc55cSDimitry Andric   const size_t terminated_sequence = static_cast<size_t>(0);
26*349cc55cSDimitry Andric   const size_t invalid_sequence = static_cast<size_t>(-1);
27*349cc55cSDimitry Andric   const size_t incomplete_sequence = static_cast<size_t>(-2);
28*349cc55cSDimitry Andric 
29*349cc55cSDimitry Andric   size_t source_converted;
30*349cc55cSDimitry Andric   size_t dest_converted;
31*349cc55cSDimitry Andric   size_t result = 0;
32*349cc55cSDimitry Andric 
33*349cc55cSDimitry Andric   // If `dst` is null then `max_dest_chars` should be ignored according to the
34*349cc55cSDimitry Andric   // standard. Setting `max_dest_chars` to a large value has this effect.
35*349cc55cSDimitry Andric   if (dst == nullptr)
36*349cc55cSDimitry Andric     max_dest_chars = static_cast<size_t>(-1);
37*349cc55cSDimitry Andric 
38*349cc55cSDimitry Andric   for (dest_converted = source_converted = 0;
39*349cc55cSDimitry Andric        source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
40*349cc55cSDimitry Andric        ++dest_converted, source_converted += result) {
41*349cc55cSDimitry Andric     // Converts one multi byte character.
42*349cc55cSDimitry Andric     // If result (char_size) is greater than 0, it's the size in bytes of that character.
43*349cc55cSDimitry Andric     // If result (char_size) is zero, it indicates that the null character has been found.
44*349cc55cSDimitry Andric     // Otherwise, it's an error and errno may be set.
45*349cc55cSDimitry Andric     size_t source_remaining = src_size_bytes - source_converted;
46*349cc55cSDimitry Andric     size_t dest_remaining = max_dest_chars - dest_converted;
47*349cc55cSDimitry Andric 
48*349cc55cSDimitry Andric     if (dst == nullptr) {
49*349cc55cSDimitry Andric       result = mbrtowc(NULL, *src + source_converted, source_remaining, ps);
50*349cc55cSDimitry Andric     } else if (dest_remaining >= source_remaining) {
51*349cc55cSDimitry Andric       // dst has enough space to translate in-place.
52*349cc55cSDimitry Andric       result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
53*349cc55cSDimitry Andric     } else {
54*349cc55cSDimitry Andric       /*
55*349cc55cSDimitry Andric       * dst may not have enough space, so use a temporary buffer.
56*349cc55cSDimitry Andric       *
57*349cc55cSDimitry Andric       * We need to save a copy of the conversion state
58*349cc55cSDimitry Andric       * here so we can restore it if the multibyte
59*349cc55cSDimitry Andric       * character is too long for the buffer.
60*349cc55cSDimitry Andric       */
61*349cc55cSDimitry Andric       wchar_t buff[MB_LEN_MAX];
62*349cc55cSDimitry Andric       mbstate_t mbstate_tmp;
63*349cc55cSDimitry Andric 
64*349cc55cSDimitry Andric       if (ps != nullptr)
65*349cc55cSDimitry Andric         mbstate_tmp = *ps;
66*349cc55cSDimitry Andric       result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
67*349cc55cSDimitry Andric 
68*349cc55cSDimitry Andric       if (result > dest_remaining) {
69*349cc55cSDimitry Andric         // Multi-byte sequence for character won't fit.
70*349cc55cSDimitry Andric         if (ps != nullptr)
71*349cc55cSDimitry Andric           *ps = mbstate_tmp;
72*349cc55cSDimitry Andric         break;
73*349cc55cSDimitry Andric       } else {
74*349cc55cSDimitry Andric         // The buffer was used, so we need copy the translation to dst.
75*349cc55cSDimitry Andric         wmemcpy(dst, buff, result);
76*349cc55cSDimitry Andric       }
77*349cc55cSDimitry Andric     }
78*349cc55cSDimitry Andric 
79*349cc55cSDimitry Andric     // Don't do anything to change errno from here on.
80*349cc55cSDimitry Andric     if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
81*349cc55cSDimitry Andric       break;
82*349cc55cSDimitry Andric     }
83*349cc55cSDimitry Andric   }
84*349cc55cSDimitry Andric 
85*349cc55cSDimitry Andric   if (dst) {
86*349cc55cSDimitry Andric     if (result == terminated_sequence)
87*349cc55cSDimitry Andric       *src = NULL;
88*349cc55cSDimitry Andric     else
89*349cc55cSDimitry Andric       *src += source_converted;
90*349cc55cSDimitry Andric   }
91*349cc55cSDimitry Andric   if (result == invalid_sequence)
92*349cc55cSDimitry Andric     return invalid_sequence;
93*349cc55cSDimitry Andric 
94*349cc55cSDimitry Andric   return dest_converted;
95*349cc55cSDimitry Andric }
96