1*349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 2*349cc55cSDimitry Andric // 3*349cc55cSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*349cc55cSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*349cc55cSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*349cc55cSDimitry Andric // 7*349cc55cSDimitry Andric //===----------------------------------------------------------------------===// 8*349cc55cSDimitry Andric 9*349cc55cSDimitry Andric #include <cstddef> // size_t 10*349cc55cSDimitry Andric #include <cwchar> // mbstate_t 11*349cc55cSDimitry Andric #include <limits.h> // MB_LEN_MAX 12*349cc55cSDimitry Andric #include <string.h> // wmemcpy 13*349cc55cSDimitry Andric 14*349cc55cSDimitry Andric // Returns the number of wide characters found in the multi byte sequence `src` 15*349cc55cSDimitry Andric // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars` 16*349cc55cSDimitry Andric // elements size). The count returned excludes the null terminator. 17*349cc55cSDimitry Andric // When `dst` is NULL, no characters are copied to `dst`. 18*349cc55cSDimitry Andric // Returns (size_t) -1 when an invalid sequence is encountered. 19*349cc55cSDimitry Andric // Leaves *`src` pointing to the next character to convert or NULL 20*349cc55cSDimitry Andric // if a null character was converted from *`src`. 21*349cc55cSDimitry Andric _LIBCPP_FUNC_VIS 22*349cc55cSDimitry Andric size_t mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src, 23*349cc55cSDimitry Andric size_t src_size_bytes, size_t max_dest_chars, 24*349cc55cSDimitry Andric mbstate_t *__restrict ps) { 25*349cc55cSDimitry Andric const size_t terminated_sequence = static_cast<size_t>(0); 26*349cc55cSDimitry Andric const size_t invalid_sequence = static_cast<size_t>(-1); 27*349cc55cSDimitry Andric const size_t incomplete_sequence = static_cast<size_t>(-2); 28*349cc55cSDimitry Andric 29*349cc55cSDimitry Andric size_t source_converted; 30*349cc55cSDimitry Andric size_t dest_converted; 31*349cc55cSDimitry Andric size_t result = 0; 32*349cc55cSDimitry Andric 33*349cc55cSDimitry Andric // If `dst` is null then `max_dest_chars` should be ignored according to the 34*349cc55cSDimitry Andric // standard. Setting `max_dest_chars` to a large value has this effect. 35*349cc55cSDimitry Andric if (dst == nullptr) 36*349cc55cSDimitry Andric max_dest_chars = static_cast<size_t>(-1); 37*349cc55cSDimitry Andric 38*349cc55cSDimitry Andric for (dest_converted = source_converted = 0; 39*349cc55cSDimitry Andric source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars); 40*349cc55cSDimitry Andric ++dest_converted, source_converted += result) { 41*349cc55cSDimitry Andric // Converts one multi byte character. 42*349cc55cSDimitry Andric // If result (char_size) is greater than 0, it's the size in bytes of that character. 43*349cc55cSDimitry Andric // If result (char_size) is zero, it indicates that the null character has been found. 44*349cc55cSDimitry Andric // Otherwise, it's an error and errno may be set. 45*349cc55cSDimitry Andric size_t source_remaining = src_size_bytes - source_converted; 46*349cc55cSDimitry Andric size_t dest_remaining = max_dest_chars - dest_converted; 47*349cc55cSDimitry Andric 48*349cc55cSDimitry Andric if (dst == nullptr) { 49*349cc55cSDimitry Andric result = mbrtowc(NULL, *src + source_converted, source_remaining, ps); 50*349cc55cSDimitry Andric } else if (dest_remaining >= source_remaining) { 51*349cc55cSDimitry Andric // dst has enough space to translate in-place. 52*349cc55cSDimitry Andric result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps); 53*349cc55cSDimitry Andric } else { 54*349cc55cSDimitry Andric /* 55*349cc55cSDimitry Andric * dst may not have enough space, so use a temporary buffer. 56*349cc55cSDimitry Andric * 57*349cc55cSDimitry Andric * We need to save a copy of the conversion state 58*349cc55cSDimitry Andric * here so we can restore it if the multibyte 59*349cc55cSDimitry Andric * character is too long for the buffer. 60*349cc55cSDimitry Andric */ 61*349cc55cSDimitry Andric wchar_t buff[MB_LEN_MAX]; 62*349cc55cSDimitry Andric mbstate_t mbstate_tmp; 63*349cc55cSDimitry Andric 64*349cc55cSDimitry Andric if (ps != nullptr) 65*349cc55cSDimitry Andric mbstate_tmp = *ps; 66*349cc55cSDimitry Andric result = mbrtowc(buff, *src + source_converted, source_remaining, ps); 67*349cc55cSDimitry Andric 68*349cc55cSDimitry Andric if (result > dest_remaining) { 69*349cc55cSDimitry Andric // Multi-byte sequence for character won't fit. 70*349cc55cSDimitry Andric if (ps != nullptr) 71*349cc55cSDimitry Andric *ps = mbstate_tmp; 72*349cc55cSDimitry Andric break; 73*349cc55cSDimitry Andric } else { 74*349cc55cSDimitry Andric // The buffer was used, so we need copy the translation to dst. 75*349cc55cSDimitry Andric wmemcpy(dst, buff, result); 76*349cc55cSDimitry Andric } 77*349cc55cSDimitry Andric } 78*349cc55cSDimitry Andric 79*349cc55cSDimitry Andric // Don't do anything to change errno from here on. 80*349cc55cSDimitry Andric if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) { 81*349cc55cSDimitry Andric break; 82*349cc55cSDimitry Andric } 83*349cc55cSDimitry Andric } 84*349cc55cSDimitry Andric 85*349cc55cSDimitry Andric if (dst) { 86*349cc55cSDimitry Andric if (result == terminated_sequence) 87*349cc55cSDimitry Andric *src = NULL; 88*349cc55cSDimitry Andric else 89*349cc55cSDimitry Andric *src += source_converted; 90*349cc55cSDimitry Andric } 91*349cc55cSDimitry Andric if (result == invalid_sequence) 92*349cc55cSDimitry Andric return invalid_sequence; 93*349cc55cSDimitry Andric 94*349cc55cSDimitry Andric return dest_converted; 95*349cc55cSDimitry Andric } 96