1eb8650a7SLouis Dionne //===----------------------------------------------------------------------===// 24247381eSMuiez Ahmed // 34247381eSMuiez Ahmed // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 44247381eSMuiez Ahmed // See https://llvm.org/LICENSE.txt for license information. 54247381eSMuiez Ahmed // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 64247381eSMuiez Ahmed // 74247381eSMuiez Ahmed //===----------------------------------------------------------------------===// 84247381eSMuiez Ahmed 94247381eSMuiez Ahmed #include <cstddef> // size_t 104247381eSMuiez Ahmed #include <cwchar> // mbstate_t 114247381eSMuiez Ahmed #include <limits.h> // MB_LEN_MAX 124247381eSMuiez Ahmed #include <string.h> // wmemcpy 134247381eSMuiez Ahmed 144247381eSMuiez Ahmed // Returns the number of wide characters found in the multi byte sequence `src` 154247381eSMuiez Ahmed // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars` 164247381eSMuiez Ahmed // elements size). The count returned excludes the null terminator. 174247381eSMuiez Ahmed // When `dst` is NULL, no characters are copied to `dst`. 184247381eSMuiez Ahmed // Returns (size_t) -1 when an invalid sequence is encountered. 194247381eSMuiez Ahmed // Leaves *`src` pointing to the next character to convert or NULL 204247381eSMuiez Ahmed // if a null character was converted from *`src`. 219783f28cSLouis Dionne _LIBCPP_EXPORTED_FROM_ABI size_t mbsnrtowcs( 229783f28cSLouis Dionne wchar_t* __restrict dst, 239783f28cSLouis Dionne const char** __restrict src, 249783f28cSLouis Dionne size_t src_size_bytes, 259783f28cSLouis Dionne size_t max_dest_chars, 264247381eSMuiez Ahmed mbstate_t* __restrict ps) { 274247381eSMuiez Ahmed const size_t terminated_sequence = static_cast<size_t>(0); 284247381eSMuiez Ahmed const size_t invalid_sequence = static_cast<size_t>(-1); 294247381eSMuiez Ahmed const size_t incomplete_sequence = static_cast<size_t>(-2); 304247381eSMuiez Ahmed 314247381eSMuiez Ahmed size_t source_converted; 324247381eSMuiez Ahmed size_t dest_converted; 334247381eSMuiez Ahmed size_t result = 0; 344247381eSMuiez Ahmed 354247381eSMuiez Ahmed // If `dst` is null then `max_dest_chars` should be ignored according to the 364247381eSMuiez Ahmed // standard. Setting `max_dest_chars` to a large value has this effect. 374247381eSMuiez Ahmed if (dst == nullptr) 384247381eSMuiez Ahmed max_dest_chars = static_cast<size_t>(-1); 394247381eSMuiez Ahmed 404247381eSMuiez Ahmed for (dest_converted = source_converted = 0; 414247381eSMuiez Ahmed source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars); 424247381eSMuiez Ahmed ++dest_converted, source_converted += result) { 434247381eSMuiez Ahmed // Converts one multi byte character. 444247381eSMuiez Ahmed // If result (char_size) is greater than 0, it's the size in bytes of that character. 454247381eSMuiez Ahmed // If result (char_size) is zero, it indicates that the null character has been found. 464247381eSMuiez Ahmed // Otherwise, it's an error and errno may be set. 474247381eSMuiez Ahmed size_t source_remaining = src_size_bytes - source_converted; 484247381eSMuiez Ahmed size_t dest_remaining = max_dest_chars - dest_converted; 494247381eSMuiez Ahmed 504247381eSMuiez Ahmed if (dst == nullptr) { 51*d0438d2dSLouis Dionne result = mbrtowc(nullptr, *src + source_converted, source_remaining, ps); 524247381eSMuiez Ahmed } else if (dest_remaining >= source_remaining) { 534247381eSMuiez Ahmed // dst has enough space to translate in-place. 544247381eSMuiez Ahmed result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps); 554247381eSMuiez Ahmed } else { 564247381eSMuiez Ahmed /* 574247381eSMuiez Ahmed * dst may not have enough space, so use a temporary buffer. 584247381eSMuiez Ahmed * 594247381eSMuiez Ahmed * We need to save a copy of the conversion state 604247381eSMuiez Ahmed * here so we can restore it if the multibyte 614247381eSMuiez Ahmed * character is too long for the buffer. 624247381eSMuiez Ahmed */ 634247381eSMuiez Ahmed wchar_t buff[MB_LEN_MAX]; 644247381eSMuiez Ahmed mbstate_t mbstate_tmp; 654247381eSMuiez Ahmed 664247381eSMuiez Ahmed if (ps != nullptr) 674247381eSMuiez Ahmed mbstate_tmp = *ps; 684247381eSMuiez Ahmed result = mbrtowc(buff, *src + source_converted, source_remaining, ps); 694247381eSMuiez Ahmed 704247381eSMuiez Ahmed if (result > dest_remaining) { 714247381eSMuiez Ahmed // Multi-byte sequence for character won't fit. 724247381eSMuiez Ahmed if (ps != nullptr) 734247381eSMuiez Ahmed *ps = mbstate_tmp; 744247381eSMuiez Ahmed break; 754247381eSMuiez Ahmed } else { 764247381eSMuiez Ahmed // The buffer was used, so we need copy the translation to dst. 774247381eSMuiez Ahmed wmemcpy(dst, buff, result); 784247381eSMuiez Ahmed } 794247381eSMuiez Ahmed } 804247381eSMuiez Ahmed 814247381eSMuiez Ahmed // Don't do anything to change errno from here on. 824247381eSMuiez Ahmed if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) { 834247381eSMuiez Ahmed break; 844247381eSMuiez Ahmed } 854247381eSMuiez Ahmed } 864247381eSMuiez Ahmed 874247381eSMuiez Ahmed if (dst) { 884247381eSMuiez Ahmed if (result == terminated_sequence) 89*d0438d2dSLouis Dionne *src = nullptr; 904247381eSMuiez Ahmed else 914247381eSMuiez Ahmed *src += source_converted; 924247381eSMuiez Ahmed } 934247381eSMuiez Ahmed if (result == invalid_sequence) 944247381eSMuiez Ahmed return invalid_sequence; 954247381eSMuiez Ahmed 964247381eSMuiez Ahmed return dest_converted; 974247381eSMuiez Ahmed } 98