xref: /llvm-project/libcxx/src/support/ibm/mbsnrtowcs.cpp (revision d0438d2d087e78571a671c98cbb42308e4dcfcec)
1eb8650a7SLouis Dionne //===----------------------------------------------------------------------===//
24247381eSMuiez Ahmed //
34247381eSMuiez Ahmed // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
44247381eSMuiez Ahmed // See https://llvm.org/LICENSE.txt for license information.
54247381eSMuiez Ahmed // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
64247381eSMuiez Ahmed //
74247381eSMuiez Ahmed //===----------------------------------------------------------------------===//
84247381eSMuiez Ahmed 
94247381eSMuiez Ahmed #include <cstddef>  // size_t
104247381eSMuiez Ahmed #include <cwchar>   // mbstate_t
114247381eSMuiez Ahmed #include <limits.h> // MB_LEN_MAX
124247381eSMuiez Ahmed #include <string.h> // wmemcpy
134247381eSMuiez Ahmed 
144247381eSMuiez Ahmed // Returns the number of wide characters found in the multi byte sequence `src`
154247381eSMuiez Ahmed // (of `src_size_bytes`), that fit in the buffer `dst` (of `max_dest_chars`
164247381eSMuiez Ahmed // elements size). The count returned excludes the null terminator.
174247381eSMuiez Ahmed // When `dst` is NULL, no characters are copied to `dst`.
184247381eSMuiez Ahmed // Returns (size_t) -1 when an invalid sequence is encountered.
194247381eSMuiez Ahmed // Leaves *`src` pointing to the next character to convert or NULL
204247381eSMuiez Ahmed // if a null character was converted from *`src`.
219783f28cSLouis Dionne _LIBCPP_EXPORTED_FROM_ABI size_t mbsnrtowcs(
229783f28cSLouis Dionne     wchar_t* __restrict dst,
239783f28cSLouis Dionne     const char** __restrict src,
249783f28cSLouis Dionne     size_t src_size_bytes,
259783f28cSLouis Dionne     size_t max_dest_chars,
264247381eSMuiez Ahmed     mbstate_t* __restrict ps) {
274247381eSMuiez Ahmed   const size_t terminated_sequence = static_cast<size_t>(0);
284247381eSMuiez Ahmed   const size_t invalid_sequence    = static_cast<size_t>(-1);
294247381eSMuiez Ahmed   const size_t incomplete_sequence = static_cast<size_t>(-2);
304247381eSMuiez Ahmed 
314247381eSMuiez Ahmed   size_t source_converted;
324247381eSMuiez Ahmed   size_t dest_converted;
334247381eSMuiez Ahmed   size_t result = 0;
344247381eSMuiez Ahmed 
354247381eSMuiez Ahmed   // If `dst` is null then `max_dest_chars` should be ignored according to the
364247381eSMuiez Ahmed   // standard. Setting `max_dest_chars` to a large value has this effect.
374247381eSMuiez Ahmed   if (dst == nullptr)
384247381eSMuiez Ahmed     max_dest_chars = static_cast<size_t>(-1);
394247381eSMuiez Ahmed 
404247381eSMuiez Ahmed   for (dest_converted = source_converted = 0;
414247381eSMuiez Ahmed        source_converted < src_size_bytes && (!dst || dest_converted < max_dest_chars);
424247381eSMuiez Ahmed        ++dest_converted, source_converted += result) {
434247381eSMuiez Ahmed     // Converts one multi byte character.
444247381eSMuiez Ahmed     // If result (char_size) is greater than 0, it's the size in bytes of that character.
454247381eSMuiez Ahmed     // If result (char_size) is zero, it indicates that the null character has been found.
464247381eSMuiez Ahmed     // Otherwise, it's an error and errno may be set.
474247381eSMuiez Ahmed     size_t source_remaining = src_size_bytes - source_converted;
484247381eSMuiez Ahmed     size_t dest_remaining   = max_dest_chars - dest_converted;
494247381eSMuiez Ahmed 
504247381eSMuiez Ahmed     if (dst == nullptr) {
51*d0438d2dSLouis Dionne       result = mbrtowc(nullptr, *src + source_converted, source_remaining, ps);
524247381eSMuiez Ahmed     } else if (dest_remaining >= source_remaining) {
534247381eSMuiez Ahmed       // dst has enough space to translate in-place.
544247381eSMuiez Ahmed       result = mbrtowc(dst + dest_converted, *src + source_converted, source_remaining, ps);
554247381eSMuiez Ahmed     } else {
564247381eSMuiez Ahmed       /*
574247381eSMuiez Ahmed        * dst may not have enough space, so use a temporary buffer.
584247381eSMuiez Ahmed        *
594247381eSMuiez Ahmed        * We need to save a copy of the conversion state
604247381eSMuiez Ahmed        * here so we can restore it if the multibyte
614247381eSMuiez Ahmed        * character is too long for the buffer.
624247381eSMuiez Ahmed        */
634247381eSMuiez Ahmed       wchar_t buff[MB_LEN_MAX];
644247381eSMuiez Ahmed       mbstate_t mbstate_tmp;
654247381eSMuiez Ahmed 
664247381eSMuiez Ahmed       if (ps != nullptr)
674247381eSMuiez Ahmed         mbstate_tmp = *ps;
684247381eSMuiez Ahmed       result = mbrtowc(buff, *src + source_converted, source_remaining, ps);
694247381eSMuiez Ahmed 
704247381eSMuiez Ahmed       if (result > dest_remaining) {
714247381eSMuiez Ahmed         // Multi-byte sequence for character won't fit.
724247381eSMuiez Ahmed         if (ps != nullptr)
734247381eSMuiez Ahmed           *ps = mbstate_tmp;
744247381eSMuiez Ahmed         break;
754247381eSMuiez Ahmed       } else {
764247381eSMuiez Ahmed         // The buffer was used, so we need copy the translation to dst.
774247381eSMuiez Ahmed         wmemcpy(dst, buff, result);
784247381eSMuiez Ahmed       }
794247381eSMuiez Ahmed     }
804247381eSMuiez Ahmed 
814247381eSMuiez Ahmed     // Don't do anything to change errno from here on.
824247381eSMuiez Ahmed     if (result == invalid_sequence || result == terminated_sequence || result == incomplete_sequence) {
834247381eSMuiez Ahmed       break;
844247381eSMuiez Ahmed     }
854247381eSMuiez Ahmed   }
864247381eSMuiez Ahmed 
874247381eSMuiez Ahmed   if (dst) {
884247381eSMuiez Ahmed     if (result == terminated_sequence)
89*d0438d2dSLouis Dionne       *src = nullptr;
904247381eSMuiez Ahmed     else
914247381eSMuiez Ahmed       *src += source_converted;
924247381eSMuiez Ahmed   }
934247381eSMuiez Ahmed   if (result == invalid_sequence)
944247381eSMuiez Ahmed     return invalid_sequence;
954247381eSMuiez Ahmed 
964247381eSMuiez Ahmed   return dest_converted;
974247381eSMuiez Ahmed }
98