106c3fb27SDimitry Andric // -*- C++ -*- 206c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 306c3fb27SDimitry Andric // 406c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 506c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 606c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 706c3fb27SDimitry Andric // 806c3fb27SDimitry Andric //===----------------------------------------------------------------------===// 906c3fb27SDimitry Andric 1006c3fb27SDimitry Andric #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H 1106c3fb27SDimitry Andric #define _LIBCPP___FORMAT_WRITE_ESCAPED_H 1206c3fb27SDimitry Andric 1306c3fb27SDimitry Andric #include <__algorithm/ranges_copy.h> 1406c3fb27SDimitry Andric #include <__algorithm/ranges_for_each.h> 1506c3fb27SDimitry Andric #include <__charconv/to_chars_integral.h> 1606c3fb27SDimitry Andric #include <__charconv/to_chars_result.h> 1706c3fb27SDimitry Andric #include <__chrono/statically_widen.h> 1806c3fb27SDimitry Andric #include <__format/escaped_output_table.h> 1906c3fb27SDimitry Andric #include <__format/formatter_output.h> 2006c3fb27SDimitry Andric #include <__format/parser_std_format_spec.h> 2106c3fb27SDimitry Andric #include <__format/unicode.h> 2206c3fb27SDimitry Andric #include <__iterator/back_insert_iterator.h> 2306c3fb27SDimitry Andric #include <__memory/addressof.h> 2406c3fb27SDimitry Andric #include <__system_error/errc.h> 2506c3fb27SDimitry Andric #include <__type_traits/make_unsigned.h> 2606c3fb27SDimitry Andric #include <__utility/move.h> 2706c3fb27SDimitry Andric #include <string_view> 2806c3fb27SDimitry Andric 2906c3fb27SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 3006c3fb27SDimitry Andric # pragma GCC system_header 3106c3fb27SDimitry Andric #endif 3206c3fb27SDimitry Andric 33b3edf446SDimitry Andric _LIBCPP_PUSH_MACROS 34b3edf446SDimitry Andric #include <__undef_macros> 35b3edf446SDimitry Andric 3606c3fb27SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD 3706c3fb27SDimitry Andric 3806c3fb27SDimitry Andric namespace __formatter { 3906c3fb27SDimitry Andric 4006c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 20 4106c3fb27SDimitry Andric 4206c3fb27SDimitry Andric /// Writes a string using format's width estimation algorithm. 4306c3fb27SDimitry Andric /// 4406c3fb27SDimitry Andric /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the 4506c3fb27SDimitry Andric /// input is ASCII. 4606c3fb27SDimitry Andric template <class _CharT> 47cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI auto 48cb14a3feSDimitry Andric __write_string(basic_string_view<_CharT> __str, 4906c3fb27SDimitry Andric output_iterator<const _CharT&> auto __out_it, 5006c3fb27SDimitry Andric __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 5106c3fb27SDimitry Andric if (!__specs.__has_precision()) 525f757f3fSDimitry Andric return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs); 5306c3fb27SDimitry Andric 5406c3fb27SDimitry Andric int __size = __formatter::__truncate(__str, __specs.__precision_); 5506c3fb27SDimitry Andric 565f757f3fSDimitry Andric return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size); 5706c3fb27SDimitry Andric } 5806c3fb27SDimitry Andric 5906c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 20 6006c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 23 6106c3fb27SDimitry Andric 6206c3fb27SDimitry Andric struct __nul_terminator {}; 6306c3fb27SDimitry Andric 6406c3fb27SDimitry Andric template <class _CharT> 6506c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { 6606c3fb27SDimitry Andric return *__cstr == _CharT('\0'); 6706c3fb27SDimitry Andric } 6806c3fb27SDimitry Andric 6906c3fb27SDimitry Andric template <class _CharT> 7006c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void 7106c3fb27SDimitry Andric __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { 7206c3fb27SDimitry Andric back_insert_iterator __out_it{__str}; 7306c3fb27SDimitry Andric std::ranges::copy(__prefix, __nul_terminator{}, __out_it); 7406c3fb27SDimitry Andric 7506c3fb27SDimitry Andric char __buffer[8]; 7606c3fb27SDimitry Andric to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16); 771db9f3b2SDimitry Andric _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small"); 7806c3fb27SDimitry Andric std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it); 7906c3fb27SDimitry Andric 8006c3fb27SDimitry Andric __str += _CharT('}'); 8106c3fb27SDimitry Andric } 8206c3fb27SDimitry Andric 8306c3fb27SDimitry Andric // [format.string.escaped]/2.2.1.2 8406c3fb27SDimitry Andric // ... 8506c3fb27SDimitry Andric // then the sequence \u{hex-digit-sequence} is appended to E, where 8606c3fb27SDimitry Andric // hex-digit-sequence is the shortest hexadecimal representation of C using 8706c3fb27SDimitry Andric // lower-case hexadecimal digits. 8806c3fb27SDimitry Andric template <class _CharT> 8906c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { 9006c3fb27SDimitry Andric __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{")); 9106c3fb27SDimitry Andric } 9206c3fb27SDimitry Andric 9306c3fb27SDimitry Andric // [format.string.escaped]/2.2.3 9406c3fb27SDimitry Andric // Otherwise (X is a sequence of ill-formed code units), each code unit U is 9506c3fb27SDimitry Andric // appended to E in order as the sequence \x{hex-digit-sequence}, where 9606c3fb27SDimitry Andric // hex-digit-sequence is the shortest hexadecimal representation of U using 9706c3fb27SDimitry Andric // lower-case hexadecimal digits. 9806c3fb27SDimitry Andric template <class _CharT> 9906c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { 10006c3fb27SDimitry Andric __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{")); 10106c3fb27SDimitry Andric } 10206c3fb27SDimitry Andric 10306c3fb27SDimitry Andric template <class _CharT> 104*0fca6ea1SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool 105*0fca6ea1SDimitry Andric __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) { 10606c3fb27SDimitry Andric # ifdef _LIBCPP_HAS_NO_UNICODE 10706c3fb27SDimitry Andric // For ASCII assume everything above 127 is printable. 10806c3fb27SDimitry Andric if (__value > 127) 10906c3fb27SDimitry Andric return false; 11006c3fb27SDimitry Andric # endif 11106c3fb27SDimitry Andric 112*0fca6ea1SDimitry Andric // [format.string.escaped]/2.2.1.2.1 113*0fca6ea1SDimitry Andric // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar 114*0fca6ea1SDimitry Andric // value whose Unicode property General_Category has a value in the groups 115*0fca6ea1SDimitry Andric // Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard, 11606c3fb27SDimitry Andric if (!__escaped_output_table::__needs_escape(__value)) 117*0fca6ea1SDimitry Andric // [format.string.escaped]/2.2.1.2.2 118*0fca6ea1SDimitry Andric // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar 119*0fca6ea1SDimitry Andric // value with the Unicode property Grapheme_Extend=Yes as described by UAX 120*0fca6ea1SDimitry Andric // #44 of the Unicode Standard and C is not immediately preceded in S by a 121*0fca6ea1SDimitry Andric // character P appended to E without translation to an escape sequence, 122*0fca6ea1SDimitry Andric if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) != 123*0fca6ea1SDimitry Andric __extended_grapheme_custer_property_boundary::__property::__Extend) 12406c3fb27SDimitry Andric return false; 12506c3fb27SDimitry Andric 12606c3fb27SDimitry Andric __formatter::__write_well_formed_escaped_code_unit(__str, __value); 12706c3fb27SDimitry Andric return true; 12806c3fb27SDimitry Andric } 12906c3fb27SDimitry Andric 13006c3fb27SDimitry Andric template <class _CharT> 13106c3fb27SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { 13206c3fb27SDimitry Andric return static_cast<make_unsigned_t<_CharT>>(__value); 13306c3fb27SDimitry Andric } 13406c3fb27SDimitry Andric 1355f757f3fSDimitry Andric enum class __escape_quotation_mark { __apostrophe, __double_quote }; 13606c3fb27SDimitry Andric 13706c3fb27SDimitry Andric // [format.string.escaped]/2 13806c3fb27SDimitry Andric template <class _CharT> 139*0fca6ea1SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written( 140*0fca6ea1SDimitry Andric basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) { 14106c3fb27SDimitry Andric // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] 14206c3fb27SDimitry Andric switch (__value) { 14306c3fb27SDimitry Andric case _CharT('\t'): 14406c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t"); 14506c3fb27SDimitry Andric return true; 14606c3fb27SDimitry Andric case _CharT('\n'): 14706c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n"); 14806c3fb27SDimitry Andric return true; 14906c3fb27SDimitry Andric case _CharT('\r'): 15006c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r"); 15106c3fb27SDimitry Andric return true; 15206c3fb27SDimitry Andric case _CharT('\''): 15306c3fb27SDimitry Andric if (__mark == __escape_quotation_mark::__apostrophe) 15406c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')"); 15506c3fb27SDimitry Andric else 15606c3fb27SDimitry Andric __str += __value; 15706c3fb27SDimitry Andric return true; 15806c3fb27SDimitry Andric case _CharT('"'): 15906c3fb27SDimitry Andric if (__mark == __escape_quotation_mark::__double_quote) 16006c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")"); 16106c3fb27SDimitry Andric else 16206c3fb27SDimitry Andric __str += __value; 16306c3fb27SDimitry Andric return true; 16406c3fb27SDimitry Andric case _CharT('\\'): 16506c3fb27SDimitry Andric __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)"); 16606c3fb27SDimitry Andric return true; 16706c3fb27SDimitry Andric 16806c3fb27SDimitry Andric // 2.2.1.2 - Space 16906c3fb27SDimitry Andric case _CharT(' '): 17006c3fb27SDimitry Andric __str += __value; 17106c3fb27SDimitry Andric return true; 17206c3fb27SDimitry Andric } 17306c3fb27SDimitry Andric 17406c3fb27SDimitry Andric // 2.2.2 17506c3fb27SDimitry Andric // Otherwise, if X is a shift sequence, the effect on E and further 17606c3fb27SDimitry Andric // decoding of S is unspecified. 17706c3fb27SDimitry Andric // For now shift sequences are ignored and treated as Unicode. Other parts 17806c3fb27SDimitry Andric // of the format library do the same. It's unknown how ostream treats them. 17906c3fb27SDimitry Andric // TODO FMT determine what to do with shift sequences. 18006c3fb27SDimitry Andric 18106c3fb27SDimitry Andric // 2.2.1.2.1 and 2.2.1.2.2 - Escape 182*0fca6ea1SDimitry Andric return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value)); 18306c3fb27SDimitry Andric } 18406c3fb27SDimitry Andric 18506c3fb27SDimitry Andric template <class _CharT> 18606c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void 18706c3fb27SDimitry Andric __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { 18806c3fb27SDimitry Andric __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; 18906c3fb27SDimitry Andric 190*0fca6ea1SDimitry Andric // When the first code unit has the property Grapheme_Extend=Yes it needs to 191*0fca6ea1SDimitry Andric // be escaped. This happens when the previous code unit was also escaped. 192*0fca6ea1SDimitry Andric bool __escape = true; 19306c3fb27SDimitry Andric while (!__view.__at_end()) { 19406c3fb27SDimitry Andric auto __first = __view.__position(); 19506c3fb27SDimitry Andric typename __unicode::__consume_result __result = __view.__consume(); 19606c3fb27SDimitry Andric if (__result.__status == __unicode::__consume_result::__ok) { 197*0fca6ea1SDimitry Andric __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark); 198*0fca6ea1SDimitry Andric if (!__escape) 19906c3fb27SDimitry Andric // 2.2.1.3 - Add the character 20006c3fb27SDimitry Andric ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); 20106c3fb27SDimitry Andric } else { 20206c3fb27SDimitry Andric // 2.2.3 sequence of ill-formed code units 20306c3fb27SDimitry Andric ranges::for_each(__first, __view.__position(), [&](_CharT __value) { 20406c3fb27SDimitry Andric __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); 20506c3fb27SDimitry Andric }); 20606c3fb27SDimitry Andric } 20706c3fb27SDimitry Andric } 20806c3fb27SDimitry Andric } 20906c3fb27SDimitry Andric 21006c3fb27SDimitry Andric template <class _CharT> 21106c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI auto 21206c3fb27SDimitry Andric __format_escaped_char(_CharT __value, 21306c3fb27SDimitry Andric output_iterator<const _CharT&> auto __out_it, 21406c3fb27SDimitry Andric __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 21506c3fb27SDimitry Andric basic_string<_CharT> __str; 21606c3fb27SDimitry Andric __str += _CharT('\''); 21706c3fb27SDimitry Andric __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); 21806c3fb27SDimitry Andric __str += _CharT('\''); 2195f757f3fSDimitry Andric return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size()); 22006c3fb27SDimitry Andric } 22106c3fb27SDimitry Andric 22206c3fb27SDimitry Andric template <class _CharT> 22306c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI auto 22406c3fb27SDimitry Andric __format_escaped_string(basic_string_view<_CharT> __values, 22506c3fb27SDimitry Andric output_iterator<const _CharT&> auto __out_it, 22606c3fb27SDimitry Andric __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { 22706c3fb27SDimitry Andric basic_string<_CharT> __str; 22806c3fb27SDimitry Andric __str += _CharT('"'); 22906c3fb27SDimitry Andric __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); 23006c3fb27SDimitry Andric __str += _CharT('"'); 2315f757f3fSDimitry Andric return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs); 23206c3fb27SDimitry Andric } 23306c3fb27SDimitry Andric 23406c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 23 23506c3fb27SDimitry Andric 23606c3fb27SDimitry Andric } // namespace __formatter 23706c3fb27SDimitry Andric 23806c3fb27SDimitry Andric _LIBCPP_END_NAMESPACE_STD 23906c3fb27SDimitry Andric 240b3edf446SDimitry Andric _LIBCPP_POP_MACROS 241b3edf446SDimitry Andric 24206c3fb27SDimitry Andric #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H 243