xref: /freebsd-src/contrib/llvm-project/libcxx/include/__format/write_escaped.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
106c3fb27SDimitry Andric // -*- C++ -*-
206c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
306c3fb27SDimitry Andric //
406c3fb27SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
506c3fb27SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
606c3fb27SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
706c3fb27SDimitry Andric //
806c3fb27SDimitry Andric //===----------------------------------------------------------------------===//
906c3fb27SDimitry Andric 
1006c3fb27SDimitry Andric #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H
1106c3fb27SDimitry Andric #define _LIBCPP___FORMAT_WRITE_ESCAPED_H
1206c3fb27SDimitry Andric 
1306c3fb27SDimitry Andric #include <__algorithm/ranges_copy.h>
1406c3fb27SDimitry Andric #include <__algorithm/ranges_for_each.h>
1506c3fb27SDimitry Andric #include <__charconv/to_chars_integral.h>
1606c3fb27SDimitry Andric #include <__charconv/to_chars_result.h>
1706c3fb27SDimitry Andric #include <__chrono/statically_widen.h>
1806c3fb27SDimitry Andric #include <__format/escaped_output_table.h>
1906c3fb27SDimitry Andric #include <__format/formatter_output.h>
2006c3fb27SDimitry Andric #include <__format/parser_std_format_spec.h>
2106c3fb27SDimitry Andric #include <__format/unicode.h>
2206c3fb27SDimitry Andric #include <__iterator/back_insert_iterator.h>
2306c3fb27SDimitry Andric #include <__memory/addressof.h>
2406c3fb27SDimitry Andric #include <__system_error/errc.h>
2506c3fb27SDimitry Andric #include <__type_traits/make_unsigned.h>
2606c3fb27SDimitry Andric #include <__utility/move.h>
2706c3fb27SDimitry Andric #include <string_view>
2806c3fb27SDimitry Andric 
2906c3fb27SDimitry Andric #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
3006c3fb27SDimitry Andric #  pragma GCC system_header
3106c3fb27SDimitry Andric #endif
3206c3fb27SDimitry Andric 
33b3edf446SDimitry Andric _LIBCPP_PUSH_MACROS
34b3edf446SDimitry Andric #include <__undef_macros>
35b3edf446SDimitry Andric 
3606c3fb27SDimitry Andric _LIBCPP_BEGIN_NAMESPACE_STD
3706c3fb27SDimitry Andric 
3806c3fb27SDimitry Andric namespace __formatter {
3906c3fb27SDimitry Andric 
4006c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 20
4106c3fb27SDimitry Andric 
4206c3fb27SDimitry Andric /// Writes a string using format's width estimation algorithm.
4306c3fb27SDimitry Andric ///
4406c3fb27SDimitry Andric /// \note When \c _LIBCPP_HAS_NO_UNICODE is defined the function assumes the
4506c3fb27SDimitry Andric /// input is ASCII.
4606c3fb27SDimitry Andric template <class _CharT>
47cb14a3feSDimitry Andric _LIBCPP_HIDE_FROM_ABI auto
48cb14a3feSDimitry Andric __write_string(basic_string_view<_CharT> __str,
4906c3fb27SDimitry Andric                output_iterator<const _CharT&> auto __out_it,
5006c3fb27SDimitry Andric                __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
5106c3fb27SDimitry Andric   if (!__specs.__has_precision())
525f757f3fSDimitry Andric     return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs);
5306c3fb27SDimitry Andric 
5406c3fb27SDimitry Andric   int __size = __formatter::__truncate(__str, __specs.__precision_);
5506c3fb27SDimitry Andric 
565f757f3fSDimitry Andric   return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size);
5706c3fb27SDimitry Andric }
5806c3fb27SDimitry Andric 
5906c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 20
6006c3fb27SDimitry Andric #if _LIBCPP_STD_VER >= 23
6106c3fb27SDimitry Andric 
6206c3fb27SDimitry Andric struct __nul_terminator {};
6306c3fb27SDimitry Andric 
6406c3fb27SDimitry Andric template <class _CharT>
6506c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) {
6606c3fb27SDimitry Andric   return *__cstr == _CharT('\0');
6706c3fb27SDimitry Andric }
6806c3fb27SDimitry Andric 
6906c3fb27SDimitry Andric template <class _CharT>
7006c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void
7106c3fb27SDimitry Andric __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) {
7206c3fb27SDimitry Andric   back_insert_iterator __out_it{__str};
7306c3fb27SDimitry Andric   std::ranges::copy(__prefix, __nul_terminator{}, __out_it);
7406c3fb27SDimitry Andric 
7506c3fb27SDimitry Andric   char __buffer[8];
7606c3fb27SDimitry Andric   to_chars_result __r = std::to_chars(std::begin(__buffer), std::end(__buffer), __value, 16);
771db9f3b2SDimitry Andric   _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small");
7806c3fb27SDimitry Andric   std::ranges::copy(std::begin(__buffer), __r.ptr, __out_it);
7906c3fb27SDimitry Andric 
8006c3fb27SDimitry Andric   __str += _CharT('}');
8106c3fb27SDimitry Andric }
8206c3fb27SDimitry Andric 
8306c3fb27SDimitry Andric // [format.string.escaped]/2.2.1.2
8406c3fb27SDimitry Andric // ...
8506c3fb27SDimitry Andric // then the sequence \u{hex-digit-sequence} is appended to E, where
8606c3fb27SDimitry Andric // hex-digit-sequence is the shortest hexadecimal representation of C using
8706c3fb27SDimitry Andric // lower-case hexadecimal digits.
8806c3fb27SDimitry Andric template <class _CharT>
8906c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) {
9006c3fb27SDimitry Andric   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{"));
9106c3fb27SDimitry Andric }
9206c3fb27SDimitry Andric 
9306c3fb27SDimitry Andric // [format.string.escaped]/2.2.3
9406c3fb27SDimitry Andric // Otherwise (X is a sequence of ill-formed code units), each code unit U is
9506c3fb27SDimitry Andric // appended to E in order as the sequence \x{hex-digit-sequence}, where
9606c3fb27SDimitry Andric // hex-digit-sequence is the shortest hexadecimal representation of U using
9706c3fb27SDimitry Andric // lower-case hexadecimal digits.
9806c3fb27SDimitry Andric template <class _CharT>
9906c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) {
10006c3fb27SDimitry Andric   __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{"));
10106c3fb27SDimitry Andric }
10206c3fb27SDimitry Andric 
10306c3fb27SDimitry Andric template <class _CharT>
104*0fca6ea1SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool
105*0fca6ea1SDimitry Andric __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) {
10606c3fb27SDimitry Andric #  ifdef _LIBCPP_HAS_NO_UNICODE
10706c3fb27SDimitry Andric   // For ASCII assume everything above 127 is printable.
10806c3fb27SDimitry Andric   if (__value > 127)
10906c3fb27SDimitry Andric     return false;
11006c3fb27SDimitry Andric #  endif
11106c3fb27SDimitry Andric 
112*0fca6ea1SDimitry Andric   // [format.string.escaped]/2.2.1.2.1
113*0fca6ea1SDimitry Andric   //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
114*0fca6ea1SDimitry Andric   //   value whose Unicode property General_Category has a value in the groups
115*0fca6ea1SDimitry Andric   //   Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard,
11606c3fb27SDimitry Andric   if (!__escaped_output_table::__needs_escape(__value))
117*0fca6ea1SDimitry Andric     // [format.string.escaped]/2.2.1.2.2
118*0fca6ea1SDimitry Andric     //   CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar
119*0fca6ea1SDimitry Andric     //   value with the Unicode property Grapheme_Extend=Yes as described by UAX
120*0fca6ea1SDimitry Andric     //   #44 of the Unicode Standard and C is not immediately preceded in S by a
121*0fca6ea1SDimitry Andric     //   character P appended to E without translation to an escape sequence,
122*0fca6ea1SDimitry Andric     if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(__value) !=
123*0fca6ea1SDimitry Andric                                __extended_grapheme_custer_property_boundary::__property::__Extend)
12406c3fb27SDimitry Andric       return false;
12506c3fb27SDimitry Andric 
12606c3fb27SDimitry Andric   __formatter::__write_well_formed_escaped_code_unit(__str, __value);
12706c3fb27SDimitry Andric   return true;
12806c3fb27SDimitry Andric }
12906c3fb27SDimitry Andric 
13006c3fb27SDimitry Andric template <class _CharT>
13106c3fb27SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) {
13206c3fb27SDimitry Andric   return static_cast<make_unsigned_t<_CharT>>(__value);
13306c3fb27SDimitry Andric }
13406c3fb27SDimitry Andric 
1355f757f3fSDimitry Andric enum class __escape_quotation_mark { __apostrophe, __double_quote };
13606c3fb27SDimitry Andric 
13706c3fb27SDimitry Andric // [format.string.escaped]/2
13806c3fb27SDimitry Andric template <class _CharT>
139*0fca6ea1SDimitry Andric [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written(
140*0fca6ea1SDimitry Andric     basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) {
14106c3fb27SDimitry Andric   // 2.2.1.1 - Mapped character in [tab:format.escape.sequences]
14206c3fb27SDimitry Andric   switch (__value) {
14306c3fb27SDimitry Andric   case _CharT('\t'):
14406c3fb27SDimitry Andric     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t");
14506c3fb27SDimitry Andric     return true;
14606c3fb27SDimitry Andric   case _CharT('\n'):
14706c3fb27SDimitry Andric     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n");
14806c3fb27SDimitry Andric     return true;
14906c3fb27SDimitry Andric   case _CharT('\r'):
15006c3fb27SDimitry Andric     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r");
15106c3fb27SDimitry Andric     return true;
15206c3fb27SDimitry Andric   case _CharT('\''):
15306c3fb27SDimitry Andric     if (__mark == __escape_quotation_mark::__apostrophe)
15406c3fb27SDimitry Andric       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')");
15506c3fb27SDimitry Andric     else
15606c3fb27SDimitry Andric       __str += __value;
15706c3fb27SDimitry Andric     return true;
15806c3fb27SDimitry Andric   case _CharT('"'):
15906c3fb27SDimitry Andric     if (__mark == __escape_quotation_mark::__double_quote)
16006c3fb27SDimitry Andric       __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")");
16106c3fb27SDimitry Andric     else
16206c3fb27SDimitry Andric       __str += __value;
16306c3fb27SDimitry Andric     return true;
16406c3fb27SDimitry Andric   case _CharT('\\'):
16506c3fb27SDimitry Andric     __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)");
16606c3fb27SDimitry Andric     return true;
16706c3fb27SDimitry Andric 
16806c3fb27SDimitry Andric   // 2.2.1.2 - Space
16906c3fb27SDimitry Andric   case _CharT(' '):
17006c3fb27SDimitry Andric     __str += __value;
17106c3fb27SDimitry Andric     return true;
17206c3fb27SDimitry Andric   }
17306c3fb27SDimitry Andric 
17406c3fb27SDimitry Andric   // 2.2.2
17506c3fb27SDimitry Andric   //   Otherwise, if X is a shift sequence, the effect on E and further
17606c3fb27SDimitry Andric   //   decoding of S is unspecified.
17706c3fb27SDimitry Andric   // For now shift sequences are ignored and treated as Unicode. Other parts
17806c3fb27SDimitry Andric   // of the format library do the same. It's unknown how ostream treats them.
17906c3fb27SDimitry Andric   // TODO FMT determine what to do with shift sequences.
18006c3fb27SDimitry Andric 
18106c3fb27SDimitry Andric   // 2.2.1.2.1 and 2.2.1.2.2 - Escape
182*0fca6ea1SDimitry Andric   return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value));
18306c3fb27SDimitry Andric }
18406c3fb27SDimitry Andric 
18506c3fb27SDimitry Andric template <class _CharT>
18606c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI void
18706c3fb27SDimitry Andric __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) {
18806c3fb27SDimitry Andric   __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()};
18906c3fb27SDimitry Andric 
190*0fca6ea1SDimitry Andric   // When the first code unit has the property Grapheme_Extend=Yes it needs to
191*0fca6ea1SDimitry Andric   // be escaped. This happens when the previous code unit was also escaped.
192*0fca6ea1SDimitry Andric   bool __escape = true;
19306c3fb27SDimitry Andric   while (!__view.__at_end()) {
19406c3fb27SDimitry Andric     auto __first                                  = __view.__position();
19506c3fb27SDimitry Andric     typename __unicode::__consume_result __result = __view.__consume();
19606c3fb27SDimitry Andric     if (__result.__status == __unicode::__consume_result::__ok) {
197*0fca6ea1SDimitry Andric       __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark);
198*0fca6ea1SDimitry Andric       if (!__escape)
19906c3fb27SDimitry Andric         // 2.2.1.3 - Add the character
20006c3fb27SDimitry Andric         ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str));
20106c3fb27SDimitry Andric     } else {
20206c3fb27SDimitry Andric       // 2.2.3 sequence of ill-formed code units
20306c3fb27SDimitry Andric       ranges::for_each(__first, __view.__position(), [&](_CharT __value) {
20406c3fb27SDimitry Andric         __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value));
20506c3fb27SDimitry Andric       });
20606c3fb27SDimitry Andric     }
20706c3fb27SDimitry Andric   }
20806c3fb27SDimitry Andric }
20906c3fb27SDimitry Andric 
21006c3fb27SDimitry Andric template <class _CharT>
21106c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI auto
21206c3fb27SDimitry Andric __format_escaped_char(_CharT __value,
21306c3fb27SDimitry Andric                       output_iterator<const _CharT&> auto __out_it,
21406c3fb27SDimitry Andric                       __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
21506c3fb27SDimitry Andric   basic_string<_CharT> __str;
21606c3fb27SDimitry Andric   __str += _CharT('\'');
21706c3fb27SDimitry Andric   __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe);
21806c3fb27SDimitry Andric   __str += _CharT('\'');
2195f757f3fSDimitry Andric   return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size());
22006c3fb27SDimitry Andric }
22106c3fb27SDimitry Andric 
22206c3fb27SDimitry Andric template <class _CharT>
22306c3fb27SDimitry Andric _LIBCPP_HIDE_FROM_ABI auto
22406c3fb27SDimitry Andric __format_escaped_string(basic_string_view<_CharT> __values,
22506c3fb27SDimitry Andric                         output_iterator<const _CharT&> auto __out_it,
22606c3fb27SDimitry Andric                         __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) {
22706c3fb27SDimitry Andric   basic_string<_CharT> __str;
22806c3fb27SDimitry Andric   __str += _CharT('"');
22906c3fb27SDimitry Andric   __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote);
23006c3fb27SDimitry Andric   __str += _CharT('"');
2315f757f3fSDimitry Andric   return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs);
23206c3fb27SDimitry Andric }
23306c3fb27SDimitry Andric 
23406c3fb27SDimitry Andric #endif // _LIBCPP_STD_VER >= 23
23506c3fb27SDimitry Andric 
23606c3fb27SDimitry Andric } // namespace __formatter
23706c3fb27SDimitry Andric 
23806c3fb27SDimitry Andric _LIBCPP_END_NAMESPACE_STD
23906c3fb27SDimitry Andric 
240b3edf446SDimitry Andric _LIBCPP_POP_MACROS
241b3edf446SDimitry Andric 
24206c3fb27SDimitry Andric #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H
243