168c3d66aSMark de Wever#!/usr/bin/env python 268c3d66aSMark de Wever# ===----------------------------------------------------------------------===## 368c3d66aSMark de Wever# 468c3d66aSMark de Wever# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 568c3d66aSMark de Wever# See https://llvm.org/LICENSE.txt for license information. 668c3d66aSMark de Wever# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 768c3d66aSMark de Wever# 868c3d66aSMark de Wever# ===----------------------------------------------------------------------===## 968c3d66aSMark de Wever 1068c3d66aSMark de Wever# The code is based on 1168c3d66aSMark de Wever# https://github.com/microsoft/STL/blob/main/tools/unicode_properties_parse/grapheme_break_property_data_gen.py 1268c3d66aSMark de Wever# 1368c3d66aSMark de Wever# Copyright (c) Microsoft Corporation. 1468c3d66aSMark de Wever# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 1568c3d66aSMark de Wever 1668c3d66aSMark de Weverfrom io import StringIO 1768c3d66aSMark de Weverfrom pathlib import Path 180d3c40b8SStephan T. Lavavejfrom dataclasses import dataclass 1968c3d66aSMark de Weverfrom typing import Optional 2068c3d66aSMark de Weverimport re 2168c3d66aSMark de Weverimport sys 2268c3d66aSMark de Wever 2368c3d66aSMark de Wever 2468c3d66aSMark de Wever@dataclass 2568c3d66aSMark de Weverclass PropertyRange: 2668c3d66aSMark de Wever lower: int = -1 2768c3d66aSMark de Wever upper: int = -1 2868c3d66aSMark de Wever prop: str = None 2968c3d66aSMark de Wever 3068c3d66aSMark de Wever 3168c3d66aSMark de Wever@dataclass 3268c3d66aSMark de Weverclass Entry: 3368c3d66aSMark de Wever lower: int = -1 3468c3d66aSMark de Wever offset: int = -1 3568c3d66aSMark de Wever 3668c3d66aSMark de Wever 3768c3d66aSMark de WeverLINE_REGEX = re.compile( 3868c3d66aSMark de Wever r"^(?P<lower>[0-9A-F]{4,6})(?:\.\.(?P<upper>[0-9A-F]{4,6}))?\s*;\s*(?P<prop>\w+)" 3968c3d66aSMark de Wever) 4068c3d66aSMark de Wever 4168c3d66aSMark de Wever 4268c3d66aSMark de Weverdef filterProperty(element: PropertyRange) -> Optional[PropertyRange]: 4368c3d66aSMark de Wever ### Matches property predicate? 4468c3d66aSMark de Wever if element.prop in ["W", "F"]: 4568c3d66aSMark de Wever return element 4668c3d66aSMark de Wever 4768c3d66aSMark de Wever ### Matches hardcode ranges predicate? 4868c3d66aSMark de Wever 4968c3d66aSMark de Wever # Yijing Hexagram Symbols 5068c3d66aSMark de Wever if element.lower >= 0x4DC0 and element.upper <= 0x4DFF: 5168c3d66aSMark de Wever return element 5268c3d66aSMark de Wever 5368c3d66aSMark de Wever # Miscellaneous Symbols and Pictographs 5468c3d66aSMark de Wever if element.lower >= 0x1F300 and element.upper <= 0x1F5FF: 5568c3d66aSMark de Wever return element 5668c3d66aSMark de Wever 5768c3d66aSMark de Wever # Supplemental Symbols and Pictographs 5868c3d66aSMark de Wever if element.lower >= 0x1F900 and element.upper <= 0x1F9FF: 5968c3d66aSMark de Wever return element 6068c3d66aSMark de Wever 6168c3d66aSMark de Wever return None 6268c3d66aSMark de Wever 6368c3d66aSMark de Wever 6468c3d66aSMark de Weverdef parsePropertyLine(inputLine: str) -> Optional[PropertyRange]: 6568c3d66aSMark de Wever result = PropertyRange() 6668c3d66aSMark de Wever if m := LINE_REGEX.match(inputLine): 6768c3d66aSMark de Wever lower_str, upper_str, result.prop = m.group("lower", "upper", "prop") 6868c3d66aSMark de Wever result.lower = int(lower_str, base=16) 6968c3d66aSMark de Wever result.upper = result.lower 7068c3d66aSMark de Wever if upper_str is not None: 7168c3d66aSMark de Wever result.upper = int(upper_str, base=16) 7268c3d66aSMark de Wever return result 7368c3d66aSMark de Wever 7468c3d66aSMark de Wever else: 7568c3d66aSMark de Wever return None 7668c3d66aSMark de Wever 7768c3d66aSMark de Wever 7868c3d66aSMark de Weverdef compactPropertyRanges(input: list[PropertyRange]) -> list[PropertyRange]: 7968c3d66aSMark de Wever """ 8068c3d66aSMark de Wever Merges overlapping and consecutive ranges to one range. 8168c3d66aSMark de Wever 8268c3d66aSMark de Wever Since the input properties are filtered the exact property isn't 8368c3d66aSMark de Wever interesting anymore. The properties in the output are merged to aid 8468c3d66aSMark de Wever debugging. 8568c3d66aSMark de Wever Merging the ranges results in fewer ranges in the output table, 8668c3d66aSMark de Wever reducing binary and improving lookup performance. 8768c3d66aSMark de Wever """ 8868c3d66aSMark de Wever result = list() 8968c3d66aSMark de Wever for x in input: 9068c3d66aSMark de Wever if ( 9168c3d66aSMark de Wever len(result) 9268c3d66aSMark de Wever and x.lower > result[-1].lower 9368c3d66aSMark de Wever and x.lower <= result[-1].upper + 1 9468c3d66aSMark de Wever ): 9568c3d66aSMark de Wever result[-1].upper = max(result[-1].upper, x.upper) 9668c3d66aSMark de Wever result[-1].prop += f" {x.prop}" 9768c3d66aSMark de Wever continue 9868c3d66aSMark de Wever result.append(x) 9968c3d66aSMark de Wever return result 10068c3d66aSMark de Wever 10168c3d66aSMark de Wever 10288184e50SEisuke KawashimaDATA_ARRAY_TEMPLATE = r""" 10368c3d66aSMark de Wever/// The entries of the characters with an estimated width of 2. 10468c3d66aSMark de Wever/// 10568c3d66aSMark de Wever/// Contains the entries for [format.string.std]/12 10668c3d66aSMark de Wever/// - Any code point with the East_Asian_Width="W" or East_Asian_Width="F" 10768c3d66aSMark de Wever/// Derived Extracted Property as described by UAX #44 10868c3d66aSMark de Wever/// - U+4DC0 - U+4DFF (Yijing Hexagram Symbols) 10968c3d66aSMark de Wever/// - U+1F300 - U+1F5FF (Miscellaneous Symbols and Pictographs) 11068c3d66aSMark de Wever/// - U+1F900 - U+1F9FF (Supplemental Symbols and Pictographs) 11168c3d66aSMark de Wever/// 11268c3d66aSMark de Wever/// The data is generated from 11368c3d66aSMark de Wever/// - https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt 11468c3d66aSMark de Wever/// - The "overrides" in [format.string.std]/12 11568c3d66aSMark de Wever/// 11668c3d66aSMark de Wever/// The format of EastAsianWidth.txt is two fields separated by a semicolon. 11768c3d66aSMark de Wever/// Field 0: Unicode code point value or range of code point values 11868c3d66aSMark de Wever/// Field 1: East_Asian_Width property, consisting of one of the following values: 11968c3d66aSMark de Wever/// "A", "F", "H", "N", "Na", "W" 12068c3d66aSMark de Wever/// - All code points, assigned or unassigned, that are not listed 12168c3d66aSMark de Wever/// explicitly are given the value "N". 12268c3d66aSMark de Wever/// - The unassigned code points in the following blocks default to "W": 12368c3d66aSMark de Wever/// CJK Unified Ideographs Extension A: U+3400..U+4DBF 12468c3d66aSMark de Wever/// CJK Unified Ideographs: U+4E00..U+9FFF 12568c3d66aSMark de Wever/// CJK Compatibility Ideographs: U+F900..U+FAFF 12668c3d66aSMark de Wever/// - All undesignated code points in Planes 2 and 3, whether inside or 12768c3d66aSMark de Wever/// outside of allocated blocks, default to "W": 12868c3d66aSMark de Wever/// Plane 2: U+20000..U+2FFFD 12968c3d66aSMark de Wever/// Plane 3: U+30000..U+3FFFD 13068c3d66aSMark de Wever/// 13168c3d66aSMark de Wever/// The table is similar to the table 13268c3d66aSMark de Wever/// __extended_grapheme_custer_property_boundary::__entries 13368c3d66aSMark de Wever/// which explains the details of these classes. The only difference is this 13468c3d66aSMark de Wever/// table lacks a property, thus having more bits available for the size. 13568c3d66aSMark de Wever/// 13668c3d66aSMark de Wever/// The maximum code point that has an estimated width of 2 is U+3FFFD. This 13768c3d66aSMark de Wever/// value can be encoded in 18 bits. Thus the upper 3 bits of the code point 13868c3d66aSMark de Wever/// are always 0. These 3 bits are used to enlarge the offset range. This 13968c3d66aSMark de Wever/// optimization reduces the table in Unicode 15 from 184 to 104 entries, 14068c3d66aSMark de Wever/// saving 320 bytes. 14168c3d66aSMark de Wever/// 14268c3d66aSMark de Wever/// The data has 2 values: 14368c3d66aSMark de Wever/// - bits [0, 13] The size of the range, allowing 16384 elements. 14468c3d66aSMark de Wever/// - bits [14, 31] The lower bound code point of the range. The upper bound of 14568c3d66aSMark de Wever/// the range is lower bound + size. 146d179176fSMark de Wever_LIBCPP_HIDE_FROM_ABI inline constexpr uint32_t __entries[{size}] = {{ 14768c3d66aSMark de Wever{entries}}}; 14868c3d66aSMark de Wever 14968c3d66aSMark de Wever/// The upper bound entry of EastAsianWidth.txt. 15068c3d66aSMark de Wever/// 15168c3d66aSMark de Wever/// Values greater than this value may have more than 18 significant bits. 15268c3d66aSMark de Wever/// They always have a width of 1. This property makes it possible to store 15368c3d66aSMark de Wever/// the table in its compact form. 15468c3d66aSMark de Weverinline constexpr uint32_t __table_upper_bound = 0x{upper_bound:08x}; 15568c3d66aSMark de Wever 15668c3d66aSMark de Wever/// Returns the estimated width of a Unicode code point. 15768c3d66aSMark de Wever/// 158ae858b51SAngryLoki/// \\pre The code point is a valid Unicode code point. 15968c3d66aSMark de Wever[[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr int __estimated_width(const char32_t __code_point) noexcept {{ 16068c3d66aSMark de Wever // Since __table_upper_bound contains the unshifted range do the 16168c3d66aSMark de Wever // comparison without shifting. 16268c3d66aSMark de Wever if (__code_point > __table_upper_bound) [[unlikely]] 16368c3d66aSMark de Wever return 1; 16468c3d66aSMark de Wever 16568c3d66aSMark de Wever // When the code-point is less than the first element in the table 16668c3d66aSMark de Wever // the lookup is quite expensive. Since quite some scripts are in 16768c3d66aSMark de Wever // that range, it makes sense to validate that first. 16868c3d66aSMark de Wever // The std_format_spec_string_unicode benchmark gives a measurable 16968c3d66aSMark de Wever // improvement. 17068c3d66aSMark de Wever if (__code_point < (__entries[0] >> 14)) 17168c3d66aSMark de Wever return 1; 17268c3d66aSMark de Wever 17368c3d66aSMark de Wever ptrdiff_t __i = std::ranges::upper_bound(__entries, (__code_point << 14) | 0x3fffu) - __entries; 17468c3d66aSMark de Wever if (__i == 0) 17568c3d66aSMark de Wever return 1; 17668c3d66aSMark de Wever 17768c3d66aSMark de Wever --__i; 17868c3d66aSMark de Wever uint32_t __upper_bound = (__entries[__i] >> 14) + (__entries[__i] & 0x3fffu); 17968c3d66aSMark de Wever return 1 + (__code_point <= __upper_bound); 18068c3d66aSMark de Wever}} 18168c3d66aSMark de Wever""" 18268c3d66aSMark de Wever 18368c3d66aSMark de WeverTABLES_HPP_TEMPLATE = """ 18468c3d66aSMark de Wever// -*- C++ -*- 18568c3d66aSMark de Wever//===----------------------------------------------------------------------===// 18668c3d66aSMark de Wever// 18768c3d66aSMark de Wever// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 18868c3d66aSMark de Wever// See https://llvm.org/LICENSE.txt for license information. 18968c3d66aSMark de Wever// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 19068c3d66aSMark de Wever// 19168c3d66aSMark de Wever//===----------------------------------------------------------------------===// 19268c3d66aSMark de Wever 19368c3d66aSMark de Wever// WARNING, this entire header is generated by 19468c3d66aSMark de Wever// utils/generate_width_estimation_table.py 19568c3d66aSMark de Wever// DO NOT MODIFY! 19668c3d66aSMark de Wever 19768c3d66aSMark de Wever// UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE 19868c3d66aSMark de Wever// 19968c3d66aSMark de Wever// See Terms of Use <https://www.unicode.org/copyright.html> 20068c3d66aSMark de Wever// for definitions of Unicode Inc.'s Data Files and Software. 20168c3d66aSMark de Wever// 20268c3d66aSMark de Wever// NOTICE TO USER: Carefully read the following legal agreement. 20368c3d66aSMark de Wever// BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S 20468c3d66aSMark de Wever// DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), 20568c3d66aSMark de Wever// YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE 20668c3d66aSMark de Wever// TERMS AND CONDITIONS OF THIS AGREEMENT. 20768c3d66aSMark de Wever// IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE 20868c3d66aSMark de Wever// THE DATA FILES OR SOFTWARE. 20968c3d66aSMark de Wever// 21068c3d66aSMark de Wever// COPYRIGHT AND PERMISSION NOTICE 21168c3d66aSMark de Wever// 21268c3d66aSMark de Wever// Copyright (c) 1991-2022 Unicode, Inc. All rights reserved. 21368c3d66aSMark de Wever// Distributed under the Terms of Use in https://www.unicode.org/copyright.html. 21468c3d66aSMark de Wever// 21568c3d66aSMark de Wever// Permission is hereby granted, free of charge, to any person obtaining 21668c3d66aSMark de Wever// a copy of the Unicode data files and any associated documentation 21768c3d66aSMark de Wever// (the "Data Files") or Unicode software and any associated documentation 21868c3d66aSMark de Wever// (the "Software") to deal in the Data Files or Software 21968c3d66aSMark de Wever// without restriction, including without limitation the rights to use, 22068c3d66aSMark de Wever// copy, modify, merge, publish, distribute, and/or sell copies of 22168c3d66aSMark de Wever// the Data Files or Software, and to permit persons to whom the Data Files 22268c3d66aSMark de Wever// or Software are furnished to do so, provided that either 22368c3d66aSMark de Wever// (a) this copyright and permission notice appear with all copies 22468c3d66aSMark de Wever// of the Data Files or Software, or 22568c3d66aSMark de Wever// (b) this copyright and permission notice appear in associated 22668c3d66aSMark de Wever// Documentation. 22768c3d66aSMark de Wever// 22868c3d66aSMark de Wever// THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF 22968c3d66aSMark de Wever// ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 23068c3d66aSMark de Wever// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 23168c3d66aSMark de Wever// NONINFRINGEMENT OF THIRD PARTY RIGHTS. 23268c3d66aSMark de Wever// IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS 23368c3d66aSMark de Wever// NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL 23468c3d66aSMark de Wever// DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 23568c3d66aSMark de Wever// DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 23668c3d66aSMark de Wever// TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 23768c3d66aSMark de Wever// PERFORMANCE OF THE DATA FILES OR SOFTWARE. 23868c3d66aSMark de Wever// 23968c3d66aSMark de Wever// Except as contained in this notice, the name of a copyright holder 24068c3d66aSMark de Wever// shall not be used in advertising or otherwise to promote the sale, 24168c3d66aSMark de Wever// use or other dealings in these Data Files or Software without prior 24268c3d66aSMark de Wever// written authorization of the copyright holder. 24368c3d66aSMark de Wever 24468c3d66aSMark de Wever#ifndef _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H 24568c3d66aSMark de Wever#define _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H 24668c3d66aSMark de Wever 24768c3d66aSMark de Wever#include <__algorithm/ranges_upper_bound.h> 24868c3d66aSMark de Wever#include <__config> 249*e99c4906SNikolas Klauser#include <__cstddef/ptrdiff_t.h> 25068c3d66aSMark de Wever#include <cstdint> 25168c3d66aSMark de Wever 25268c3d66aSMark de Wever#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) 25368c3d66aSMark de Wever# pragma GCC system_header 25468c3d66aSMark de Wever#endif 25568c3d66aSMark de Wever 25668c3d66aSMark de Wever_LIBCPP_BEGIN_NAMESPACE_STD 25768c3d66aSMark de Wever 25868c3d66aSMark de Wever#if _LIBCPP_STD_VER >= 20 25968c3d66aSMark de Wever 26068c3d66aSMark de Wevernamespace __width_estimation_table {{ 26168c3d66aSMark de Wever{content} 26268c3d66aSMark de Wever}} // namespace __width_estimation_table 26368c3d66aSMark de Wever 26468c3d66aSMark de Wever#endif // _LIBCPP_STD_VER >= 20 26568c3d66aSMark de Wever 26668c3d66aSMark de Wever_LIBCPP_END_NAMESPACE_STD 26768c3d66aSMark de Wever 26868c3d66aSMark de Wever#endif // _LIBCPP___FORMAT_WIDTH_ESTIMATION_TABLE_H""" 26968c3d66aSMark de Wever 27068c3d66aSMark de Wever 27168c3d66aSMark de Weverdef property_ranges_to_table(ranges: list[PropertyRange]) -> list[Entry]: 27268c3d66aSMark de Wever # The maximum value that can be encoded in the available bits in the 27368c3d66aSMark de Wever # __entries table. 27468c3d66aSMark de Wever upper_bound = 0x3FFFF 27568c3d66aSMark de Wever # The maximum offset in an __entries entry. Larger offsets will be 27668c3d66aSMark de Wever # splitted and stored in multiple entries. 27768c3d66aSMark de Wever chunk = 16384 27868c3d66aSMark de Wever result = list[Entry]() 27968c3d66aSMark de Wever high = -1 28068c3d66aSMark de Wever for range in sorted(ranges, key=lambda x: x.lower): 28168c3d66aSMark de Wever # Validate overlapping ranges 28268c3d66aSMark de Wever assert range.lower > high 28368c3d66aSMark de Wever high = range.upper 28468c3d66aSMark de Wever assert high <= upper_bound 28568c3d66aSMark de Wever 28668c3d66aSMark de Wever while True: 28768c3d66aSMark de Wever e = Entry(range.lower, range.upper - range.lower) 28868c3d66aSMark de Wever if e.offset < chunk: 28968c3d66aSMark de Wever result.append(e) 29068c3d66aSMark de Wever break 29168c3d66aSMark de Wever e.offset = chunk - 1 29268c3d66aSMark de Wever result.append(e) 29368c3d66aSMark de Wever range.lower += chunk 29468c3d66aSMark de Wever return result 29568c3d66aSMark de Wever 29668c3d66aSMark de Wever 29768c3d66aSMark de Wevercpp_entrytemplate = " 0x{:08x} /* {:08x} - {:08x} [{:>5}] */" 29868c3d66aSMark de Wever 29968c3d66aSMark de Wever 30068c3d66aSMark de Weverdef generate_cpp_data(ranges: list[PropertyRange], upper_bound: int) -> str: 30168c3d66aSMark de Wever result = StringIO() 30268c3d66aSMark de Wever table = property_ranges_to_table(ranges) 30368c3d66aSMark de Wever result.write( 30468c3d66aSMark de Wever DATA_ARRAY_TEMPLATE.format( 30568c3d66aSMark de Wever size=len(table), 30668c3d66aSMark de Wever entries=", //\n".join( 30768c3d66aSMark de Wever [ 30868c3d66aSMark de Wever cpp_entrytemplate.format( 30968c3d66aSMark de Wever x.lower << 14 | x.offset, 31068c3d66aSMark de Wever x.lower, 31168c3d66aSMark de Wever x.lower + x.offset, 31268c3d66aSMark de Wever x.offset + 1, 31368c3d66aSMark de Wever ) 31468c3d66aSMark de Wever for x in table 31568c3d66aSMark de Wever ] 31668c3d66aSMark de Wever ), 31768c3d66aSMark de Wever upper_bound=upper_bound, 31868c3d66aSMark de Wever ) 31968c3d66aSMark de Wever ) 32068c3d66aSMark de Wever 32168c3d66aSMark de Wever return result.getvalue() 32268c3d66aSMark de Wever 32368c3d66aSMark de Wever 32468c3d66aSMark de Weverdef generate_data_tables() -> str: 32568c3d66aSMark de Wever """ 32668c3d66aSMark de Wever Generate Unicode data for [format.string.std]/12 32768c3d66aSMark de Wever """ 32868c3d66aSMark de Wever east_asian_width_path = ( 3297bfaa0f0STobias Hieta Path(__file__).absolute().parent / "data" / "unicode" / "EastAsianWidth.txt" 33068c3d66aSMark de Wever ) 33168c3d66aSMark de Wever 33268c3d66aSMark de Wever properties = list() 33368c3d66aSMark de Wever with east_asian_width_path.open(encoding="utf-8") as f: 33468c3d66aSMark de Wever properties.extend( 33568c3d66aSMark de Wever list( 33668c3d66aSMark de Wever filter( 33768c3d66aSMark de Wever filterProperty, 33868c3d66aSMark de Wever [x for line in f if (x := parsePropertyLine(line))], 33968c3d66aSMark de Wever ) 34068c3d66aSMark de Wever ) 34168c3d66aSMark de Wever ) 34268c3d66aSMark de Wever # The range U+4DC0 - U+4DFF is neutral and should not be in the table 34368c3d66aSMark de Wever # The range U+1F300 - U+1F5FF is partly in the range, for example 34468c3d66aSMark de Wever # 1F300..1F320;W # So [33] CYCLONE..SHOOTING STAR 34568c3d66aSMark de Wever # 1F321..1F32C;N # So [12] THERMOMETER..WIND BLOWING FACE 34668c3d66aSMark de Wever # 1F32D..1F335;W # So [9] HOT DOG..CACTUS 34768c3d66aSMark de Wever # The first and last ranges are present, but the second isn't 34868c3d66aSMark de Wever 34968c3d66aSMark de Wever # Validate the hardcode ranges are present 35068c3d66aSMark de Wever 35168c3d66aSMark de Wever # Yijing Hexagram Symbols 35268c3d66aSMark de Wever for i in range(0x4DC0, 0x4DFF + 1): 35368c3d66aSMark de Wever assert [x for x in properties if i >= x.lower and i <= x.upper] 35468c3d66aSMark de Wever 35568c3d66aSMark de Wever # Miscellaneous Symbols and Pictographs 35668c3d66aSMark de Wever for i in range(0x1F300, 0x1F5FF + 1): 35768c3d66aSMark de Wever assert [x for x in properties if i >= x.lower and i <= x.upper] 35868c3d66aSMark de Wever 35968c3d66aSMark de Wever # Miscellaneous Symbols and Pictographs 36068c3d66aSMark de Wever for i in range(0x1F900, 0x1F9FF + 1): 36168c3d66aSMark de Wever assert [x for x in properties if i >= x.lower and i <= x.upper] 36268c3d66aSMark de Wever 36368c3d66aSMark de Wever data = compactPropertyRanges(sorted(properties, key=lambda x: x.lower)) 36468c3d66aSMark de Wever 36568c3d66aSMark de Wever return "\n".join([generate_cpp_data(data, data[-1].upper)]) 36668c3d66aSMark de Wever 36768c3d66aSMark de Wever 36868c3d66aSMark de Weverif __name__ == "__main__": 36968c3d66aSMark de Wever if len(sys.argv) == 2: 37068c3d66aSMark de Wever sys.stdout = open(sys.argv[1], "w") 37168c3d66aSMark de Wever print(TABLES_HPP_TEMPLATE.lstrip().format(content=generate_data_tables())) 372