1#!/usr/bin/env python3 2 3# Generate Unicode case-folding table for Ada. 4 5# Copyright (C) 2022-2023 Free Software Foundation, Inc. 6 7# This file is part of GDB. 8 9# This program is free software; you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation; either version 3 of the License, or 12# (at your option) any later version. 13 14# This program is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18 19# You should have received a copy of the GNU General Public License 20# along with this program. If not, see <http://www.gnu.org/licenses/>. 21 22# This generates the ada-casefold.h header. 23# Usage: 24# python ada-unicode.py 25 26import gdbcopyright 27 28# The start of the current range of case-conversions we are 29# processing. If RANGE_START is None, then we're outside of a range. 30range_start = None 31# End of the current range. 32range_end = None 33# The delta between RANGE_START and the upper-case variant of that 34# character. 35upper_delta = None 36# The delta between RANGE_START and the lower-case variant of that 37# character. 38lower_delta = None 39 40# All the ranges found and completed so far. 41# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA). 42all_ranges = [] 43 44 45def finish_range(): 46 global range_start 47 global range_end 48 global upper_delta 49 global lower_delta 50 if range_start is not None: 51 all_ranges.append((range_start, range_end, upper_delta, lower_delta)) 52 range_start = None 53 range_end = None 54 upper_delta = None 55 lower_delta = None 56 57 58def process_codepoint(val): 59 global range_start 60 global range_end 61 global upper_delta 62 global lower_delta 63 c = chr(val) 64 low = c.lower() 65 up = c.upper() 66 # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally 67 # upper-cases to the two-character string "SS" (the capital form 68 # is a relatively recent addition -- 2017). Our simple scheme 69 # can't handle this, so we skip it. Also, because our approach 70 # just represents runs of characters with identical folding 71 # deltas, this change must terminate the current run. 72 if (c == low and c == up) or len(low) != 1 or len(up) != 1: 73 finish_range() 74 return 75 updelta = ord(up) - val 76 lowdelta = ord(low) - val 77 if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta): 78 finish_range() 79 if range_start is None: 80 range_start = val 81 upper_delta = updelta 82 lower_delta = lowdelta 83 range_end = val 84 85 86for c in range(0, 0x10FFFF): 87 process_codepoint(c) 88 89with open("ada-casefold.h", "w") as f: 90 print( 91 gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"), 92 file=f, 93 ) 94 for r in all_ranges: 95 print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f) 96