1*6881a400Schristos#!/usr/bin/env python3 2*6881a400Schristos 3*6881a400Schristos# Generate Unicode case-folding table for Ada. 4*6881a400Schristos 5*6881a400Schristos# Copyright (C) 2022-2023 Free Software Foundation, Inc. 6*6881a400Schristos 7*6881a400Schristos# This file is part of GDB. 8*6881a400Schristos 9*6881a400Schristos# This program is free software; you can redistribute it and/or modify 10*6881a400Schristos# it under the terms of the GNU General Public License as published by 11*6881a400Schristos# the Free Software Foundation; either version 3 of the License, or 12*6881a400Schristos# (at your option) any later version. 13*6881a400Schristos 14*6881a400Schristos# This program is distributed in the hope that it will be useful, 15*6881a400Schristos# but WITHOUT ANY WARRANTY; without even the implied warranty of 16*6881a400Schristos# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17*6881a400Schristos# GNU General Public License for more details. 18*6881a400Schristos 19*6881a400Schristos# You should have received a copy of the GNU General Public License 20*6881a400Schristos# along with this program. If not, see <http://www.gnu.org/licenses/>. 21*6881a400Schristos 22*6881a400Schristos# This generates the ada-casefold.h header. 23*6881a400Schristos# Usage: 24*6881a400Schristos# python ada-unicode.py 25*6881a400Schristos 26*6881a400Schristosimport gdbcopyright 27*6881a400Schristos 28*6881a400Schristos# The start of the current range of case-conversions we are 29*6881a400Schristos# processing. If RANGE_START is None, then we're outside of a range. 30*6881a400Schristosrange_start = None 31*6881a400Schristos# End of the current range. 32*6881a400Schristosrange_end = None 33*6881a400Schristos# The delta between RANGE_START and the upper-case variant of that 34*6881a400Schristos# character. 35*6881a400Schristosupper_delta = None 36*6881a400Schristos# The delta between RANGE_START and the lower-case variant of that 37*6881a400Schristos# character. 38*6881a400Schristoslower_delta = None 39*6881a400Schristos 40*6881a400Schristos# All the ranges found and completed so far. 41*6881a400Schristos# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA). 42*6881a400Schristosall_ranges = [] 43*6881a400Schristos 44*6881a400Schristos 45*6881a400Schristosdef finish_range(): 46*6881a400Schristos global range_start 47*6881a400Schristos global range_end 48*6881a400Schristos global upper_delta 49*6881a400Schristos global lower_delta 50*6881a400Schristos if range_start is not None: 51*6881a400Schristos all_ranges.append((range_start, range_end, upper_delta, lower_delta)) 52*6881a400Schristos range_start = None 53*6881a400Schristos range_end = None 54*6881a400Schristos upper_delta = None 55*6881a400Schristos lower_delta = None 56*6881a400Schristos 57*6881a400Schristos 58*6881a400Schristosdef process_codepoint(val): 59*6881a400Schristos global range_start 60*6881a400Schristos global range_end 61*6881a400Schristos global upper_delta 62*6881a400Schristos global lower_delta 63*6881a400Schristos c = chr(val) 64*6881a400Schristos low = c.lower() 65*6881a400Schristos up = c.upper() 66*6881a400Schristos # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally 67*6881a400Schristos # upper-cases to the two-character string "SS" (the capital form 68*6881a400Schristos # is a relatively recent addition -- 2017). Our simple scheme 69*6881a400Schristos # can't handle this, so we skip it. Also, because our approach 70*6881a400Schristos # just represents runs of characters with identical folding 71*6881a400Schristos # deltas, this change must terminate the current run. 72*6881a400Schristos if (c == low and c == up) or len(low) != 1 or len(up) != 1: 73*6881a400Schristos finish_range() 74*6881a400Schristos return 75*6881a400Schristos updelta = ord(up) - val 76*6881a400Schristos lowdelta = ord(low) - val 77*6881a400Schristos if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta): 78*6881a400Schristos finish_range() 79*6881a400Schristos if range_start is None: 80*6881a400Schristos range_start = val 81*6881a400Schristos upper_delta = updelta 82*6881a400Schristos lower_delta = lowdelta 83*6881a400Schristos range_end = val 84*6881a400Schristos 85*6881a400Schristos 86*6881a400Schristosfor c in range(0, 0x10FFFF): 87*6881a400Schristos process_codepoint(c) 88*6881a400Schristos 89*6881a400Schristoswith open("ada-casefold.h", "w") as f: 90*6881a400Schristos print( 91*6881a400Schristos gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"), 92*6881a400Schristos file=f, 93*6881a400Schristos ) 94*6881a400Schristos for r in all_ranges: 95*6881a400Schristos print(f" {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f) 96