xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/ada-unicode.py (revision 6881a4007f077b54e5f51159c52b9b25f57deb0d)
1*6881a400Schristos#!/usr/bin/env python3
2*6881a400Schristos
3*6881a400Schristos# Generate Unicode case-folding table for Ada.
4*6881a400Schristos
5*6881a400Schristos# Copyright (C) 2022-2023 Free Software Foundation, Inc.
6*6881a400Schristos
7*6881a400Schristos# This file is part of GDB.
8*6881a400Schristos
9*6881a400Schristos# This program is free software; you can redistribute it and/or modify
10*6881a400Schristos# it under the terms of the GNU General Public License as published by
11*6881a400Schristos# the Free Software Foundation; either version 3 of the License, or
12*6881a400Schristos# (at your option) any later version.
13*6881a400Schristos
14*6881a400Schristos# This program is distributed in the hope that it will be useful,
15*6881a400Schristos# but WITHOUT ANY WARRANTY; without even the implied warranty of
16*6881a400Schristos# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17*6881a400Schristos# GNU General Public License for more details.
18*6881a400Schristos
19*6881a400Schristos# You should have received a copy of the GNU General Public License
20*6881a400Schristos# along with this program.  If not, see <http://www.gnu.org/licenses/>.
21*6881a400Schristos
22*6881a400Schristos# This generates the ada-casefold.h header.
23*6881a400Schristos# Usage:
24*6881a400Schristos#   python ada-unicode.py
25*6881a400Schristos
26*6881a400Schristosimport gdbcopyright
27*6881a400Schristos
28*6881a400Schristos# The start of the current range of case-conversions we are
29*6881a400Schristos# processing.  If RANGE_START is None, then we're outside of a range.
30*6881a400Schristosrange_start = None
31*6881a400Schristos# End of the current range.
32*6881a400Schristosrange_end = None
33*6881a400Schristos# The delta between RANGE_START and the upper-case variant of that
34*6881a400Schristos# character.
35*6881a400Schristosupper_delta = None
36*6881a400Schristos# The delta between RANGE_START and the lower-case variant of that
37*6881a400Schristos# character.
38*6881a400Schristoslower_delta = None
39*6881a400Schristos
40*6881a400Schristos# All the ranges found and completed so far.
41*6881a400Schristos# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
42*6881a400Schristosall_ranges = []
43*6881a400Schristos
44*6881a400Schristos
45*6881a400Schristosdef finish_range():
46*6881a400Schristos    global range_start
47*6881a400Schristos    global range_end
48*6881a400Schristos    global upper_delta
49*6881a400Schristos    global lower_delta
50*6881a400Schristos    if range_start is not None:
51*6881a400Schristos        all_ranges.append((range_start, range_end, upper_delta, lower_delta))
52*6881a400Schristos        range_start = None
53*6881a400Schristos        range_end = None
54*6881a400Schristos        upper_delta = None
55*6881a400Schristos        lower_delta = None
56*6881a400Schristos
57*6881a400Schristos
58*6881a400Schristosdef process_codepoint(val):
59*6881a400Schristos    global range_start
60*6881a400Schristos    global range_end
61*6881a400Schristos    global upper_delta
62*6881a400Schristos    global lower_delta
63*6881a400Schristos    c = chr(val)
64*6881a400Schristos    low = c.lower()
65*6881a400Schristos    up = c.upper()
66*6881a400Schristos    # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
67*6881a400Schristos    # upper-cases to the two-character string "SS" (the capital form
68*6881a400Schristos    # is a relatively recent addition -- 2017).  Our simple scheme
69*6881a400Schristos    # can't handle this, so we skip it.  Also, because our approach
70*6881a400Schristos    # just represents runs of characters with identical folding
71*6881a400Schristos    # deltas, this change must terminate the current run.
72*6881a400Schristos    if (c == low and c == up) or len(low) != 1 or len(up) != 1:
73*6881a400Schristos        finish_range()
74*6881a400Schristos        return
75*6881a400Schristos    updelta = ord(up) - val
76*6881a400Schristos    lowdelta = ord(low) - val
77*6881a400Schristos    if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
78*6881a400Schristos        finish_range()
79*6881a400Schristos    if range_start is None:
80*6881a400Schristos        range_start = val
81*6881a400Schristos        upper_delta = updelta
82*6881a400Schristos        lower_delta = lowdelta
83*6881a400Schristos    range_end = val
84*6881a400Schristos
85*6881a400Schristos
86*6881a400Schristosfor c in range(0, 0x10FFFF):
87*6881a400Schristos    process_codepoint(c)
88*6881a400Schristos
89*6881a400Schristoswith open("ada-casefold.h", "w") as f:
90*6881a400Schristos    print(
91*6881a400Schristos        gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
92*6881a400Schristos        file=f,
93*6881a400Schristos    )
94*6881a400Schristos    for r in all_ranges:
95*6881a400Schristos        print(f"   {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)
96