xref: /netbsd-src/external/gpl3/gdb.old/dist/gdb/ada-unicode.py (revision f8cf1a9151c7af1cb0bd8b09c13c66bca599c027)
1#!/usr/bin/env python3
2
3# Generate Unicode case-folding table for Ada.
4
5# Copyright (C) 2022-2023 Free Software Foundation, Inc.
6
7# This file is part of GDB.
8
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 3 of the License, or
12# (at your option) any later version.
13
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18
19# You should have received a copy of the GNU General Public License
20# along with this program.  If not, see <http://www.gnu.org/licenses/>.
21
22# This generates the ada-casefold.h header.
23# Usage:
24#   python ada-unicode.py
25
26import gdbcopyright
27
28# The start of the current range of case-conversions we are
29# processing.  If RANGE_START is None, then we're outside of a range.
30range_start = None
31# End of the current range.
32range_end = None
33# The delta between RANGE_START and the upper-case variant of that
34# character.
35upper_delta = None
36# The delta between RANGE_START and the lower-case variant of that
37# character.
38lower_delta = None
39
40# All the ranges found and completed so far.
41# Each entry is a tuple of the form (START, END, UPPER_DELTA, LOWER_DELTA).
42all_ranges = []
43
44
45def finish_range():
46    global range_start
47    global range_end
48    global upper_delta
49    global lower_delta
50    if range_start is not None:
51        all_ranges.append((range_start, range_end, upper_delta, lower_delta))
52        range_start = None
53        range_end = None
54        upper_delta = None
55        lower_delta = None
56
57
58def process_codepoint(val):
59    global range_start
60    global range_end
61    global upper_delta
62    global lower_delta
63    c = chr(val)
64    low = c.lower()
65    up = c.upper()
66    # U+00DF ("LATIN SMALL LETTER SHARP S", aka eszsett) traditionally
67    # upper-cases to the two-character string "SS" (the capital form
68    # is a relatively recent addition -- 2017).  Our simple scheme
69    # can't handle this, so we skip it.  Also, because our approach
70    # just represents runs of characters with identical folding
71    # deltas, this change must terminate the current run.
72    if (c == low and c == up) or len(low) != 1 or len(up) != 1:
73        finish_range()
74        return
75    updelta = ord(up) - val
76    lowdelta = ord(low) - val
77    if range_start is not None and (updelta != upper_delta or lowdelta != lower_delta):
78        finish_range()
79    if range_start is None:
80        range_start = val
81        upper_delta = updelta
82        lower_delta = lowdelta
83    range_end = val
84
85
86for c in range(0, 0x10FFFF):
87    process_codepoint(c)
88
89with open("ada-casefold.h", "w") as f:
90    print(
91        gdbcopyright.copyright("ada-unicode.py", "UTF-32 case-folding for GDB"),
92        file=f,
93    )
94    for r in all_ranges:
95        print(f"   {{{r[0]}, {r[1]}, {r[2]}, {r[3]}}},", file=f)
96