xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/mkheader (revision f2da64fbbbf1b03f09f390ab01267c93dfd77c4c)
1#!perl
2#
3# This auxiliary script makes five header files
4# used for building XSUB of Unicode::Collate.
5#
6# Usage:
7#    <do 'mkheader'> in perl, or <perl mkheader> in command line
8#
9# Input file:
10#    Collate/allkeys.txt
11#
12# Output file:
13#    ucatbl.h
14#
15use 5.006;
16use strict;
17use warnings;
18use Carp;
19use File::Spec;
20
21BEGIN {
22    unless ("A" eq pack('U', 0x41)) {
23	die "Unicode::Collate cannot stringify a Unicode code point\n";
24    }
25    unless (0x41 == unpack('U', 'A')) {
26	die "Unicode::Collate cannot get a Unicode code point\n";
27    }
28}
29
30use constant TRUE  => 1;
31use constant FALSE => "";
32use constant VCE_TEMPLATE => 'Cn4';
33
34sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
35
36our $PACKAGE = 'Unicode::Collate, mkheader';
37our $prefix  = "UCA_";
38
39our %SimpleEntries;	# $codepoint => $keys
40our @Rest;
41
42{
43    my($f, $fh);
44    foreach my $d (File::Spec->curdir()) {
45	$f = File::Spec->catfile($d, "Collate", "allkeys.txt");
46	last if open($fh, $f);
47	$f = undef;
48    }
49    croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
50
51    while (my $line = <$fh>) {
52	next if $line =~ /^\s*#/;
53	if ($line =~ /^\s*\@/) {
54	    push @Rest, $line;
55	    next;
56	}
57
58	next if $line !~ /^\s*[0-9A-Fa-f]/;
59
60	$line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
61
62	# gets element
63	my($e, $k) = split /;/, $line;
64
65	croak "Wrong Entry: <charList> must be separated by ';' ".
66	      "from <collElement>" if ! $k;
67
68	my @uv = _getHexArray($e);
69	next if !@uv;
70
71	if (@uv != 1) {
72	    push @Rest, $line;
73	    next;
74	}
75
76	my $is_L3_ignorable = TRUE;
77
78	my @key;
79	foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
80	    my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
81	    my @wt = _getHexArray($arr);
82	    push @key, pack(VCE_TEMPLATE, $var, @wt);
83	    $is_L3_ignorable = FALSE
84		if $wt[0] || $wt[1] || $wt[2];
85	    # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
86	    # is completely ignorable.
87	    # For expansion, an entry $is_L3_ignorable
88	    # if and only if "all" CEs are [.0000.0000.0000].
89	}
90	my $mapping = $is_L3_ignorable ? [] : \@key;
91	my $num = @$mapping;
92	my $str = chr($num).join('', @$mapping);
93	$SimpleEntries{$uv[0]} = stringify($str);
94    }
95}
96
97sub stringify {
98    my $str = shift;
99    return sprintf '"%s"', join '',
100	   map sprintf("\\x%02x", ord $_), split //, $str;
101
102}
103
104########## writing header files ##########
105
106my $init = '';
107{
108    my $type = "char*";
109    my $head = $prefix."rest";
110
111    $init .= "static $type $head [] = {\n";
112    for my $line (@Rest) {
113	$line =~ s/\s*\z//;
114	next if $line eq '';
115	$init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
116	$init .= "($type)".stringify($line).",\n";
117    }
118    $init .= "NULL\n"; # sentinel
119    $init .= "};\n\n";
120}
121
122my @tripletable = (
123    {
124	file => "ucatbl",
125	name => "simple",
126	type => "char*",
127	hash => \%SimpleEntries,
128	null => "NULL",
129	init => $init,
130    },
131);
132
133foreach my $tbl (@tripletable) {
134    my $file = "$tbl->{file}.h";
135    my $head = "${prefix}$tbl->{name}";
136    my $type = $tbl->{type};
137    my $hash = $tbl->{hash};
138    my $null = $tbl->{null};
139    my $init = $tbl->{init};
140
141    open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
142    binmode $fh_h; select $fh_h;
143    my %val;
144
145    print << 'EOF';
146/*
147 * This file is auto-generated by mkheader.
148 * Any changes here will be lost!
149 */
150EOF
151
152    print $init if defined $init;
153
154    foreach my $uv (keys %$hash) {
155	croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
156	    unless $uv <= 0x10FFFF;
157	my @c = unpack 'CCCC', pack 'N', $uv;
158	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
159    }
160
161    foreach my $p (sort { $a <=> $b } keys %val) {
162	next if ! $val{ $p };
163	for (my $r = 0; $r < 256; $r++) {
164	    next if ! $val{ $p }{ $r };
165	    printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r;
166	    for (my $c = 0; $c < 256; $c++) {
167		print "\t", defined $val{$p}{$r}{$c}
168		    ? "($type)".$val{$p}{$r}{$c}
169		    : $null;
170		print ','  if $c != 255;
171		print "\n" if $c % 8 == 7;
172	    }
173	    print "};\n\n";
174	}
175    }
176    foreach my $p (sort { $a <=> $b } keys %val) {
177	next if ! $val{ $p };
178	printf "static $type* ${head}_%02x [256] = {\n", $p;
179	for (my $r = 0; $r < 256; $r++) {
180	    print $val{ $p }{ $r }
181		? sprintf("${head}_%02x_%02x", $p, $r)
182		: "NULL";
183	    print ','  if $r != 255;
184	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
185	}
186	print "};\n\n";
187    }
188    print "static $type** $head [] = {\n";
189    for (my $p = 0; $p <= 0x10; $p++) {
190	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
191	print ','  if $p != 0x10;
192	print "\n";
193    }
194    print "};\n\n";
195    close $fh_h;
196}
197
1981;
199__END__
200