xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/mkheader (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1898184e3Ssthen#!perl
2898184e3Ssthen#
3898184e3Ssthen# This auxiliary script makes five header files
4898184e3Ssthen# used for building XSUB of Unicode::Collate.
5898184e3Ssthen#
6898184e3Ssthen# Usage:
79f11ffb7Safresh1#    <do './mkheader'> in perl, or <perl mkheader> in command line
8898184e3Ssthen#
9898184e3Ssthen# Input file:
10898184e3Ssthen#    Collate/allkeys.txt
11898184e3Ssthen#
12898184e3Ssthen# Output file:
13898184e3Ssthen#    ucatbl.h
14898184e3Ssthen#
15898184e3Ssthenuse 5.006;
16898184e3Ssthenuse strict;
17898184e3Ssthenuse warnings;
18898184e3Ssthenuse Carp;
19898184e3Ssthenuse File::Spec;
20898184e3Ssthen
21898184e3Ssthenuse constant TRUE  => 1;
22898184e3Ssthenuse constant FALSE => "";
23898184e3Ssthenuse constant VCE_TEMPLATE => 'Cn4';
24898184e3Ssthen
25898184e3Ssthensub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g }
26898184e3Ssthen
27898184e3Ssthenour $PACKAGE = 'Unicode::Collate, mkheader';
28898184e3Ssthenour $prefix  = "UCA_";
29898184e3Ssthen
30898184e3Ssthenour %SimpleEntries;	# $codepoint => $keys
31898184e3Ssthenour @Rest;
32898184e3Ssthen
33898184e3Ssthen{
34898184e3Ssthen    my($f, $fh);
35898184e3Ssthen    foreach my $d (File::Spec->curdir()) {
36898184e3Ssthen	$f = File::Spec->catfile($d, "Collate", "allkeys.txt");
37898184e3Ssthen	last if open($fh, $f);
38898184e3Ssthen	$f = undef;
39898184e3Ssthen    }
40898184e3Ssthen    croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f;
41898184e3Ssthen
42898184e3Ssthen    while (my $line = <$fh>) {
43898184e3Ssthen	next if $line =~ /^\s*#/;
44898184e3Ssthen	if ($line =~ /^\s*\@/) {
45898184e3Ssthen	    push @Rest, $line;
46898184e3Ssthen	    next;
47898184e3Ssthen	}
48898184e3Ssthen
49b8851fccSafresh1	next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element
50898184e3Ssthen
51898184e3Ssthen	$line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name)
52898184e3Ssthen
53898184e3Ssthen	# gets element
54898184e3Ssthen	my($e, $k) = split /;/, $line;
55898184e3Ssthen
56898184e3Ssthen	croak "Wrong Entry: <charList> must be separated by ';' ".
57898184e3Ssthen	      "from <collElement>" if ! $k;
58898184e3Ssthen
59898184e3Ssthen	my @uv = _getHexArray($e);
60898184e3Ssthen	next if !@uv;
61898184e3Ssthen
62898184e3Ssthen	if (@uv != 1) {
63898184e3Ssthen	    push @Rest, $line;
64898184e3Ssthen	    next;
65b8851fccSafresh1	    # Contractions of two or more characters will not be compiled.
66898184e3Ssthen	}
67898184e3Ssthen
68898184e3Ssthen	my $is_L3_ignorable = TRUE;
69898184e3Ssthen
70898184e3Ssthen	my @key;
71898184e3Ssthen	foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
72898184e3Ssthen	    my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
73898184e3Ssthen	    my @wt = _getHexArray($arr);
74898184e3Ssthen	    push @key, pack(VCE_TEMPLATE, $var, @wt);
75898184e3Ssthen	    $is_L3_ignorable = FALSE
76898184e3Ssthen		if $wt[0] || $wt[1] || $wt[2];
77898184e3Ssthen	    # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable
78898184e3Ssthen	    # is completely ignorable.
79898184e3Ssthen	    # For expansion, an entry $is_L3_ignorable
80898184e3Ssthen	    # if and only if "all" CEs are [.0000.0000.0000].
81898184e3Ssthen	}
82898184e3Ssthen	my $mapping = $is_L3_ignorable ? [] : \@key;
83898184e3Ssthen	my $num = @$mapping;
84898184e3Ssthen	my $str = chr($num).join('', @$mapping);
85898184e3Ssthen	$SimpleEntries{$uv[0]} = stringify($str);
86898184e3Ssthen    }
87898184e3Ssthen}
88898184e3Ssthen
89898184e3Ssthensub stringify {
90898184e3Ssthen    my $str = shift;
91898184e3Ssthen    return sprintf '"%s"', join '',
92898184e3Ssthen	   map sprintf("\\x%02x", ord $_), split //, $str;
93898184e3Ssthen
94898184e3Ssthen}
95898184e3Ssthen
96898184e3Ssthen########## writing header files ##########
97898184e3Ssthen
98898184e3Ssthenmy $init = '';
99898184e3Ssthen{
100b8851fccSafresh1    my $type = "char* const";
101898184e3Ssthen    my $head = $prefix."rest";
102898184e3Ssthen
103b8851fccSafresh1    $init .= "static const $type $head [] = {\n";
104898184e3Ssthen    for my $line (@Rest) {
105898184e3Ssthen	$line =~ s/\s*\z//;
106898184e3Ssthen	next if $line eq '';
107898184e3Ssthen	$init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/;
108b8851fccSafresh1	$init .= stringify($line).",\n";
109898184e3Ssthen    }
110898184e3Ssthen    $init .= "NULL\n"; # sentinel
111898184e3Ssthen    $init .= "};\n\n";
112898184e3Ssthen}
113898184e3Ssthen
114898184e3Ssthenmy @tripletable = (
115898184e3Ssthen    {
116898184e3Ssthen	file => "ucatbl",
117898184e3Ssthen	name => "simple",
118b8851fccSafresh1	type => "char* const",
119898184e3Ssthen	hash => \%SimpleEntries,
120898184e3Ssthen	null => "NULL",
121898184e3Ssthen	init => $init,
122898184e3Ssthen    },
123898184e3Ssthen);
124898184e3Ssthen
125898184e3Ssthenforeach my $tbl (@tripletable) {
126898184e3Ssthen    my $file = "$tbl->{file}.h";
127898184e3Ssthen    my $head = "${prefix}$tbl->{name}";
128898184e3Ssthen    my $type = $tbl->{type};
129898184e3Ssthen    my $hash = $tbl->{hash};
130898184e3Ssthen    my $null = $tbl->{null};
131898184e3Ssthen    my $init = $tbl->{init};
132898184e3Ssthen
1336fb12b70Safresh1    open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made";
134*eac174f2Safresh1    binmode $fh_h;
135*eac174f2Safresh1    my $old_fh = select $fh_h;
136898184e3Ssthen    my %val;
137898184e3Ssthen
1386fb12b70Safresh1    print << 'EOF';
139898184e3Ssthen/*
140898184e3Ssthen * This file is auto-generated by mkheader.
141898184e3Ssthen * Any changes here will be lost!
142898184e3Ssthen */
143898184e3SsthenEOF
144898184e3Ssthen
145898184e3Ssthen    print $init if defined $init;
146898184e3Ssthen
147898184e3Ssthen    foreach my $uv (keys %$hash) {
148898184e3Ssthen	croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv)
149898184e3Ssthen	    unless $uv <= 0x10FFFF;
150898184e3Ssthen	my @c = unpack 'CCCC', pack 'N', $uv;
151898184e3Ssthen	$val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv};
152b8851fccSafresh1	# $c[0] must be 0.
153898184e3Ssthen    }
154898184e3Ssthen
155898184e3Ssthen    foreach my $p (sort { $a <=> $b } keys %val) {
156898184e3Ssthen	next if ! $val{ $p };
157898184e3Ssthen	for (my $r = 0; $r < 256; $r++) {
158898184e3Ssthen	    next if ! $val{ $p }{ $r };
159b8851fccSafresh1	    printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r;
160898184e3Ssthen	    for (my $c = 0; $c < 256; $c++) {
161898184e3Ssthen		print "\t", defined $val{$p}{$r}{$c}
162b8851fccSafresh1		    ? $val{$p}{$r}{$c}
163898184e3Ssthen		    : $null;
164898184e3Ssthen		print ','  if $c != 255;
165898184e3Ssthen		print "\n" if $c % 8 == 7;
166898184e3Ssthen	    }
167898184e3Ssthen	    print "};\n\n";
168898184e3Ssthen	}
169898184e3Ssthen    }
170898184e3Ssthen    foreach my $p (sort { $a <=> $b } keys %val) {
171898184e3Ssthen	next if ! $val{ $p };
172b8851fccSafresh1	printf "static const $type* const ${head}_%02x [256] = {\n", $p;
173898184e3Ssthen	for (my $r = 0; $r < 256; $r++) {
174898184e3Ssthen	    print $val{ $p }{ $r }
175898184e3Ssthen		? sprintf("${head}_%02x_%02x", $p, $r)
176898184e3Ssthen		: "NULL";
177898184e3Ssthen	    print ','  if $r != 255;
178898184e3Ssthen	    print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0;
179898184e3Ssthen	}
180898184e3Ssthen	print "};\n\n";
181898184e3Ssthen    }
182b8851fccSafresh1    print "static const $type* const * const $head [] = {\n";
183898184e3Ssthen    for (my $p = 0; $p <= 0x10; $p++) {
184898184e3Ssthen	print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL";
185898184e3Ssthen	print ','  if $p != 0x10;
186898184e3Ssthen	print "\n";
187898184e3Ssthen    }
188898184e3Ssthen    print "};\n\n";
1896fb12b70Safresh1    close $fh_h;
190*eac174f2Safresh1    select $old_fh;
191898184e3Ssthen}
192898184e3Ssthen
193898184e3Ssthen1;
194898184e3Ssthen__END__
195