1#!perl 2# 3# This auxiliary script makes five header files 4# used for building XSUB of Unicode::Collate. 5# 6# Usage: 7# <do './mkheader'> in perl, or <perl mkheader> in command line 8# 9# Input file: 10# Collate/allkeys.txt 11# 12# Output file: 13# ucatbl.h 14# 15use 5.006; 16use strict; 17use warnings; 18use Carp; 19use File::Spec; 20 21BEGIN { 22 unless ("A" eq pack('U', 0x41)) { 23 die "Unicode::Collate cannot stringify a Unicode code point\n"; 24 } 25 unless (0x41 == unpack('U', 'A')) { 26 die "Unicode::Collate cannot get a Unicode code point\n"; 27 } 28} 29 30use constant TRUE => 1; 31use constant FALSE => ""; 32use constant VCE_TEMPLATE => 'Cn4'; 33 34sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } 35 36our $PACKAGE = 'Unicode::Collate, mkheader'; 37our $prefix = "UCA_"; 38 39our %SimpleEntries; # $codepoint => $keys 40our @Rest; 41 42{ 43 my($f, $fh); 44 foreach my $d (File::Spec->curdir()) { 45 $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); 46 last if open($fh, $f); 47 $f = undef; 48 } 49 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; 50 51 while (my $line = <$fh>) { 52 next if $line =~ /^\s*#/; 53 if ($line =~ /^\s*\@/) { 54 push @Rest, $line; 55 next; 56 } 57 58 next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element 59 60 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) 61 62 # gets element 63 my($e, $k) = split /;/, $line; 64 65 croak "Wrong Entry: <charList> must be separated by ';' ". 66 "from <collElement>" if ! $k; 67 68 my @uv = _getHexArray($e); 69 next if !@uv; 70 71 if (@uv != 1) { 72 push @Rest, $line; 73 next; 74 # Contractions of two or more characters will not be compiled. 75 } 76 77 my $is_L3_ignorable = TRUE; 78 79 my @key; 80 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed 81 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. 82 my @wt = _getHexArray($arr); 83 push @key, pack(VCE_TEMPLATE, $var, @wt); 84 $is_L3_ignorable = FALSE 85 if $wt[0] || $wt[1] || $wt[2]; 86 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable 87 # is completely ignorable. 88 # For expansion, an entry $is_L3_ignorable 89 # if and only if "all" CEs are [.0000.0000.0000]. 90 } 91 my $mapping = $is_L3_ignorable ? [] : \@key; 92 my $num = @$mapping; 93 my $str = chr($num).join('', @$mapping); 94 $SimpleEntries{$uv[0]} = stringify($str); 95 } 96} 97 98sub stringify { 99 my $str = shift; 100 return sprintf '"%s"', join '', 101 map sprintf("\\x%02x", ord $_), split //, $str; 102 103} 104 105########## writing header files ########## 106 107my $init = ''; 108{ 109 my $type = "char* const"; 110 my $head = $prefix."rest"; 111 112 $init .= "static const $type $head [] = {\n"; 113 for my $line (@Rest) { 114 $line =~ s/\s*\z//; 115 next if $line eq ''; 116 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; 117 $init .= stringify($line).",\n"; 118 } 119 $init .= "NULL\n"; # sentinel 120 $init .= "};\n\n"; 121} 122 123my @tripletable = ( 124 { 125 file => "ucatbl", 126 name => "simple", 127 type => "char* const", 128 hash => \%SimpleEntries, 129 null => "NULL", 130 init => $init, 131 }, 132); 133 134foreach my $tbl (@tripletable) { 135 my $file = "$tbl->{file}.h"; 136 my $head = "${prefix}$tbl->{name}"; 137 my $type = $tbl->{type}; 138 my $hash = $tbl->{hash}; 139 my $null = $tbl->{null}; 140 my $init = $tbl->{init}; 141 142 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; 143 binmode $fh_h; select $fh_h; 144 my %val; 145 146 print << 'EOF'; 147/* 148 * This file is auto-generated by mkheader. 149 * Any changes here will be lost! 150 */ 151EOF 152 153 print $init if defined $init; 154 155 foreach my $uv (keys %$hash) { 156 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) 157 unless $uv <= 0x10FFFF; 158 my @c = unpack 'CCCC', pack 'N', $uv; 159 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; 160 # $c[0] must be 0. 161 } 162 163 foreach my $p (sort { $a <=> $b } keys %val) { 164 next if ! $val{ $p }; 165 for (my $r = 0; $r < 256; $r++) { 166 next if ! $val{ $p }{ $r }; 167 printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r; 168 for (my $c = 0; $c < 256; $c++) { 169 print "\t", defined $val{$p}{$r}{$c} 170 ? $val{$p}{$r}{$c} 171 : $null; 172 print ',' if $c != 255; 173 print "\n" if $c % 8 == 7; 174 } 175 print "};\n\n"; 176 } 177 } 178 foreach my $p (sort { $a <=> $b } keys %val) { 179 next if ! $val{ $p }; 180 printf "static const $type* const ${head}_%02x [256] = {\n", $p; 181 for (my $r = 0; $r < 256; $r++) { 182 print $val{ $p }{ $r } 183 ? sprintf("${head}_%02x_%02x", $p, $r) 184 : "NULL"; 185 print ',' if $r != 255; 186 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; 187 } 188 print "};\n\n"; 189 } 190 print "static const $type* const * const $head [] = {\n"; 191 for (my $p = 0; $p <= 0x10; $p++) { 192 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; 193 print ',' if $p != 0x10; 194 print "\n"; 195 } 196 print "};\n\n"; 197 close $fh_h; 198} 199 2001; 201__END__ 202