1#!perl 2# 3# This auxiliary script makes five header files 4# used for building XSUB of Unicode::Collate. 5# 6# Usage: 7# <do 'mkheader'> in perl, or <perl mkheader> in command line 8# 9# Input file: 10# Collate/allkeys.txt 11# 12# Output file: 13# ucatbl.h 14# 15use 5.006; 16use strict; 17use warnings; 18use Carp; 19use File::Spec; 20 21BEGIN { 22 unless ("A" eq pack('U', 0x41)) { 23 die "Unicode::Collate cannot stringify a Unicode code point\n"; 24 } 25} 26 27use constant TRUE => 1; 28use constant FALSE => ""; 29use constant VCE_TEMPLATE => 'Cn4'; 30 31sub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } 32 33our $PACKAGE = 'Unicode::Collate, mkheader'; 34our $prefix = "UCA_"; 35 36our %SimpleEntries; # $codepoint => $keys 37our @Rest; 38 39{ 40 my($f, $fh); 41 foreach my $d (File::Spec->curdir()) { 42 $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); 43 last if open($fh, $f); 44 $f = undef; 45 } 46 croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; 47 48 while (my $line = <$fh>) { 49 next if $line =~ /^\s*#/; 50 if ($line =~ /^\s*\@/) { 51 push @Rest, $line; 52 next; 53 } 54 55 next if $line !~ /^\s*[0-9A-Fa-f]/; 56 57 $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) 58 59 # gets element 60 my($e, $k) = split /;/, $line; 61 62 croak "Wrong Entry: <charList> must be separated by ';' ". 63 "from <collElement>" if ! $k; 64 65 my @uv = _getHexArray($e); 66 next if !@uv; 67 68 if (@uv != 1) { 69 push @Rest, $line; 70 next; 71 } 72 73 my $is_L3_ignorable = TRUE; 74 75 my @key; 76 foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed 77 my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. 78 my @wt = _getHexArray($arr); 79 push @key, pack(VCE_TEMPLATE, $var, @wt); 80 $is_L3_ignorable = FALSE 81 if $wt[0] || $wt[1] || $wt[2]; 82 # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable 83 # is completely ignorable. 84 # For expansion, an entry $is_L3_ignorable 85 # if and only if "all" CEs are [.0000.0000.0000]. 86 } 87 my $mapping = $is_L3_ignorable ? [] : \@key; 88 my $num = @$mapping; 89 my $str = chr($num).join('', @$mapping); 90 $SimpleEntries{$uv[0]} = stringify($str); 91 } 92} 93 94sub stringify { 95 my $str = shift; 96 return sprintf '"%s"', join '', 97 map sprintf("\\x%02x", ord $_), split //, $str; 98 99} 100 101########## writing header files ########## 102 103my $init = ''; 104{ 105 my $type = "char*"; 106 my $head = $prefix."rest"; 107 108 $init .= "static $type $head [] = {\n"; 109 for my $line (@Rest) { 110 $line =~ s/\s*\z//; 111 next if $line eq ''; 112 $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; 113 $init .= "($type)".stringify($line).",\n"; 114 } 115 $init .= "NULL\n"; # sentinel 116 $init .= "};\n\n"; 117} 118 119my @tripletable = ( 120 { 121 file => "ucatbl", 122 name => "simple", 123 type => "char*", 124 hash => \%SimpleEntries, 125 null => "NULL", 126 init => $init, 127 }, 128); 129 130foreach my $tbl (@tripletable) { 131 my $file = "$tbl->{file}.h"; 132 my $head = "${prefix}$tbl->{name}"; 133 my $type = $tbl->{type}; 134 my $hash = $tbl->{hash}; 135 my $null = $tbl->{null}; 136 my $init = $tbl->{init}; 137 138 open FH, ">$file" or croak "$PACKAGE: $file can't be made"; 139 binmode FH; select FH; 140 my %val; 141 142 print FH << 'EOF'; 143/* 144 * This file is auto-generated by mkheader. 145 * Any changes here will be lost! 146 */ 147EOF 148 149 print $init if defined $init; 150 151 foreach my $uv (keys %$hash) { 152 croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) 153 unless $uv <= 0x10FFFF; 154 my @c = unpack 'CCCC', pack 'N', $uv; 155 $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; 156 } 157 158 foreach my $p (sort { $a <=> $b } keys %val) { 159 next if ! $val{ $p }; 160 for (my $r = 0; $r < 256; $r++) { 161 next if ! $val{ $p }{ $r }; 162 printf "static $type ${head}_%02x_%02x [256] = {\n", $p, $r; 163 for (my $c = 0; $c < 256; $c++) { 164 print "\t", defined $val{$p}{$r}{$c} 165 ? "($type)".$val{$p}{$r}{$c} 166 : $null; 167 print ',' if $c != 255; 168 print "\n" if $c % 8 == 7; 169 } 170 print "};\n\n"; 171 } 172 } 173 foreach my $p (sort { $a <=> $b } keys %val) { 174 next if ! $val{ $p }; 175 printf "static $type* ${head}_%02x [256] = {\n", $p; 176 for (my $r = 0; $r < 256; $r++) { 177 print $val{ $p }{ $r } 178 ? sprintf("${head}_%02x_%02x", $p, $r) 179 : "NULL"; 180 print ',' if $r != 255; 181 print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; 182 } 183 print "};\n\n"; 184 } 185 print "static $type** $head [] = {\n"; 186 for (my $p = 0; $p <= 0x10; $p++) { 187 print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; 188 print ',' if $p != 0x10; 189 print "\n"; 190 } 191 print "};\n\n"; 192 close FH; 193} 194 1951; 196__END__ 197