1898184e3Ssthen#!perl 2898184e3Ssthen# 3898184e3Ssthen# This auxiliary script makes five header files 4898184e3Ssthen# used for building XSUB of Unicode::Collate. 5898184e3Ssthen# 6898184e3Ssthen# Usage: 79f11ffb7Safresh1# <do './mkheader'> in perl, or <perl mkheader> in command line 8898184e3Ssthen# 9898184e3Ssthen# Input file: 10898184e3Ssthen# Collate/allkeys.txt 11898184e3Ssthen# 12898184e3Ssthen# Output file: 13898184e3Ssthen# ucatbl.h 14898184e3Ssthen# 15898184e3Ssthenuse 5.006; 16898184e3Ssthenuse strict; 17898184e3Ssthenuse warnings; 18898184e3Ssthenuse Carp; 19898184e3Ssthenuse File::Spec; 20898184e3Ssthen 21898184e3Ssthenuse constant TRUE => 1; 22898184e3Ssthenuse constant FALSE => ""; 23898184e3Ssthenuse constant VCE_TEMPLATE => 'Cn4'; 24898184e3Ssthen 25898184e3Ssthensub _getHexArray { map hex, $_[0] =~ /([0-9a-fA-F]+)/g } 26898184e3Ssthen 27898184e3Ssthenour $PACKAGE = 'Unicode::Collate, mkheader'; 28898184e3Ssthenour $prefix = "UCA_"; 29898184e3Ssthen 30898184e3Ssthenour %SimpleEntries; # $codepoint => $keys 31898184e3Ssthenour @Rest; 32898184e3Ssthen 33898184e3Ssthen{ 34898184e3Ssthen my($f, $fh); 35898184e3Ssthen foreach my $d (File::Spec->curdir()) { 36898184e3Ssthen $f = File::Spec->catfile($d, "Collate", "allkeys.txt"); 37898184e3Ssthen last if open($fh, $f); 38898184e3Ssthen $f = undef; 39898184e3Ssthen } 40898184e3Ssthen croak "$PACKAGE: Collate/allkeys.txt is not found" if !defined $f; 41898184e3Ssthen 42898184e3Ssthen while (my $line = <$fh>) { 43898184e3Ssthen next if $line =~ /^\s*#/; 44898184e3Ssthen if ($line =~ /^\s*\@/) { 45898184e3Ssthen push @Rest, $line; 46898184e3Ssthen next; 47898184e3Ssthen } 48898184e3Ssthen 49b8851fccSafresh1 next if $line !~ /^\s*[0-9A-Fa-f]/; # lines without element 50898184e3Ssthen 51898184e3Ssthen $line =~ s/[#%]\s*(.*)//; # removing comment (not getting the name) 52898184e3Ssthen 53898184e3Ssthen # gets element 54898184e3Ssthen my($e, $k) = split /;/, $line; 55898184e3Ssthen 56898184e3Ssthen croak "Wrong Entry: <charList> must be separated by ';' ". 57898184e3Ssthen "from <collElement>" if ! $k; 58898184e3Ssthen 59898184e3Ssthen my @uv = _getHexArray($e); 60898184e3Ssthen next if !@uv; 61898184e3Ssthen 62898184e3Ssthen if (@uv != 1) { 63898184e3Ssthen push @Rest, $line; 64898184e3Ssthen next; 65b8851fccSafresh1 # Contractions of two or more characters will not be compiled. 66898184e3Ssthen } 67898184e3Ssthen 68898184e3Ssthen my $is_L3_ignorable = TRUE; 69898184e3Ssthen 70898184e3Ssthen my @key; 71898184e3Ssthen foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed 72898184e3Ssthen my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient. 73898184e3Ssthen my @wt = _getHexArray($arr); 74898184e3Ssthen push @key, pack(VCE_TEMPLATE, $var, @wt); 75898184e3Ssthen $is_L3_ignorable = FALSE 76898184e3Ssthen if $wt[0] || $wt[1] || $wt[2]; 77898184e3Ssthen # Conformance Test for 3.1.1 and 4.0.0 shows Level 3 ignorable 78898184e3Ssthen # is completely ignorable. 79898184e3Ssthen # For expansion, an entry $is_L3_ignorable 80898184e3Ssthen # if and only if "all" CEs are [.0000.0000.0000]. 81898184e3Ssthen } 82898184e3Ssthen my $mapping = $is_L3_ignorable ? [] : \@key; 83898184e3Ssthen my $num = @$mapping; 84898184e3Ssthen my $str = chr($num).join('', @$mapping); 85898184e3Ssthen $SimpleEntries{$uv[0]} = stringify($str); 86898184e3Ssthen } 87898184e3Ssthen} 88898184e3Ssthen 89898184e3Ssthensub stringify { 90898184e3Ssthen my $str = shift; 91898184e3Ssthen return sprintf '"%s"', join '', 92898184e3Ssthen map sprintf("\\x%02x", ord $_), split //, $str; 93898184e3Ssthen 94898184e3Ssthen} 95898184e3Ssthen 96898184e3Ssthen########## writing header files ########## 97898184e3Ssthen 98898184e3Ssthenmy $init = ''; 99898184e3Ssthen{ 100b8851fccSafresh1 my $type = "char* const"; 101898184e3Ssthen my $head = $prefix."rest"; 102898184e3Ssthen 103b8851fccSafresh1 $init .= "static const $type $head [] = {\n"; 104898184e3Ssthen for my $line (@Rest) { 105898184e3Ssthen $line =~ s/\s*\z//; 106898184e3Ssthen next if $line eq ''; 107898184e3Ssthen $init .= "/*$line*/\n" if $line =~ /^[A-Za-z0-9_.:;@\ \[\]]+\z/; 108b8851fccSafresh1 $init .= stringify($line).",\n"; 109898184e3Ssthen } 110898184e3Ssthen $init .= "NULL\n"; # sentinel 111898184e3Ssthen $init .= "};\n\n"; 112898184e3Ssthen} 113898184e3Ssthen 114898184e3Ssthenmy @tripletable = ( 115898184e3Ssthen { 116898184e3Ssthen file => "ucatbl", 117898184e3Ssthen name => "simple", 118b8851fccSafresh1 type => "char* const", 119898184e3Ssthen hash => \%SimpleEntries, 120898184e3Ssthen null => "NULL", 121898184e3Ssthen init => $init, 122898184e3Ssthen }, 123898184e3Ssthen); 124898184e3Ssthen 125898184e3Ssthenforeach my $tbl (@tripletable) { 126898184e3Ssthen my $file = "$tbl->{file}.h"; 127898184e3Ssthen my $head = "${prefix}$tbl->{name}"; 128898184e3Ssthen my $type = $tbl->{type}; 129898184e3Ssthen my $hash = $tbl->{hash}; 130898184e3Ssthen my $null = $tbl->{null}; 131898184e3Ssthen my $init = $tbl->{init}; 132898184e3Ssthen 1336fb12b70Safresh1 open my $fh_h, ">$file" or croak "$PACKAGE: $file can't be made"; 134*eac174f2Safresh1 binmode $fh_h; 135*eac174f2Safresh1 my $old_fh = select $fh_h; 136898184e3Ssthen my %val; 137898184e3Ssthen 1386fb12b70Safresh1 print << 'EOF'; 139898184e3Ssthen/* 140898184e3Ssthen * This file is auto-generated by mkheader. 141898184e3Ssthen * Any changes here will be lost! 142898184e3Ssthen */ 143898184e3SsthenEOF 144898184e3Ssthen 145898184e3Ssthen print $init if defined $init; 146898184e3Ssthen 147898184e3Ssthen foreach my $uv (keys %$hash) { 148898184e3Ssthen croak sprintf("a Unicode code point 0x%04X over 0x10FFFF.", $uv) 149898184e3Ssthen unless $uv <= 0x10FFFF; 150898184e3Ssthen my @c = unpack 'CCCC', pack 'N', $uv; 151898184e3Ssthen $val{ $c[1] }{ $c[2] }{ $c[3] } = $hash->{$uv}; 152b8851fccSafresh1 # $c[0] must be 0. 153898184e3Ssthen } 154898184e3Ssthen 155898184e3Ssthen foreach my $p (sort { $a <=> $b } keys %val) { 156898184e3Ssthen next if ! $val{ $p }; 157898184e3Ssthen for (my $r = 0; $r < 256; $r++) { 158898184e3Ssthen next if ! $val{ $p }{ $r }; 159b8851fccSafresh1 printf "static const $type ${head}_%02x_%02x [256] = {\n", $p, $r; 160898184e3Ssthen for (my $c = 0; $c < 256; $c++) { 161898184e3Ssthen print "\t", defined $val{$p}{$r}{$c} 162b8851fccSafresh1 ? $val{$p}{$r}{$c} 163898184e3Ssthen : $null; 164898184e3Ssthen print ',' if $c != 255; 165898184e3Ssthen print "\n" if $c % 8 == 7; 166898184e3Ssthen } 167898184e3Ssthen print "};\n\n"; 168898184e3Ssthen } 169898184e3Ssthen } 170898184e3Ssthen foreach my $p (sort { $a <=> $b } keys %val) { 171898184e3Ssthen next if ! $val{ $p }; 172b8851fccSafresh1 printf "static const $type* const ${head}_%02x [256] = {\n", $p; 173898184e3Ssthen for (my $r = 0; $r < 256; $r++) { 174898184e3Ssthen print $val{ $p }{ $r } 175898184e3Ssthen ? sprintf("${head}_%02x_%02x", $p, $r) 176898184e3Ssthen : "NULL"; 177898184e3Ssthen print ',' if $r != 255; 178898184e3Ssthen print "\n" if $val{ $p }{ $r } || ($r+1) % 8 == 0; 179898184e3Ssthen } 180898184e3Ssthen print "};\n\n"; 181898184e3Ssthen } 182b8851fccSafresh1 print "static const $type* const * const $head [] = {\n"; 183898184e3Ssthen for (my $p = 0; $p <= 0x10; $p++) { 184898184e3Ssthen print $val{ $p } ? sprintf("${head}_%02x", $p) : "NULL"; 185898184e3Ssthen print ',' if $p != 0x10; 186898184e3Ssthen print "\n"; 187898184e3Ssthen } 188898184e3Ssthen print "};\n\n"; 1896fb12b70Safresh1 close $fh_h; 190*eac174f2Safresh1 select $old_fh; 191898184e3Ssthen} 192898184e3Ssthen 193898184e3Ssthen1; 194898184e3Ssthen__END__ 195