xref: /openbsd-src/gnu/usr.bin/perl/lib/Unicode/UCD.pm (revision e2a1b4748ac00cfe1e64a346f850b3c670166aef)
1package Unicode::UCD;
2
3use strict;
4use warnings;
5
6our $VERSION = '0.21';
7
8use Storable qw(dclone);
9
10require Exporter;
11
12our @ISA = qw(Exporter);
13
14our @EXPORT_OK = qw(charinfo
15		    charblock charscript
16		    charblocks charscripts
17		    charinrange
18		    compexcl
19		    casefold casespec);
20
21use Carp;
22
23=head1 NAME
24
25Unicode::UCD - Unicode character database
26
27=head1 SYNOPSIS
28
29    use Unicode::UCD 'charinfo';
30    my $charinfo   = charinfo($codepoint);
31
32    use Unicode::UCD 'charblock';
33    my $charblock  = charblock($codepoint);
34
35    use Unicode::UCD 'charscript';
36    my $charscript = charscript($codepoint);
37
38    use Unicode::UCD 'charblocks';
39    my $charblocks = charblocks();
40
41    use Unicode::UCD 'charscripts';
42    my %charscripts = charscripts();
43
44    use Unicode::UCD qw(charscript charinrange);
45    my $range = charscript($script);
46    print "looks like $script\n" if charinrange($range, $codepoint);
47
48    use Unicode::UCD 'compexcl';
49    my $compexcl = compexcl($codepoint);
50
51    my $unicode_version = Unicode::UCD::UnicodeVersion();
52
53=head1 DESCRIPTION
54
55The Unicode::UCD module offers a simple interface to the Unicode
56Character Database.
57
58=cut
59
60my $UNICODEFH;
61my $BLOCKSFH;
62my $SCRIPTSFH;
63my $VERSIONFH;
64my $COMPEXCLFH;
65my $CASEFOLDFH;
66my $CASESPECFH;
67
68sub openunicode {
69    my ($rfh, @path) = @_;
70    my $f;
71    unless (defined $$rfh) {
72	for my $d (@INC) {
73	    use File::Spec;
74	    $f = File::Spec->catfile($d, "unicore", @path);
75	    last if open($$rfh, $f);
76	    undef $f;
77	}
78	croak __PACKAGE__, ": failed to find ",
79              File::Spec->catfile(@path), " in @INC"
80	    unless defined $f;
81    }
82    return $f;
83}
84
85=head2 charinfo
86
87    use Unicode::UCD 'charinfo';
88
89    my $charinfo = charinfo(0x41);
90
91charinfo() returns a reference to a hash that has the following fields
92as defined by the Unicode standard:
93
94    key
95
96    code             code point with at least four hexdigits
97    name             name of the character IN UPPER CASE
98    category         general category of the character
99    combining        classes used in the Canonical Ordering Algorithm
100    bidi             bidirectional category
101    decomposition    character decomposition mapping
102    decimal          if decimal digit this is the integer numeric value
103    digit            if digit this is the numeric value
104    numeric          if numeric is the integer or rational numeric value
105    mirrored         if mirrored in bidirectional text
106    unicode10        Unicode 1.0 name if existed and different
107    comment          ISO 10646 comment field
108    upper            uppercase equivalent mapping
109    lower            lowercase equivalent mapping
110    title            titlecase equivalent mapping
111
112    block            block the character belongs to (used in \p{In...})
113    script           script the character belongs to
114
115If no match is found, a reference to an empty hash is returned.
116
117The C<block> property is the same as returned by charinfo().  It is
118not defined in the Unicode Character Database proper (Chapter 4 of the
119Unicode 3.0 Standard, aka TUS3) but instead in an auxiliary database
120(Chapter 14 of TUS3).  Similarly for the C<script> property.
121
122Note that you cannot do (de)composition and casing based solely on the
123above C<decomposition> and C<lower>, C<upper>, C<title>, properties,
124you will need also the compexcl(), casefold(), and casespec() functions.
125
126=cut
127
128sub _getcode {
129    my $arg = shift;
130
131    if ($arg =~ /^[1-9]\d*$/) {
132	return $arg;
133    } elsif ($arg =~ /^(?:[Uu]\+|0[xX])?([[:xdigit:]]+)$/) {
134	return hex($1);
135    }
136
137    return;
138}
139
140# Lingua::KO::Hangul::Util not part of the standard distribution
141# but it will be used if available.
142
143eval { require Lingua::KO::Hangul::Util };
144my $hasHangulUtil = ! $@;
145if ($hasHangulUtil) {
146    Lingua::KO::Hangul::Util->import();
147}
148
149sub hangul_decomp { # internal: called from charinfo
150    if ($hasHangulUtil) {
151	my @tmp = decomposeHangul(shift);
152	return sprintf("%04X %04X",      @tmp) if @tmp == 2;
153	return sprintf("%04X %04X %04X", @tmp) if @tmp == 3;
154    }
155    return;
156}
157
158sub hangul_charname { # internal: called from charinfo
159    return sprintf("HANGUL SYLLABLE-%04X", shift);
160}
161
162sub han_charname { # internal: called from charinfo
163    return sprintf("CJK UNIFIED IDEOGRAPH-%04X", shift);
164}
165
166my @CharinfoRanges = (
167# block name
168# [ first, last, coderef to name, coderef to decompose ],
169# CJK Ideographs Extension A
170  [ 0x3400,   0x4DB5,   \&han_charname,   undef  ],
171# CJK Ideographs
172  [ 0x4E00,   0x9FA5,   \&han_charname,   undef  ],
173# Hangul Syllables
174  [ 0xAC00,   0xD7A3,   $hasHangulUtil ? \&getHangulName : \&hangul_charname,  \&hangul_decomp ],
175# Non-Private Use High Surrogates
176  [ 0xD800,   0xDB7F,   undef,   undef  ],
177# Private Use High Surrogates
178  [ 0xDB80,   0xDBFF,   undef,   undef  ],
179# Low Surrogates
180  [ 0xDC00,   0xDFFF,   undef,   undef  ],
181# The Private Use Area
182  [ 0xE000,   0xF8FF,   undef,   undef  ],
183# CJK Ideographs Extension B
184  [ 0x20000,  0x2A6D6,  \&han_charname,   undef  ],
185# Plane 15 Private Use Area
186  [ 0xF0000,  0xFFFFD,  undef,   undef  ],
187# Plane 16 Private Use Area
188  [ 0x100000, 0x10FFFD, undef,   undef  ],
189);
190
191sub charinfo {
192    my $arg  = shift;
193    my $code = _getcode($arg);
194    croak __PACKAGE__, "::charinfo: unknown code '$arg'"
195	unless defined $code;
196    my $hexk = sprintf("%06X", $code);
197    my($rcode,$rname,$rdec);
198    foreach my $range (@CharinfoRanges){
199      if ($range->[0] <= $code && $code <= $range->[1]) {
200        $rcode = $hexk;
201	$rcode =~ s/^0+//;
202	$rcode =  sprintf("%04X", hex($rcode));
203        $rname = $range->[2] ? $range->[2]->($code) : '';
204        $rdec  = $range->[3] ? $range->[3]->($code) : '';
205        $hexk  = sprintf("%06X", $range->[0]); # replace by the first
206        last;
207      }
208    }
209    openunicode(\$UNICODEFH, "UnicodeData.txt");
210    if (defined $UNICODEFH) {
211	use Search::Dict 1.02;
212	if (look($UNICODEFH, "$hexk;", { xfrm => sub { $_[0] =~ /^([^;]+);(.+)/; sprintf "%06X;$2", hex($1) } } ) >= 0) {
213	    my $line = <$UNICODEFH>;
214	    return unless defined $line;
215	    chomp $line;
216	    my %prop;
217	    @prop{qw(
218		     code name category
219		     combining bidi decomposition
220		     decimal digit numeric
221		     mirrored unicode10 comment
222		     upper lower title
223		    )} = split(/;/, $line, -1);
224	    $hexk =~ s/^0+//;
225	    $hexk =  sprintf("%04X", hex($hexk));
226	    if ($prop{code} eq $hexk) {
227		$prop{block}  = charblock($code);
228		$prop{script} = charscript($code);
229		if(defined $rname){
230                    $prop{code} = $rcode;
231                    $prop{name} = $rname;
232                    $prop{decomposition} = $rdec;
233                }
234		return \%prop;
235	    }
236	}
237    }
238    return;
239}
240
241sub _search { # Binary search in a [[lo,hi,prop],[...],...] table.
242    my ($table, $lo, $hi, $code) = @_;
243
244    return if $lo > $hi;
245
246    my $mid = int(($lo+$hi) / 2);
247
248    if ($table->[$mid]->[0] < $code) {
249	if ($table->[$mid]->[1] >= $code) {
250	    return $table->[$mid]->[2];
251	} else {
252	    _search($table, $mid + 1, $hi, $code);
253	}
254    } elsif ($table->[$mid]->[0] > $code) {
255	_search($table, $lo, $mid - 1, $code);
256    } else {
257	return $table->[$mid]->[2];
258    }
259}
260
261sub charinrange {
262    my ($range, $arg) = @_;
263    my $code = _getcode($arg);
264    croak __PACKAGE__, "::charinrange: unknown code '$arg'"
265	unless defined $code;
266    _search($range, 0, $#$range, $code);
267}
268
269=head2 charblock
270
271    use Unicode::UCD 'charblock';
272
273    my $charblock = charblock(0x41);
274    my $charblock = charblock(1234);
275    my $charblock = charblock("0x263a");
276    my $charblock = charblock("U+263a");
277
278    my $range     = charblock('Armenian');
279
280With a B<code point argument> charblock() returns the I<block> the character
281belongs to, e.g.  C<Basic Latin>.  Note that not all the character
282positions within all blocks are defined.
283
284See also L</Blocks versus Scripts>.
285
286If supplied with an argument that can't be a code point, charblock() tries
287to do the opposite and interpret the argument as a character block. The
288return value is a I<range>: an anonymous list of lists that contain
289I<start-of-range>, I<end-of-range> code point pairs. You can test whether a
290code point is in a range using the L</charinrange> function. If the
291argument is not a known charater block, C<undef> is returned.
292
293=cut
294
295my @BLOCKS;
296my %BLOCKS;
297
298sub _charblocks {
299    unless (@BLOCKS) {
300	if (openunicode(\$BLOCKSFH, "Blocks.txt")) {
301	    local $_;
302	    while (<$BLOCKSFH>) {
303		if (/^([0-9A-F]+)\.\.([0-9A-F]+);\s+(.+)/) {
304		    my ($lo, $hi) = (hex($1), hex($2));
305		    my $subrange = [ $lo, $hi, $3 ];
306		    push @BLOCKS, $subrange;
307		    push @{$BLOCKS{$3}}, $subrange;
308		}
309	    }
310	    close($BLOCKSFH);
311	}
312    }
313}
314
315sub charblock {
316    my $arg = shift;
317
318    _charblocks() unless @BLOCKS;
319
320    my $code = _getcode($arg);
321
322    if (defined $code) {
323	_search(\@BLOCKS, 0, $#BLOCKS, $code);
324    } else {
325	if (exists $BLOCKS{$arg}) {
326	    return dclone $BLOCKS{$arg};
327	} else {
328	    return;
329	}
330    }
331}
332
333=head2 charscript
334
335    use Unicode::UCD 'charscript';
336
337    my $charscript = charscript(0x41);
338    my $charscript = charscript(1234);
339    my $charscript = charscript("U+263a");
340
341    my $range      = charscript('Thai');
342
343With a B<code point argument> charscript() returns the I<script> the
344character belongs to, e.g.  C<Latin>, C<Greek>, C<Han>.
345
346See also L</Blocks versus Scripts>.
347
348If supplied with an argument that can't be a code point, charscript() tries
349to do the opposite and interpret the argument as a character script. The
350return value is a I<range>: an anonymous list of lists that contain
351I<start-of-range>, I<end-of-range> code point pairs. You can test whether a
352code point is in a range using the L</charinrange> function. If the
353argument is not a known charater script, C<undef> is returned.
354
355=cut
356
357my @SCRIPTS;
358my %SCRIPTS;
359
360sub _charscripts {
361    unless (@SCRIPTS) {
362	if (openunicode(\$SCRIPTSFH, "Scripts.txt")) {
363	    local $_;
364	    while (<$SCRIPTSFH>) {
365		if (/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+)/) {
366		    my ($lo, $hi) = (hex($1), $2 ? hex($2) : hex($1));
367		    my $script = lc($3);
368		    $script =~ s/\b(\w)/uc($1)/ge;
369		    my $subrange = [ $lo, $hi, $script ];
370		    push @SCRIPTS, $subrange;
371		    push @{$SCRIPTS{$script}}, $subrange;
372		}
373	    }
374	    close($SCRIPTSFH);
375	    @SCRIPTS = sort { $a->[0] <=> $b->[0] } @SCRIPTS;
376	}
377    }
378}
379
380sub charscript {
381    my $arg = shift;
382
383    _charscripts() unless @SCRIPTS;
384
385    my $code = _getcode($arg);
386
387    if (defined $code) {
388	_search(\@SCRIPTS, 0, $#SCRIPTS, $code);
389    } else {
390	if (exists $SCRIPTS{$arg}) {
391	    return dclone $SCRIPTS{$arg};
392	} else {
393	    return;
394	}
395    }
396}
397
398=head2 charblocks
399
400    use Unicode::UCD 'charblocks';
401
402    my $charblocks = charblocks();
403
404charblocks() returns a reference to a hash with the known block names
405as the keys, and the code point ranges (see L</charblock>) as the values.
406
407See also L</Blocks versus Scripts>.
408
409=cut
410
411sub charblocks {
412    _charblocks() unless %BLOCKS;
413    return dclone \%BLOCKS;
414}
415
416=head2 charscripts
417
418    use Unicode::UCD 'charscripts';
419
420    my %charscripts = charscripts();
421
422charscripts() returns a hash with the known script names as the keys,
423and the code point ranges (see L</charscript>) as the values.
424
425See also L</Blocks versus Scripts>.
426
427=cut
428
429sub charscripts {
430    _charscripts() unless %SCRIPTS;
431    return dclone \%SCRIPTS;
432}
433
434=head2 Blocks versus Scripts
435
436The difference between a block and a script is that scripts are closer
437to the linguistic notion of a set of characters required to present
438languages, while block is more of an artifact of the Unicode character
439numbering and separation into blocks of (mostly) 256 characters.
440
441For example the Latin B<script> is spread over several B<blocks>, such
442as C<Basic Latin>, C<Latin 1 Supplement>, C<Latin Extended-A>, and
443C<Latin Extended-B>.  On the other hand, the Latin script does not
444contain all the characters of the C<Basic Latin> block (also known as
445the ASCII): it includes only the letters, and not, for example, the digits
446or the punctuation.
447
448For blocks see http://www.unicode.org/Public/UNIDATA/Blocks.txt
449
450For scripts see UTR #24: http://www.unicode.org/unicode/reports/tr24/
451
452=head2 Matching Scripts and Blocks
453
454Scripts are matched with the regular-expression construct
455C<\p{...}> (e.g. C<\p{Tibetan}> matches characters of the Tibetan script),
456while C<\p{In...}> is used for blocks (e.g. C<\p{InTibetan}> matches
457any of the 256 code points in the Tibetan block).
458
459=head2 Code Point Arguments
460
461A I<code point argument> is either a decimal or a hexadecimal scalar
462designating a Unicode character, or C<U+> followed by hexadecimals
463designating a Unicode character.  In other words, if you want a code
464point to be interpreted as a hexadecimal number, you must prefix it
465with either C<0x> or C<U+>, because a string like e.g. C<123> will
466be interpreted as a decimal code point.  Also note that Unicode is
467B<not> limited to 16 bits (the number of Unicode characters is
468open-ended, in theory unlimited): you may have more than 4 hexdigits.
469
470=head2 charinrange
471
472In addition to using the C<\p{In...}> and C<\P{In...}> constructs, you
473can also test whether a code point is in the I<range> as returned by
474L</charblock> and L</charscript> or as the values of the hash returned
475by L</charblocks> and L</charscripts> by using charinrange():
476
477    use Unicode::UCD qw(charscript charinrange);
478
479    $range = charscript('Hiragana');
480    print "looks like hiragana\n" if charinrange($range, $codepoint);
481
482=cut
483
484=head2 compexcl
485
486    use Unicode::UCD 'compexcl';
487
488    my $compexcl = compexcl("09dc");
489
490The compexcl() returns the composition exclusion (that is, if the
491character should not be produced during a precomposition) of the
492character specified by a B<code point argument>.
493
494If there is a composition exclusion for the character, true is
495returned.  Otherwise, false is returned.
496
497=cut
498
499my %COMPEXCL;
500
501sub _compexcl {
502    unless (%COMPEXCL) {
503	if (openunicode(\$COMPEXCLFH, "CompositionExclusions.txt")) {
504	    local $_;
505	    while (<$COMPEXCLFH>) {
506		if (/^([0-9A-F]+)\s+\#\s+/) {
507		    my $code = hex($1);
508		    $COMPEXCL{$code} = undef;
509		}
510	    }
511	    close($COMPEXCLFH);
512	}
513    }
514}
515
516sub compexcl {
517    my $arg  = shift;
518    my $code = _getcode($arg);
519    croak __PACKAGE__, "::compexcl: unknown code '$arg'"
520	unless defined $code;
521
522    _compexcl() unless %COMPEXCL;
523
524    return exists $COMPEXCL{$code};
525}
526
527=head2 casefold
528
529    use Unicode::UCD 'casefold';
530
531    my $casefold = casefold("00DF");
532
533The casefold() returns the locale-independent case folding of the
534character specified by a B<code point argument>.
535
536If there is a case folding for that character, a reference to a hash
537with the following fields is returned:
538
539    key
540
541    code             code point with at least four hexdigits
542    status           "C", "F", "S", or "I"
543    mapping          one or more codes separated by spaces
544
545The meaning of the I<status> is as follows:
546
547   C                 common case folding, common mappings shared
548                     by both simple and full mappings
549   F                 full case folding, mappings that cause strings
550                     to grow in length. Multiple characters are separated
551                     by spaces
552   S                 simple case folding, mappings to single characters
553                     where different from F
554   I                 special case for dotted uppercase I and
555                     dotless lowercase i
556                     - If this mapping is included, the result is
557                       case-insensitive, but dotless and dotted I's
558                       are not distinguished
559                     - If this mapping is excluded, the result is not
560                       fully case-insensitive, but dotless and dotted
561                       I's are distinguished
562
563If there is no case folding for that character, C<undef> is returned.
564
565For more information about case mappings see
566http://www.unicode.org/unicode/reports/tr21/
567
568=cut
569
570my %CASEFOLD;
571
572sub _casefold {
573    unless (%CASEFOLD) {
574	if (openunicode(\$CASEFOLDFH, "CaseFolding.txt")) {
575	    local $_;
576	    while (<$CASEFOLDFH>) {
577		if (/^([0-9A-F]+); ([CFSI]); ([0-9A-F]+(?: [0-9A-F]+)*);/) {
578		    my $code = hex($1);
579		    $CASEFOLD{$code} = { code    => $1,
580					 status  => $2,
581					 mapping => $3 };
582		}
583	    }
584	    close($CASEFOLDFH);
585	}
586    }
587}
588
589sub casefold {
590    my $arg  = shift;
591    my $code = _getcode($arg);
592    croak __PACKAGE__, "::casefold: unknown code '$arg'"
593	unless defined $code;
594
595    _casefold() unless %CASEFOLD;
596
597    return $CASEFOLD{$code};
598}
599
600=head2 casespec
601
602    use Unicode::UCD 'casespec';
603
604    my $casespec = casespec("FB00");
605
606The casespec() returns the potentially locale-dependent case mapping
607of the character specified by a B<code point argument>.  The mapping
608may change the length of the string (which the basic Unicode case
609mappings as returned by charinfo() never do).
610
611If there is a case folding for that character, a reference to a hash
612with the following fields is returned:
613
614    key
615
616    code             code point with at least four hexdigits
617    lower            lowercase
618    title            titlecase
619    upper            uppercase
620    condition        condition list (may be undef)
621
622The C<condition> is optional.  Where present, it consists of one or
623more I<locales> or I<contexts>, separated by spaces (other than as
624used to separate elements, spaces are to be ignored).  A condition
625list overrides the normal behavior if all of the listed conditions are
626true.  Case distinctions in the condition list are not significant.
627Conditions preceded by "NON_" represent the negation of the condition.
628
629Note that when there are multiple case folding definitions for a
630single code point because of different locales, the value returned by
631casespec() is a hash reference which has the locales as the keys and
632hash references as described above as the values.
633
634A I<locale> is defined as a 2-letter ISO 3166 country code, possibly
635followed by a "_" and a 2-letter ISO language code (possibly followed
636by a "_" and a variant code).  You can find the lists of those codes,
637see L<Locale::Country> and L<Locale::Language>.
638
639A I<context> is one of the following choices:
640
641    FINAL            The letter is not followed by a letter of
642                     general category L (e.g. Ll, Lt, Lu, Lm, or Lo)
643    MODERN           The mapping is only used for modern text
644    AFTER_i          The last base character was "i" (U+0069)
645
646For more information about case mappings see
647http://www.unicode.org/unicode/reports/tr21/
648
649=cut
650
651my %CASESPEC;
652
653sub _casespec {
654    unless (%CASESPEC) {
655	if (openunicode(\$CASESPECFH, "SpecialCasing.txt")) {
656	    local $_;
657	    while (<$CASESPECFH>) {
658		if (/^([0-9A-F]+); ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; ([0-9A-F]+(?: [0-9A-F]+)*)?; (\w+(?: \w+)*)?/) {
659		    my ($hexcode, $lower, $title, $upper, $condition) =
660			($1, $2, $3, $4, $5);
661		    my $code = hex($hexcode);
662		    if (exists $CASESPEC{$code}) {
663			if (exists $CASESPEC{$code}->{code}) {
664			    my ($oldlower,
665				$oldtitle,
666				$oldupper,
667				$oldcondition) =
668				    @{$CASESPEC{$code}}{qw(lower
669							   title
670							   upper
671							   condition)};
672			    if (defined $oldcondition) {
673				my ($oldlocale) =
674				($oldcondition =~ /^([a-z][a-z](?:_\S+)?)/);
675				delete $CASESPEC{$code};
676				$CASESPEC{$code}->{$oldlocale} =
677				{ code      => $hexcode,
678				  lower     => $oldlower,
679				  title     => $oldtitle,
680				  upper     => $oldupper,
681				  condition => $oldcondition };
682			    }
683			}
684			my ($locale) =
685			    ($condition =~ /^([a-z][a-z](?:_\S+)?)/);
686			$CASESPEC{$code}->{$locale} =
687			{ code      => $hexcode,
688			  lower     => $lower,
689			  title     => $title,
690			  upper     => $upper,
691			  condition => $condition };
692		    } else {
693			$CASESPEC{$code} =
694			{ code      => $hexcode,
695			  lower     => $lower,
696			  title     => $title,
697			  upper     => $upper,
698			  condition => $condition };
699		    }
700		}
701	    }
702	    close($CASESPECFH);
703	}
704    }
705}
706
707sub casespec {
708    my $arg  = shift;
709    my $code = _getcode($arg);
710    croak __PACKAGE__, "::casespec: unknown code '$arg'"
711	unless defined $code;
712
713    _casespec() unless %CASESPEC;
714
715    return ref $CASESPEC{$code} ? dclone $CASESPEC{$code} : $CASESPEC{$code};
716}
717
718=head2 Unicode::UCD::UnicodeVersion
719
720Unicode::UCD::UnicodeVersion() returns the version of the Unicode
721Character Database, in other words, the version of the Unicode
722standard the database implements.  The version is a string
723of numbers delimited by dots (C<'.'>).
724
725=cut
726
727my $UNICODEVERSION;
728
729sub UnicodeVersion {
730    unless (defined $UNICODEVERSION) {
731	openunicode(\$VERSIONFH, "version");
732	chomp($UNICODEVERSION = <$VERSIONFH>);
733	close($VERSIONFH);
734	croak __PACKAGE__, "::VERSION: strange version '$UNICODEVERSION'"
735	    unless $UNICODEVERSION =~ /^\d+(?:\.\d+)+$/;
736    }
737    return $UNICODEVERSION;
738}
739
740=head2 Implementation Note
741
742The first use of charinfo() opens a read-only filehandle to the Unicode
743Character Database (the database is included in the Perl distribution).
744The filehandle is then kept open for further queries.  In other words,
745if you are wondering where one of your filehandles went, that's where.
746
747=head1 BUGS
748
749Does not yet support EBCDIC platforms.
750
751=head1 AUTHOR
752
753Jarkko Hietaniemi
754
755=cut
756
7571;
758