xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/t/rewrite.t (revision 256a93a44f36679bee503f12e49566c2183f6181)
1898184e3Ssthen
2898184e3SsthenBEGIN {
3898184e3Ssthen    if ($ENV{PERL_CORE}) {
4898184e3Ssthen	chdir('t') if -d 't';
5898184e3Ssthen	@INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
6898184e3Ssthen    }
7898184e3Ssthen}
8898184e3Ssthen
9898184e3Ssthenuse strict;
10898184e3Ssthenuse warnings;
11898184e3SsthenBEGIN { $| = 1; print "1..17\n"; }
12898184e3Ssthenmy $count = 0;
13898184e3Ssthensub ok ($;$) {
14898184e3Ssthen    my $p = my $r = shift;
15898184e3Ssthen    if (@_) {
16898184e3Ssthen	my $x = shift;
17898184e3Ssthen	$p = !defined $x ? !defined $r : !defined $r ? 0 : $r eq $x;
18898184e3Ssthen    }
19898184e3Ssthen    print $p ? "ok" : "not ok", ' ', ++$count, "\n";
20898184e3Ssthen}
21898184e3Ssthen
22898184e3Ssthenuse Unicode::Collate;
23898184e3Ssthen
24898184e3Ssthenok(1);
25898184e3Ssthen
26*256a93a4Safresh1sub _pack_U   { Unicode::Collate::pack_U(@_) }
27*256a93a4Safresh1sub _unpack_U { Unicode::Collate::unpack_U(@_) }
28*256a93a4Safresh1
29898184e3Ssthen#########################
30898184e3Ssthen
31898184e3Ssthenmy $code = sub {
32898184e3Ssthen    my $line = shift;
336fb12b70Safresh1    $line =~ s/\[\.0000\..{4}\..{4}([.\]])/[.0000.0000.0000$1/g;
34898184e3Ssthen    return $line;
35898184e3Ssthen  };
36898184e3Ssthen
37898184e3Ssthen#####
38898184e3Ssthen
39898184e3Ssthenmy $Collator = Unicode::Collate->new(
40898184e3Ssthen  table => 'keys.txt', normalization => undef, rewrite => $code,
41898184e3Ssthen);
42898184e3Ssthen
43898184e3Ssthenok($Collator->eq("camel", "came\x{300}l"));
44898184e3Ssthenok($Collator->eq("camel", "ca\x{300}me\x{301}l"));
45898184e3Ssthenok($Collator->lt("camel", "Camel"));
46898184e3Ssthen{
47898184e3Ssthen  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
48898184e3Ssthen  $Collator->gsubst($s, "camel", sub { "=$_[0]=" });
49898184e3Ssthen  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
50898184e3Ssthen}
51898184e3Ssthen
52898184e3Ssthen# 5
53898184e3Ssthen
54898184e3Ssthenmy $rewriteDUCET = Unicode::Collate->new(
55898184e3Ssthen  normalization => undef, rewrite => $code,
56898184e3Ssthen);
57898184e3Ssthen
58898184e3Ssthenok($rewriteDUCET->eq("camel", "came\x{300}l"));
59898184e3Ssthenok($rewriteDUCET->eq("camel", "ca\x{300}me\x{301}l"));
60898184e3Ssthenok($rewriteDUCET->lt("camel", "Camel"));
61898184e3Ssthen{
62898184e3Ssthen  my $s = "Camel donkey zebra came\x{301}l CAMEL horse cam\0e\0l.";
63898184e3Ssthen  $rewriteDUCET->gsubst($s, "camel", sub { "=$_[0]=" });
64898184e3Ssthen  ok($s, "Camel donkey zebra =came\x{301}l= CAMEL horse =cam\0e\0l=.");
65898184e3Ssthen}
66898184e3Ssthen
67898184e3Ssthen# 9
68898184e3Ssthen
69898184e3Ssthenmy $undef_hira = Unicode::Collate->new(
70898184e3Ssthen  table => 'keys.txt',
71898184e3Ssthen  normalization => undef,
72898184e3Ssthen  level => 1,
73898184e3Ssthen  rewrite => sub {
74898184e3Ssthen    my $line = shift;
75898184e3Ssthen    return '' if $line =~ /HIRAGANA/;
76898184e3Ssthen    return $line;
77898184e3Ssthen  },
78898184e3Ssthen);
79898184e3Ssthen
80898184e3Ssthenmy $hiragana = "\x{3042}\x{3044}";
81898184e3Ssthenmy $katakana = "\x{30A2}\x{30A4}";
82898184e3Ssthenmy $cjkkanji = "\x{4E00}";
83898184e3Ssthen
84898184e3Ssthen# HIRAGANA are undefined via rewrite
85898184e3Ssthen# So they are after CJK Unified Ideographs.
86898184e3Ssthen
87898184e3Ssthenok($undef_hira->lt("abc", "perl"));
88898184e3Ssthenok($undef_hira->lt("", "ABC"));
89898184e3Ssthenok($undef_hira->lt($katakana, $hiragana));
90898184e3Ssthenok($undef_hira->lt($katakana, $cjkkanji));
91898184e3Ssthenok($undef_hira->lt($cjkkanji, $hiragana));
92898184e3Ssthen
93898184e3Ssthen$Collator->change(level => 1);
94898184e3Ssthenok($Collator->eq($katakana, $hiragana));
95898184e3Ssthenok($Collator->lt($katakana, $cjkkanji));
96898184e3Ssthenok($Collator->gt($cjkkanji, $hiragana));
97898184e3Ssthen
98898184e3Ssthen# 17
99