xref: /openbsd-src/gnu/usr.bin/perl/cpan/Unicode-Collate/Collate/Locale.pm (revision eac174f2741a08d8deb8aae59a7f778ef9b5d770)
1898184e3Ssthenpackage Unicode::Collate::Locale;
2898184e3Ssthen
3898184e3Ssthenuse strict;
4b8851fccSafresh1use warnings;
5898184e3Ssthenuse Carp;
6898184e3Ssthenuse base qw(Unicode::Collate);
7898184e3Ssthen
8*eac174f2Safresh1our $VERSION = '1.31';
9898184e3Ssthen
10898184e3Ssthenmy $PL_EXT  = '.pl';
11898184e3Ssthen
12898184e3Ssthenmy %LocaleFile = map { ($_, $_) } qw(
13b46d8ef2Safresh1   af ar as az be bn ca cs cu cy da dsb ee eo es et fa fi fil fo gu
149f11ffb7Safresh1   ha haw he hi hr hu hy ig is ja kk kl kn ko kok lkt ln lt lv
159f11ffb7Safresh1   mk ml mr mt nb nn nso om or pa pl ro sa se si sk sl sq sr sv
169f11ffb7Safresh1   ta te th tn to tr uk ur vi vo wae wo yo zh
17898184e3Ssthen);
18898184e3Ssthen   $LocaleFile{'default'} = '';
19898184e3Ssthen# aliases
20898184e3Ssthen   $LocaleFile{'bs'}      = 'hr';
2191f110e0Safresh1   $LocaleFile{'bs_Cyrl'} = 'sr';
22898184e3Ssthen   $LocaleFile{'sr_Latn'} = 'hr';
23898184e3Ssthen# short file names
24898184e3Ssthen   $LocaleFile{'de__phonebook'}   = 'de_phone';
259f11ffb7Safresh1   $LocaleFile{'de_AT_phonebook'} = 'de_at_ph';
26898184e3Ssthen   $LocaleFile{'es__traditional'} = 'es_trad';
279f11ffb7Safresh1   $LocaleFile{'fr_CA'}           = 'fr_ca';
28898184e3Ssthen   $LocaleFile{'fi__phonebook'}   = 'fi_phone';
29898184e3Ssthen   $LocaleFile{'si__dictionary'}  = 'si_dict';
30898184e3Ssthen   $LocaleFile{'sv__reformed'}    = 'sv_refo';
319f11ffb7Safresh1   $LocaleFile{'ug_Cyrl'}         = 'ug_cyrl';
32898184e3Ssthen   $LocaleFile{'zh__big5han'}     = 'zh_big5';
33898184e3Ssthen   $LocaleFile{'zh__gb2312han'}   = 'zh_gb';
34898184e3Ssthen   $LocaleFile{'zh__pinyin'}      = 'zh_pin';
35898184e3Ssthen   $LocaleFile{'zh__stroke'}      = 'zh_strk';
3691f110e0Safresh1   $LocaleFile{'zh__zhuyin'}      = 'zh_zhu';
37898184e3Ssthen
38898184e3Ssthenmy %TypeAlias = qw(
39898184e3Ssthen    phone     phonebook
40898184e3Ssthen    phonebk   phonebook
41898184e3Ssthen    dict      dictionary
42898184e3Ssthen    reform    reformed
43898184e3Ssthen    trad      traditional
44898184e3Ssthen    big5      big5han
45898184e3Ssthen    gb2312    gb2312han
46898184e3Ssthen);
47898184e3Ssthen
48898184e3Ssthensub _locale {
49898184e3Ssthen    my $locale = shift;
50898184e3Ssthen    if ($locale) {
51898184e3Ssthen	$locale = lc $locale;
52898184e3Ssthen	$locale =~ tr/\-\ \./_/;
53898184e3Ssthen	$locale =~ s/_([0-9a-z]+)\z/$TypeAlias{$1} ?
54898184e3Ssthen				  "_$TypeAlias{$1}" : "_$1"/e;
55898184e3Ssthen	$LocaleFile{$locale} and return $locale;
56898184e3Ssthen
57898184e3Ssthen	my @code = split /_/, $locale;
58898184e3Ssthen	my $lan = shift @code;
59898184e3Ssthen	my $scr = @code && length $code[0] == 4 ? ucfirst shift @code : '';
60898184e3Ssthen	my $reg = @code && length $code[0] <  4 ? uc      shift @code : '';
61898184e3Ssthen	my $var = @code                         ?         shift @code : '';
62898184e3Ssthen
63898184e3Ssthen	my @list;
64898184e3Ssthen	push @list, (
65898184e3Ssthen	    "${lan}_${scr}_${reg}_$var",
66898184e3Ssthen	    "${lan}_${scr}__$var", # empty $scr should not be ${lan}__$var.
67898184e3Ssthen	    "${lan}_${reg}_$var",  # empty $reg may be ${lan}__$var.
68898184e3Ssthen	    "${lan}__$var",
69898184e3Ssthen	) if $var ne '';
70898184e3Ssthen	push @list, (
71898184e3Ssthen	    "${lan}_${scr}_${reg}",
72898184e3Ssthen	    "${lan}_${scr}",
73898184e3Ssthen	    "${lan}_${reg}",
74898184e3Ssthen	     ${lan},
75898184e3Ssthen	);
76898184e3Ssthen	for my $loc (@list) {
77898184e3Ssthen	    $LocaleFile{$loc} and return $loc;
78898184e3Ssthen	}
79898184e3Ssthen    }
80898184e3Ssthen    return 'default';
81898184e3Ssthen}
82898184e3Ssthen
83898184e3Ssthensub getlocale {
84898184e3Ssthen    return shift->{accepted_locale};
85898184e3Ssthen}
86898184e3Ssthen
87898184e3Ssthensub locale_version {
88898184e3Ssthen    return shift->{locale_version};
89898184e3Ssthen}
90898184e3Ssthen
91898184e3Ssthensub _fetchpl {
92898184e3Ssthen    my $accepted = shift;
93898184e3Ssthen    my $f = $LocaleFile{$accepted};
94898184e3Ssthen    return if !$f;
95898184e3Ssthen    $f .= $PL_EXT;
96898184e3Ssthen
97898184e3Ssthen    # allow to search @INC
98898184e3Ssthen#   use File::Spec;
99898184e3Ssthen#   my $path = File::Spec->catfile('Unicode', 'Collate', 'Locale', $f);
100898184e3Ssthen    my $path = "Unicode/Collate/Locale/$f";
101898184e3Ssthen    my $h = do $path;
102898184e3Ssthen    croak "Unicode/Collate/Locale/$f can't be found" if !$h;
103898184e3Ssthen    return $h;
104898184e3Ssthen}
105898184e3Ssthen
106898184e3Ssthensub new {
107898184e3Ssthen    my $class = shift;
108898184e3Ssthen    my %hash = @_;
109898184e3Ssthen    $hash{accepted_locale} = _locale($hash{locale});
110898184e3Ssthen
111898184e3Ssthen    if (exists $hash{table}) {
112898184e3Ssthen	croak "your table can't be used with Unicode::Collate::Locale";
113898184e3Ssthen    }
114898184e3Ssthen
115898184e3Ssthen    my $href = _fetchpl($hash{accepted_locale});
116898184e3Ssthen    while (my($k,$v) = each %$href) {
11791f110e0Safresh1	if (!exists $hash{$k}) {
11891f110e0Safresh1	    $hash{$k} = $v;
11991f110e0Safresh1	} elsif ($k eq 'entry') {
12091f110e0Safresh1	    $hash{$k} = $v.$hash{$k};
12191f110e0Safresh1	} else {
122898184e3Ssthen	    croak "$k is reserved by $hash{locale}, can't be overwritten";
123898184e3Ssthen	}
124898184e3Ssthen    }
125898184e3Ssthen    return $class->SUPER::new(%hash);
126898184e3Ssthen}
127898184e3Ssthen
128898184e3Ssthen1;
129898184e3Ssthen__END__
130898184e3Ssthen
131898184e3Ssthen=head1 NAME
132898184e3Ssthen
133898184e3SsthenUnicode::Collate::Locale - Linguistic tailoring for DUCET via Unicode::Collate
134898184e3Ssthen
135898184e3Ssthen=head1 SYNOPSIS
136898184e3Ssthen
137898184e3Ssthen  use Unicode::Collate::Locale;
138898184e3Ssthen
139898184e3Ssthen  #construct
140898184e3Ssthen  $Collator = Unicode::Collate::Locale->
141898184e3Ssthen      new(locale => $locale_name, %tailoring);
142898184e3Ssthen
143898184e3Ssthen  #sort
144898184e3Ssthen  @sorted = $Collator->sort(@not_sorted);
145898184e3Ssthen
146898184e3Ssthen  #compare
147898184e3Ssthen  $result = $Collator->cmp($a, $b); # returns 1, 0, or -1.
148898184e3Ssthen
149898184e3SsthenB<Note:> Strings in C<@not_sorted>, C<$a> and C<$b> are interpreted
150898184e3Ssthenaccording to Perl's Unicode support. See L<perlunicode>,
151898184e3SsthenL<perluniintro>, L<perlunitut>, L<perlunifaq>, L<utf8>.
152898184e3SsthenOtherwise you can use C<preprocess> (cf. C<Unicode::Collate>)
153898184e3Ssthenor should decode them before.
154898184e3Ssthen
155898184e3Ssthen=head1 DESCRIPTION
156898184e3Ssthen
157898184e3SsthenThis module provides linguistic tailoring for it
158898184e3Ssthentaking advantage of C<Unicode::Collate>.
159898184e3Ssthen
160898184e3Ssthen=head2 Constructor
161898184e3Ssthen
162898184e3SsthenThe C<new> method returns a collator object.
163898184e3Ssthen
164898184e3SsthenA parameter list for the constructor is a hash, which can include
165898184e3Ssthena special key C<locale> and its value (case-insensitive) standing
166898184e3Ssthenfor a Unicode base language code (two or three-letter).
1679f11ffb7Safresh1For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'ES')>
1689f11ffb7Safresh1returns a collator tailored for Spanish.
169898184e3Ssthen
170898184e3SsthenC<$locale_name> may be suffixed with a Unicode script code (four-letter),
1719f11ffb7Safresh1a Unicode region (territory) code, a Unicode language variant code.
1729f11ffb7Safresh1These codes are case-insensitive, and separated with C<'_'> or C<'-'>.
173898184e3SsthenE.g. C<en_US> for English in USA,
174898184e3SsthenC<az_Cyrl> for Azerbaijani in the Cyrillic script,
175898184e3SsthenC<es_ES_traditional> for Spanish in Spain (Traditional).
176898184e3Ssthen
177898184e3SsthenIf C<$locale_name> is not available,
178898184e3Ssthenfallback is selected in the following order:
179898184e3Ssthen
180898184e3Ssthen    1. language with a variant code
181898184e3Ssthen    2. language with a script code
182898184e3Ssthen    3. language with a region code
183898184e3Ssthen    4. language
184898184e3Ssthen    5. default
185898184e3Ssthen
186898184e3SsthenTailoring tags provided by C<Unicode::Collate> are allowed as long as
187898184e3Ssthenthey are not used for C<locale> support.  Esp. the C<table> tag
188898184e3Ssthenis always untailorable, since it is reserved for DUCET.
189898184e3Ssthen
19091f110e0Safresh1However C<entry> is allowed, even if it is used for C<locale> support,
19191f110e0Safresh1to add or override mappings.
19291f110e0Safresh1
1939f11ffb7Safresh1E.g. a collator for Spanish, which ignores diacritics and case difference
194898184e3Ssthen(i.e. level 1), with reversed case ordering and no normalization.
195898184e3Ssthen
196898184e3Ssthen    Unicode::Collate::Locale->new(
197898184e3Ssthen        level => 1,
1989f11ffb7Safresh1        locale => 'es',
199898184e3Ssthen        upper_before_lower => 1,
200898184e3Ssthen        normalization => undef
201898184e3Ssthen    )
202898184e3Ssthen
203898184e3SsthenOverriding a behavior already tailored by C<locale> is disallowed
204898184e3Ssthenif such a tailoring is passed to C<new()>.
205898184e3Ssthen
206898184e3Ssthen    Unicode::Collate::Locale->new(
207898184e3Ssthen        locale => 'da',
208898184e3Ssthen        upper_before_lower => 0, # causes error as reserved by 'da'
209898184e3Ssthen    )
210898184e3Ssthen
211898184e3SsthenHowever C<change()> inherited from C<Unicode::Collate> allows
212898184e3Ssthensuch a tailoring that is reserved by C<locale>. Examples:
213898184e3Ssthen
2149f11ffb7Safresh1    new(locale => 'fr_ca')->change(backwards => undef)
215898184e3Ssthen    new(locale => 'da')->change(upper_before_lower => 0)
216898184e3Ssthen    new(locale => 'ja')->change(overrideCJK => undef)
217898184e3Ssthen
218898184e3Ssthen=head2 Methods
219898184e3Ssthen
220898184e3SsthenC<Unicode::Collate::Locale> is a subclass of C<Unicode::Collate>
221898184e3Ssthenand methods other than C<new> are inherited from C<Unicode::Collate>.
222898184e3Ssthen
223898184e3SsthenHere is a list of additional methods:
224898184e3Ssthen
225898184e3Ssthen=over 4
226898184e3Ssthen
227898184e3Ssthen=item C<$Collator-E<gt>getlocale>
228898184e3Ssthen
229898184e3SsthenReturns a language code accepted and used actually on collation.
230898184e3SsthenIf linguistic tailoring is not provided for a language code you passed
231898184e3Ssthen(intensionally for some languages, or due to the incomplete implementation),
232898184e3Ssthenthis method returns a string C<'default'> meaning no special tailoring.
233898184e3Ssthen
234898184e3Ssthen=item C<$Collator-E<gt>locale_version>
235898184e3Ssthen
236898184e3Ssthen(Since Unicode::Collate::Locale 0.87)
237898184e3SsthenReturns the version number (perhaps C</\d\.\d\d/>) of the locale, as that
238898184e3Ssthenof F<Locale/*.pl>.
239898184e3Ssthen
240898184e3SsthenB<Note:> F<Locale/*.pl> that a collator uses should be identified by
241898184e3Ssthena combination of return values from C<getlocale> and C<locale_version>.
242898184e3Ssthen
243898184e3Ssthen=back
244898184e3Ssthen
245898184e3Ssthen=head2 A list of tailorable locales
246898184e3Ssthen
247898184e3Ssthen      locale name       description
248898184e3Ssthen    --------------------------------------------------------------
249898184e3Ssthen      af                Afrikaans
250898184e3Ssthen      ar                Arabic
251898184e3Ssthen      as                Assamese
252898184e3Ssthen      az                Azerbaijani (Azeri)
253898184e3Ssthen      be                Belarusian
254898184e3Ssthen      bn                Bengali
2559f11ffb7Safresh1      bs                Bosnian (tailored as Croatian)
25691f110e0Safresh1      bs_Cyrl           Bosnian in Cyrillic (tailored as Serbian)
257898184e3Ssthen      ca                Catalan
258898184e3Ssthen      cs                Czech
259b46d8ef2Safresh1      cu                Church Slavic
260898184e3Ssthen      cy                Welsh
261898184e3Ssthen      da                Danish
262898184e3Ssthen      de__phonebook     German (umlaut as 'ae', 'oe', 'ue')
2639f11ffb7Safresh1      de_AT_phonebook   Austrian German (umlaut primary greater)
2649f11ffb7Safresh1      dsb               Lower Sorbian
26591f110e0Safresh1      ee                Ewe
266898184e3Ssthen      eo                Esperanto
267898184e3Ssthen      es                Spanish
268898184e3Ssthen      es__traditional   Spanish ('ch' and 'll' as a grapheme)
269898184e3Ssthen      et                Estonian
270898184e3Ssthen      fa                Persian
271898184e3Ssthen      fi                Finnish (v and w are primary equal)
272898184e3Ssthen      fi__phonebook     Finnish (v and w as separate characters)
273898184e3Ssthen      fil               Filipino
274898184e3Ssthen      fo                Faroese
2759f11ffb7Safresh1      fr_CA             Canadian French
276898184e3Ssthen      gu                Gujarati
277898184e3Ssthen      ha                Hausa
278898184e3Ssthen      haw               Hawaiian
2799f11ffb7Safresh1      he                Hebrew
280898184e3Ssthen      hi                Hindi
281898184e3Ssthen      hr                Croatian
282898184e3Ssthen      hu                Hungarian
283898184e3Ssthen      hy                Armenian
284898184e3Ssthen      ig                Igbo
285898184e3Ssthen      is                Icelandic
286898184e3Ssthen      ja                Japanese [1]
287898184e3Ssthen      kk                Kazakh
288898184e3Ssthen      kl                Kalaallisut
289898184e3Ssthen      kn                Kannada
290898184e3Ssthen      ko                Korean [2]
291898184e3Ssthen      kok               Konkani
2929f11ffb7Safresh1      lkt               Lakota
293898184e3Ssthen      ln                Lingala
294898184e3Ssthen      lt                Lithuanian
295898184e3Ssthen      lv                Latvian
296898184e3Ssthen      mk                Macedonian
297898184e3Ssthen      ml                Malayalam
298898184e3Ssthen      mr                Marathi
299898184e3Ssthen      mt                Maltese
300898184e3Ssthen      nb                Norwegian Bokmal
301898184e3Ssthen      nn                Norwegian Nynorsk
302898184e3Ssthen      nso               Northern Sotho
303898184e3Ssthen      om                Oromo
304898184e3Ssthen      or                Oriya
305898184e3Ssthen      pa                Punjabi
306898184e3Ssthen      pl                Polish
307898184e3Ssthen      ro                Romanian
308898184e3Ssthen      sa                Sanskrit
309898184e3Ssthen      se                Northern Sami
310898184e3Ssthen      si                Sinhala
311898184e3Ssthen      si__dictionary    Sinhala (U+0DA5 = U+0DA2,0DCA,0DA4)
312898184e3Ssthen      sk                Slovak
313898184e3Ssthen      sl                Slovenian
314898184e3Ssthen      sq                Albanian
315898184e3Ssthen      sr                Serbian
316898184e3Ssthen      sr_Latn           Serbian in Latin (tailored as Croatian)
317898184e3Ssthen      sv                Swedish (v and w are primary equal)
318898184e3Ssthen      sv__reformed      Swedish (v and w as separate characters)
319898184e3Ssthen      ta                Tamil
320898184e3Ssthen      te                Telugu
321898184e3Ssthen      th                Thai
322898184e3Ssthen      tn                Tswana
323898184e3Ssthen      to                Tonga
324898184e3Ssthen      tr                Turkish
3259f11ffb7Safresh1      ug_Cyrl           Uyghur in Cyrillic
326898184e3Ssthen      uk                Ukrainian
327898184e3Ssthen      ur                Urdu
328898184e3Ssthen      vi                Vietnamese
3299f11ffb7Safresh1      vo                Volapu"k
330898184e3Ssthen      wae               Walser
331898184e3Ssthen      wo                Wolof
332898184e3Ssthen      yo                Yoruba
333898184e3Ssthen      zh                Chinese
334898184e3Ssthen      zh__big5han       Chinese (ideographs: big5 order)
335898184e3Ssthen      zh__gb2312han     Chinese (ideographs: GB-2312 order)
336898184e3Ssthen      zh__pinyin        Chinese (ideographs: pinyin order) [3]
337898184e3Ssthen      zh__stroke        Chinese (ideographs: stroke order) [3]
33891f110e0Safresh1      zh__zhuyin        Chinese (ideographs: zhuyin order) [3]
339898184e3Ssthen    --------------------------------------------------------------
340898184e3Ssthen
341898184e3SsthenLocales according to the default UCA rules include
3429f11ffb7Safresh1am (Amharic) without C<[reorder Ethi]>,
3439f11ffb7Safresh1bg (Bulgarian) without C<[reorder Cyrl]>,
3449f11ffb7Safresh1chr (Cherokee) without C<[reorder Cher]>,
345898184e3Ssthende (German),
346898184e3Ssthenen (English),
3479f11ffb7Safresh1fr (French),
348898184e3Ssthenga (Irish),
349898184e3Ssthenid (Indonesian),
350898184e3Ssthenit (Italian),
3519f11ffb7Safresh1ka (Georgian) without C<[reorder Geor]>,
3529f11ffb7Safresh1mn (Mongolian) without C<[reorder Cyrl Mong]>,
353898184e3Ssthenms (Malay),
354898184e3Ssthennl (Dutch),
355898184e3Ssthenpt (Portuguese),
3569f11ffb7Safresh1ru (Russian) without C<[reorder Cyrl]>,
357898184e3Ssthensw (Swahili),
358898184e3Ssthenzu (Zulu).
359898184e3Ssthen
360898184e3SsthenB<Note>
361898184e3Ssthen
362898184e3Ssthen[1] ja: Ideographs are sorted in JIS X 0208 order.
36391f110e0Safresh1Fullwidth and halfwidth forms are identical to their regular form.
364898184e3SsthenThe difference between hiragana and katakana is at the 4th level,
365898184e3Ssthenthe comparison also requires C<(variable =E<gt> 'Non-ignorable')>,
366898184e3Ssthenand then C<katakana_before_hiragana> has no effect.
367898184e3Ssthen
368898184e3Ssthen[2] ko: Plenty of ideographs are sorted by their reading. Such
369898184e3Ssthenan ideograph is primary (level 1) equal to, and secondary (level 2)
370898184e3Ssthengreater than, the corresponding hangul syllable.
371898184e3Ssthen
37291f110e0Safresh1[3] zh__pinyin, zh__stroke and zh__zhuyin: implemented alt='short',
37391f110e0Safresh1where a smaller number of ideographs are tailored.
37491f110e0Safresh1
3759f11ffb7Safresh1=head2 A list of variant codes and their aliases
3769f11ffb7Safresh1
3779f11ffb7Safresh1      variant code       alias
3789f11ffb7Safresh1    ------------------------------------------
3799f11ffb7Safresh1      dictionary         dict
3809f11ffb7Safresh1      phonebook          phone     phonebk
3819f11ffb7Safresh1      reformed           reform
3829f11ffb7Safresh1      traditional        trad
3839f11ffb7Safresh1    ------------------------------------------
3849f11ffb7Safresh1      big5han            big5
3859f11ffb7Safresh1      gb2312han          gb2312
3869f11ffb7Safresh1      pinyin
3879f11ffb7Safresh1      stroke
3889f11ffb7Safresh1      zhuyin
3899f11ffb7Safresh1    ------------------------------------------
3909f11ffb7Safresh1
3919f11ffb7Safresh1Note: 'pinyin' is Han in Latin, 'zhuyin' is Han in Bopomofo.
392898184e3Ssthen
393898184e3Ssthen=head1 INSTALL
394898184e3Ssthen
395898184e3SsthenInstallation of C<Unicode::Collate::Locale> requires F<Collate/Locale.pm>,
396898184e3SsthenF<Collate/Locale/*.pm>, F<Collate/CJK/*.pm> and F<Collate/allkeys.txt>.
3979f11ffb7Safresh1On building, C<Unicode::Collate::Locale> doesn't require
3989f11ffb7Safresh1any of F<data/*.txt>, F<gendata/*>, and F<mklocale>.
399898184e3SsthenTests for C<Unicode::Collate::Locale> are named F<t/loc_*.t>.
400898184e3Ssthen
401898184e3Ssthen=head1 CAVEAT
402898184e3Ssthen
403898184e3Ssthen=over 4
404898184e3Ssthen
4059f11ffb7Safresh1=item Tailoring is not maximum
406898184e3Ssthen
407898184e3SsthenEven if a certain letter is tailored, its equivalent would not always
408898184e3Ssthentailored as well as it. For example, even though W is tailored,
409898184e3Ssthenfullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not
410898184e3Ssthentailored. The result may depend on whether source strings are
411898184e3Ssthennormalized or not, and whether decomposed or composed.
412898184e3SsthenThus C<(normalization =E<gt> undef)> is less preferred.
413898184e3Ssthen
4149f11ffb7Safresh1=item Collation reordering is not supported
4159f11ffb7Safresh1
4169f11ffb7Safresh1The order of any groups including scripts is not changed.
4179f11ffb7Safresh1
418898184e3Ssthen=back
419898184e3Ssthen
4209f11ffb7Safresh1=head2 Reference
4219f11ffb7Safresh1
4229f11ffb7Safresh1      locale            based CLDR or other reference
4239f11ffb7Safresh1    --------------------------------------------------------------------
4249f11ffb7Safresh1      af                30 = 1.8.1
4259f11ffb7Safresh1      ar                30 = 28 ("compat" wo [reorder Arab]) = 1.9.0
4269f11ffb7Safresh1      as                30 = 28 (without [reorder Beng..]) = 23
4279f11ffb7Safresh1      az                30 = 24 ("standard" wo [reorder Latn Cyrl])
4289f11ffb7Safresh1      be                30 = 28 (without [reorder Cyrl])
4299f11ffb7Safresh1      bn                30 = 28 ("standard" wo [reorder Beng..]) = 2.0.1
4309f11ffb7Safresh1      bs                30 = 28 (type="standard": [import hr])
4319f11ffb7Safresh1      bs_Cyrl           30 = 28 (type="standard": [import sr])
4329f11ffb7Safresh1      ca                30 = 23 (alt="proposed" type="standard")
4339f11ffb7Safresh1      cs                30 = 1.8.1 (type="standard")
434b46d8ef2Safresh1      cu                34 = 30 (without [reorder Cyrl])
4359f11ffb7Safresh1      cy                30 = 1.8.1
4369f11ffb7Safresh1      da                22.1 = 1.8.1 (type="standard")
4379f11ffb7Safresh1      de__phonebook     30 = 2.0 (type="phonebook")
4389f11ffb7Safresh1      de_AT_phonebook   30 = 27 (type="phonebook")
4399f11ffb7Safresh1      dsb               30 = 26
4409f11ffb7Safresh1      ee                30 = 21
4419f11ffb7Safresh1      eo                30 = 1.8.1
4429f11ffb7Safresh1      es                30 = 1.9.0 (type="standard")
4439f11ffb7Safresh1      es__traditional   30 = 1.8.1 (type="traditional")
4449f11ffb7Safresh1      et                30 = 26
4459f11ffb7Safresh1      fa                22.1 = 1.8.1
4469f11ffb7Safresh1      fi                22.1 = 1.8.1 (type="standard" alt="proposed")
4479f11ffb7Safresh1      fi__phonebook     22.1 = 1.8.1 (type="phonebook")
4489f11ffb7Safresh1      fil               30 = 1.9.0 (type="standard") = 1.8.1
4499f11ffb7Safresh1      fo                22.1 = 1.8.1 (alt="proposed" type="standard")
4509f11ffb7Safresh1      fr_CA             30 = 1.9.0
4519f11ffb7Safresh1      gu                30 = 28 ("standard" wo [reorder Gujr..]) = 1.9.0
4529f11ffb7Safresh1      ha                30 = 1.9.0
4539f11ffb7Safresh1      haw               30 = 24
4549f11ffb7Safresh1      he                30 = 28 (without [reorder Hebr]) = 23
4559f11ffb7Safresh1      hi                30 = 28 (without [reorder Deva..]) = 1.9.0
4569f11ffb7Safresh1      hr                30 = 28 ("standard" wo [reorder Latn Cyrl]) = 1.9.0
4579f11ffb7Safresh1      hu                22.1 = 1.8.1 (alt="proposed" type="standard")
4589f11ffb7Safresh1      hy                30 = 28 (without [reorder Armn]) = 1.8.1
4599f11ffb7Safresh1      ig                30 = 1.8.1
4609f11ffb7Safresh1      is                22.1 = 1.8.1 (type="standard")
4619f11ffb7Safresh1      ja                22.1 = 1.8.1 (type="standard")
4629f11ffb7Safresh1      kk                30 = 28 (without [reorder Cyrl])
4639f11ffb7Safresh1      kl                22.1 = 1.8.1 (type="standard")
4649f11ffb7Safresh1      kn                30 = 28 ("standard" wo [reorder Knda..]) = 1.9.0
4659f11ffb7Safresh1      ko                22.1 = 1.8.1 (type="standard")
4669f11ffb7Safresh1      kok               30 = 28 (without [reorder Deva..]) = 1.8.1
4679f11ffb7Safresh1      lkt               30 = 25
4689f11ffb7Safresh1      ln                30 = 2.0 (type="standard") = 1.8.1
4699f11ffb7Safresh1      lt                22.1 = 1.9.0
4709f11ffb7Safresh1      lv                22.1 = 1.9.0 (type="standard") = 1.8.1
4719f11ffb7Safresh1      mk                30 = 28 (without [reorder Cyrl])
4729f11ffb7Safresh1      ml                22.1 = 1.9.0
4739f11ffb7Safresh1      mr                30 = 28 (without [reorder Deva..]) = 1.8.1
4749f11ffb7Safresh1      mt                22.1 = 1.9.0
4759f11ffb7Safresh1      nb                22.1 = 2.0   (type="standard")
4769f11ffb7Safresh1      nn                22.1 = 2.0   (type="standard")
4779f11ffb7Safresh1      nso           [*] 26 = 1.8.1
4789f11ffb7Safresh1      om                22.1 = 1.8.1
4799f11ffb7Safresh1      or                30 = 28 (without [reorder Orya..]) = 1.9.0
4809f11ffb7Safresh1      pa                22.1 = 1.8.1
4819f11ffb7Safresh1      pl                30 = 1.8.1
4829f11ffb7Safresh1      ro                30 = 1.9.0 (type="standard")
4839f11ffb7Safresh1      sa            [*] 1.9.1 = 1.8.1 (type="standard" alt="proposed")
4849f11ffb7Safresh1      se                22.1 = 1.8.1 (type="standard")
4859f11ffb7Safresh1      si                30 = 28 ("standard" wo [reorder Sinh..]) = 1.9.0
4869f11ffb7Safresh1      si__dictionary    30 = 28 ("dictionary" wo [reorder Sinh..]) = 1.9.0
4879f11ffb7Safresh1      sk                22.1 = 1.9.0 (type="standard")
4889f11ffb7Safresh1      sl                22.1 = 1.8.1 (type="standard" alt="proposed")
4899f11ffb7Safresh1      sq                22.1 = 1.8.1 (alt="proposed" type="standard")
4909f11ffb7Safresh1      sr                30 = 28 (without [reorder Cyrl])
4919f11ffb7Safresh1      sr_Latn           30 = 28 (type="standard": [import hr])
4929f11ffb7Safresh1      sv                22.1 = 1.9.0 (type="standard")
4939f11ffb7Safresh1      sv__reformed      22.1 = 1.8.1 (type="reformed")
4949f11ffb7Safresh1      ta                22.1 = 1.9.0
4959f11ffb7Safresh1      te                30 = 28 (without [reorder Telu..]) = 1.9.0
4969f11ffb7Safresh1      th                22.1 = 22
4979f11ffb7Safresh1      tn            [*] 26 = 1.8.1
4989f11ffb7Safresh1      to                22.1 = 22
4999f11ffb7Safresh1      tr                22.1 = 1.8.1 (type="standard")
5009f11ffb7Safresh1      uk                30 = 28 (without [reorder Cyrl])
5019f11ffb7Safresh1      ug_Cyrl           https://en.wikipedia.org/wiki/Uyghur_Cyrillic_alphabet
5029f11ffb7Safresh1      ur                22.1 = 1.9.0
5039f11ffb7Safresh1      vi                22.1 = 1.8.1
5049f11ffb7Safresh1      vo                30 = 25
5059f11ffb7Safresh1      wae               30 = 2.0
5069f11ffb7Safresh1      wo            [*] 1.9.1 = 1.8.1
5079f11ffb7Safresh1      yo                30 = 1.8.1
5089f11ffb7Safresh1      zh                22.1 = 1.8.1 (type="standard")
5099f11ffb7Safresh1      zh__big5han       22.1 = 1.8.1 (type="big5han")
5109f11ffb7Safresh1      zh__gb2312han     22.1 = 1.8.1 (type="gb2312han")
5119f11ffb7Safresh1      zh__pinyin        22.1 = 2.0   (type='pinyin' alt='short')
5129f11ffb7Safresh1      zh__stroke        22.1 = 1.9.1 (type='stroke' alt='short')
5139f11ffb7Safresh1      zh__zhuyin        22.1 = 22    (type='zhuyin' alt='short')
5149f11ffb7Safresh1    --------------------------------------------------------------------
5159f11ffb7Safresh1
5169f11ffb7Safresh1[*] http://www.unicode.org/repos/cldr/tags/latest/seed/collation/
5179f11ffb7Safresh1
518898184e3Ssthen=head1 AUTHOR
519898184e3Ssthen
520898184e3SsthenThe Unicode::Collate::Locale module for perl was written
521898184e3Ssthenby SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>.
522*eac174f2Safresh1This module is Copyright(C) 2004-2020, SADAHIRO Tomoyuki. Japan.
523898184e3SsthenAll rights reserved.
524898184e3Ssthen
525898184e3SsthenThis module is free software; you can redistribute it and/or
526898184e3Ssthenmodify it under the same terms as Perl itself.
527898184e3Ssthen
528898184e3Ssthen=head1 SEE ALSO
529898184e3Ssthen
530898184e3Ssthen=over 4
531898184e3Ssthen
532898184e3Ssthen=item Unicode Collation Algorithm - UTS #10
533898184e3Ssthen
534898184e3SsthenL<http://www.unicode.org/reports/tr10/>
535898184e3Ssthen
536898184e3Ssthen=item The Default Unicode Collation Element Table (DUCET)
537898184e3Ssthen
538898184e3SsthenL<http://www.unicode.org/Public/UCA/latest/allkeys.txt>
539898184e3Ssthen
540898184e3Ssthen=item Unicode Locale Data Markup Language (LDML) - UTS #35
541898184e3Ssthen
542898184e3SsthenL<http://www.unicode.org/reports/tr35/>
543898184e3Ssthen
544898184e3Ssthen=item CLDR - Unicode Common Locale Data Repository
545898184e3Ssthen
546898184e3SsthenL<http://cldr.unicode.org/>
547898184e3Ssthen
548898184e3Ssthen=item L<Unicode::Collate>
549898184e3Ssthen
550898184e3Ssthen=item L<Unicode::Normalize>
551898184e3Ssthen
552898184e3Ssthen=back
553898184e3Ssthen
554898184e3Ssthen=cut
555