1898184e3Ssthenpackage Unicode::Collate::Locale; 2898184e3Ssthen 3898184e3Ssthenuse strict; 4b8851fccSafresh1use warnings; 5898184e3Ssthenuse Carp; 6898184e3Ssthenuse base qw(Unicode::Collate); 7898184e3Ssthen 8*eac174f2Safresh1our $VERSION = '1.31'; 9898184e3Ssthen 10898184e3Ssthenmy $PL_EXT = '.pl'; 11898184e3Ssthen 12898184e3Ssthenmy %LocaleFile = map { ($_, $_) } qw( 13b46d8ef2Safresh1 af ar as az be bn ca cs cu cy da dsb ee eo es et fa fi fil fo gu 149f11ffb7Safresh1 ha haw he hi hr hu hy ig is ja kk kl kn ko kok lkt ln lt lv 159f11ffb7Safresh1 mk ml mr mt nb nn nso om or pa pl ro sa se si sk sl sq sr sv 169f11ffb7Safresh1 ta te th tn to tr uk ur vi vo wae wo yo zh 17898184e3Ssthen); 18898184e3Ssthen $LocaleFile{'default'} = ''; 19898184e3Ssthen# aliases 20898184e3Ssthen $LocaleFile{'bs'} = 'hr'; 2191f110e0Safresh1 $LocaleFile{'bs_Cyrl'} = 'sr'; 22898184e3Ssthen $LocaleFile{'sr_Latn'} = 'hr'; 23898184e3Ssthen# short file names 24898184e3Ssthen $LocaleFile{'de__phonebook'} = 'de_phone'; 259f11ffb7Safresh1 $LocaleFile{'de_AT_phonebook'} = 'de_at_ph'; 26898184e3Ssthen $LocaleFile{'es__traditional'} = 'es_trad'; 279f11ffb7Safresh1 $LocaleFile{'fr_CA'} = 'fr_ca'; 28898184e3Ssthen $LocaleFile{'fi__phonebook'} = 'fi_phone'; 29898184e3Ssthen $LocaleFile{'si__dictionary'} = 'si_dict'; 30898184e3Ssthen $LocaleFile{'sv__reformed'} = 'sv_refo'; 319f11ffb7Safresh1 $LocaleFile{'ug_Cyrl'} = 'ug_cyrl'; 32898184e3Ssthen $LocaleFile{'zh__big5han'} = 'zh_big5'; 33898184e3Ssthen $LocaleFile{'zh__gb2312han'} = 'zh_gb'; 34898184e3Ssthen $LocaleFile{'zh__pinyin'} = 'zh_pin'; 35898184e3Ssthen $LocaleFile{'zh__stroke'} = 'zh_strk'; 3691f110e0Safresh1 $LocaleFile{'zh__zhuyin'} = 'zh_zhu'; 37898184e3Ssthen 38898184e3Ssthenmy %TypeAlias = qw( 39898184e3Ssthen phone phonebook 40898184e3Ssthen phonebk phonebook 41898184e3Ssthen dict dictionary 42898184e3Ssthen reform reformed 43898184e3Ssthen trad traditional 44898184e3Ssthen big5 big5han 45898184e3Ssthen gb2312 gb2312han 46898184e3Ssthen); 47898184e3Ssthen 48898184e3Ssthensub _locale { 49898184e3Ssthen my $locale = shift; 50898184e3Ssthen if ($locale) { 51898184e3Ssthen $locale = lc $locale; 52898184e3Ssthen $locale =~ tr/\-\ \./_/; 53898184e3Ssthen $locale =~ s/_([0-9a-z]+)\z/$TypeAlias{$1} ? 54898184e3Ssthen "_$TypeAlias{$1}" : "_$1"/e; 55898184e3Ssthen $LocaleFile{$locale} and return $locale; 56898184e3Ssthen 57898184e3Ssthen my @code = split /_/, $locale; 58898184e3Ssthen my $lan = shift @code; 59898184e3Ssthen my $scr = @code && length $code[0] == 4 ? ucfirst shift @code : ''; 60898184e3Ssthen my $reg = @code && length $code[0] < 4 ? uc shift @code : ''; 61898184e3Ssthen my $var = @code ? shift @code : ''; 62898184e3Ssthen 63898184e3Ssthen my @list; 64898184e3Ssthen push @list, ( 65898184e3Ssthen "${lan}_${scr}_${reg}_$var", 66898184e3Ssthen "${lan}_${scr}__$var", # empty $scr should not be ${lan}__$var. 67898184e3Ssthen "${lan}_${reg}_$var", # empty $reg may be ${lan}__$var. 68898184e3Ssthen "${lan}__$var", 69898184e3Ssthen ) if $var ne ''; 70898184e3Ssthen push @list, ( 71898184e3Ssthen "${lan}_${scr}_${reg}", 72898184e3Ssthen "${lan}_${scr}", 73898184e3Ssthen "${lan}_${reg}", 74898184e3Ssthen ${lan}, 75898184e3Ssthen ); 76898184e3Ssthen for my $loc (@list) { 77898184e3Ssthen $LocaleFile{$loc} and return $loc; 78898184e3Ssthen } 79898184e3Ssthen } 80898184e3Ssthen return 'default'; 81898184e3Ssthen} 82898184e3Ssthen 83898184e3Ssthensub getlocale { 84898184e3Ssthen return shift->{accepted_locale}; 85898184e3Ssthen} 86898184e3Ssthen 87898184e3Ssthensub locale_version { 88898184e3Ssthen return shift->{locale_version}; 89898184e3Ssthen} 90898184e3Ssthen 91898184e3Ssthensub _fetchpl { 92898184e3Ssthen my $accepted = shift; 93898184e3Ssthen my $f = $LocaleFile{$accepted}; 94898184e3Ssthen return if !$f; 95898184e3Ssthen $f .= $PL_EXT; 96898184e3Ssthen 97898184e3Ssthen # allow to search @INC 98898184e3Ssthen# use File::Spec; 99898184e3Ssthen# my $path = File::Spec->catfile('Unicode', 'Collate', 'Locale', $f); 100898184e3Ssthen my $path = "Unicode/Collate/Locale/$f"; 101898184e3Ssthen my $h = do $path; 102898184e3Ssthen croak "Unicode/Collate/Locale/$f can't be found" if !$h; 103898184e3Ssthen return $h; 104898184e3Ssthen} 105898184e3Ssthen 106898184e3Ssthensub new { 107898184e3Ssthen my $class = shift; 108898184e3Ssthen my %hash = @_; 109898184e3Ssthen $hash{accepted_locale} = _locale($hash{locale}); 110898184e3Ssthen 111898184e3Ssthen if (exists $hash{table}) { 112898184e3Ssthen croak "your table can't be used with Unicode::Collate::Locale"; 113898184e3Ssthen } 114898184e3Ssthen 115898184e3Ssthen my $href = _fetchpl($hash{accepted_locale}); 116898184e3Ssthen while (my($k,$v) = each %$href) { 11791f110e0Safresh1 if (!exists $hash{$k}) { 11891f110e0Safresh1 $hash{$k} = $v; 11991f110e0Safresh1 } elsif ($k eq 'entry') { 12091f110e0Safresh1 $hash{$k} = $v.$hash{$k}; 12191f110e0Safresh1 } else { 122898184e3Ssthen croak "$k is reserved by $hash{locale}, can't be overwritten"; 123898184e3Ssthen } 124898184e3Ssthen } 125898184e3Ssthen return $class->SUPER::new(%hash); 126898184e3Ssthen} 127898184e3Ssthen 128898184e3Ssthen1; 129898184e3Ssthen__END__ 130898184e3Ssthen 131898184e3Ssthen=head1 NAME 132898184e3Ssthen 133898184e3SsthenUnicode::Collate::Locale - Linguistic tailoring for DUCET via Unicode::Collate 134898184e3Ssthen 135898184e3Ssthen=head1 SYNOPSIS 136898184e3Ssthen 137898184e3Ssthen use Unicode::Collate::Locale; 138898184e3Ssthen 139898184e3Ssthen #construct 140898184e3Ssthen $Collator = Unicode::Collate::Locale-> 141898184e3Ssthen new(locale => $locale_name, %tailoring); 142898184e3Ssthen 143898184e3Ssthen #sort 144898184e3Ssthen @sorted = $Collator->sort(@not_sorted); 145898184e3Ssthen 146898184e3Ssthen #compare 147898184e3Ssthen $result = $Collator->cmp($a, $b); # returns 1, 0, or -1. 148898184e3Ssthen 149898184e3SsthenB<Note:> Strings in C<@not_sorted>, C<$a> and C<$b> are interpreted 150898184e3Ssthenaccording to Perl's Unicode support. See L<perlunicode>, 151898184e3SsthenL<perluniintro>, L<perlunitut>, L<perlunifaq>, L<utf8>. 152898184e3SsthenOtherwise you can use C<preprocess> (cf. C<Unicode::Collate>) 153898184e3Ssthenor should decode them before. 154898184e3Ssthen 155898184e3Ssthen=head1 DESCRIPTION 156898184e3Ssthen 157898184e3SsthenThis module provides linguistic tailoring for it 158898184e3Ssthentaking advantage of C<Unicode::Collate>. 159898184e3Ssthen 160898184e3Ssthen=head2 Constructor 161898184e3Ssthen 162898184e3SsthenThe C<new> method returns a collator object. 163898184e3Ssthen 164898184e3SsthenA parameter list for the constructor is a hash, which can include 165898184e3Ssthena special key C<locale> and its value (case-insensitive) standing 166898184e3Ssthenfor a Unicode base language code (two or three-letter). 1679f11ffb7Safresh1For example, C<Unicode::Collate::Locale-E<gt>new(locale =E<gt> 'ES')> 1689f11ffb7Safresh1returns a collator tailored for Spanish. 169898184e3Ssthen 170898184e3SsthenC<$locale_name> may be suffixed with a Unicode script code (four-letter), 1719f11ffb7Safresh1a Unicode region (territory) code, a Unicode language variant code. 1729f11ffb7Safresh1These codes are case-insensitive, and separated with C<'_'> or C<'-'>. 173898184e3SsthenE.g. C<en_US> for English in USA, 174898184e3SsthenC<az_Cyrl> for Azerbaijani in the Cyrillic script, 175898184e3SsthenC<es_ES_traditional> for Spanish in Spain (Traditional). 176898184e3Ssthen 177898184e3SsthenIf C<$locale_name> is not available, 178898184e3Ssthenfallback is selected in the following order: 179898184e3Ssthen 180898184e3Ssthen 1. language with a variant code 181898184e3Ssthen 2. language with a script code 182898184e3Ssthen 3. language with a region code 183898184e3Ssthen 4. language 184898184e3Ssthen 5. default 185898184e3Ssthen 186898184e3SsthenTailoring tags provided by C<Unicode::Collate> are allowed as long as 187898184e3Ssthenthey are not used for C<locale> support. Esp. the C<table> tag 188898184e3Ssthenis always untailorable, since it is reserved for DUCET. 189898184e3Ssthen 19091f110e0Safresh1However C<entry> is allowed, even if it is used for C<locale> support, 19191f110e0Safresh1to add or override mappings. 19291f110e0Safresh1 1939f11ffb7Safresh1E.g. a collator for Spanish, which ignores diacritics and case difference 194898184e3Ssthen(i.e. level 1), with reversed case ordering and no normalization. 195898184e3Ssthen 196898184e3Ssthen Unicode::Collate::Locale->new( 197898184e3Ssthen level => 1, 1989f11ffb7Safresh1 locale => 'es', 199898184e3Ssthen upper_before_lower => 1, 200898184e3Ssthen normalization => undef 201898184e3Ssthen ) 202898184e3Ssthen 203898184e3SsthenOverriding a behavior already tailored by C<locale> is disallowed 204898184e3Ssthenif such a tailoring is passed to C<new()>. 205898184e3Ssthen 206898184e3Ssthen Unicode::Collate::Locale->new( 207898184e3Ssthen locale => 'da', 208898184e3Ssthen upper_before_lower => 0, # causes error as reserved by 'da' 209898184e3Ssthen ) 210898184e3Ssthen 211898184e3SsthenHowever C<change()> inherited from C<Unicode::Collate> allows 212898184e3Ssthensuch a tailoring that is reserved by C<locale>. Examples: 213898184e3Ssthen 2149f11ffb7Safresh1 new(locale => 'fr_ca')->change(backwards => undef) 215898184e3Ssthen new(locale => 'da')->change(upper_before_lower => 0) 216898184e3Ssthen new(locale => 'ja')->change(overrideCJK => undef) 217898184e3Ssthen 218898184e3Ssthen=head2 Methods 219898184e3Ssthen 220898184e3SsthenC<Unicode::Collate::Locale> is a subclass of C<Unicode::Collate> 221898184e3Ssthenand methods other than C<new> are inherited from C<Unicode::Collate>. 222898184e3Ssthen 223898184e3SsthenHere is a list of additional methods: 224898184e3Ssthen 225898184e3Ssthen=over 4 226898184e3Ssthen 227898184e3Ssthen=item C<$Collator-E<gt>getlocale> 228898184e3Ssthen 229898184e3SsthenReturns a language code accepted and used actually on collation. 230898184e3SsthenIf linguistic tailoring is not provided for a language code you passed 231898184e3Ssthen(intensionally for some languages, or due to the incomplete implementation), 232898184e3Ssthenthis method returns a string C<'default'> meaning no special tailoring. 233898184e3Ssthen 234898184e3Ssthen=item C<$Collator-E<gt>locale_version> 235898184e3Ssthen 236898184e3Ssthen(Since Unicode::Collate::Locale 0.87) 237898184e3SsthenReturns the version number (perhaps C</\d\.\d\d/>) of the locale, as that 238898184e3Ssthenof F<Locale/*.pl>. 239898184e3Ssthen 240898184e3SsthenB<Note:> F<Locale/*.pl> that a collator uses should be identified by 241898184e3Ssthena combination of return values from C<getlocale> and C<locale_version>. 242898184e3Ssthen 243898184e3Ssthen=back 244898184e3Ssthen 245898184e3Ssthen=head2 A list of tailorable locales 246898184e3Ssthen 247898184e3Ssthen locale name description 248898184e3Ssthen -------------------------------------------------------------- 249898184e3Ssthen af Afrikaans 250898184e3Ssthen ar Arabic 251898184e3Ssthen as Assamese 252898184e3Ssthen az Azerbaijani (Azeri) 253898184e3Ssthen be Belarusian 254898184e3Ssthen bn Bengali 2559f11ffb7Safresh1 bs Bosnian (tailored as Croatian) 25691f110e0Safresh1 bs_Cyrl Bosnian in Cyrillic (tailored as Serbian) 257898184e3Ssthen ca Catalan 258898184e3Ssthen cs Czech 259b46d8ef2Safresh1 cu Church Slavic 260898184e3Ssthen cy Welsh 261898184e3Ssthen da Danish 262898184e3Ssthen de__phonebook German (umlaut as 'ae', 'oe', 'ue') 2639f11ffb7Safresh1 de_AT_phonebook Austrian German (umlaut primary greater) 2649f11ffb7Safresh1 dsb Lower Sorbian 26591f110e0Safresh1 ee Ewe 266898184e3Ssthen eo Esperanto 267898184e3Ssthen es Spanish 268898184e3Ssthen es__traditional Spanish ('ch' and 'll' as a grapheme) 269898184e3Ssthen et Estonian 270898184e3Ssthen fa Persian 271898184e3Ssthen fi Finnish (v and w are primary equal) 272898184e3Ssthen fi__phonebook Finnish (v and w as separate characters) 273898184e3Ssthen fil Filipino 274898184e3Ssthen fo Faroese 2759f11ffb7Safresh1 fr_CA Canadian French 276898184e3Ssthen gu Gujarati 277898184e3Ssthen ha Hausa 278898184e3Ssthen haw Hawaiian 2799f11ffb7Safresh1 he Hebrew 280898184e3Ssthen hi Hindi 281898184e3Ssthen hr Croatian 282898184e3Ssthen hu Hungarian 283898184e3Ssthen hy Armenian 284898184e3Ssthen ig Igbo 285898184e3Ssthen is Icelandic 286898184e3Ssthen ja Japanese [1] 287898184e3Ssthen kk Kazakh 288898184e3Ssthen kl Kalaallisut 289898184e3Ssthen kn Kannada 290898184e3Ssthen ko Korean [2] 291898184e3Ssthen kok Konkani 2929f11ffb7Safresh1 lkt Lakota 293898184e3Ssthen ln Lingala 294898184e3Ssthen lt Lithuanian 295898184e3Ssthen lv Latvian 296898184e3Ssthen mk Macedonian 297898184e3Ssthen ml Malayalam 298898184e3Ssthen mr Marathi 299898184e3Ssthen mt Maltese 300898184e3Ssthen nb Norwegian Bokmal 301898184e3Ssthen nn Norwegian Nynorsk 302898184e3Ssthen nso Northern Sotho 303898184e3Ssthen om Oromo 304898184e3Ssthen or Oriya 305898184e3Ssthen pa Punjabi 306898184e3Ssthen pl Polish 307898184e3Ssthen ro Romanian 308898184e3Ssthen sa Sanskrit 309898184e3Ssthen se Northern Sami 310898184e3Ssthen si Sinhala 311898184e3Ssthen si__dictionary Sinhala (U+0DA5 = U+0DA2,0DCA,0DA4) 312898184e3Ssthen sk Slovak 313898184e3Ssthen sl Slovenian 314898184e3Ssthen sq Albanian 315898184e3Ssthen sr Serbian 316898184e3Ssthen sr_Latn Serbian in Latin (tailored as Croatian) 317898184e3Ssthen sv Swedish (v and w are primary equal) 318898184e3Ssthen sv__reformed Swedish (v and w as separate characters) 319898184e3Ssthen ta Tamil 320898184e3Ssthen te Telugu 321898184e3Ssthen th Thai 322898184e3Ssthen tn Tswana 323898184e3Ssthen to Tonga 324898184e3Ssthen tr Turkish 3259f11ffb7Safresh1 ug_Cyrl Uyghur in Cyrillic 326898184e3Ssthen uk Ukrainian 327898184e3Ssthen ur Urdu 328898184e3Ssthen vi Vietnamese 3299f11ffb7Safresh1 vo Volapu"k 330898184e3Ssthen wae Walser 331898184e3Ssthen wo Wolof 332898184e3Ssthen yo Yoruba 333898184e3Ssthen zh Chinese 334898184e3Ssthen zh__big5han Chinese (ideographs: big5 order) 335898184e3Ssthen zh__gb2312han Chinese (ideographs: GB-2312 order) 336898184e3Ssthen zh__pinyin Chinese (ideographs: pinyin order) [3] 337898184e3Ssthen zh__stroke Chinese (ideographs: stroke order) [3] 33891f110e0Safresh1 zh__zhuyin Chinese (ideographs: zhuyin order) [3] 339898184e3Ssthen -------------------------------------------------------------- 340898184e3Ssthen 341898184e3SsthenLocales according to the default UCA rules include 3429f11ffb7Safresh1am (Amharic) without C<[reorder Ethi]>, 3439f11ffb7Safresh1bg (Bulgarian) without C<[reorder Cyrl]>, 3449f11ffb7Safresh1chr (Cherokee) without C<[reorder Cher]>, 345898184e3Ssthende (German), 346898184e3Ssthenen (English), 3479f11ffb7Safresh1fr (French), 348898184e3Ssthenga (Irish), 349898184e3Ssthenid (Indonesian), 350898184e3Ssthenit (Italian), 3519f11ffb7Safresh1ka (Georgian) without C<[reorder Geor]>, 3529f11ffb7Safresh1mn (Mongolian) without C<[reorder Cyrl Mong]>, 353898184e3Ssthenms (Malay), 354898184e3Ssthennl (Dutch), 355898184e3Ssthenpt (Portuguese), 3569f11ffb7Safresh1ru (Russian) without C<[reorder Cyrl]>, 357898184e3Ssthensw (Swahili), 358898184e3Ssthenzu (Zulu). 359898184e3Ssthen 360898184e3SsthenB<Note> 361898184e3Ssthen 362898184e3Ssthen[1] ja: Ideographs are sorted in JIS X 0208 order. 36391f110e0Safresh1Fullwidth and halfwidth forms are identical to their regular form. 364898184e3SsthenThe difference between hiragana and katakana is at the 4th level, 365898184e3Ssthenthe comparison also requires C<(variable =E<gt> 'Non-ignorable')>, 366898184e3Ssthenand then C<katakana_before_hiragana> has no effect. 367898184e3Ssthen 368898184e3Ssthen[2] ko: Plenty of ideographs are sorted by their reading. Such 369898184e3Ssthenan ideograph is primary (level 1) equal to, and secondary (level 2) 370898184e3Ssthengreater than, the corresponding hangul syllable. 371898184e3Ssthen 37291f110e0Safresh1[3] zh__pinyin, zh__stroke and zh__zhuyin: implemented alt='short', 37391f110e0Safresh1where a smaller number of ideographs are tailored. 37491f110e0Safresh1 3759f11ffb7Safresh1=head2 A list of variant codes and their aliases 3769f11ffb7Safresh1 3779f11ffb7Safresh1 variant code alias 3789f11ffb7Safresh1 ------------------------------------------ 3799f11ffb7Safresh1 dictionary dict 3809f11ffb7Safresh1 phonebook phone phonebk 3819f11ffb7Safresh1 reformed reform 3829f11ffb7Safresh1 traditional trad 3839f11ffb7Safresh1 ------------------------------------------ 3849f11ffb7Safresh1 big5han big5 3859f11ffb7Safresh1 gb2312han gb2312 3869f11ffb7Safresh1 pinyin 3879f11ffb7Safresh1 stroke 3889f11ffb7Safresh1 zhuyin 3899f11ffb7Safresh1 ------------------------------------------ 3909f11ffb7Safresh1 3919f11ffb7Safresh1Note: 'pinyin' is Han in Latin, 'zhuyin' is Han in Bopomofo. 392898184e3Ssthen 393898184e3Ssthen=head1 INSTALL 394898184e3Ssthen 395898184e3SsthenInstallation of C<Unicode::Collate::Locale> requires F<Collate/Locale.pm>, 396898184e3SsthenF<Collate/Locale/*.pm>, F<Collate/CJK/*.pm> and F<Collate/allkeys.txt>. 3979f11ffb7Safresh1On building, C<Unicode::Collate::Locale> doesn't require 3989f11ffb7Safresh1any of F<data/*.txt>, F<gendata/*>, and F<mklocale>. 399898184e3SsthenTests for C<Unicode::Collate::Locale> are named F<t/loc_*.t>. 400898184e3Ssthen 401898184e3Ssthen=head1 CAVEAT 402898184e3Ssthen 403898184e3Ssthen=over 4 404898184e3Ssthen 4059f11ffb7Safresh1=item Tailoring is not maximum 406898184e3Ssthen 407898184e3SsthenEven if a certain letter is tailored, its equivalent would not always 408898184e3Ssthentailored as well as it. For example, even though W is tailored, 409898184e3Ssthenfullwidth W (C<U+FF37>), W with acute (C<U+1E82>), etc. are not 410898184e3Ssthentailored. The result may depend on whether source strings are 411898184e3Ssthennormalized or not, and whether decomposed or composed. 412898184e3SsthenThus C<(normalization =E<gt> undef)> is less preferred. 413898184e3Ssthen 4149f11ffb7Safresh1=item Collation reordering is not supported 4159f11ffb7Safresh1 4169f11ffb7Safresh1The order of any groups including scripts is not changed. 4179f11ffb7Safresh1 418898184e3Ssthen=back 419898184e3Ssthen 4209f11ffb7Safresh1=head2 Reference 4219f11ffb7Safresh1 4229f11ffb7Safresh1 locale based CLDR or other reference 4239f11ffb7Safresh1 -------------------------------------------------------------------- 4249f11ffb7Safresh1 af 30 = 1.8.1 4259f11ffb7Safresh1 ar 30 = 28 ("compat" wo [reorder Arab]) = 1.9.0 4269f11ffb7Safresh1 as 30 = 28 (without [reorder Beng..]) = 23 4279f11ffb7Safresh1 az 30 = 24 ("standard" wo [reorder Latn Cyrl]) 4289f11ffb7Safresh1 be 30 = 28 (without [reorder Cyrl]) 4299f11ffb7Safresh1 bn 30 = 28 ("standard" wo [reorder Beng..]) = 2.0.1 4309f11ffb7Safresh1 bs 30 = 28 (type="standard": [import hr]) 4319f11ffb7Safresh1 bs_Cyrl 30 = 28 (type="standard": [import sr]) 4329f11ffb7Safresh1 ca 30 = 23 (alt="proposed" type="standard") 4339f11ffb7Safresh1 cs 30 = 1.8.1 (type="standard") 434b46d8ef2Safresh1 cu 34 = 30 (without [reorder Cyrl]) 4359f11ffb7Safresh1 cy 30 = 1.8.1 4369f11ffb7Safresh1 da 22.1 = 1.8.1 (type="standard") 4379f11ffb7Safresh1 de__phonebook 30 = 2.0 (type="phonebook") 4389f11ffb7Safresh1 de_AT_phonebook 30 = 27 (type="phonebook") 4399f11ffb7Safresh1 dsb 30 = 26 4409f11ffb7Safresh1 ee 30 = 21 4419f11ffb7Safresh1 eo 30 = 1.8.1 4429f11ffb7Safresh1 es 30 = 1.9.0 (type="standard") 4439f11ffb7Safresh1 es__traditional 30 = 1.8.1 (type="traditional") 4449f11ffb7Safresh1 et 30 = 26 4459f11ffb7Safresh1 fa 22.1 = 1.8.1 4469f11ffb7Safresh1 fi 22.1 = 1.8.1 (type="standard" alt="proposed") 4479f11ffb7Safresh1 fi__phonebook 22.1 = 1.8.1 (type="phonebook") 4489f11ffb7Safresh1 fil 30 = 1.9.0 (type="standard") = 1.8.1 4499f11ffb7Safresh1 fo 22.1 = 1.8.1 (alt="proposed" type="standard") 4509f11ffb7Safresh1 fr_CA 30 = 1.9.0 4519f11ffb7Safresh1 gu 30 = 28 ("standard" wo [reorder Gujr..]) = 1.9.0 4529f11ffb7Safresh1 ha 30 = 1.9.0 4539f11ffb7Safresh1 haw 30 = 24 4549f11ffb7Safresh1 he 30 = 28 (without [reorder Hebr]) = 23 4559f11ffb7Safresh1 hi 30 = 28 (without [reorder Deva..]) = 1.9.0 4569f11ffb7Safresh1 hr 30 = 28 ("standard" wo [reorder Latn Cyrl]) = 1.9.0 4579f11ffb7Safresh1 hu 22.1 = 1.8.1 (alt="proposed" type="standard") 4589f11ffb7Safresh1 hy 30 = 28 (without [reorder Armn]) = 1.8.1 4599f11ffb7Safresh1 ig 30 = 1.8.1 4609f11ffb7Safresh1 is 22.1 = 1.8.1 (type="standard") 4619f11ffb7Safresh1 ja 22.1 = 1.8.1 (type="standard") 4629f11ffb7Safresh1 kk 30 = 28 (without [reorder Cyrl]) 4639f11ffb7Safresh1 kl 22.1 = 1.8.1 (type="standard") 4649f11ffb7Safresh1 kn 30 = 28 ("standard" wo [reorder Knda..]) = 1.9.0 4659f11ffb7Safresh1 ko 22.1 = 1.8.1 (type="standard") 4669f11ffb7Safresh1 kok 30 = 28 (without [reorder Deva..]) = 1.8.1 4679f11ffb7Safresh1 lkt 30 = 25 4689f11ffb7Safresh1 ln 30 = 2.0 (type="standard") = 1.8.1 4699f11ffb7Safresh1 lt 22.1 = 1.9.0 4709f11ffb7Safresh1 lv 22.1 = 1.9.0 (type="standard") = 1.8.1 4719f11ffb7Safresh1 mk 30 = 28 (without [reorder Cyrl]) 4729f11ffb7Safresh1 ml 22.1 = 1.9.0 4739f11ffb7Safresh1 mr 30 = 28 (without [reorder Deva..]) = 1.8.1 4749f11ffb7Safresh1 mt 22.1 = 1.9.0 4759f11ffb7Safresh1 nb 22.1 = 2.0 (type="standard") 4769f11ffb7Safresh1 nn 22.1 = 2.0 (type="standard") 4779f11ffb7Safresh1 nso [*] 26 = 1.8.1 4789f11ffb7Safresh1 om 22.1 = 1.8.1 4799f11ffb7Safresh1 or 30 = 28 (without [reorder Orya..]) = 1.9.0 4809f11ffb7Safresh1 pa 22.1 = 1.8.1 4819f11ffb7Safresh1 pl 30 = 1.8.1 4829f11ffb7Safresh1 ro 30 = 1.9.0 (type="standard") 4839f11ffb7Safresh1 sa [*] 1.9.1 = 1.8.1 (type="standard" alt="proposed") 4849f11ffb7Safresh1 se 22.1 = 1.8.1 (type="standard") 4859f11ffb7Safresh1 si 30 = 28 ("standard" wo [reorder Sinh..]) = 1.9.0 4869f11ffb7Safresh1 si__dictionary 30 = 28 ("dictionary" wo [reorder Sinh..]) = 1.9.0 4879f11ffb7Safresh1 sk 22.1 = 1.9.0 (type="standard") 4889f11ffb7Safresh1 sl 22.1 = 1.8.1 (type="standard" alt="proposed") 4899f11ffb7Safresh1 sq 22.1 = 1.8.1 (alt="proposed" type="standard") 4909f11ffb7Safresh1 sr 30 = 28 (without [reorder Cyrl]) 4919f11ffb7Safresh1 sr_Latn 30 = 28 (type="standard": [import hr]) 4929f11ffb7Safresh1 sv 22.1 = 1.9.0 (type="standard") 4939f11ffb7Safresh1 sv__reformed 22.1 = 1.8.1 (type="reformed") 4949f11ffb7Safresh1 ta 22.1 = 1.9.0 4959f11ffb7Safresh1 te 30 = 28 (without [reorder Telu..]) = 1.9.0 4969f11ffb7Safresh1 th 22.1 = 22 4979f11ffb7Safresh1 tn [*] 26 = 1.8.1 4989f11ffb7Safresh1 to 22.1 = 22 4999f11ffb7Safresh1 tr 22.1 = 1.8.1 (type="standard") 5009f11ffb7Safresh1 uk 30 = 28 (without [reorder Cyrl]) 5019f11ffb7Safresh1 ug_Cyrl https://en.wikipedia.org/wiki/Uyghur_Cyrillic_alphabet 5029f11ffb7Safresh1 ur 22.1 = 1.9.0 5039f11ffb7Safresh1 vi 22.1 = 1.8.1 5049f11ffb7Safresh1 vo 30 = 25 5059f11ffb7Safresh1 wae 30 = 2.0 5069f11ffb7Safresh1 wo [*] 1.9.1 = 1.8.1 5079f11ffb7Safresh1 yo 30 = 1.8.1 5089f11ffb7Safresh1 zh 22.1 = 1.8.1 (type="standard") 5099f11ffb7Safresh1 zh__big5han 22.1 = 1.8.1 (type="big5han") 5109f11ffb7Safresh1 zh__gb2312han 22.1 = 1.8.1 (type="gb2312han") 5119f11ffb7Safresh1 zh__pinyin 22.1 = 2.0 (type='pinyin' alt='short') 5129f11ffb7Safresh1 zh__stroke 22.1 = 1.9.1 (type='stroke' alt='short') 5139f11ffb7Safresh1 zh__zhuyin 22.1 = 22 (type='zhuyin' alt='short') 5149f11ffb7Safresh1 -------------------------------------------------------------------- 5159f11ffb7Safresh1 5169f11ffb7Safresh1[*] http://www.unicode.org/repos/cldr/tags/latest/seed/collation/ 5179f11ffb7Safresh1 518898184e3Ssthen=head1 AUTHOR 519898184e3Ssthen 520898184e3SsthenThe Unicode::Collate::Locale module for perl was written 521898184e3Ssthenby SADAHIRO Tomoyuki, <SADAHIRO@cpan.org>. 522*eac174f2Safresh1This module is Copyright(C) 2004-2020, SADAHIRO Tomoyuki. Japan. 523898184e3SsthenAll rights reserved. 524898184e3Ssthen 525898184e3SsthenThis module is free software; you can redistribute it and/or 526898184e3Ssthenmodify it under the same terms as Perl itself. 527898184e3Ssthen 528898184e3Ssthen=head1 SEE ALSO 529898184e3Ssthen 530898184e3Ssthen=over 4 531898184e3Ssthen 532898184e3Ssthen=item Unicode Collation Algorithm - UTS #10 533898184e3Ssthen 534898184e3SsthenL<http://www.unicode.org/reports/tr10/> 535898184e3Ssthen 536898184e3Ssthen=item The Default Unicode Collation Element Table (DUCET) 537898184e3Ssthen 538898184e3SsthenL<http://www.unicode.org/Public/UCA/latest/allkeys.txt> 539898184e3Ssthen 540898184e3Ssthen=item Unicode Locale Data Markup Language (LDML) - UTS #35 541898184e3Ssthen 542898184e3SsthenL<http://www.unicode.org/reports/tr35/> 543898184e3Ssthen 544898184e3Ssthen=item CLDR - Unicode Common Locale Data Repository 545898184e3Ssthen 546898184e3SsthenL<http://cldr.unicode.org/> 547898184e3Ssthen 548898184e3Ssthen=item L<Unicode::Collate> 549898184e3Ssthen 550898184e3Ssthen=item L<Unicode::Normalize> 551898184e3Ssthen 552898184e3Ssthen=back 553898184e3Ssthen 554898184e3Ssthen=cut 555