1# 2# Locale::Script - ISO codes for script identification (ISO 15924) 3# 4# $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $ 5# 6 7package Locale::Script; 8use strict; 9require 5.002; 10 11require Exporter; 12use Carp; 13use Locale::Constants; 14 15 16#----------------------------------------------------------------------- 17# Public Global Variables 18#----------------------------------------------------------------------- 19use vars qw($VERSION @ISA @EXPORT @EXPORT_OK); 20$VERSION = sprintf("%d.%02d", q$Revision: 2.21 $ =~ /(\d+)\.(\d+)/); 21@ISA = qw(Exporter); 22@EXPORT = qw(code2script script2code 23 all_script_codes all_script_names 24 script_code2code 25 LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); 26 27#----------------------------------------------------------------------- 28# Private Global Variables 29#----------------------------------------------------------------------- 30my $CODES = []; 31my $COUNTRIES = []; 32 33 34#======================================================================= 35# 36# code2script ( CODE [, CODESET ] ) 37# 38#======================================================================= 39sub code2script 40{ 41 my $code = shift; 42 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 43 44 45 return undef unless defined $code; 46 47 #------------------------------------------------------------------- 48 # Make sure the code is in the right form before we use it 49 # to look up the corresponding script. 50 # We have to sprintf because the codes are given as 3-digits, 51 # with leading 0's. Eg 070 for Egyptian demotic. 52 #------------------------------------------------------------------- 53 if ($codeset == LOCALE_CODE_NUMERIC) 54 { 55 return undef if ($code =~ /\D/); 56 $code = sprintf("%.3d", $code); 57 } 58 else 59 { 60 $code = lc($code); 61 } 62 63 if (exists $CODES->[$codeset]->{$code}) 64 { 65 return $CODES->[$codeset]->{$code}; 66 } 67 else 68 { 69 #--------------------------------------------------------------- 70 # no such script code! 71 #--------------------------------------------------------------- 72 return undef; 73 } 74} 75 76 77#======================================================================= 78# 79# script2code ( SCRIPT [, CODESET ] ) 80# 81#======================================================================= 82sub script2code 83{ 84 my $script = shift; 85 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 86 87 88 return undef unless defined $script; 89 $script = lc($script); 90 if (exists $COUNTRIES->[$codeset]->{$script}) 91 { 92 return $COUNTRIES->[$codeset]->{$script}; 93 } 94 else 95 { 96 #--------------------------------------------------------------- 97 # no such script! 98 #--------------------------------------------------------------- 99 return undef; 100 } 101} 102 103 104#======================================================================= 105# 106# script_code2code ( CODE, IN-CODESET, OUT-CODESET ) 107# 108#======================================================================= 109sub script_code2code 110{ 111 (@_ == 3) or croak "script_code2code() takes 3 arguments!"; 112 113 my $code = shift; 114 my $inset = shift; 115 my $outset = shift; 116 my $outcode; 117 my $script; 118 119 120 return undef if $inset == $outset; 121 $script = code2script($code, $inset); 122 return undef if not defined $script; 123 $outcode = script2code($script, $outset); 124 return $outcode; 125} 126 127 128#======================================================================= 129# 130# all_script_codes() 131# 132#======================================================================= 133sub all_script_codes 134{ 135 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 136 137 return keys %{ $CODES->[$codeset] }; 138} 139 140 141#======================================================================= 142# 143# all_script_names() 144# 145#======================================================================= 146sub all_script_names 147{ 148 my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 149 150 return values %{ $CODES->[$codeset] }; 151} 152 153 154#======================================================================= 155# 156# initialisation code - stuff the DATA into the ALPHA2 hash 157# 158#======================================================================= 159{ 160 my ($alpha2, $alpha3, $numeric); 161 my $script; 162 163 local $_; 164 165 while (<DATA>) 166 { 167 next unless /\S/; 168 chop; 169 ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); 170 171 $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; 172 $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; 173 174 if ($alpha3) 175 { 176 $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; 177 $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; 178 } 179 180 if ($numeric) 181 { 182 $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; 183 $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; 184 } 185 186 } 187 188 close(DATA); 189} 190 1911; 192 193__DATA__ 194am:ama:130:Aramaic 195ar:ara:160:Arabic 196av:ave:151:Avestan 197bh:bhm:300:Brahmi (Ashoka) 198bi:bid:372:Buhid 199bn:ben:325:Bengali 200bo:bod:330:Tibetan 201bp:bpm:285:Bopomofo 202br:brl:570:Braille 203bt:btk:365:Batak 204bu:bug:367:Buginese (Makassar) 205by:bys:550:Blissymbols 206ca:cam:358:Cham 207ch:chu:221:Old Church Slavonic 208ci:cir:291:Cirth 209cm:cmn:402:Cypro-Minoan 210co:cop:205:Coptic 211cp:cpr:403:Cypriote syllabary 212cy:cyr:220:Cyrillic 213ds:dsr:250:Deserel (Mormon) 214dv:dvn:315:Devanagari (Nagari) 215ed:egd:070:Egyptian demotic 216eg:egy:050:Egyptian hieroglyphs 217eh:egh:060:Egyptian hieratic 218el:ell:200:Greek 219eo:eos:210:Etruscan and Oscan 220et:eth:430:Ethiopic 221gl:glg:225:Glagolitic 222gm:gmu:310:Gurmukhi 223gt:gth:206:Gothic 224gu:guj:320:Gujarati 225ha:han:500:Han ideographs 226he:heb:125:Hebrew 227hg:hgl:420:Hangul 228hm:hmo:450:Pahawh Hmong 229ho:hoo:371:Hanunoo 230hr:hrg:410:Hiragana 231hu:hun:176:Old Hungarian runic 232hv:hvn:175:Kok Turki runic 233hy:hye:230:Armenian 234iv:ivl:610:Indus Valley 235ja:jap:930:(alias for Han + Hiragana + Katakana) 236jl:jlg:445:Cherokee syllabary 237jw:jwi:360:Javanese 238ka:kam:241:Georgian (Mxedruli) 239kh:khn:931:(alias for Hangul + Han) 240kk:kkn:411:Katakana 241km:khm:354:Khmer 242kn:kan:345:Kannada 243kr:krn:357:Karenni (Kayah Li) 244ks:kst:305:Kharoshthi 245kx:kax:240:Georgian (Xucuri) 246la:lat:217:Latin 247lf:laf:215:Latin (Fraktur variant) 248lg:lag:216:Latin (Gaelic variant) 249lo:lao:356:Lao 250lp:lpc:335:Lepcha (Rong) 251md:mda:140:Mandaean 252me:mer:100:Meroitic 253mh:may:090:Mayan hieroglyphs 254ml:mlm:347:Malayalam 255mn:mon:145:Mongolian 256my:mya:350:Burmese 257na:naa:400:Linear A 258nb:nbb:401:Linear B 259og:ogm:212:Ogham 260or:ory:327:Oriya 261os:osm:260:Osmanya 262ph:phx:115:Phoenician 263ph:pah:150:Pahlavi 264pl:pld:282:Pollard Phonetic 265pq:pqd:295:Klingon plQaD 266pr:prm:227:Old Permic 267ps:pst:600:Phaistos Disk 268rn:rnr:211:Runic (Germanic) 269rr:rro:620:Rongo-rongo 270sa:sar:110:South Arabian 271si:sin:348:Sinhala 272sj:syj:137:Syriac (Jacobite variant) 273sl:slb:440:Unified Canadian Aboriginal Syllabics 274sn:syn:136:Syriac (Nestorian variant) 275sw:sww:281:Shavian (Shaw) 276sy:syr:135:Syriac (Estrangelo) 277ta:tam:346:Tamil 278tb:tbw:373:Tagbanwa 279te:tel:340:Telugu 280tf:tfn:120:Tifnagh 281tg:tag:370:Tagalog 282th:tha:352:Thai 283tn:tna:170:Thaana 284tw:twr:290:Tengwar 285va:vai:470:Vai 286vs:vsp:280:Visible Speech 287xa:xas:000:Cuneiform, Sumero-Akkadian 288xf:xfa:105:Cuneiform, Old Persian 289xk:xkn:412:(alias for Hiragana + Katakana) 290xu:xug:106:Cuneiform, Ugaritic 291yi:yii:460:Yi 292zx:zxx:997:Unwritten language 293zy:zyy:998:Undetermined script 294zz:zzz:999:Uncoded script 295