1*0Sstevel@tonic-gate# 2*0Sstevel@tonic-gate# Locale::Script - ISO codes for script identification (ISO 15924) 3*0Sstevel@tonic-gate# 4*0Sstevel@tonic-gate# $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $ 5*0Sstevel@tonic-gate# 6*0Sstevel@tonic-gate 7*0Sstevel@tonic-gatepackage Locale::Script; 8*0Sstevel@tonic-gateuse strict; 9*0Sstevel@tonic-gaterequire 5.002; 10*0Sstevel@tonic-gate 11*0Sstevel@tonic-gaterequire Exporter; 12*0Sstevel@tonic-gateuse Carp; 13*0Sstevel@tonic-gateuse Locale::Constants; 14*0Sstevel@tonic-gate 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gate#----------------------------------------------------------------------- 17*0Sstevel@tonic-gate# Public Global Variables 18*0Sstevel@tonic-gate#----------------------------------------------------------------------- 19*0Sstevel@tonic-gateuse vars qw($VERSION @ISA @EXPORT @EXPORT_OK); 20*0Sstevel@tonic-gate$VERSION = sprintf("%d.%02d", q$Revision: 2.21 $ =~ /(\d+)\.(\d+)/); 21*0Sstevel@tonic-gate@ISA = qw(Exporter); 22*0Sstevel@tonic-gate@EXPORT = qw(code2script script2code 23*0Sstevel@tonic-gate all_script_codes all_script_names 24*0Sstevel@tonic-gate script_code2code 25*0Sstevel@tonic-gate LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC); 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate#----------------------------------------------------------------------- 28*0Sstevel@tonic-gate# Private Global Variables 29*0Sstevel@tonic-gate#----------------------------------------------------------------------- 30*0Sstevel@tonic-gatemy $CODES = []; 31*0Sstevel@tonic-gatemy $COUNTRIES = []; 32*0Sstevel@tonic-gate 33*0Sstevel@tonic-gate 34*0Sstevel@tonic-gate#======================================================================= 35*0Sstevel@tonic-gate# 36*0Sstevel@tonic-gate# code2script ( CODE [, CODESET ] ) 37*0Sstevel@tonic-gate# 38*0Sstevel@tonic-gate#======================================================================= 39*0Sstevel@tonic-gatesub code2script 40*0Sstevel@tonic-gate{ 41*0Sstevel@tonic-gate my $code = shift; 42*0Sstevel@tonic-gate my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate 45*0Sstevel@tonic-gate return undef unless defined $code; 46*0Sstevel@tonic-gate 47*0Sstevel@tonic-gate #------------------------------------------------------------------- 48*0Sstevel@tonic-gate # Make sure the code is in the right form before we use it 49*0Sstevel@tonic-gate # to look up the corresponding script. 50*0Sstevel@tonic-gate # We have to sprintf because the codes are given as 3-digits, 51*0Sstevel@tonic-gate # with leading 0's. Eg 070 for Egyptian demotic. 52*0Sstevel@tonic-gate #------------------------------------------------------------------- 53*0Sstevel@tonic-gate if ($codeset == LOCALE_CODE_NUMERIC) 54*0Sstevel@tonic-gate { 55*0Sstevel@tonic-gate return undef if ($code =~ /\D/); 56*0Sstevel@tonic-gate $code = sprintf("%.3d", $code); 57*0Sstevel@tonic-gate } 58*0Sstevel@tonic-gate else 59*0Sstevel@tonic-gate { 60*0Sstevel@tonic-gate $code = lc($code); 61*0Sstevel@tonic-gate } 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate if (exists $CODES->[$codeset]->{$code}) 64*0Sstevel@tonic-gate { 65*0Sstevel@tonic-gate return $CODES->[$codeset]->{$code}; 66*0Sstevel@tonic-gate } 67*0Sstevel@tonic-gate else 68*0Sstevel@tonic-gate { 69*0Sstevel@tonic-gate #--------------------------------------------------------------- 70*0Sstevel@tonic-gate # no such script code! 71*0Sstevel@tonic-gate #--------------------------------------------------------------- 72*0Sstevel@tonic-gate return undef; 73*0Sstevel@tonic-gate } 74*0Sstevel@tonic-gate} 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate#======================================================================= 78*0Sstevel@tonic-gate# 79*0Sstevel@tonic-gate# script2code ( SCRIPT [, CODESET ] ) 80*0Sstevel@tonic-gate# 81*0Sstevel@tonic-gate#======================================================================= 82*0Sstevel@tonic-gatesub script2code 83*0Sstevel@tonic-gate{ 84*0Sstevel@tonic-gate my $script = shift; 85*0Sstevel@tonic-gate my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 86*0Sstevel@tonic-gate 87*0Sstevel@tonic-gate 88*0Sstevel@tonic-gate return undef unless defined $script; 89*0Sstevel@tonic-gate $script = lc($script); 90*0Sstevel@tonic-gate if (exists $COUNTRIES->[$codeset]->{$script}) 91*0Sstevel@tonic-gate { 92*0Sstevel@tonic-gate return $COUNTRIES->[$codeset]->{$script}; 93*0Sstevel@tonic-gate } 94*0Sstevel@tonic-gate else 95*0Sstevel@tonic-gate { 96*0Sstevel@tonic-gate #--------------------------------------------------------------- 97*0Sstevel@tonic-gate # no such script! 98*0Sstevel@tonic-gate #--------------------------------------------------------------- 99*0Sstevel@tonic-gate return undef; 100*0Sstevel@tonic-gate } 101*0Sstevel@tonic-gate} 102*0Sstevel@tonic-gate 103*0Sstevel@tonic-gate 104*0Sstevel@tonic-gate#======================================================================= 105*0Sstevel@tonic-gate# 106*0Sstevel@tonic-gate# script_code2code ( CODE, IN-CODESET, OUT-CODESET ) 107*0Sstevel@tonic-gate# 108*0Sstevel@tonic-gate#======================================================================= 109*0Sstevel@tonic-gatesub script_code2code 110*0Sstevel@tonic-gate{ 111*0Sstevel@tonic-gate (@_ == 3) or croak "script_code2code() takes 3 arguments!"; 112*0Sstevel@tonic-gate 113*0Sstevel@tonic-gate my $code = shift; 114*0Sstevel@tonic-gate my $inset = shift; 115*0Sstevel@tonic-gate my $outset = shift; 116*0Sstevel@tonic-gate my $outcode; 117*0Sstevel@tonic-gate my $script; 118*0Sstevel@tonic-gate 119*0Sstevel@tonic-gate 120*0Sstevel@tonic-gate return undef if $inset == $outset; 121*0Sstevel@tonic-gate $script = code2script($code, $inset); 122*0Sstevel@tonic-gate return undef if not defined $script; 123*0Sstevel@tonic-gate $outcode = script2code($script, $outset); 124*0Sstevel@tonic-gate return $outcode; 125*0Sstevel@tonic-gate} 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate 128*0Sstevel@tonic-gate#======================================================================= 129*0Sstevel@tonic-gate# 130*0Sstevel@tonic-gate# all_script_codes() 131*0Sstevel@tonic-gate# 132*0Sstevel@tonic-gate#======================================================================= 133*0Sstevel@tonic-gatesub all_script_codes 134*0Sstevel@tonic-gate{ 135*0Sstevel@tonic-gate my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 136*0Sstevel@tonic-gate 137*0Sstevel@tonic-gate return keys %{ $CODES->[$codeset] }; 138*0Sstevel@tonic-gate} 139*0Sstevel@tonic-gate 140*0Sstevel@tonic-gate 141*0Sstevel@tonic-gate#======================================================================= 142*0Sstevel@tonic-gate# 143*0Sstevel@tonic-gate# all_script_names() 144*0Sstevel@tonic-gate# 145*0Sstevel@tonic-gate#======================================================================= 146*0Sstevel@tonic-gatesub all_script_names 147*0Sstevel@tonic-gate{ 148*0Sstevel@tonic-gate my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT; 149*0Sstevel@tonic-gate 150*0Sstevel@tonic-gate return values %{ $CODES->[$codeset] }; 151*0Sstevel@tonic-gate} 152*0Sstevel@tonic-gate 153*0Sstevel@tonic-gate 154*0Sstevel@tonic-gate#======================================================================= 155*0Sstevel@tonic-gate# 156*0Sstevel@tonic-gate# initialisation code - stuff the DATA into the ALPHA2 hash 157*0Sstevel@tonic-gate# 158*0Sstevel@tonic-gate#======================================================================= 159*0Sstevel@tonic-gate{ 160*0Sstevel@tonic-gate my ($alpha2, $alpha3, $numeric); 161*0Sstevel@tonic-gate my $script; 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gate local $_; 164*0Sstevel@tonic-gate 165*0Sstevel@tonic-gate while (<DATA>) 166*0Sstevel@tonic-gate { 167*0Sstevel@tonic-gate next unless /\S/; 168*0Sstevel@tonic-gate chop; 169*0Sstevel@tonic-gate ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4); 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script; 172*0Sstevel@tonic-gate $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2; 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate if ($alpha3) 175*0Sstevel@tonic-gate { 176*0Sstevel@tonic-gate $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script; 177*0Sstevel@tonic-gate $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3; 178*0Sstevel@tonic-gate } 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gate if ($numeric) 181*0Sstevel@tonic-gate { 182*0Sstevel@tonic-gate $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script; 183*0Sstevel@tonic-gate $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric; 184*0Sstevel@tonic-gate } 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate } 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gate close(DATA); 189*0Sstevel@tonic-gate} 190*0Sstevel@tonic-gate 191*0Sstevel@tonic-gate1; 192*0Sstevel@tonic-gate 193*0Sstevel@tonic-gate__DATA__ 194*0Sstevel@tonic-gateam:ama:130:Aramaic 195*0Sstevel@tonic-gatear:ara:160:Arabic 196*0Sstevel@tonic-gateav:ave:151:Avestan 197*0Sstevel@tonic-gatebh:bhm:300:Brahmi (Ashoka) 198*0Sstevel@tonic-gatebi:bid:372:Buhid 199*0Sstevel@tonic-gatebn:ben:325:Bengali 200*0Sstevel@tonic-gatebo:bod:330:Tibetan 201*0Sstevel@tonic-gatebp:bpm:285:Bopomofo 202*0Sstevel@tonic-gatebr:brl:570:Braille 203*0Sstevel@tonic-gatebt:btk:365:Batak 204*0Sstevel@tonic-gatebu:bug:367:Buginese (Makassar) 205*0Sstevel@tonic-gateby:bys:550:Blissymbols 206*0Sstevel@tonic-gateca:cam:358:Cham 207*0Sstevel@tonic-gatech:chu:221:Old Church Slavonic 208*0Sstevel@tonic-gateci:cir:291:Cirth 209*0Sstevel@tonic-gatecm:cmn:402:Cypro-Minoan 210*0Sstevel@tonic-gateco:cop:205:Coptic 211*0Sstevel@tonic-gatecp:cpr:403:Cypriote syllabary 212*0Sstevel@tonic-gatecy:cyr:220:Cyrillic 213*0Sstevel@tonic-gateds:dsr:250:Deserel (Mormon) 214*0Sstevel@tonic-gatedv:dvn:315:Devanagari (Nagari) 215*0Sstevel@tonic-gateed:egd:070:Egyptian demotic 216*0Sstevel@tonic-gateeg:egy:050:Egyptian hieroglyphs 217*0Sstevel@tonic-gateeh:egh:060:Egyptian hieratic 218*0Sstevel@tonic-gateel:ell:200:Greek 219*0Sstevel@tonic-gateeo:eos:210:Etruscan and Oscan 220*0Sstevel@tonic-gateet:eth:430:Ethiopic 221*0Sstevel@tonic-gategl:glg:225:Glagolitic 222*0Sstevel@tonic-gategm:gmu:310:Gurmukhi 223*0Sstevel@tonic-gategt:gth:206:Gothic 224*0Sstevel@tonic-gategu:guj:320:Gujarati 225*0Sstevel@tonic-gateha:han:500:Han ideographs 226*0Sstevel@tonic-gatehe:heb:125:Hebrew 227*0Sstevel@tonic-gatehg:hgl:420:Hangul 228*0Sstevel@tonic-gatehm:hmo:450:Pahawh Hmong 229*0Sstevel@tonic-gateho:hoo:371:Hanunoo 230*0Sstevel@tonic-gatehr:hrg:410:Hiragana 231*0Sstevel@tonic-gatehu:hun:176:Old Hungarian runic 232*0Sstevel@tonic-gatehv:hvn:175:Kok Turki runic 233*0Sstevel@tonic-gatehy:hye:230:Armenian 234*0Sstevel@tonic-gateiv:ivl:610:Indus Valley 235*0Sstevel@tonic-gateja:jap:930:(alias for Han + Hiragana + Katakana) 236*0Sstevel@tonic-gatejl:jlg:445:Cherokee syllabary 237*0Sstevel@tonic-gatejw:jwi:360:Javanese 238*0Sstevel@tonic-gateka:kam:241:Georgian (Mxedruli) 239*0Sstevel@tonic-gatekh:khn:931:(alias for Hangul + Han) 240*0Sstevel@tonic-gatekk:kkn:411:Katakana 241*0Sstevel@tonic-gatekm:khm:354:Khmer 242*0Sstevel@tonic-gatekn:kan:345:Kannada 243*0Sstevel@tonic-gatekr:krn:357:Karenni (Kayah Li) 244*0Sstevel@tonic-gateks:kst:305:Kharoshthi 245*0Sstevel@tonic-gatekx:kax:240:Georgian (Xucuri) 246*0Sstevel@tonic-gatela:lat:217:Latin 247*0Sstevel@tonic-gatelf:laf:215:Latin (Fraktur variant) 248*0Sstevel@tonic-gatelg:lag:216:Latin (Gaelic variant) 249*0Sstevel@tonic-gatelo:lao:356:Lao 250*0Sstevel@tonic-gatelp:lpc:335:Lepcha (Rong) 251*0Sstevel@tonic-gatemd:mda:140:Mandaean 252*0Sstevel@tonic-gateme:mer:100:Meroitic 253*0Sstevel@tonic-gatemh:may:090:Mayan hieroglyphs 254*0Sstevel@tonic-gateml:mlm:347:Malayalam 255*0Sstevel@tonic-gatemn:mon:145:Mongolian 256*0Sstevel@tonic-gatemy:mya:350:Burmese 257*0Sstevel@tonic-gatena:naa:400:Linear A 258*0Sstevel@tonic-gatenb:nbb:401:Linear B 259*0Sstevel@tonic-gateog:ogm:212:Ogham 260*0Sstevel@tonic-gateor:ory:327:Oriya 261*0Sstevel@tonic-gateos:osm:260:Osmanya 262*0Sstevel@tonic-gateph:phx:115:Phoenician 263*0Sstevel@tonic-gateph:pah:150:Pahlavi 264*0Sstevel@tonic-gatepl:pld:282:Pollard Phonetic 265*0Sstevel@tonic-gatepq:pqd:295:Klingon plQaD 266*0Sstevel@tonic-gatepr:prm:227:Old Permic 267*0Sstevel@tonic-gateps:pst:600:Phaistos Disk 268*0Sstevel@tonic-gatern:rnr:211:Runic (Germanic) 269*0Sstevel@tonic-gaterr:rro:620:Rongo-rongo 270*0Sstevel@tonic-gatesa:sar:110:South Arabian 271*0Sstevel@tonic-gatesi:sin:348:Sinhala 272*0Sstevel@tonic-gatesj:syj:137:Syriac (Jacobite variant) 273*0Sstevel@tonic-gatesl:slb:440:Unified Canadian Aboriginal Syllabics 274*0Sstevel@tonic-gatesn:syn:136:Syriac (Nestorian variant) 275*0Sstevel@tonic-gatesw:sww:281:Shavian (Shaw) 276*0Sstevel@tonic-gatesy:syr:135:Syriac (Estrangelo) 277*0Sstevel@tonic-gateta:tam:346:Tamil 278*0Sstevel@tonic-gatetb:tbw:373:Tagbanwa 279*0Sstevel@tonic-gatete:tel:340:Telugu 280*0Sstevel@tonic-gatetf:tfn:120:Tifnagh 281*0Sstevel@tonic-gatetg:tag:370:Tagalog 282*0Sstevel@tonic-gateth:tha:352:Thai 283*0Sstevel@tonic-gatetn:tna:170:Thaana 284*0Sstevel@tonic-gatetw:twr:290:Tengwar 285*0Sstevel@tonic-gateva:vai:470:Vai 286*0Sstevel@tonic-gatevs:vsp:280:Visible Speech 287*0Sstevel@tonic-gatexa:xas:000:Cuneiform, Sumero-Akkadian 288*0Sstevel@tonic-gatexf:xfa:105:Cuneiform, Old Persian 289*0Sstevel@tonic-gatexk:xkn:412:(alias for Hiragana + Katakana) 290*0Sstevel@tonic-gatexu:xug:106:Cuneiform, Ugaritic 291*0Sstevel@tonic-gateyi:yii:460:Yi 292*0Sstevel@tonic-gatezx:zxx:997:Unwritten language 293*0Sstevel@tonic-gatezy:zyy:998:Undetermined script 294*0Sstevel@tonic-gatezz:zzz:999:Uncoded script 295