xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Locale/Script.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gate#
2*0Sstevel@tonic-gate# Locale::Script - ISO codes for script identification (ISO 15924)
3*0Sstevel@tonic-gate#
4*0Sstevel@tonic-gate# $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $
5*0Sstevel@tonic-gate#
6*0Sstevel@tonic-gate
7*0Sstevel@tonic-gatepackage Locale::Script;
8*0Sstevel@tonic-gateuse strict;
9*0Sstevel@tonic-gaterequire 5.002;
10*0Sstevel@tonic-gate
11*0Sstevel@tonic-gaterequire Exporter;
12*0Sstevel@tonic-gateuse Carp;
13*0Sstevel@tonic-gateuse Locale::Constants;
14*0Sstevel@tonic-gate
15*0Sstevel@tonic-gate
16*0Sstevel@tonic-gate#-----------------------------------------------------------------------
17*0Sstevel@tonic-gate#	Public Global Variables
18*0Sstevel@tonic-gate#-----------------------------------------------------------------------
19*0Sstevel@tonic-gateuse vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
20*0Sstevel@tonic-gate$VERSION   = sprintf("%d.%02d", q$Revision: 2.21 $ =~ /(\d+)\.(\d+)/);
21*0Sstevel@tonic-gate@ISA       = qw(Exporter);
22*0Sstevel@tonic-gate@EXPORT    = qw(code2script script2code
23*0Sstevel@tonic-gate                all_script_codes all_script_names
24*0Sstevel@tonic-gate		script_code2code
25*0Sstevel@tonic-gate		LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gate#-----------------------------------------------------------------------
28*0Sstevel@tonic-gate#	Private Global Variables
29*0Sstevel@tonic-gate#-----------------------------------------------------------------------
30*0Sstevel@tonic-gatemy $CODES     = [];
31*0Sstevel@tonic-gatemy $COUNTRIES = [];
32*0Sstevel@tonic-gate
33*0Sstevel@tonic-gate
34*0Sstevel@tonic-gate#=======================================================================
35*0Sstevel@tonic-gate#
36*0Sstevel@tonic-gate# code2script ( CODE [, CODESET ] )
37*0Sstevel@tonic-gate#
38*0Sstevel@tonic-gate#=======================================================================
39*0Sstevel@tonic-gatesub code2script
40*0Sstevel@tonic-gate{
41*0Sstevel@tonic-gate    my $code = shift;
42*0Sstevel@tonic-gate    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gate
45*0Sstevel@tonic-gate    return undef unless defined $code;
46*0Sstevel@tonic-gate
47*0Sstevel@tonic-gate    #-------------------------------------------------------------------
48*0Sstevel@tonic-gate    # Make sure the code is in the right form before we use it
49*0Sstevel@tonic-gate    # to look up the corresponding script.
50*0Sstevel@tonic-gate    # We have to sprintf because the codes are given as 3-digits,
51*0Sstevel@tonic-gate    # with leading 0's. Eg 070 for Egyptian demotic.
52*0Sstevel@tonic-gate    #-------------------------------------------------------------------
53*0Sstevel@tonic-gate    if ($codeset == LOCALE_CODE_NUMERIC)
54*0Sstevel@tonic-gate    {
55*0Sstevel@tonic-gate	return undef if ($code =~ /\D/);
56*0Sstevel@tonic-gate	$code = sprintf("%.3d", $code);
57*0Sstevel@tonic-gate    }
58*0Sstevel@tonic-gate    else
59*0Sstevel@tonic-gate    {
60*0Sstevel@tonic-gate	$code = lc($code);
61*0Sstevel@tonic-gate    }
62*0Sstevel@tonic-gate
63*0Sstevel@tonic-gate    if (exists $CODES->[$codeset]->{$code})
64*0Sstevel@tonic-gate    {
65*0Sstevel@tonic-gate        return $CODES->[$codeset]->{$code};
66*0Sstevel@tonic-gate    }
67*0Sstevel@tonic-gate    else
68*0Sstevel@tonic-gate    {
69*0Sstevel@tonic-gate        #---------------------------------------------------------------
70*0Sstevel@tonic-gate        # no such script code!
71*0Sstevel@tonic-gate        #---------------------------------------------------------------
72*0Sstevel@tonic-gate        return undef;
73*0Sstevel@tonic-gate    }
74*0Sstevel@tonic-gate}
75*0Sstevel@tonic-gate
76*0Sstevel@tonic-gate
77*0Sstevel@tonic-gate#=======================================================================
78*0Sstevel@tonic-gate#
79*0Sstevel@tonic-gate# script2code ( SCRIPT [, CODESET ] )
80*0Sstevel@tonic-gate#
81*0Sstevel@tonic-gate#=======================================================================
82*0Sstevel@tonic-gatesub script2code
83*0Sstevel@tonic-gate{
84*0Sstevel@tonic-gate    my $script = shift;
85*0Sstevel@tonic-gate    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
86*0Sstevel@tonic-gate
87*0Sstevel@tonic-gate
88*0Sstevel@tonic-gate    return undef unless defined $script;
89*0Sstevel@tonic-gate    $script = lc($script);
90*0Sstevel@tonic-gate    if (exists $COUNTRIES->[$codeset]->{$script})
91*0Sstevel@tonic-gate    {
92*0Sstevel@tonic-gate        return $COUNTRIES->[$codeset]->{$script};
93*0Sstevel@tonic-gate    }
94*0Sstevel@tonic-gate    else
95*0Sstevel@tonic-gate    {
96*0Sstevel@tonic-gate        #---------------------------------------------------------------
97*0Sstevel@tonic-gate        # no such script!
98*0Sstevel@tonic-gate        #---------------------------------------------------------------
99*0Sstevel@tonic-gate        return undef;
100*0Sstevel@tonic-gate    }
101*0Sstevel@tonic-gate}
102*0Sstevel@tonic-gate
103*0Sstevel@tonic-gate
104*0Sstevel@tonic-gate#=======================================================================
105*0Sstevel@tonic-gate#
106*0Sstevel@tonic-gate# script_code2code ( CODE, IN-CODESET, OUT-CODESET )
107*0Sstevel@tonic-gate#
108*0Sstevel@tonic-gate#=======================================================================
109*0Sstevel@tonic-gatesub script_code2code
110*0Sstevel@tonic-gate{
111*0Sstevel@tonic-gate    (@_ == 3) or croak "script_code2code() takes 3 arguments!";
112*0Sstevel@tonic-gate
113*0Sstevel@tonic-gate    my $code = shift;
114*0Sstevel@tonic-gate    my $inset = shift;
115*0Sstevel@tonic-gate    my $outset = shift;
116*0Sstevel@tonic-gate    my $outcode;
117*0Sstevel@tonic-gate    my $script;
118*0Sstevel@tonic-gate
119*0Sstevel@tonic-gate
120*0Sstevel@tonic-gate    return undef if $inset == $outset;
121*0Sstevel@tonic-gate    $script = code2script($code, $inset);
122*0Sstevel@tonic-gate    return undef if not defined $script;
123*0Sstevel@tonic-gate    $outcode = script2code($script, $outset);
124*0Sstevel@tonic-gate    return $outcode;
125*0Sstevel@tonic-gate}
126*0Sstevel@tonic-gate
127*0Sstevel@tonic-gate
128*0Sstevel@tonic-gate#=======================================================================
129*0Sstevel@tonic-gate#
130*0Sstevel@tonic-gate# all_script_codes()
131*0Sstevel@tonic-gate#
132*0Sstevel@tonic-gate#=======================================================================
133*0Sstevel@tonic-gatesub all_script_codes
134*0Sstevel@tonic-gate{
135*0Sstevel@tonic-gate    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
136*0Sstevel@tonic-gate
137*0Sstevel@tonic-gate    return keys %{ $CODES->[$codeset] };
138*0Sstevel@tonic-gate}
139*0Sstevel@tonic-gate
140*0Sstevel@tonic-gate
141*0Sstevel@tonic-gate#=======================================================================
142*0Sstevel@tonic-gate#
143*0Sstevel@tonic-gate# all_script_names()
144*0Sstevel@tonic-gate#
145*0Sstevel@tonic-gate#=======================================================================
146*0Sstevel@tonic-gatesub all_script_names
147*0Sstevel@tonic-gate{
148*0Sstevel@tonic-gate    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
149*0Sstevel@tonic-gate
150*0Sstevel@tonic-gate    return values %{ $CODES->[$codeset] };
151*0Sstevel@tonic-gate}
152*0Sstevel@tonic-gate
153*0Sstevel@tonic-gate
154*0Sstevel@tonic-gate#=======================================================================
155*0Sstevel@tonic-gate#
156*0Sstevel@tonic-gate# initialisation code - stuff the DATA into the ALPHA2 hash
157*0Sstevel@tonic-gate#
158*0Sstevel@tonic-gate#=======================================================================
159*0Sstevel@tonic-gate{
160*0Sstevel@tonic-gate    my ($alpha2, $alpha3, $numeric);
161*0Sstevel@tonic-gate    my $script;
162*0Sstevel@tonic-gate
163*0Sstevel@tonic-gate    local $_;
164*0Sstevel@tonic-gate
165*0Sstevel@tonic-gate    while (<DATA>)
166*0Sstevel@tonic-gate    {
167*0Sstevel@tonic-gate        next unless /\S/;
168*0Sstevel@tonic-gate        chop;
169*0Sstevel@tonic-gate        ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4);
170*0Sstevel@tonic-gate
171*0Sstevel@tonic-gate        $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script;
172*0Sstevel@tonic-gate        $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2;
173*0Sstevel@tonic-gate
174*0Sstevel@tonic-gate	if ($alpha3)
175*0Sstevel@tonic-gate	{
176*0Sstevel@tonic-gate            $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script;
177*0Sstevel@tonic-gate            $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3;
178*0Sstevel@tonic-gate	}
179*0Sstevel@tonic-gate
180*0Sstevel@tonic-gate	if ($numeric)
181*0Sstevel@tonic-gate	{
182*0Sstevel@tonic-gate            $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script;
183*0Sstevel@tonic-gate            $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric;
184*0Sstevel@tonic-gate	}
185*0Sstevel@tonic-gate
186*0Sstevel@tonic-gate    }
187*0Sstevel@tonic-gate
188*0Sstevel@tonic-gate    close(DATA);
189*0Sstevel@tonic-gate}
190*0Sstevel@tonic-gate
191*0Sstevel@tonic-gate1;
192*0Sstevel@tonic-gate
193*0Sstevel@tonic-gate__DATA__
194*0Sstevel@tonic-gateam:ama:130:Aramaic
195*0Sstevel@tonic-gatear:ara:160:Arabic
196*0Sstevel@tonic-gateav:ave:151:Avestan
197*0Sstevel@tonic-gatebh:bhm:300:Brahmi (Ashoka)
198*0Sstevel@tonic-gatebi:bid:372:Buhid
199*0Sstevel@tonic-gatebn:ben:325:Bengali
200*0Sstevel@tonic-gatebo:bod:330:Tibetan
201*0Sstevel@tonic-gatebp:bpm:285:Bopomofo
202*0Sstevel@tonic-gatebr:brl:570:Braille
203*0Sstevel@tonic-gatebt:btk:365:Batak
204*0Sstevel@tonic-gatebu:bug:367:Buginese (Makassar)
205*0Sstevel@tonic-gateby:bys:550:Blissymbols
206*0Sstevel@tonic-gateca:cam:358:Cham
207*0Sstevel@tonic-gatech:chu:221:Old Church Slavonic
208*0Sstevel@tonic-gateci:cir:291:Cirth
209*0Sstevel@tonic-gatecm:cmn:402:Cypro-Minoan
210*0Sstevel@tonic-gateco:cop:205:Coptic
211*0Sstevel@tonic-gatecp:cpr:403:Cypriote syllabary
212*0Sstevel@tonic-gatecy:cyr:220:Cyrillic
213*0Sstevel@tonic-gateds:dsr:250:Deserel (Mormon)
214*0Sstevel@tonic-gatedv:dvn:315:Devanagari (Nagari)
215*0Sstevel@tonic-gateed:egd:070:Egyptian demotic
216*0Sstevel@tonic-gateeg:egy:050:Egyptian hieroglyphs
217*0Sstevel@tonic-gateeh:egh:060:Egyptian hieratic
218*0Sstevel@tonic-gateel:ell:200:Greek
219*0Sstevel@tonic-gateeo:eos:210:Etruscan and Oscan
220*0Sstevel@tonic-gateet:eth:430:Ethiopic
221*0Sstevel@tonic-gategl:glg:225:Glagolitic
222*0Sstevel@tonic-gategm:gmu:310:Gurmukhi
223*0Sstevel@tonic-gategt:gth:206:Gothic
224*0Sstevel@tonic-gategu:guj:320:Gujarati
225*0Sstevel@tonic-gateha:han:500:Han ideographs
226*0Sstevel@tonic-gatehe:heb:125:Hebrew
227*0Sstevel@tonic-gatehg:hgl:420:Hangul
228*0Sstevel@tonic-gatehm:hmo:450:Pahawh Hmong
229*0Sstevel@tonic-gateho:hoo:371:Hanunoo
230*0Sstevel@tonic-gatehr:hrg:410:Hiragana
231*0Sstevel@tonic-gatehu:hun:176:Old Hungarian runic
232*0Sstevel@tonic-gatehv:hvn:175:Kok Turki runic
233*0Sstevel@tonic-gatehy:hye:230:Armenian
234*0Sstevel@tonic-gateiv:ivl:610:Indus Valley
235*0Sstevel@tonic-gateja:jap:930:(alias for Han + Hiragana + Katakana)
236*0Sstevel@tonic-gatejl:jlg:445:Cherokee syllabary
237*0Sstevel@tonic-gatejw:jwi:360:Javanese
238*0Sstevel@tonic-gateka:kam:241:Georgian (Mxedruli)
239*0Sstevel@tonic-gatekh:khn:931:(alias for Hangul + Han)
240*0Sstevel@tonic-gatekk:kkn:411:Katakana
241*0Sstevel@tonic-gatekm:khm:354:Khmer
242*0Sstevel@tonic-gatekn:kan:345:Kannada
243*0Sstevel@tonic-gatekr:krn:357:Karenni (Kayah Li)
244*0Sstevel@tonic-gateks:kst:305:Kharoshthi
245*0Sstevel@tonic-gatekx:kax:240:Georgian (Xucuri)
246*0Sstevel@tonic-gatela:lat:217:Latin
247*0Sstevel@tonic-gatelf:laf:215:Latin (Fraktur variant)
248*0Sstevel@tonic-gatelg:lag:216:Latin (Gaelic variant)
249*0Sstevel@tonic-gatelo:lao:356:Lao
250*0Sstevel@tonic-gatelp:lpc:335:Lepcha (Rong)
251*0Sstevel@tonic-gatemd:mda:140:Mandaean
252*0Sstevel@tonic-gateme:mer:100:Meroitic
253*0Sstevel@tonic-gatemh:may:090:Mayan hieroglyphs
254*0Sstevel@tonic-gateml:mlm:347:Malayalam
255*0Sstevel@tonic-gatemn:mon:145:Mongolian
256*0Sstevel@tonic-gatemy:mya:350:Burmese
257*0Sstevel@tonic-gatena:naa:400:Linear A
258*0Sstevel@tonic-gatenb:nbb:401:Linear B
259*0Sstevel@tonic-gateog:ogm:212:Ogham
260*0Sstevel@tonic-gateor:ory:327:Oriya
261*0Sstevel@tonic-gateos:osm:260:Osmanya
262*0Sstevel@tonic-gateph:phx:115:Phoenician
263*0Sstevel@tonic-gateph:pah:150:Pahlavi
264*0Sstevel@tonic-gatepl:pld:282:Pollard Phonetic
265*0Sstevel@tonic-gatepq:pqd:295:Klingon plQaD
266*0Sstevel@tonic-gatepr:prm:227:Old Permic
267*0Sstevel@tonic-gateps:pst:600:Phaistos Disk
268*0Sstevel@tonic-gatern:rnr:211:Runic (Germanic)
269*0Sstevel@tonic-gaterr:rro:620:Rongo-rongo
270*0Sstevel@tonic-gatesa:sar:110:South Arabian
271*0Sstevel@tonic-gatesi:sin:348:Sinhala
272*0Sstevel@tonic-gatesj:syj:137:Syriac (Jacobite variant)
273*0Sstevel@tonic-gatesl:slb:440:Unified Canadian Aboriginal Syllabics
274*0Sstevel@tonic-gatesn:syn:136:Syriac (Nestorian variant)
275*0Sstevel@tonic-gatesw:sww:281:Shavian (Shaw)
276*0Sstevel@tonic-gatesy:syr:135:Syriac (Estrangelo)
277*0Sstevel@tonic-gateta:tam:346:Tamil
278*0Sstevel@tonic-gatetb:tbw:373:Tagbanwa
279*0Sstevel@tonic-gatete:tel:340:Telugu
280*0Sstevel@tonic-gatetf:tfn:120:Tifnagh
281*0Sstevel@tonic-gatetg:tag:370:Tagalog
282*0Sstevel@tonic-gateth:tha:352:Thai
283*0Sstevel@tonic-gatetn:tna:170:Thaana
284*0Sstevel@tonic-gatetw:twr:290:Tengwar
285*0Sstevel@tonic-gateva:vai:470:Vai
286*0Sstevel@tonic-gatevs:vsp:280:Visible Speech
287*0Sstevel@tonic-gatexa:xas:000:Cuneiform, Sumero-Akkadian
288*0Sstevel@tonic-gatexf:xfa:105:Cuneiform, Old Persian
289*0Sstevel@tonic-gatexk:xkn:412:(alias for Hiragana + Katakana)
290*0Sstevel@tonic-gatexu:xug:106:Cuneiform, Ugaritic
291*0Sstevel@tonic-gateyi:yii:460:Yi
292*0Sstevel@tonic-gatezx:zxx:997:Unwritten language
293*0Sstevel@tonic-gatezy:zyy:998:Undetermined script
294*0Sstevel@tonic-gatezz:zzz:999:Uncoded script
295