xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/Locale/Script.pm (revision 0:68f95e015346)
1#
2# Locale::Script - ISO codes for script identification (ISO 15924)
3#
4# $Id: Script.pm,v 2.2 2002/07/10 16:33:28 neilb Exp $
5#
6
7package Locale::Script;
8use strict;
9require 5.002;
10
11require Exporter;
12use Carp;
13use Locale::Constants;
14
15
16#-----------------------------------------------------------------------
17#	Public Global Variables
18#-----------------------------------------------------------------------
19use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);
20$VERSION   = sprintf("%d.%02d", q$Revision: 2.21 $ =~ /(\d+)\.(\d+)/);
21@ISA       = qw(Exporter);
22@EXPORT    = qw(code2script script2code
23                all_script_codes all_script_names
24		script_code2code
25		LOCALE_CODE_ALPHA_2 LOCALE_CODE_ALPHA_3 LOCALE_CODE_NUMERIC);
26
27#-----------------------------------------------------------------------
28#	Private Global Variables
29#-----------------------------------------------------------------------
30my $CODES     = [];
31my $COUNTRIES = [];
32
33
34#=======================================================================
35#
36# code2script ( CODE [, CODESET ] )
37#
38#=======================================================================
39sub code2script
40{
41    my $code = shift;
42    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
43
44
45    return undef unless defined $code;
46
47    #-------------------------------------------------------------------
48    # Make sure the code is in the right form before we use it
49    # to look up the corresponding script.
50    # We have to sprintf because the codes are given as 3-digits,
51    # with leading 0's. Eg 070 for Egyptian demotic.
52    #-------------------------------------------------------------------
53    if ($codeset == LOCALE_CODE_NUMERIC)
54    {
55	return undef if ($code =~ /\D/);
56	$code = sprintf("%.3d", $code);
57    }
58    else
59    {
60	$code = lc($code);
61    }
62
63    if (exists $CODES->[$codeset]->{$code})
64    {
65        return $CODES->[$codeset]->{$code};
66    }
67    else
68    {
69        #---------------------------------------------------------------
70        # no such script code!
71        #---------------------------------------------------------------
72        return undef;
73    }
74}
75
76
77#=======================================================================
78#
79# script2code ( SCRIPT [, CODESET ] )
80#
81#=======================================================================
82sub script2code
83{
84    my $script = shift;
85    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
86
87
88    return undef unless defined $script;
89    $script = lc($script);
90    if (exists $COUNTRIES->[$codeset]->{$script})
91    {
92        return $COUNTRIES->[$codeset]->{$script};
93    }
94    else
95    {
96        #---------------------------------------------------------------
97        # no such script!
98        #---------------------------------------------------------------
99        return undef;
100    }
101}
102
103
104#=======================================================================
105#
106# script_code2code ( CODE, IN-CODESET, OUT-CODESET )
107#
108#=======================================================================
109sub script_code2code
110{
111    (@_ == 3) or croak "script_code2code() takes 3 arguments!";
112
113    my $code = shift;
114    my $inset = shift;
115    my $outset = shift;
116    my $outcode;
117    my $script;
118
119
120    return undef if $inset == $outset;
121    $script = code2script($code, $inset);
122    return undef if not defined $script;
123    $outcode = script2code($script, $outset);
124    return $outcode;
125}
126
127
128#=======================================================================
129#
130# all_script_codes()
131#
132#=======================================================================
133sub all_script_codes
134{
135    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
136
137    return keys %{ $CODES->[$codeset] };
138}
139
140
141#=======================================================================
142#
143# all_script_names()
144#
145#=======================================================================
146sub all_script_names
147{
148    my $codeset = @_ > 0 ? shift : LOCALE_CODE_DEFAULT;
149
150    return values %{ $CODES->[$codeset] };
151}
152
153
154#=======================================================================
155#
156# initialisation code - stuff the DATA into the ALPHA2 hash
157#
158#=======================================================================
159{
160    my ($alpha2, $alpha3, $numeric);
161    my $script;
162
163    local $_;
164
165    while (<DATA>)
166    {
167        next unless /\S/;
168        chop;
169        ($alpha2, $alpha3, $numeric, $script) = split(/:/, $_, 4);
170
171        $CODES->[LOCALE_CODE_ALPHA_2]->{$alpha2} = $script;
172        $COUNTRIES->[LOCALE_CODE_ALPHA_2]->{"\L$script"} = $alpha2;
173
174	if ($alpha3)
175	{
176            $CODES->[LOCALE_CODE_ALPHA_3]->{$alpha3} = $script;
177            $COUNTRIES->[LOCALE_CODE_ALPHA_3]->{"\L$script"} = $alpha3;
178	}
179
180	if ($numeric)
181	{
182            $CODES->[LOCALE_CODE_NUMERIC]->{$numeric} = $script;
183            $COUNTRIES->[LOCALE_CODE_NUMERIC]->{"\L$script"} = $numeric;
184	}
185
186    }
187
188    close(DATA);
189}
190
1911;
192
193__DATA__
194am:ama:130:Aramaic
195ar:ara:160:Arabic
196av:ave:151:Avestan
197bh:bhm:300:Brahmi (Ashoka)
198bi:bid:372:Buhid
199bn:ben:325:Bengali
200bo:bod:330:Tibetan
201bp:bpm:285:Bopomofo
202br:brl:570:Braille
203bt:btk:365:Batak
204bu:bug:367:Buginese (Makassar)
205by:bys:550:Blissymbols
206ca:cam:358:Cham
207ch:chu:221:Old Church Slavonic
208ci:cir:291:Cirth
209cm:cmn:402:Cypro-Minoan
210co:cop:205:Coptic
211cp:cpr:403:Cypriote syllabary
212cy:cyr:220:Cyrillic
213ds:dsr:250:Deserel (Mormon)
214dv:dvn:315:Devanagari (Nagari)
215ed:egd:070:Egyptian demotic
216eg:egy:050:Egyptian hieroglyphs
217eh:egh:060:Egyptian hieratic
218el:ell:200:Greek
219eo:eos:210:Etruscan and Oscan
220et:eth:430:Ethiopic
221gl:glg:225:Glagolitic
222gm:gmu:310:Gurmukhi
223gt:gth:206:Gothic
224gu:guj:320:Gujarati
225ha:han:500:Han ideographs
226he:heb:125:Hebrew
227hg:hgl:420:Hangul
228hm:hmo:450:Pahawh Hmong
229ho:hoo:371:Hanunoo
230hr:hrg:410:Hiragana
231hu:hun:176:Old Hungarian runic
232hv:hvn:175:Kok Turki runic
233hy:hye:230:Armenian
234iv:ivl:610:Indus Valley
235ja:jap:930:(alias for Han + Hiragana + Katakana)
236jl:jlg:445:Cherokee syllabary
237jw:jwi:360:Javanese
238ka:kam:241:Georgian (Mxedruli)
239kh:khn:931:(alias for Hangul + Han)
240kk:kkn:411:Katakana
241km:khm:354:Khmer
242kn:kan:345:Kannada
243kr:krn:357:Karenni (Kayah Li)
244ks:kst:305:Kharoshthi
245kx:kax:240:Georgian (Xucuri)
246la:lat:217:Latin
247lf:laf:215:Latin (Fraktur variant)
248lg:lag:216:Latin (Gaelic variant)
249lo:lao:356:Lao
250lp:lpc:335:Lepcha (Rong)
251md:mda:140:Mandaean
252me:mer:100:Meroitic
253mh:may:090:Mayan hieroglyphs
254ml:mlm:347:Malayalam
255mn:mon:145:Mongolian
256my:mya:350:Burmese
257na:naa:400:Linear A
258nb:nbb:401:Linear B
259og:ogm:212:Ogham
260or:ory:327:Oriya
261os:osm:260:Osmanya
262ph:phx:115:Phoenician
263ph:pah:150:Pahlavi
264pl:pld:282:Pollard Phonetic
265pq:pqd:295:Klingon plQaD
266pr:prm:227:Old Permic
267ps:pst:600:Phaistos Disk
268rn:rnr:211:Runic (Germanic)
269rr:rro:620:Rongo-rongo
270sa:sar:110:South Arabian
271si:sin:348:Sinhala
272sj:syj:137:Syriac (Jacobite variant)
273sl:slb:440:Unified Canadian Aboriginal Syllabics
274sn:syn:136:Syriac (Nestorian variant)
275sw:sww:281:Shavian (Shaw)
276sy:syr:135:Syriac (Estrangelo)
277ta:tam:346:Tamil
278tb:tbw:373:Tagbanwa
279te:tel:340:Telugu
280tf:tfn:120:Tifnagh
281tg:tag:370:Tagalog
282th:tha:352:Thai
283tn:tna:170:Thaana
284tw:twr:290:Tengwar
285va:vai:470:Vai
286vs:vsp:280:Visible Speech
287xa:xas:000:Cuneiform, Sumero-Akkadian
288xf:xfa:105:Cuneiform, Old Persian
289xk:xkn:412:(alias for Hiragana + Katakana)
290xu:xug:106:Cuneiform, Ugaritic
291yi:yii:460:Yi
292zx:zxx:997:Unwritten language
293zy:zyy:998:Undetermined script
294zz:zzz:999:Uncoded script
295