xref: /onnv-gate/usr/src/cmd/perl/5.8.4/distrib/lib/I18N/Collate.pm (revision 0:68f95e015346)
1*0Sstevel@tonic-gatepackage I18N::Collate;
2*0Sstevel@tonic-gate
3*0Sstevel@tonic-gateuse strict;
4*0Sstevel@tonic-gateour $VERSION = '1.00';
5*0Sstevel@tonic-gate
6*0Sstevel@tonic-gate=head1 NAME
7*0Sstevel@tonic-gate
8*0Sstevel@tonic-gateI18N::Collate - compare 8-bit scalar data according to the current locale
9*0Sstevel@tonic-gate
10*0Sstevel@tonic-gate=head1 SYNOPSIS
11*0Sstevel@tonic-gate
12*0Sstevel@tonic-gate    use I18N::Collate;
13*0Sstevel@tonic-gate    setlocale(LC_COLLATE, 'locale-of-your-choice');
14*0Sstevel@tonic-gate    $s1 = new I18N::Collate "scalar_data_1";
15*0Sstevel@tonic-gate    $s2 = new I18N::Collate "scalar_data_2";
16*0Sstevel@tonic-gate
17*0Sstevel@tonic-gate=head1 DESCRIPTION
18*0Sstevel@tonic-gate
19*0Sstevel@tonic-gate  ***
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gate  WARNING: starting from the Perl version 5.003_06
22*0Sstevel@tonic-gate  the I18N::Collate interface for comparing 8-bit scalar data
23*0Sstevel@tonic-gate  according to the current locale
24*0Sstevel@tonic-gate
25*0Sstevel@tonic-gate	HAS BEEN DEPRECATED
26*0Sstevel@tonic-gate
27*0Sstevel@tonic-gate  That is, please do not use it anymore for any new applications
28*0Sstevel@tonic-gate  and please migrate the old applications away from it because its
29*0Sstevel@tonic-gate  functionality was integrated into the Perl core language in the
30*0Sstevel@tonic-gate  release 5.003_06.
31*0Sstevel@tonic-gate
32*0Sstevel@tonic-gate  See the perllocale manual page for further information.
33*0Sstevel@tonic-gate
34*0Sstevel@tonic-gate  ***
35*0Sstevel@tonic-gate
36*0Sstevel@tonic-gateThis module provides you with objects that will collate
37*0Sstevel@tonic-gateaccording to your national character set, provided that the
38*0Sstevel@tonic-gatePOSIX setlocale() function is supported on your system.
39*0Sstevel@tonic-gate
40*0Sstevel@tonic-gateYou can compare $s1 and $s2 above with
41*0Sstevel@tonic-gate
42*0Sstevel@tonic-gate    $s1 le $s2
43*0Sstevel@tonic-gate
44*0Sstevel@tonic-gateto extract the data itself, you'll need a dereference: $$s1
45*0Sstevel@tonic-gate
46*0Sstevel@tonic-gateThis module uses POSIX::setlocale(). The basic collation conversion is
47*0Sstevel@tonic-gatedone by strxfrm() which terminates at NUL characters being a decent C
48*0Sstevel@tonic-gateroutine.  collate_xfrm() handles embedded NUL characters gracefully.
49*0Sstevel@tonic-gate
50*0Sstevel@tonic-gateThe available locales depend on your operating system; try whether
51*0Sstevel@tonic-gateC<locale -a> shows them or man pages for "locale" or "nlsinfo" or the
52*0Sstevel@tonic-gatedirect approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or
53*0Sstevel@tonic-gateC<ls /usr/lib/locale>.  Not all the locales that your vendor supports
54*0Sstevel@tonic-gateare necessarily installed: please consult your operating system's
55*0Sstevel@tonic-gatedocumentation and possibly your local system administration.  The
56*0Sstevel@tonic-gatelocale names are probably something like C<xx_XX.(ISO)?8859-N> or
57*0Sstevel@tonic-gateC<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH)
58*0Sstevel@tonic-gatevariant of French (fr), ISO Latin (8859) 1 (-1) which is the Western
59*0Sstevel@tonic-gateEuropean character set.
60*0Sstevel@tonic-gate
61*0Sstevel@tonic-gate=cut
62*0Sstevel@tonic-gate
63*0Sstevel@tonic-gate# I18N::Collate.pm
64*0Sstevel@tonic-gate#
65*0Sstevel@tonic-gate# Author:	Jarkko Hietaniemi <F<jhi@iki.fi>>
66*0Sstevel@tonic-gate#		Helsinki University of Technology, Finland
67*0Sstevel@tonic-gate#
68*0Sstevel@tonic-gate# Acks:		Guy Decoux <F<decoux@moulon.inra.fr>> understood
69*0Sstevel@tonic-gate#		overloading magic much deeper than I and told
70*0Sstevel@tonic-gate#		how to cut the size of this code by more than half.
71*0Sstevel@tonic-gate#		(my first version did overload all of lt gt eq le ge cmp)
72*0Sstevel@tonic-gate#
73*0Sstevel@tonic-gate# Purpose:      compare 8-bit scalar data according to the current locale
74*0Sstevel@tonic-gate#
75*0Sstevel@tonic-gate# Requirements:	Perl5 POSIX::setlocale() and POSIX::strxfrm()
76*0Sstevel@tonic-gate#
77*0Sstevel@tonic-gate# Exports:	setlocale 1)
78*0Sstevel@tonic-gate#		collate_xfrm 2)
79*0Sstevel@tonic-gate#
80*0Sstevel@tonic-gate# Overloads:	cmp # 3)
81*0Sstevel@tonic-gate#
82*0Sstevel@tonic-gate# Usage:	use I18N::Collate;
83*0Sstevel@tonic-gate#	        setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4)
84*0Sstevel@tonic-gate#		$s1 = new I18N::Collate "scalar_data_1";
85*0Sstevel@tonic-gate#		$s2 = new I18N::Collate "scalar_data_2";
86*0Sstevel@tonic-gate#
87*0Sstevel@tonic-gate#		now you can compare $s1 and $s2: $s1 le $s2
88*0Sstevel@tonic-gate#		to extract the data itself, you need to deref: $$s1
89*0Sstevel@tonic-gate#
90*0Sstevel@tonic-gate# Notes:
91*0Sstevel@tonic-gate#		1) this uses POSIX::setlocale
92*0Sstevel@tonic-gate#		2) the basic collation conversion is done by strxfrm() which
93*0Sstevel@tonic-gate#		   terminates at NUL characters being a decent C routine.
94*0Sstevel@tonic-gate#		   collate_xfrm handles embedded NUL characters gracefully.
95*0Sstevel@tonic-gate#		3) due to cmp and overload magic, lt le eq ge gt work also
96*0Sstevel@tonic-gate#		4) the available locales depend on your operating system;
97*0Sstevel@tonic-gate#		   try whether "locale -a" shows them or man pages for
98*0Sstevel@tonic-gate#		   "locale" or "nlsinfo" work or the more direct
99*0Sstevel@tonic-gate#		   approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls".
100*0Sstevel@tonic-gate#		   Not all the locales that your vendor supports
101*0Sstevel@tonic-gate#		   are necessarily installed: please consult your
102*0Sstevel@tonic-gate#		   operating system's documentation.
103*0Sstevel@tonic-gate#		   The locale names are probably something like
104*0Sstevel@tonic-gate#		   'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N',
105*0Sstevel@tonic-gate#		   for example 'fr_CH.ISO8859-1' is the Swiss (CH)
106*0Sstevel@tonic-gate#		   variant of French (fr), ISO Latin (8859) 1 (-1)
107*0Sstevel@tonic-gate#		   which is the Western European character set.
108*0Sstevel@tonic-gate#
109*0Sstevel@tonic-gate# Updated:	19961005
110*0Sstevel@tonic-gate#
111*0Sstevel@tonic-gate# ---
112*0Sstevel@tonic-gate
113*0Sstevel@tonic-gateuse POSIX qw(strxfrm LC_COLLATE);
114*0Sstevel@tonic-gateuse warnings::register;
115*0Sstevel@tonic-gate
116*0Sstevel@tonic-gaterequire Exporter;
117*0Sstevel@tonic-gate
118*0Sstevel@tonic-gateour @ISA = qw(Exporter);
119*0Sstevel@tonic-gateour @EXPORT = qw(collate_xfrm setlocale LC_COLLATE);
120*0Sstevel@tonic-gateour @EXPORT_OK = qw();
121*0Sstevel@tonic-gate
122*0Sstevel@tonic-gateuse overload qw(
123*0Sstevel@tonic-gatefallback	1
124*0Sstevel@tonic-gatecmp		collate_cmp
125*0Sstevel@tonic-gate);
126*0Sstevel@tonic-gate
127*0Sstevel@tonic-gateour($LOCALE, $C);
128*0Sstevel@tonic-gate
129*0Sstevel@tonic-gateour $please_use_I18N_Collate_even_if_deprecated = 0;
130*0Sstevel@tonic-gatesub new {
131*0Sstevel@tonic-gate  my $new = $_[1];
132*0Sstevel@tonic-gate
133*0Sstevel@tonic-gate  if (warnings::enabled() && $] >= 5.003_06) {
134*0Sstevel@tonic-gate    unless ($please_use_I18N_Collate_even_if_deprecated) {
135*0Sstevel@tonic-gate      warnings::warn <<___EOD___;
136*0Sstevel@tonic-gate***
137*0Sstevel@tonic-gate
138*0Sstevel@tonic-gate  WARNING: starting from the Perl version 5.003_06
139*0Sstevel@tonic-gate  the I18N::Collate interface for comparing 8-bit scalar data
140*0Sstevel@tonic-gate  according to the current locale
141*0Sstevel@tonic-gate
142*0Sstevel@tonic-gate	HAS BEEN DEPRECATED
143*0Sstevel@tonic-gate
144*0Sstevel@tonic-gate  That is, please do not use it anymore for any new applications
145*0Sstevel@tonic-gate  and please migrate the old applications away from it because its
146*0Sstevel@tonic-gate  functionality was integrated into the Perl core language in the
147*0Sstevel@tonic-gate  release 5.003_06.
148*0Sstevel@tonic-gate
149*0Sstevel@tonic-gate  See the perllocale manual page for further information.
150*0Sstevel@tonic-gate
151*0Sstevel@tonic-gate***
152*0Sstevel@tonic-gate___EOD___
153*0Sstevel@tonic-gate      $please_use_I18N_Collate_even_if_deprecated++;
154*0Sstevel@tonic-gate    }
155*0Sstevel@tonic-gate  }
156*0Sstevel@tonic-gate
157*0Sstevel@tonic-gate  bless \$new;
158*0Sstevel@tonic-gate}
159*0Sstevel@tonic-gate
160*0Sstevel@tonic-gatesub setlocale {
161*0Sstevel@tonic-gate my ($category, $locale) = @_[0,1];
162*0Sstevel@tonic-gate
163*0Sstevel@tonic-gate POSIX::setlocale($category, $locale) if (defined $category);
164*0Sstevel@tonic-gate # the current $LOCALE
165*0Sstevel@tonic-gate $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || '';
166*0Sstevel@tonic-gate}
167*0Sstevel@tonic-gate
168*0Sstevel@tonic-gatesub C {
169*0Sstevel@tonic-gate  my $s = ${$_[0]};
170*0Sstevel@tonic-gate
171*0Sstevel@tonic-gate  $C->{$LOCALE}->{$s} = collate_xfrm($s)
172*0Sstevel@tonic-gate    unless (defined $C->{$LOCALE}->{$s}); # cache when met
173*0Sstevel@tonic-gate
174*0Sstevel@tonic-gate  $C->{$LOCALE}->{$s};
175*0Sstevel@tonic-gate}
176*0Sstevel@tonic-gate
177*0Sstevel@tonic-gatesub collate_xfrm {
178*0Sstevel@tonic-gate  my $s = $_[0];
179*0Sstevel@tonic-gate  my $x = '';
180*0Sstevel@tonic-gate
181*0Sstevel@tonic-gate  for (split(/(\000+)/, $s)) {
182*0Sstevel@tonic-gate    $x .= (/^\000/) ? $_ : strxfrm("$_\000");
183*0Sstevel@tonic-gate  }
184*0Sstevel@tonic-gate
185*0Sstevel@tonic-gate  $x;
186*0Sstevel@tonic-gate}
187*0Sstevel@tonic-gate
188*0Sstevel@tonic-gatesub collate_cmp {
189*0Sstevel@tonic-gate  &C($_[0]) cmp &C($_[1]);
190*0Sstevel@tonic-gate}
191*0Sstevel@tonic-gate
192*0Sstevel@tonic-gate# init $LOCALE
193*0Sstevel@tonic-gate
194*0Sstevel@tonic-gate&I18N::Collate::setlocale();
195*0Sstevel@tonic-gate
196*0Sstevel@tonic-gate1; # keep require happy
197