1*0Sstevel@tonic-gatepackage I18N::Collate; 2*0Sstevel@tonic-gate 3*0Sstevel@tonic-gateuse strict; 4*0Sstevel@tonic-gateour $VERSION = '1.00'; 5*0Sstevel@tonic-gate 6*0Sstevel@tonic-gate=head1 NAME 7*0Sstevel@tonic-gate 8*0Sstevel@tonic-gateI18N::Collate - compare 8-bit scalar data according to the current locale 9*0Sstevel@tonic-gate 10*0Sstevel@tonic-gate=head1 SYNOPSIS 11*0Sstevel@tonic-gate 12*0Sstevel@tonic-gate use I18N::Collate; 13*0Sstevel@tonic-gate setlocale(LC_COLLATE, 'locale-of-your-choice'); 14*0Sstevel@tonic-gate $s1 = new I18N::Collate "scalar_data_1"; 15*0Sstevel@tonic-gate $s2 = new I18N::Collate "scalar_data_2"; 16*0Sstevel@tonic-gate 17*0Sstevel@tonic-gate=head1 DESCRIPTION 18*0Sstevel@tonic-gate 19*0Sstevel@tonic-gate *** 20*0Sstevel@tonic-gate 21*0Sstevel@tonic-gate WARNING: starting from the Perl version 5.003_06 22*0Sstevel@tonic-gate the I18N::Collate interface for comparing 8-bit scalar data 23*0Sstevel@tonic-gate according to the current locale 24*0Sstevel@tonic-gate 25*0Sstevel@tonic-gate HAS BEEN DEPRECATED 26*0Sstevel@tonic-gate 27*0Sstevel@tonic-gate That is, please do not use it anymore for any new applications 28*0Sstevel@tonic-gate and please migrate the old applications away from it because its 29*0Sstevel@tonic-gate functionality was integrated into the Perl core language in the 30*0Sstevel@tonic-gate release 5.003_06. 31*0Sstevel@tonic-gate 32*0Sstevel@tonic-gate See the perllocale manual page for further information. 33*0Sstevel@tonic-gate 34*0Sstevel@tonic-gate *** 35*0Sstevel@tonic-gate 36*0Sstevel@tonic-gateThis module provides you with objects that will collate 37*0Sstevel@tonic-gateaccording to your national character set, provided that the 38*0Sstevel@tonic-gatePOSIX setlocale() function is supported on your system. 39*0Sstevel@tonic-gate 40*0Sstevel@tonic-gateYou can compare $s1 and $s2 above with 41*0Sstevel@tonic-gate 42*0Sstevel@tonic-gate $s1 le $s2 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gateto extract the data itself, you'll need a dereference: $$s1 45*0Sstevel@tonic-gate 46*0Sstevel@tonic-gateThis module uses POSIX::setlocale(). The basic collation conversion is 47*0Sstevel@tonic-gatedone by strxfrm() which terminates at NUL characters being a decent C 48*0Sstevel@tonic-gateroutine. collate_xfrm() handles embedded NUL characters gracefully. 49*0Sstevel@tonic-gate 50*0Sstevel@tonic-gateThe available locales depend on your operating system; try whether 51*0Sstevel@tonic-gateC<locale -a> shows them or man pages for "locale" or "nlsinfo" or the 52*0Sstevel@tonic-gatedirect approach C<ls /usr/lib/nls/loc> or C<ls /usr/lib/nls> or 53*0Sstevel@tonic-gateC<ls /usr/lib/locale>. Not all the locales that your vendor supports 54*0Sstevel@tonic-gateare necessarily installed: please consult your operating system's 55*0Sstevel@tonic-gatedocumentation and possibly your local system administration. The 56*0Sstevel@tonic-gatelocale names are probably something like C<xx_XX.(ISO)?8859-N> or 57*0Sstevel@tonic-gateC<xx_XX.(ISO)?8859N>, for example C<fr_CH.ISO8859-1> is the Swiss (CH) 58*0Sstevel@tonic-gatevariant of French (fr), ISO Latin (8859) 1 (-1) which is the Western 59*0Sstevel@tonic-gateEuropean character set. 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate=cut 62*0Sstevel@tonic-gate 63*0Sstevel@tonic-gate# I18N::Collate.pm 64*0Sstevel@tonic-gate# 65*0Sstevel@tonic-gate# Author: Jarkko Hietaniemi <F<jhi@iki.fi>> 66*0Sstevel@tonic-gate# Helsinki University of Technology, Finland 67*0Sstevel@tonic-gate# 68*0Sstevel@tonic-gate# Acks: Guy Decoux <F<decoux@moulon.inra.fr>> understood 69*0Sstevel@tonic-gate# overloading magic much deeper than I and told 70*0Sstevel@tonic-gate# how to cut the size of this code by more than half. 71*0Sstevel@tonic-gate# (my first version did overload all of lt gt eq le ge cmp) 72*0Sstevel@tonic-gate# 73*0Sstevel@tonic-gate# Purpose: compare 8-bit scalar data according to the current locale 74*0Sstevel@tonic-gate# 75*0Sstevel@tonic-gate# Requirements: Perl5 POSIX::setlocale() and POSIX::strxfrm() 76*0Sstevel@tonic-gate# 77*0Sstevel@tonic-gate# Exports: setlocale 1) 78*0Sstevel@tonic-gate# collate_xfrm 2) 79*0Sstevel@tonic-gate# 80*0Sstevel@tonic-gate# Overloads: cmp # 3) 81*0Sstevel@tonic-gate# 82*0Sstevel@tonic-gate# Usage: use I18N::Collate; 83*0Sstevel@tonic-gate# setlocale(LC_COLLATE, 'locale-of-your-choice'); # 4) 84*0Sstevel@tonic-gate# $s1 = new I18N::Collate "scalar_data_1"; 85*0Sstevel@tonic-gate# $s2 = new I18N::Collate "scalar_data_2"; 86*0Sstevel@tonic-gate# 87*0Sstevel@tonic-gate# now you can compare $s1 and $s2: $s1 le $s2 88*0Sstevel@tonic-gate# to extract the data itself, you need to deref: $$s1 89*0Sstevel@tonic-gate# 90*0Sstevel@tonic-gate# Notes: 91*0Sstevel@tonic-gate# 1) this uses POSIX::setlocale 92*0Sstevel@tonic-gate# 2) the basic collation conversion is done by strxfrm() which 93*0Sstevel@tonic-gate# terminates at NUL characters being a decent C routine. 94*0Sstevel@tonic-gate# collate_xfrm handles embedded NUL characters gracefully. 95*0Sstevel@tonic-gate# 3) due to cmp and overload magic, lt le eq ge gt work also 96*0Sstevel@tonic-gate# 4) the available locales depend on your operating system; 97*0Sstevel@tonic-gate# try whether "locale -a" shows them or man pages for 98*0Sstevel@tonic-gate# "locale" or "nlsinfo" work or the more direct 99*0Sstevel@tonic-gate# approach "ls /usr/lib/nls/loc" or "ls /usr/lib/nls". 100*0Sstevel@tonic-gate# Not all the locales that your vendor supports 101*0Sstevel@tonic-gate# are necessarily installed: please consult your 102*0Sstevel@tonic-gate# operating system's documentation. 103*0Sstevel@tonic-gate# The locale names are probably something like 104*0Sstevel@tonic-gate# 'xx_XX.(ISO)?8859-N' or 'xx_XX.(ISO)?8859N', 105*0Sstevel@tonic-gate# for example 'fr_CH.ISO8859-1' is the Swiss (CH) 106*0Sstevel@tonic-gate# variant of French (fr), ISO Latin (8859) 1 (-1) 107*0Sstevel@tonic-gate# which is the Western European character set. 108*0Sstevel@tonic-gate# 109*0Sstevel@tonic-gate# Updated: 19961005 110*0Sstevel@tonic-gate# 111*0Sstevel@tonic-gate# --- 112*0Sstevel@tonic-gate 113*0Sstevel@tonic-gateuse POSIX qw(strxfrm LC_COLLATE); 114*0Sstevel@tonic-gateuse warnings::register; 115*0Sstevel@tonic-gate 116*0Sstevel@tonic-gaterequire Exporter; 117*0Sstevel@tonic-gate 118*0Sstevel@tonic-gateour @ISA = qw(Exporter); 119*0Sstevel@tonic-gateour @EXPORT = qw(collate_xfrm setlocale LC_COLLATE); 120*0Sstevel@tonic-gateour @EXPORT_OK = qw(); 121*0Sstevel@tonic-gate 122*0Sstevel@tonic-gateuse overload qw( 123*0Sstevel@tonic-gatefallback 1 124*0Sstevel@tonic-gatecmp collate_cmp 125*0Sstevel@tonic-gate); 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gateour($LOCALE, $C); 128*0Sstevel@tonic-gate 129*0Sstevel@tonic-gateour $please_use_I18N_Collate_even_if_deprecated = 0; 130*0Sstevel@tonic-gatesub new { 131*0Sstevel@tonic-gate my $new = $_[1]; 132*0Sstevel@tonic-gate 133*0Sstevel@tonic-gate if (warnings::enabled() && $] >= 5.003_06) { 134*0Sstevel@tonic-gate unless ($please_use_I18N_Collate_even_if_deprecated) { 135*0Sstevel@tonic-gate warnings::warn <<___EOD___; 136*0Sstevel@tonic-gate*** 137*0Sstevel@tonic-gate 138*0Sstevel@tonic-gate WARNING: starting from the Perl version 5.003_06 139*0Sstevel@tonic-gate the I18N::Collate interface for comparing 8-bit scalar data 140*0Sstevel@tonic-gate according to the current locale 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gate HAS BEEN DEPRECATED 143*0Sstevel@tonic-gate 144*0Sstevel@tonic-gate That is, please do not use it anymore for any new applications 145*0Sstevel@tonic-gate and please migrate the old applications away from it because its 146*0Sstevel@tonic-gate functionality was integrated into the Perl core language in the 147*0Sstevel@tonic-gate release 5.003_06. 148*0Sstevel@tonic-gate 149*0Sstevel@tonic-gate See the perllocale manual page for further information. 150*0Sstevel@tonic-gate 151*0Sstevel@tonic-gate*** 152*0Sstevel@tonic-gate___EOD___ 153*0Sstevel@tonic-gate $please_use_I18N_Collate_even_if_deprecated++; 154*0Sstevel@tonic-gate } 155*0Sstevel@tonic-gate } 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate bless \$new; 158*0Sstevel@tonic-gate} 159*0Sstevel@tonic-gate 160*0Sstevel@tonic-gatesub setlocale { 161*0Sstevel@tonic-gate my ($category, $locale) = @_[0,1]; 162*0Sstevel@tonic-gate 163*0Sstevel@tonic-gate POSIX::setlocale($category, $locale) if (defined $category); 164*0Sstevel@tonic-gate # the current $LOCALE 165*0Sstevel@tonic-gate $LOCALE = $locale || $ENV{'LC_COLLATE'} || $ENV{'LC_ALL'} || ''; 166*0Sstevel@tonic-gate} 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gatesub C { 169*0Sstevel@tonic-gate my $s = ${$_[0]}; 170*0Sstevel@tonic-gate 171*0Sstevel@tonic-gate $C->{$LOCALE}->{$s} = collate_xfrm($s) 172*0Sstevel@tonic-gate unless (defined $C->{$LOCALE}->{$s}); # cache when met 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate $C->{$LOCALE}->{$s}; 175*0Sstevel@tonic-gate} 176*0Sstevel@tonic-gate 177*0Sstevel@tonic-gatesub collate_xfrm { 178*0Sstevel@tonic-gate my $s = $_[0]; 179*0Sstevel@tonic-gate my $x = ''; 180*0Sstevel@tonic-gate 181*0Sstevel@tonic-gate for (split(/(\000+)/, $s)) { 182*0Sstevel@tonic-gate $x .= (/^\000/) ? $_ : strxfrm("$_\000"); 183*0Sstevel@tonic-gate } 184*0Sstevel@tonic-gate 185*0Sstevel@tonic-gate $x; 186*0Sstevel@tonic-gate} 187*0Sstevel@tonic-gate 188*0Sstevel@tonic-gatesub collate_cmp { 189*0Sstevel@tonic-gate &C($_[0]) cmp &C($_[1]); 190*0Sstevel@tonic-gate} 191*0Sstevel@tonic-gate 192*0Sstevel@tonic-gate# init $LOCALE 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate&I18N::Collate::setlocale(); 195*0Sstevel@tonic-gate 196*0Sstevel@tonic-gate1; # keep require happy 197