Lines Matching +full:fine +full:- +full:tuning

6 perlunicook - cookbookish examples of handling Unicode in Perl
26 use utf8; # so literals and identifiers can be in UTF-8
28 use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8
38 =head2 ℞ 1: Generic Unicode-savvy filter
51 =head2 ℞ 2: Fine-tuning Unicode warnings
56 no warnings "nonchar"; # the 66 forbidden non-characters
57 no warnings "surrogate"; # UTF-16/CESU-8 nonsense
62 Without the all-critical C<use utf8> declaration, putting UTF‑8 in your
72 my @鯉 = qw( koi8-f koi8-u koi8-r );
101 In an interpolated literal, whether a double-quoted string or a
111 # even non-BMP ranges in regex work fine
112 /[\x{1D434}-\x{1D467}]/
127 by that name for use in interpolated literals (double-quoted
141 Anything else is a Perl-specific convenience abbreviation. Specify one or
142 more scripts by names if you want short names that are script-specific.
164 Notice the C<%vx> vector-print functionality in C<printf>.
174 characters, or even to give unnamed private-use characters useful names.
187 C<CJK UNIFIED IDEOGRAPH-6771> and C<CJK UNIFIED IDEOGRAPH-4EAC>,
192 # cpan -i Unicode::Unihan
195 my $unhan = Unicode::Unihan->new;
197 printf "CJK $str in %-12s is ", $lang;
198 say $unhan->$lang($str);
212 # cpan -i Lingua::JA::Romanize::Japanese
214 my $k2r = Lingua::JA::Romanize::Japanese->new;
216 say "Japanese for $str is ", $k2r->chars($str);
231 my $bytes = encode("MIME-Header-ISO_2022_JP", $chars, 1);
239 $ perl -CA ...
244 @ARGV = map { decode('UTF-8', $_, 1) } @ARGV;
248 # cpan -i Encode::Locale
257 Use a command-line option, an environment variable, or else
260 $ perl -CS ...
264 use open qw(:std :encoding(UTF-8));
266 binmode(STDIN, ":encoding(UTF-8)");
272 # cpan -i Encode::Locale
277 binmode STDIN, ":encoding(console_in)" if -t STDIN;
278 binmode STDOUT, ":encoding(console_out)" if -t STDOUT;
279 binmode STDERR, ":encoding(console_out)" if -t STDERR;
283 Files opened without an encoding argument will be in UTF-8:
285 $ perl -CD ...
289 use open qw(:encoding(UTF-8));
293 $ perl -CSDA ...
297 use open qw(:std :encoding(UTF-8));
299 @ARGV = map { decode('UTF-8', $_, 1) } @ARGV;
304 to deal with encoded text, not by calling low-level
308 open(my $in_file, "< :encoding(UTF-16)", "wintext");
311 binmode($in_file, ":encoding(UTF-16)");
324 the incantation C<":raw :encoding(UTF-16LE) :crlf"> includes implicit
338 =head2 ℞ 21: Unicode case-insensitive comparisons
347 # sort case-insensitively
356 A Unicode linebreak matches the two-character CRLF
370 my $cat = charinfo(0x3A3)->{category}; # "Lu"
372 =head2 ℞ 24: Disabling Unicode-awareness in builtin charclasses
385 Or use specific un-Unicode properties, like C<\p{ahex}>
407 Define at compile-time your own custom character
410 # using private-use characters
428 same text to be searched. Note that this is about much more than just pre-
438 =head2 ℞ 28: Convert non-ASCII Unicode numerics
441 ASCII digits only, but Perl’s implicit string-to-number
459 Programmer-visible “characters” are codepoints matched by C</./s>,
460 but user-visible “characters” are graphemes matched by C</\X/>.
473 # cpan -i Unicode::GCString
475 my $gcs = Unicode::GCString->new($str);
476 my $first_five = $gcs->substr(0, 5);
487 # OR: cpan -i Unicode::GCString
489 $str = reverse Unicode::GCString->new($str);
500 # OR: cpan -i Unicode::GCString
502 my $gcs = Unicode::GCString->new($str);
503 my $count = $gcs->length;
505 =head2 ℞ 34: Unicode column-width for printing
519 my $gcs = Unicode::GCString->new($str);
520 my $cols = $gcs->columns;
521 my $pad = " " x (10 - $cols);
539 my $col = Unicode::Collate->new();
540 my @list = $col->sort(@old_list);
543 for a convenient command-line interface to this module.
545 =head2 ℞ 36: Case- I<and> accent-insensitive Unicode sort
551 my $col = Unicode::Collate->new(level => 1);
552 my @list = $col->sort(@old_list);
558 # either use v5.12, OR: cpan -i Unicode::Collate::Locale
560 my $col = Unicode::Collate::Locale->new(locale => "de__phonebook");
561 my @list = $col->sort(@old_list);
563 The I<ucsort> program mentioned above accepts a C<--locale> parameter.
570 $b->{AGE} <=> $a->{AGE}
572 $a->{NAME} cmp $b->{NAME}
577 my $coll = Unicode::Collate->new();
579 $rec->{NAME_key} = $coll->getSortKey( $rec->{NAME} );
582 $b->{AGE} <=> $a->{AGE}
584 $a->{NAME_key} cmp $b->{NAME_key}
587 =head2 ℞ 39: Case- I<and> accent-insensitive comparisons
593 my $es = Unicode::Collate->new(
599 $es->eq("García", "GARCIA" );
600 $es->eq("Márquez", "MARQUEZ");
602 =head2 ℞ 40: Case- I<and> accent-insensitive locale comparisons
606 my $de = Unicode::Collate::Locale->new(
611 $de->eq("tschüß", "TSCHUESS"); # notice ü => UE, ß => SS
617 # cpan -i Unicode::LineBreak
622 my $fmt = Unicode::LineBreak->new;
623 print $fmt->break($para), "\n";
638 my $enc_key = encode("UTF-8", $uni_key, 1);
639 my $enc_value = encode("UTF-8", $uni_value, 1);
645 my $enc_key = encode("UTF-8", $uni_key, 1);
647 my $uni_value = decode("UTF-8", $enc_value, 1);
659 $dbobj->Filter_Value("utf8"); # this is the magic bit
673 Here’s a full program showing how to make use of locale-sensitive
701 # umenu - demo sorting and printing of Unicode food
708 use open qw(:std :encoding(UTF-8)); # undeclared streams in UTF-8
731 "シュークリーム" => 1.85, # cream-filled pastry like eclair
747 my $coll = Unicode::Collate::Locale->new(locale => "ja");
749 for my $item ($coll->sort(keys %price)) {
755 return $str . ($padchar x ($width - colwidth($str)));
759 return Unicode::GCString->new($str)->columns;
791 I<uniquote> instead of I<cat -v> or I<hexdump>,
799 It also supplies these programs, all of which are general filters that do Unicode-y things:
845 Christiansen <et al.>, 2012-02-13 by O’Reilly Media. The code itself is
853 v1.0.0 – first public release, 2012-02-27