xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/t/gsm0338.t (revision 5486feefcc8cb79b19e014ab332cc5dfd05b3b33)
1BEGIN {
2    if ($ENV{'PERL_CORE'}){
3        chdir 't';
4        unshift @INC, '../lib';
5    }
6    require Config; Config->import();
7    if ($Config{'extensions'} !~ /\bEncode\b/) {
8      print "1..0 # Skip: Encode was not built\n";
9      exit 0;
10    }
11    $| = 1;
12}
13
14use strict;
15use utf8;
16use Test::More tests => 777;
17use Encode;
18use Encode::GSM0338;
19use PerlIO::encoding;
20
21# perl < 5.8.8 didn't enable STOP_AT_PARTIAL by default
22$PerlIO::encoding::fallback |= Encode::STOP_AT_PARTIAL;
23
24my $chk = Encode::LEAVE_SRC();
25
26# escapes
27# see https://www.3gpp.org/dynareport/23038.htm
28# see https://www.etsi.org/deliver/etsi_ts/123000_123099/123038/15.00.00_60/ts_123038v150000p.pdf (page 22)
29my %esc_seq = (
30	       "\x{20ac}" => "\x1b\x65",
31	       "\x0c"     => "\x1b\x0A",
32	       "["        => "\x1b\x3C",
33	       "\\"       => "\x1b\x2F",
34	       "]"        => "\x1b\x3E",
35	       "^"        => "\x1b\x14",
36	       "{"        => "\x1b\x28",
37	       "|"        => "\x1b\x40",
38	       "}"        => "\x1b\x29",
39	       "~"        => "\x1b\x3D",
40);
41
42my %unesc_seq = reverse %esc_seq;
43
44
45sub eu{
46    $_[0] =~ /[\x00-\x1f]/ ?
47	sprintf("\\x{%04X}", ord($_[0])) : encode_utf8($_[0]);
48
49}
50
51for my $c ( map { chr } 0 .. 127 ) {
52    next if $c eq "\x1B"; # escape character, start of multibyte sequence
53    my $u = $Encode::GSM0338::GSM2UNI{$c};
54
55    # default character set
56    is decode( "gsm0338", $c, $chk ), $u,
57      sprintf( "decode \\x%02X", ord($c) );
58    eval { decode( "gsm0338", $c . "\xff", $chk | Encode::FB_CROAK ) };
59    ok( $@, $@ );
60    is encode( "gsm0338", $u, $chk ), $c, sprintf( "encode %s", eu($u) );
61    eval { encode( "gsm0338", $u . "\x{3000}", $chk | Encode::FB_CROAK ) };
62    ok( $@, $@ );
63
64        is decode( "gsm0338", "\x00" . $c ), '@' . decode( "gsm0338", $c ),
65          sprintf( '@: decode \x00+\x%02X', ord($c) );
66
67    # escape seq.
68    my $ecs = "\x1b" . $c;
69    if ( $unesc_seq{$ecs} ) {
70        is decode( "gsm0338", $ecs, $chk ), $unesc_seq{$ecs},
71          sprintf( "ESC: decode ESC+\\x%02X", ord($c) );
72        is encode( "gsm0338", $unesc_seq{$ecs}, $chk ), $ecs,
73          sprintf( "ESC: encode %s ", eu( $unesc_seq{$ecs} ) );
74    }
75    else {
76        is decode( "gsm0338", $ecs, $chk ),
77          "\x{FFFD}",
78          sprintf( "decode ESC+\\x%02X", ord($c) );
79    }
80}
81
82# https://rt.cpan.org/Ticket/Display.html?id=75670
83is decode("gsm0338", "\x09") => chr(0xC7), 'RT75670: decode';
84is encode("gsm0338", chr(0xC7)) => "\x09", 'RT75670: encode';
85
86# https://rt.cpan.org/Public/Bug/Display.html?id=124571
87is decode("gsm0338", encode('gsm0338', '..@@..')), '..@@..';
88is decode("gsm0338", encode('gsm0338', '..@€..')), '..@€..';
89
90# special GSM sequence, € is at 1024 byte buffer boundary
91my $gsm = "\x41" . "\x1B\x65" x 1024;
92open my $fh, '<:encoding(gsm0338)', \$gsm or die;
93my $uni = <$fh>;
94close $fh;
95is $uni, "A" . "€" x 1024, 'PerlIO encoding(gsm0338) read works';
96