xref: /openbsd-src/gnu/usr.bin/perl/ext/XS-APItest/t/utf8_to_bytes.t (revision 5759b3d249badf144a6240f7eec4dcf9df003e6b)
1*5759b3d2Safresh1#!perl -w
2*5759b3d2Safresh1
3*5759b3d2Safresh1# This is a base file to be used by various .t's in its directory
4*5759b3d2Safresh1# It tests various malformed UTF-8 sequences and some code points that are
5*5759b3d2Safresh1# "problematic", and verifies that the correct warnings/flags etc are
6*5759b3d2Safresh1# generated when using them.  For the code points, it also takes the UTF-8 and
7*5759b3d2Safresh1# perturbs it to be malformed in various ways, and tests that this gets
8*5759b3d2Safresh1# appropriately detected.
9*5759b3d2Safresh1
10*5759b3d2Safresh1use strict;
11*5759b3d2Safresh1use Test::More;
12*5759b3d2Safresh1
13*5759b3d2Safresh1BEGIN {
14*5759b3d2Safresh1    require './t/utf8_setup.pl';
15*5759b3d2Safresh1    use_ok('XS::APItest');
16*5759b3d2Safresh1};
17*5759b3d2Safresh1
18*5759b3d2Safresh1$|=1;
19*5759b3d2Safresh1
20*5759b3d2Safresh1use Data::Dumper;
21*5759b3d2Safresh1
22*5759b3d2Safresh1my @well_formed = (
23*5759b3d2Safresh1            "\xE1",
24*5759b3d2Safresh1            "The quick brown fox jumped over the lazy dog",
25*5759b3d2Safresh1            "Ces systèmes de codage sont souvent incompatibles entre eux.  Ainsi, deux systèmes peuvent utiliser le même nombre pour deux caractères différents ou utiliser différents nombres pour le même caractère.",
26*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC3\xB1abc",
27*5759b3d2Safresh1);
28*5759b3d2Safresh1
29*5759b3d2Safresh1my @malformed = (
30*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1abc",
31*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1\xC3\xA8abc",
32*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xC3re ve yazi\xC3\xA8abc",
33*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xA8 ve yazi\xC3\xA8abc",
34*5759b3d2Safresh1            "Kelimelerin m\xC3\xAAme caract\xC3\xA8\xC3re ve yazi\xC3\xA8abc",
35*5759b3d2Safresh1);
36*5759b3d2Safresh1
37*5759b3d2Safresh1for my $test (@well_formed) {
38*5759b3d2Safresh1    my $utf8 = $test;
39*5759b3d2Safresh1    utf8::upgrade($utf8);
40*5759b3d2Safresh1    my $utf8_length;
41*5759b3d2Safresh1    my $byte_length = length $test;
42*5759b3d2Safresh1
43*5759b3d2Safresh1    {
44*5759b3d2Safresh1        use bytes;
45*5759b3d2Safresh1        $utf8_length = length $utf8;
46*5759b3d2Safresh1    }
47*5759b3d2Safresh1
48*5759b3d2Safresh1    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
49*5759b3d2Safresh1
50*5759b3d2Safresh1    is ($ret_ref->[0], $test, "Successfully downgraded "
51*5759b3d2Safresh1                            . display_bytes($utf8));
52*5759b3d2Safresh1    is ($ret_ref->[1], $byte_length, "... And returned correct length("
53*5759b3d2Safresh1                                     . $byte_length . ")");
54*5759b3d2Safresh1}
55*5759b3d2Safresh1
56*5759b3d2Safresh1for my $test (@malformed) {
57*5759b3d2Safresh1    my $utf8 = $test;
58*5759b3d2Safresh1    my $utf8_length = length $test;
59*5759b3d2Safresh1
60*5759b3d2Safresh1    my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length);
61*5759b3d2Safresh1
62*5759b3d2Safresh1    ok (! defined $ret_ref->[0], "Returned undef for malformed "
63*5759b3d2Safresh1                                . display_bytes($utf8));
64*5759b3d2Safresh1    is ($ret_ref->[1], -1, "... And returned length -1");
65*5759b3d2Safresh1    is ($ret_ref->[2], $utf8, "... And left the input unchanged");
66*5759b3d2Safresh1}
67*5759b3d2Safresh1
68*5759b3d2Safresh1done_testing();
69