1*5759b3d2Safresh1#!perl -w 2*5759b3d2Safresh1 3*5759b3d2Safresh1# This is a base file to be used by various .t's in its directory 4*5759b3d2Safresh1# It tests various malformed UTF-8 sequences and some code points that are 5*5759b3d2Safresh1# "problematic", and verifies that the correct warnings/flags etc are 6*5759b3d2Safresh1# generated when using them. For the code points, it also takes the UTF-8 and 7*5759b3d2Safresh1# perturbs it to be malformed in various ways, and tests that this gets 8*5759b3d2Safresh1# appropriately detected. 9*5759b3d2Safresh1 10*5759b3d2Safresh1use strict; 11*5759b3d2Safresh1use Test::More; 12*5759b3d2Safresh1 13*5759b3d2Safresh1BEGIN { 14*5759b3d2Safresh1 require './t/utf8_setup.pl'; 15*5759b3d2Safresh1 use_ok('XS::APItest'); 16*5759b3d2Safresh1}; 17*5759b3d2Safresh1 18*5759b3d2Safresh1$|=1; 19*5759b3d2Safresh1 20*5759b3d2Safresh1use Data::Dumper; 21*5759b3d2Safresh1 22*5759b3d2Safresh1my @well_formed = ( 23*5759b3d2Safresh1 "\xE1", 24*5759b3d2Safresh1 "The quick brown fox jumped over the lazy dog", 25*5759b3d2Safresh1 "Ces systèmes de codage sont souvent incompatibles entre eux. Ainsi, deux systèmes peuvent utiliser le même nombre pour deux caractères différents ou utiliser différents nombres pour le même caractère.", 26*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC3\xB1abc", 27*5759b3d2Safresh1); 28*5759b3d2Safresh1 29*5759b3d2Safresh1my @malformed = ( 30*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1abc", 31*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xC3\xA8re ve yaz\xC4\xB1\xC3\xA8abc", 32*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xC3re ve yazi\xC3\xA8abc", 33*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xA8 ve yazi\xC3\xA8abc", 34*5759b3d2Safresh1 "Kelimelerin m\xC3\xAAme caract\xC3\xA8\xC3re ve yazi\xC3\xA8abc", 35*5759b3d2Safresh1); 36*5759b3d2Safresh1 37*5759b3d2Safresh1for my $test (@well_formed) { 38*5759b3d2Safresh1 my $utf8 = $test; 39*5759b3d2Safresh1 utf8::upgrade($utf8); 40*5759b3d2Safresh1 my $utf8_length; 41*5759b3d2Safresh1 my $byte_length = length $test; 42*5759b3d2Safresh1 43*5759b3d2Safresh1 { 44*5759b3d2Safresh1 use bytes; 45*5759b3d2Safresh1 $utf8_length = length $utf8; 46*5759b3d2Safresh1 } 47*5759b3d2Safresh1 48*5759b3d2Safresh1 my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length); 49*5759b3d2Safresh1 50*5759b3d2Safresh1 is ($ret_ref->[0], $test, "Successfully downgraded " 51*5759b3d2Safresh1 . display_bytes($utf8)); 52*5759b3d2Safresh1 is ($ret_ref->[1], $byte_length, "... And returned correct length(" 53*5759b3d2Safresh1 . $byte_length . ")"); 54*5759b3d2Safresh1} 55*5759b3d2Safresh1 56*5759b3d2Safresh1for my $test (@malformed) { 57*5759b3d2Safresh1 my $utf8 = $test; 58*5759b3d2Safresh1 my $utf8_length = length $test; 59*5759b3d2Safresh1 60*5759b3d2Safresh1 my $ret_ref = test_utf8_to_bytes($utf8, $utf8_length); 61*5759b3d2Safresh1 62*5759b3d2Safresh1 ok (! defined $ret_ref->[0], "Returned undef for malformed " 63*5759b3d2Safresh1 . display_bytes($utf8)); 64*5759b3d2Safresh1 is ($ret_ref->[1], -1, "... And returned length -1"); 65*5759b3d2Safresh1 is ($ret_ref->[2], $utf8, "... And left the input unchanged"); 66*5759b3d2Safresh1} 67*5759b3d2Safresh1 68*5759b3d2Safresh1done_testing(); 69