1# 2# $Id: UTF7.pm,v 2.10 2017/06/10 17:23:50 dankogai Exp $ 3# 4package Encode::Unicode::UTF7; 5use strict; 6use warnings; 7use parent qw(Encode::Encoding); 8__PACKAGE__->Define('UTF-7'); 9our $VERSION = do { my @r = ( q$Revision: 2.10 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r }; 10use MIME::Base64; 11use Encode qw(find_encoding); 12 13# 14# Algorithms taken from Unicode::String by Gisle Aas 15# 16 17our $OPTIONAL_DIRECT_CHARS = 1; 18my $specials = quotemeta "\'(),-./:?"; 19$OPTIONAL_DIRECT_CHARS 20 and $specials .= quotemeta "!\"#$%&*;<=>@[]^_`{|}"; 21 22# \s will not work because it matches U+3000 DEOGRAPHIC SPACE 23# We use qr/[\n\r\t\ ] instead 24my $re_asis = qr/(?:[\n\r\t\ A-Za-z0-9$specials])/; 25my $re_encoded = qr/(?:[^\n\r\t\ A-Za-z0-9$specials])/; 26my $e_utf16 = find_encoding("UTF-16BE"); 27 28sub needs_lines { 1 } 29 30sub encode($$;$) { 31 my ( $obj, $str, $chk ) = @_; 32 return undef unless defined $str; 33 my $len = length($str); 34 pos($str) = 0; 35 my $bytes = substr($str, 0, 0); # to propagate taintedness 36 while ( pos($str) < $len ) { 37 if ( $str =~ /\G($re_asis+)/ogc ) { 38 my $octets = $1; 39 utf8::downgrade($octets); 40 $bytes .= $octets; 41 } 42 elsif ( $str =~ /\G($re_encoded+)/ogsc ) { 43 if ( $1 eq "+" ) { 44 $bytes .= "+-"; 45 } 46 else { 47 my $s = $1; 48 my $base64 = encode_base64( $e_utf16->encode($s), '' ); 49 $base64 =~ s/=+$//; 50 $bytes .= "+$base64-"; 51 } 52 } 53 else { 54 die "This should not happen! (pos=" . pos($str) . ")"; 55 } 56 } 57 $_[1] = '' if $chk; 58 return $bytes; 59} 60 61sub decode($$;$) { 62 use re 'taint'; 63 my ( $obj, $bytes, $chk ) = @_; 64 return undef unless defined $bytes; 65 my $len = length($bytes); 66 my $str = substr($bytes, 0, 0); # to propagate taintedness; 67 pos($bytes) = 0; 68 no warnings 'uninitialized'; 69 while ( pos($bytes) < $len ) { 70 if ( $bytes =~ /\G([^+]+)/ogc ) { 71 $str .= $1; 72 } 73 elsif ( $bytes =~ /\G\+-/ogc ) { 74 $str .= "+"; 75 } 76 elsif ( $bytes =~ /\G\+([A-Za-z0-9+\/]+)-?/ogsc ) { 77 my $base64 = $1; 78 my $pad = length($base64) % 4; 79 $base64 .= "=" x ( 4 - $pad ) if $pad; 80 $str .= $e_utf16->decode( decode_base64($base64) ); 81 } 82 elsif ( $bytes =~ /\G\+/ogc ) { 83 $^W and warn "Bad UTF7 data escape"; 84 $str .= "+"; 85 } 86 else { 87 die "This should not happen " . pos($bytes); 88 } 89 } 90 $_[1] = '' if $chk; 91 return $str; 92} 931; 94__END__ 95 96=head1 NAME 97 98Encode::Unicode::UTF7 -- UTF-7 encoding 99 100=head1 SYNOPSIS 101 102 use Encode qw/encode decode/; 103 $utf7 = encode("UTF-7", $utf8); 104 $utf8 = decode("UTF-7", $ucs2); 105 106=head1 ABSTRACT 107 108This module implements UTF-7 encoding documented in RFC 2152. UTF-7, 109as its name suggests, is a 7-bit re-encoded version of UTF-16BE. It 110is designed to be MTA-safe and expected to be a standard way to 111exchange Unicoded mails via mails. But with the advent of UTF-8 and 1128-bit compliant MTAs, UTF-7 is hardly ever used. 113 114UTF-7 was not supported by Encode until version 1.95 because of that. 115But Unicode::String, a module by Gisle Aas which adds Unicode supports 116to non-utf8-savvy perl did support UTF-7, the UTF-7 support was added 117so Encode can supersede Unicode::String 100%. 118 119=head1 In Practice 120 121When you want to encode Unicode for mails and web pages, however, do 122not use UTF-7 unless you are sure your recipients and readers can 123handle it. Very few MUAs and WWW Browsers support these days (only 124Mozilla seems to support one). For general cases, use UTF-8 for 125message body and MIME-Header for header instead. 126 127=head1 SEE ALSO 128 129L<Encode>, L<Encode::Unicode>, L<Unicode::String> 130 131RFC 2781 L<http://www.ietf.org/rfc/rfc2152.txt> 132 133=cut 134