1package Encode::MIME::Header; 2use strict; 3# use warnings; 4our $VERSION = do { my @r = (q$Revision: 1.9 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r }; 5use Encode qw(find_encoding encode_utf8 decode_utf8); 6use MIME::Base64; 7use Carp; 8 9my %seed = 10 ( 11 decode_b => '1', # decodes 'B' encoding ? 12 decode_q => '1', # decodes 'Q' encoding ? 13 encode => 'B', # encode with 'B' or 'Q' ? 14 bpl => 75, # bytes per line 15 ); 16 17$Encode::Encoding{'MIME-Header'} = 18 bless { 19 %seed, 20 Name => 'MIME-Header', 21 } => __PACKAGE__; 22 23$Encode::Encoding{'MIME-B'} = 24 bless { 25 %seed, 26 decode_q => 0, 27 Name => 'MIME-B', 28 } => __PACKAGE__; 29 30$Encode::Encoding{'MIME-Q'} = 31 bless { 32 %seed, 33 decode_q => 1, 34 encode => 'Q', 35 Name => 'MIME-Q', 36 } => __PACKAGE__; 37 38use base qw(Encode::Encoding); 39 40sub needs_lines { 1 } 41sub perlio_ok{ 0 }; 42 43sub decode($$;$){ 44 use utf8; 45 my ($obj, $str, $chk) = @_; 46 # zap spaces between encoded words 47 $str =~ s/\?=\s+=\?/\?==\?/gos; 48 # multi-line header to single line 49 $str =~ s/(:?\r|\n|\r\n)[ \t]//gos; 50 $str =~ 51 s{ 52 =\? # begin encoded word 53 ([0-9A-Za-z\-_]+) # charset (encoding) 54 \?([QqBb])\? # delimiter 55 (.*?) # Base64-encodede contents 56 \?= # end encoded word 57 }{ 58 if (uc($2) eq 'B'){ 59 $obj->{decode_b} or croak qq(MIME "B" unsupported); 60 decode_b($1, $3); 61 }elsif(uc($2) eq 'Q'){ 62 $obj->{decode_q} or croak qq(MIME "Q" unsupported); 63 decode_q($1, $3); 64 }else{ 65 croak qq(MIME "$2" encoding is nonexistent!); 66 } 67 }egox; 68 $_[1] = '' if $chk; 69 return $str; 70} 71 72sub decode_b{ 73 my $enc = shift; 74 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); 75 my $db64 = decode_base64(shift); 76 return $d->name eq 'utf8' ? 77 Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ); 78} 79 80sub decode_q{ 81 my ($enc, $q) = @_; 82 my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc"); 83 $q =~ s/_/ /go; 84 $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego; 85 return $d->name eq 'utf8' ? 86 Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ); 87} 88 89my $especials = 90 join('|' => 91 map {quotemeta(chr($_))} 92 unpack("C*", qq{()<>@,;:\"\'/[]?.=})); 93 94my $re_encoded_word = 95 qr{ 96 (?: 97 =\? # begin encoded word 98 (?:[0-9A-Za-z\-_]+) # charset (encoding) 99 \?(?:[QqBb])\? # delimiter 100 (?:.*?) # Base64-encodede contents 101 \?= # end encoded word 102 ) 103 }xo; 104 105my $re_especials = qr{$re_encoded_word|$especials}xo; 106 107sub encode($$;$){ 108 my ($obj, $str, $chk) = @_; 109 my @line = (); 110 for my $line (split /\r|\n|\r\n/o, $str){ 111 my (@word, @subline); 112 for my $word (split /($re_especials)/o, $line){ 113 if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){ 114 push @word, $obj->_encode($word); 115 }else{ 116 push @word, $word; 117 } 118 } 119 my $subline = ''; 120 for my $word (@word){ 121 use bytes (); 122 if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){ 123 push @subline, $subline; 124 $subline = ''; 125 } 126 $subline .= $word; 127 } 128 $subline and push @subline, $subline; 129 push @line, join("\n " => @subline); 130 } 131 $_[1] = '' if $chk; 132 return join("\n", @line); 133} 134 135use constant HEAD => '=?UTF-8?'; 136use constant TAIL => '?='; 137use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, }; 138 139sub _encode{ 140 my ($o, $str) = @_; 141 my $enc = $o->{encode}; 142 my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL)); 143 # to coerce a floating-point arithmetics, the following contains 144 # .0 in numbers -- dankogai 145 $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0; 146 my @result = (); 147 my $chunk = ''; 148 while(length(my $chr = substr($str, 0, 1, ''))){ 149 use bytes (); 150 if (bytes::length($chunk) + bytes::length($chr) > $llen){ 151 push @result, SINGLE->{$enc}($chunk); 152 $chunk = ''; 153 } 154 $chunk .= $chr; 155 } 156 $chunk and push @result, SINGLE->{$enc}($chunk); 157 return @result; 158} 159 160sub _encode_b{ 161 HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL; 162} 163 164sub _encode_q{ 165 my $chunk = shift; 166 $chunk =~ s{ 167 ([^0-9A-Za-z]) 168 }{ 169 join("" => map {sprintf "=%02X", $_} unpack("C*", $1)) 170 }egox; 171 return decode_utf8(HEAD . 'Q?' . $chunk . TAIL); 172} 173 1741; 175__END__ 176 177=head1 NAME 178 179Encode::MIME::Header -- MIME 'B' and 'Q' header encoding 180 181=head1 SYNOPSIS 182 183 use Encode qw/encode decode/; 184 $utf8 = decode('MIME-Header', $header); 185 $header = encode('MIME-Header', $utf8); 186 187=head1 ABSTRACT 188 189This module implements RFC 2047 Mime Header Encoding. There are 3 190variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>. The 191difference is described below 192 193 decode() encode() 194 ---------------------------------------------- 195 MIME-Header Both B and Q =?UTF-8?B?....?= 196 MIME-B B only; Q croaks =?UTF-8?B?....?= 197 MIME-Q Q only; B croaks =?UTF-8?Q?....?= 198 199=head1 DESCRIPTION 200 201When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD> 202is extracted and decoded for I<X> encoding (B for Base64, Q for 203Quoted-Printable). Then the decoded chunk is fed to 204decode(I<encoding>). So long as I<encoding> is supported by Encode, 205any source encoding is fine. 206 207When you encode, it just encodes UTF-8 string with I<X> encoding then 208quoted with =?UTF-8?I<X>?....?= . The parts that RFC 2047 forbids to 209encode are left as is and long lines are folded within 76 bytes per 210line. 211 212=head1 BUGS 213 214It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP? 215and =?ISO-8859-1?= but that makes the implementation too complicated. 216These days major mail agents all support =?UTF-8? so I think it is 217just good enough. 218 219=head1 SEE ALSO 220 221L<Encode> 222 223RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other 224locations. 225 226=cut 227