1package Encode::MIME::Header;
2use strict;
3# use warnings;
4our $VERSION = do { my @r = (q$Revision: 1.9 $ =~ /\d+/g); sprintf "%d."."%02d" x $#r, @r };
5use Encode qw(find_encoding encode_utf8 decode_utf8);
6use MIME::Base64;
7use Carp;
8
9my %seed =
10    (
11     decode_b     => '1', # decodes 'B' encoding ?
12     decode_q     => '1', # decodes 'Q' encoding ?
13     encode       => 'B', # encode with 'B' or 'Q' ?
14     bpl          => 75,  # bytes per line
15     );
16
17$Encode::Encoding{'MIME-Header'} =
18    bless {
19	%seed,
20	Name => 'MIME-Header',
21    } => __PACKAGE__;
22
23$Encode::Encoding{'MIME-B'} =
24    bless {
25	%seed,
26	decode_q  => 0,
27	Name      => 'MIME-B',
28    } => __PACKAGE__;
29
30$Encode::Encoding{'MIME-Q'} =
31    bless {
32	%seed,
33	decode_q    => 1,
34	encode      => 'Q',
35	Name        => 'MIME-Q',
36    } => __PACKAGE__;
37
38use base qw(Encode::Encoding);
39
40sub needs_lines { 1 }
41sub perlio_ok{ 0 };
42
43sub decode($$;$){
44    use utf8;
45    my ($obj, $str, $chk) = @_;
46    # zap spaces between encoded words
47    $str =~ s/\?=\s+=\?/\?==\?/gos;
48    # multi-line header to single line
49    $str =~ s/(:?\r|\n|\r\n)[ \t]//gos;
50    $str =~
51	s{
52	    =\?                  # begin encoded word
53		([0-9A-Za-z\-_]+) # charset (encoding)
54		\?([QqBb])\?     # delimiter
55		(.*?)            # Base64-encodede contents
56		\?=              # end encoded word
57	    }{
58		if    (uc($2) eq 'B'){
59		    $obj->{decode_b} or croak qq(MIME "B" unsupported);
60		    decode_b($1, $3);
61		}elsif(uc($2) eq 'Q'){
62		    $obj->{decode_q} or croak qq(MIME "Q" unsupported);
63		    decode_q($1, $3);
64		}else{
65		    croak qq(MIME "$2" encoding is nonexistent!);
66		}
67	    }egox;
68    $_[1] = '' if $chk;
69    return $str;
70}
71
72sub decode_b{
73    my $enc = shift;
74    my $d = find_encoding($enc)        or croak qq(Unknown encoding "$enc");
75    my $db64 = decode_base64(shift);
76    return $d->name eq 'utf8' ?
77	Encode::decode_utf8($db64) : $d->decode($db64, Encode::FB_PERLQQ);
78}
79
80sub decode_q{
81    my ($enc, $q) = @_;
82    my $d = find_encoding($enc) or croak qq(Unknown encoding "$enc");
83    $q =~ s/_/ /go;
84    $q =~ s/=([0-9A-Fa-f]{2})/pack("C", hex($1))/ego;
85    return $d->name eq 'utf8' ?
86	Encode::decode_utf8($q) : $d->decode($q, Encode::FB_PERLQQ);
87}
88
89my $especials =
90    join('|' =>
91	 map {quotemeta(chr($_))}
92	 unpack("C*", qq{()<>@,;:\"\'/[]?.=}));
93
94my $re_encoded_word =
95    qr{
96       (?:
97	=\?               # begin encoded word
98	(?:[0-9A-Za-z\-_]+) # charset (encoding)
99	\?(?:[QqBb])\?      # delimiter
100	(?:.*?)             # Base64-encodede contents
101	\?=                 # end encoded word
102       )
103      }xo;
104
105my $re_especials = qr{$re_encoded_word|$especials}xo;
106
107sub encode($$;$){
108    my ($obj, $str, $chk) = @_;
109    my @line = ();
110    for my $line (split /\r|\n|\r\n/o, $str){
111	my (@word, @subline);
112        for my $word (split /($re_especials)/o, $line){
113	    if ($word =~ /[^\x00-\x7f]/o or $word =~ /^$re_encoded_word$/o){
114		push @word, $obj->_encode($word);
115	    }else{
116		push @word, $word;
117	    }
118	}
119	my $subline = '';
120	for my $word (@word){
121	    use bytes ();
122	    if (bytes::length($subline) + bytes::length($word) > $obj->{bpl}){
123		push @subline, $subline;
124		$subline = '';
125	    }
126	    $subline .= $word;
127	}
128	$subline and push @subline, $subline;
129	push @line, join("\n " => @subline);
130    }
131    $_[1] = '' if $chk;
132    return join("\n", @line);
133}
134
135use constant HEAD  => '=?UTF-8?';
136use constant TAIL    => '?=';
137use constant SINGLE => { B => \&_encode_b, Q => \&_encode_q, };
138
139sub _encode{
140    my ($o, $str) = @_;
141    my $enc = $o->{encode};
142    my $llen = ($o->{bpl} - length(HEAD) - 2 - length(TAIL));
143    # to coerce a floating-point arithmetics, the following contains
144    # .0 in numbers -- dankogai
145    $llen *= $enc eq 'B' ? 3.0/4.0 : 1.0/3.0;
146    my @result = ();
147    my $chunk = '';
148    while(length(my $chr = substr($str, 0, 1, ''))){
149	use bytes ();
150	if (bytes::length($chunk) + bytes::length($chr) > $llen){
151	    push @result, SINGLE->{$enc}($chunk);
152	    $chunk = '';
153	}
154	$chunk .= $chr;
155    }
156    $chunk and push @result, SINGLE->{$enc}($chunk);
157    return @result;
158}
159
160sub _encode_b{
161    HEAD . 'B?' . encode_base64(encode_utf8(shift), '') . TAIL;
162}
163
164sub _encode_q{
165    my $chunk = shift;
166    $chunk =~ s{
167		([^0-9A-Za-z])
168	       }{
169		   join("" => map {sprintf "=%02X", $_} unpack("C*", $1))
170	       }egox;
171    return decode_utf8(HEAD . 'Q?' . $chunk . TAIL);
172}
173
1741;
175__END__
176
177=head1 NAME
178
179Encode::MIME::Header -- MIME 'B' and 'Q' header encoding
180
181=head1 SYNOPSIS
182
183    use Encode qw/encode decode/;
184    $utf8   = decode('MIME-Header', $header);
185    $header = encode('MIME-Header', $utf8);
186
187=head1 ABSTRACT
188
189This module implements RFC 2047 Mime Header Encoding.  There are 3
190variant encoding names; C<MIME-Header>, C<MIME-B> and C<MIME-Q>.  The
191difference is described below
192
193              decode()          encode()
194  ----------------------------------------------
195  MIME-Header Both B and Q      =?UTF-8?B?....?=
196  MIME-B      B only; Q croaks  =?UTF-8?B?....?=
197  MIME-Q      Q only; B croaks  =?UTF-8?Q?....?=
198
199=head1 DESCRIPTION
200
201When you decode(=?I<encoding>?I<X>?I<ENCODED WORD>?=), I<ENCODED WORD>
202is extracted and decoded for I<X> encoding (B for Base64, Q for
203Quoted-Printable). Then the decoded chunk is fed to
204decode(I<encoding>).  So long as I<encoding> is supported by Encode,
205any source encoding is fine.
206
207When you encode, it just encodes UTF-8 string with I<X> encoding then
208quoted with =?UTF-8?I<X>?....?= .  The parts that RFC 2047 forbids to
209encode are left as is and long lines are folded within 76 bytes per
210line.
211
212=head1 BUGS
213
214It would be nice to support encoding to non-UTF8, such as =?ISO-2022-JP?
215and =?ISO-8859-1?= but that makes the implementation too complicated.
216These days major mail agents all support =?UTF-8? so I think it is
217just good enough.
218
219=head1 SEE ALSO
220
221L<Encode>
222
223RFC 2047, L<http://www.faqs.org/rfcs/rfc2047.html> and many other
224locations.
225
226=cut
227