xref: /openbsd-src/gnu/usr.bin/perl/cpan/Encode/lib/Encode/Encoder.pm (revision e5157e49389faebcb42b7237d55fbf096d9c2523)
1#
2# $Id: Encoder.pm,v 2.3 2013/09/14 07:51:59 dankogai Exp $
3#
4package Encode::Encoder;
5use strict;
6use warnings;
7our $VERSION = do { my @r = ( q$Revision: 2.3 $ =~ /\d+/g ); sprintf "%d." . "%02d" x $#r, @r };
8
9require Exporter;
10our @ISA       = qw(Exporter);
11our @EXPORT_OK = qw ( encoder );
12
13our $AUTOLOAD;
14use constant DEBUG => !!$ENV{PERL_ENCODE_DEBUG};
15use Encode qw(encode decode find_encoding from_to);
16use Carp;
17
18sub new {
19    my ( $class, $data, $encname ) = @_;
20    unless ($encname) {
21        $encname = Encode::is_utf8($data) ? 'utf8' : '';
22    }
23    else {
24        my $obj = find_encoding($encname)
25          or croak __PACKAGE__, ": unknown encoding: $encname";
26        $encname = $obj->name;
27    }
28    my $self = {
29        data     => $data,
30        encoding => $encname,
31    };
32    bless $self => $class;
33}
34
35sub encoder { __PACKAGE__->new(@_) }
36
37sub data {
38    my ( $self, $data ) = @_;
39    if ( defined $data ) {
40        $self->{data} = $data;
41        return $data;
42    }
43    else {
44        return $self->{data};
45    }
46}
47
48sub encoding {
49    my ( $self, $encname ) = @_;
50    if ($encname) {
51        my $obj = find_encoding($encname)
52          or confess __PACKAGE__, ": unknown encoding: $encname";
53        $self->{encoding} = $obj->name;
54        return $self;
55    }
56    else {
57        return $self->{encoding};
58    }
59}
60
61sub bytes {
62    my ( $self, $encname ) = @_;
63    $encname ||= $self->{encoding};
64    my $obj = find_encoding($encname)
65      or confess __PACKAGE__, ": unknown encoding: $encname";
66    $self->{data} = $obj->decode( $self->{data}, 1 );
67    $self->{encoding} = '';
68    return $self;
69}
70
71sub DESTROY {    # defined so it won't autoload.
72    DEBUG and warn shift;
73}
74
75sub AUTOLOAD {
76    my $self = shift;
77    my $type = ref($self)
78      or confess "$self is not an object";
79    my $myname = $AUTOLOAD;
80    $myname =~ s/.*://;    # strip fully-qualified portion
81    my $obj = find_encoding($myname)
82      or confess __PACKAGE__, ": unknown encoding: $myname";
83    DEBUG and warn $self->{encoding}, " => ", $obj->name;
84    if ( $self->{encoding} ) {
85        from_to( $self->{data}, $self->{encoding}, $obj->name, 1 );
86    }
87    else {
88        $self->{data} = $obj->encode( $self->{data}, 1 );
89    }
90    $self->{encoding} = $obj->name;
91    return $self;
92}
93
94use overload
95  q("") => sub { $_[0]->{data} },
96  q(0+) => sub { use bytes(); bytes::length( $_[0]->{data} ) },
97  fallback => 1,
98  ;
99
1001;
101__END__
102
103=head1 NAME
104
105Encode::Encoder -- Object Oriented Encoder
106
107=head1 SYNOPSIS
108
109  use Encode::Encoder;
110  # Encode::encode("ISO-8859-1", $data);
111  Encode::Encoder->new($data)->iso_8859_1; # OOP way
112  # shortcut
113  use Encode::Encoder qw(encoder);
114  encoder($data)->iso_8859_1;
115  # you can stack them!
116  encoder($data)->iso_8859_1->base64;  # provided base64() is defined
117  # you can use it as a decoder as well
118  encoder($base64)->bytes('base64')->latin1;
119  # stringified
120  print encoder($data)->utf8->latin1;  # prints the string in latin1
121  # numified
122  encoder("\x{abcd}\x{ef}g")->utf8 == 6; # true. bytes::length($data)
123
124=head1 ABSTRACT
125
126B<Encode::Encoder> allows you to use Encode in an object-oriented
127style.  This is not only more intuitive than a functional approach,
128but also handier when you want to stack encodings.  Suppose you want
129your UTF-8 string converted to Latin1 then Base64: you can simply say
130
131  my $base64 = encoder($utf8)->latin1->base64;
132
133instead of
134
135  my $latin1 = encode("latin1", $utf8);
136  my $base64 = encode_base64($utf8);
137
138or the lazier and more convoluted
139
140  my $base64 = encode_base64(encode("latin1", $utf8));
141
142=head1 Description
143
144Here is how to use this module.
145
146=over 4
147
148=item *
149
150There are at least two instance variables stored in a hash reference,
151{data} and {encoding}.
152
153=item *
154
155When there is no method, it takes the method name as the name of the
156encoding and encodes the instance I<data> with I<encoding>.  If successful,
157the instance I<encoding> is set accordingly.
158
159=item *
160
161You can retrieve the result via -E<gt>data but usually you don't have to
162because the stringify operator ("") is overridden to do exactly that.
163
164=back
165
166=head2 Predefined Methods
167
168This module predefines the methods below:
169
170=over 4
171
172=item $e = Encode::Encoder-E<gt>new([$data, $encoding]);
173
174returns an encoder object.  Its data is initialized with $data if
175present, and its encoding is set to $encoding if present.
176
177When $encoding is omitted, it defaults to utf8 if $data is already in
178utf8 or "" (empty string) otherwise.
179
180=item encoder()
181
182is an alias of Encode::Encoder-E<gt>new().  This one is exported on demand.
183
184=item $e-E<gt>data([$data])
185
186When $data is present, sets the instance data to $data and returns the
187object itself.  Otherwise, the current instance data is returned.
188
189=item $e-E<gt>encoding([$encoding])
190
191When $encoding is present, sets the instance encoding to $encoding and
192returns the object itself.  Otherwise, the current instance encoding is
193returned.
194
195=item $e-E<gt>bytes([$encoding])
196
197decodes instance data from $encoding, or the instance encoding if
198omitted.  If the conversion is successful, the instance encoding
199will be set to "".
200
201The name I<bytes> was deliberately picked to avoid namespace tainting
202-- this module may be used as a base class so method names that appear
203in Encode::Encoding are avoided.
204
205=back
206
207=head2 Example: base64 transcoder
208
209This module is designed to work with L<Encode::Encoding>.
210To make the Base64 transcoder example above really work, you could
211write a module like this:
212
213  package Encode::Base64;
214  use parent 'Encode::Encoding';
215  __PACKAGE__->Define('base64');
216  use MIME::Base64;
217  sub encode{
218      my ($obj, $data) = @_;
219      return encode_base64($data);
220  }
221  sub decode{
222      my ($obj, $data) = @_;
223      return decode_base64($data);
224  }
225  1;
226  __END__
227
228And your caller module would be something like this:
229
230  use Encode::Encoder;
231  use Encode::Base64;
232
233  # now you can really do the following
234
235  encoder($data)->iso_8859_1->base64;
236  encoder($base64)->bytes('base64')->latin1;
237
238=head2 Operator Overloading
239
240This module overloads two operators, stringify ("") and numify (0+).
241
242Stringify dumps the data inside the object.
243
244Numify returns the number of bytes in the instance data.
245
246They come in handy when you want to print or find the size of data.
247
248=head1 SEE ALSO
249
250L<Encode>,
251L<Encode::Encoding>
252
253=cut
254