xref: /openbsd-src/gnu/usr.bin/perl/cpan/Test-Harness/lib/TAP/Parser/Grammar.pm (revision d13be5d47e4149db2549a9828e244d59dbc43f15)
1package TAP::Parser::Grammar;
2
3use strict;
4use vars qw($VERSION @ISA);
5
6use TAP::Object                  ();
7use TAP::Parser::ResultFactory   ();
8use TAP::Parser::YAMLish::Reader ();
9
10@ISA = qw(TAP::Object);
11
12=head1 NAME
13
14TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
15
16=head1 VERSION
17
18Version 3.17
19
20=cut
21
22$VERSION = '3.17';
23
24=head1 SYNOPSIS
25
26  use TAP::Parser::Grammar;
27  my $grammar = $self->make_grammar({
28    stream  => $tap_parser_stream,
29    parser  => $tap_parser,
30    version => 12,
31  });
32
33  my $result = $grammar->tokenize;
34
35=head1 DESCRIPTION
36
37C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
38L<TAP::Parser::Result> subclasses to represent the tokens.
39
40Do not attempt to use this class directly.  It won't make sense.  It's mainly
41here to ensure that we will be able to have pluggable grammars when TAP is
42expanded at some future date (plus, this stuff was really cluttering the
43parser).
44
45=head1 METHODS
46
47=head2 Class Methods
48
49=head3 C<new>
50
51  my $grammar = TAP::Parser::Grammar->new({
52      stream  => $stream,
53      parser  => $parser,
54      version => $version,
55  });
56
57Returns L<TAP::Parser> grammar object that will parse the specified stream.
58Both C<stream> and C<parser> are required arguments.  If C<version> is not set
59it defaults to C<12> (see L</set_version> for more details).
60
61=cut
62
63# new() implementation supplied by TAP::Object
64sub _initialize {
65    my ( $self, $args ) = @_;
66    $self->{stream} = $args->{stream};    # TODO: accessor
67    $self->{parser} = $args->{parser};    # TODO: accessor
68    $self->set_version( $args->{version} || 12 );
69    return $self;
70}
71
72my %language_for;
73
74{
75
76    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
77    my $ok  = qr/(?:not )?ok\b/;
78    my $num = qr/\d+/;
79
80    my %v12 = (
81        version => {
82            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
83            handler => sub {
84                my ( $self, $line ) = @_;
85                my $version = $1;
86                return $self->_make_version_token( $line, $version, );
87            },
88        },
89        plan => {
90            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
91            handler => sub {
92                my ( $self, $line ) = @_;
93                my ( $tests_planned, $tail ) = ( $1, $2 );
94                my $explanation = undef;
95                my $skip        = '';
96
97                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98                    my @todo = split /\s+/, _trim($1);
99                    return $self->_make_plan_token(
100                        $line, $tests_planned, 'TODO',
101                        '',    \@todo
102                    );
103                }
104                elsif ( 0 == $tests_planned ) {
105                    $skip = 'SKIP';
106
107                    # If we can't match # SKIP the directive should be undef.
108                    ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
109                }
110                elsif ( $tail !~ /^\s*$/ ) {
111                    return $self->_make_unknown_token($line);
112                }
113
114                $explanation = '' unless defined $explanation;
115
116                return $self->_make_plan_token(
117                    $line, $tests_planned, $skip,
118                    $explanation, []
119                );
120
121            },
122        },
123
124        # An optimization to handle the most common test lines without
125        # directives.
126        simple_test => {
127            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
128            handler => sub {
129                my ( $self, $line ) = @_;
130                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
131
132                return $self->_make_test_token(
133                    $line, $ok, $num,
134                    $desc
135                );
136            },
137        },
138        test => {
139            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
140            handler => sub {
141                my ( $self, $line ) = @_;
142                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143                my ( $dir, $explanation ) = ( '', '' );
144                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
146                  )
147                {
148                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
149                }
150                return $self->_make_test_token(
151                    $line, $ok, $num, $desc,
152                    $dir,  $explanation
153                );
154            },
155        },
156        comment => {
157            syntax  => qr/^#(.*)/,
158            handler => sub {
159                my ( $self, $line ) = @_;
160                my $comment = $1;
161                return $self->_make_comment_token( $line, $comment );
162            },
163        },
164        bailout => {
165            syntax  => qr/^Bail out!\s*(.*)/,
166            handler => sub {
167                my ( $self, $line ) = @_;
168                my $explanation = $1;
169                return $self->_make_bailout_token(
170                    $line,
171                    $explanation
172                );
173            },
174        },
175    );
176
177    my %v13 = (
178        %v12,
179        plan => {
180            syntax  => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
181            handler => sub {
182                my ( $self, $line ) = @_;
183                my ( $tests_planned, $explanation ) = ( $1, $2 );
184                my $skip
185                  = ( 0 == $tests_planned || defined $explanation )
186                  ? 'SKIP'
187                  : '';
188                $explanation = '' unless defined $explanation;
189                return $self->_make_plan_token(
190                    $line, $tests_planned, $skip,
191                    $explanation, []
192                );
193            },
194        },
195        yaml => {
196            syntax  => qr/^ (\s+) (---.*) $/x,
197            handler => sub {
198                my ( $self, $line ) = @_;
199                my ( $pad, $marker ) = ( $1, $2 );
200                return $self->_make_yaml_token( $pad, $marker );
201            },
202        },
203        pragma => {
204            syntax =>
205              qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
206            handler => sub {
207                my ( $self, $line ) = @_;
208                my $pragmas = $1;
209                return $self->_make_pragma_token( $line, $pragmas );
210            },
211        },
212    );
213
214    %language_for = (
215        '12' => {
216            tokens => \%v12,
217        },
218        '13' => {
219            tokens => \%v13,
220            setup  => sub {
221                shift->{stream}->handle_unicode;
222            },
223        },
224    );
225}
226
227##############################################################################
228
229=head2 Instance Methods
230
231=head3 C<set_version>
232
233  $grammar->set_version(13);
234
235Tell the grammar which TAP syntax version to support. The lowest
236supported version is 12. Although 'TAP version' isn't valid version 12
237syntax it is accepted so that higher version numbers may be parsed.
238
239=cut
240
241sub set_version {
242    my $self    = shift;
243    my $version = shift;
244
245    if ( my $language = $language_for{$version} ) {
246        $self->{version} = $version;
247        $self->{tokens}  = $language->{tokens};
248
249        if ( my $setup = $language->{setup} ) {
250            $self->$setup();
251        }
252
253        $self->_order_tokens;
254    }
255    else {
256        require Carp;
257        Carp::croak("Unsupported syntax version: $version");
258    }
259}
260
261# Optimization to put the most frequent tokens first.
262sub _order_tokens {
263    my $self = shift;
264
265    my %copy = %{ $self->{tokens} };
266    my @ordered_tokens = grep {defined}
267      map { delete $copy{$_} } qw( simple_test test comment plan );
268    push @ordered_tokens, values %copy;
269
270    $self->{ordered_tokens} = \@ordered_tokens;
271}
272
273##############################################################################
274
275=head3 C<tokenize>
276
277  my $token = $grammar->tokenize;
278
279This method will return a L<TAP::Parser::Result> object representing the
280current line of TAP.
281
282=cut
283
284sub tokenize {
285    my $self = shift;
286
287    my $line = $self->{stream}->next;
288    unless ( defined $line ) {
289        delete $self->{parser};    # break circular ref
290        return;
291    }
292
293    my $token;
294
295    foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296        if ( $line =~ $token_data->{syntax} ) {
297            my $handler = $token_data->{handler};
298            $token = $self->$handler($line);
299            last;
300        }
301    }
302
303    $token = $self->_make_unknown_token($line) unless $token;
304
305    return $self->{parser}->make_result($token);
306}
307
308##############################################################################
309
310=head3 C<token_types>
311
312  my @types = $grammar->token_types;
313
314Returns the different types of tokens which this grammar can parse.
315
316=cut
317
318sub token_types {
319    my $self = shift;
320    return keys %{ $self->{tokens} };
321}
322
323##############################################################################
324
325=head3 C<syntax_for>
326
327  my $syntax = $grammar->syntax_for($token_type);
328
329Returns a pre-compiled regular expression which will match a chunk of TAP
330corresponding to the token type.  For example (not that you should really pay
331attention to this, C<< $grammar->syntax_for('comment') >> will return
332C<< qr/^#(.*)/ >>.
333
334=cut
335
336sub syntax_for {
337    my ( $self, $type ) = @_;
338    return $self->{tokens}->{$type}->{syntax};
339}
340
341##############################################################################
342
343=head3 C<handler_for>
344
345  my $handler = $grammar->handler_for($token_type);
346
347Returns a code reference which, when passed an appropriate line of TAP,
348returns the lexed token corresponding to that line.  As a result, the basic
349TAP parsing loop looks similar to the following:
350
351 my @tokens;
352 my $grammar = TAP::Grammar->new;
353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354     foreach my $type ( $grammar->token_types ) {
355         my $syntax  = $grammar->syntax_for($type);
356         if ( $line =~ $syntax ) {
357             my $handler = $grammar->handler_for($type);
358             push @tokens => $grammar->$handler($line);
359             next LINE;
360         }
361     }
362     push @tokens => $grammar->_make_unknown_token($line);
363 }
364
365=cut
366
367sub handler_for {
368    my ( $self, $type ) = @_;
369    return $self->{tokens}->{$type}->{handler};
370}
371
372sub _make_version_token {
373    my ( $self, $line, $version ) = @_;
374    return {
375        type    => 'version',
376        raw     => $line,
377        version => $version,
378    };
379}
380
381sub _make_plan_token {
382    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
383
384    if (   $directive eq 'SKIP'
385        && 0 != $tests_planned
386        && $self->{version} < 13 )
387    {
388        warn
389          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
390    }
391
392    return {
393        type          => 'plan',
394        raw           => $line,
395        tests_planned => $tests_planned,
396        directive     => $directive,
397        explanation   => _trim($explanation),
398        todo_list     => $todo,
399    };
400}
401
402sub _make_test_token {
403    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
404    return {
405        ok          => $ok,
406        test_num    => $num,
407        description => _trim($desc),
408        directive   => ( defined $dir ? uc $dir : '' ),
409        explanation => _trim($explanation),
410        raw         => $line,
411        type        => 'test',
412    };
413}
414
415sub _make_unknown_token {
416    my ( $self, $line ) = @_;
417    return {
418        raw  => $line,
419        type => 'unknown',
420    };
421}
422
423sub _make_comment_token {
424    my ( $self, $line, $comment ) = @_;
425    return {
426        type    => 'comment',
427        raw     => $line,
428        comment => _trim($comment)
429    };
430}
431
432sub _make_bailout_token {
433    my ( $self, $line, $explanation ) = @_;
434    return {
435        type    => 'bailout',
436        raw     => $line,
437        bailout => _trim($explanation)
438    };
439}
440
441sub _make_yaml_token {
442    my ( $self, $pad, $marker ) = @_;
443
444    my $yaml = TAP::Parser::YAMLish::Reader->new;
445
446    my $stream = $self->{stream};
447
448    # Construct a reader that reads from our input stripping leading
449    # spaces from each line.
450    my $leader = length($pad);
451    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
452    my @extra  = ($marker);
453    my $reader = sub {
454        return shift @extra if @extra;
455        my $line = $stream->next;
456        return $2 if $line =~ $strip;
457        return;
458    };
459
460    my $data = $yaml->read($reader);
461
462    # Reconstitute input. This is convoluted. Maybe we should just
463    # record it on the way in...
464    chomp( my $raw = $yaml->get_raw );
465    $raw =~ s/^/$pad/mg;
466
467    return {
468        type => 'yaml',
469        raw  => $raw,
470        data => $data
471    };
472}
473
474sub _make_pragma_token {
475    my ( $self, $line, $pragmas ) = @_;
476    return {
477        type    => 'pragma',
478        raw     => $line,
479        pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
480    };
481}
482
483sub _trim {
484    my $data = shift;
485
486    return '' unless defined $data;
487
488    $data =~ s/^\s+//;
489    $data =~ s/\s+$//;
490    return $data;
491}
492
4931;
494
495=head1 TAP GRAMMAR
496
497B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
498about it and a new one will be provided when we have things better defined.
499
500The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
501stream-based protocol.  In fact, it's quite legal to have an infinite stream.
502For the same reason that we don't apply regexes to streams, we're not using a
503formal grammar here.  Instead, we parse the TAP in lines.
504
505For purposes for forward compatability, any result which does not match the
506following grammar is currently referred to as
507L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.
508
509A formal grammar would look similar to the following:
510
511 (*
512     For the time being, I'm cheating on the EBNF by allowing
513     certain terms to be defined by POSIX character classes by
514     using the following syntax:
515
516       digit ::= [:digit:]
517
518     As far as I am aware, that's not valid EBNF.  Sue me.  I
519     didn't know how to write "char" otherwise (Unicode issues).
520     Suggestions welcome.
521 *)
522
523 tap            ::= version? { comment | unknown } leading_plan lines
524                    |
525                    lines trailing_plan {comment}
526
527 version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
528
529 leading_plan   ::= plan skip_directive? "\n"
530
531 trailing_plan  ::= plan "\n"
532
533 plan           ::= '1..' nonNegativeInteger
534
535 lines          ::= line {line}
536
537 line           ::= (comment | test | unknown | bailout ) "\n"
538
539 test           ::= status positiveInteger? description? directive?
540
541 status         ::= 'not '? 'ok '
542
543 description    ::= (character - (digit | '#')) {character - '#'}
544
545 directive      ::= todo_directive | skip_directive
546
547 todo_directive ::= hash_mark 'TODO' ' ' {character}
548
549 skip_directive ::= hash_mark 'SKIP' ' ' {character}
550
551 comment        ::= hash_mark {character}
552
553 hash_mark      ::= '#' {' '}
554
555 bailout        ::= 'Bail out!' {character}
556
557 unknown        ::= { (character - "\n") }
558
559 (* POSIX character classes and other terminals *)
560
561 digit              ::= [:digit:]
562 character          ::= ([:print:] - "\n")
563 positiveInteger    ::= ( digit - '0' ) {digit}
564 nonNegativeInteger ::= digit {digit}
565
566=head1 SUBCLASSING
567
568Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
569
570If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
571do is read through the code.  There's no easy way of summarizing it here.
572
573=head1 SEE ALSO
574
575L<TAP::Object>,
576L<TAP::Parser>,
577L<TAP::Parser::Iterator>,
578L<TAP::Parser::Result>,
579
580=cut
581