1package TAP::Parser::Grammar; 2 3use strict; 4use vars qw($VERSION @ISA); 5 6use TAP::Object (); 7use TAP::Parser::ResultFactory (); 8use TAP::Parser::YAMLish::Reader (); 9 10@ISA = qw(TAP::Object); 11 12=head1 NAME 13 14TAP::Parser::Grammar - A grammar for the Test Anything Protocol. 15 16=head1 VERSION 17 18Version 3.17 19 20=cut 21 22$VERSION = '3.17'; 23 24=head1 SYNOPSIS 25 26 use TAP::Parser::Grammar; 27 my $grammar = $self->make_grammar({ 28 stream => $tap_parser_stream, 29 parser => $tap_parser, 30 version => 12, 31 }); 32 33 my $result = $grammar->tokenize; 34 35=head1 DESCRIPTION 36 37C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs 38L<TAP::Parser::Result> subclasses to represent the tokens. 39 40Do not attempt to use this class directly. It won't make sense. It's mainly 41here to ensure that we will be able to have pluggable grammars when TAP is 42expanded at some future date (plus, this stuff was really cluttering the 43parser). 44 45=head1 METHODS 46 47=head2 Class Methods 48 49=head3 C<new> 50 51 my $grammar = TAP::Parser::Grammar->new({ 52 stream => $stream, 53 parser => $parser, 54 version => $version, 55 }); 56 57Returns L<TAP::Parser> grammar object that will parse the specified stream. 58Both C<stream> and C<parser> are required arguments. If C<version> is not set 59it defaults to C<12> (see L</set_version> for more details). 60 61=cut 62 63# new() implementation supplied by TAP::Object 64sub _initialize { 65 my ( $self, $args ) = @_; 66 $self->{stream} = $args->{stream}; # TODO: accessor 67 $self->{parser} = $args->{parser}; # TODO: accessor 68 $self->set_version( $args->{version} || 12 ); 69 return $self; 70} 71 72my %language_for; 73 74{ 75 76 # XXX the 'not' and 'ok' might be on separate lines in VMS ... 77 my $ok = qr/(?:not )?ok\b/; 78 my $num = qr/\d+/; 79 80 my %v12 = ( 81 version => { 82 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i, 83 handler => sub { 84 my ( $self, $line ) = @_; 85 my $version = $1; 86 return $self->_make_version_token( $line, $version, ); 87 }, 88 }, 89 plan => { 90 syntax => qr/^1\.\.(\d+)\s*(.*)\z/, 91 handler => sub { 92 my ( $self, $line ) = @_; 93 my ( $tests_planned, $tail ) = ( $1, $2 ); 94 my $explanation = undef; 95 my $skip = ''; 96 97 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) { 98 my @todo = split /\s+/, _trim($1); 99 return $self->_make_plan_token( 100 $line, $tests_planned, 'TODO', 101 '', \@todo 102 ); 103 } 104 elsif ( 0 == $tests_planned ) { 105 $skip = 'SKIP'; 106 107 # If we can't match # SKIP the directive should be undef. 108 ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i; 109 } 110 elsif ( $tail !~ /^\s*$/ ) { 111 return $self->_make_unknown_token($line); 112 } 113 114 $explanation = '' unless defined $explanation; 115 116 return $self->_make_plan_token( 117 $line, $tests_planned, $skip, 118 $explanation, [] 119 ); 120 121 }, 122 }, 123 124 # An optimization to handle the most common test lines without 125 # directives. 126 simple_test => { 127 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x, 128 handler => sub { 129 my ( $self, $line ) = @_; 130 my ( $ok, $num, $desc ) = ( $1, $2, $3 ); 131 132 return $self->_make_test_token( 133 $line, $ok, $num, 134 $desc 135 ); 136 }, 137 }, 138 test => { 139 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x, 140 handler => sub { 141 my ( $self, $line ) = @_; 142 my ( $ok, $num, $desc ) = ( $1, $2, $3 ); 143 my ( $dir, $explanation ) = ( '', '' ); 144 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* ) 145 \# \s* (SKIP|TODO) \b \s* (.*) $/ix 146 ) 147 { 148 ( $desc, $dir, $explanation ) = ( $1, $2, $3 ); 149 } 150 return $self->_make_test_token( 151 $line, $ok, $num, $desc, 152 $dir, $explanation 153 ); 154 }, 155 }, 156 comment => { 157 syntax => qr/^#(.*)/, 158 handler => sub { 159 my ( $self, $line ) = @_; 160 my $comment = $1; 161 return $self->_make_comment_token( $line, $comment ); 162 }, 163 }, 164 bailout => { 165 syntax => qr/^Bail out!\s*(.*)/, 166 handler => sub { 167 my ( $self, $line ) = @_; 168 my $explanation = $1; 169 return $self->_make_bailout_token( 170 $line, 171 $explanation 172 ); 173 }, 174 }, 175 ); 176 177 my %v13 = ( 178 %v12, 179 plan => { 180 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i, 181 handler => sub { 182 my ( $self, $line ) = @_; 183 my ( $tests_planned, $explanation ) = ( $1, $2 ); 184 my $skip 185 = ( 0 == $tests_planned || defined $explanation ) 186 ? 'SKIP' 187 : ''; 188 $explanation = '' unless defined $explanation; 189 return $self->_make_plan_token( 190 $line, $tests_planned, $skip, 191 $explanation, [] 192 ); 193 }, 194 }, 195 yaml => { 196 syntax => qr/^ (\s+) (---.*) $/x, 197 handler => sub { 198 my ( $self, $line ) = @_; 199 my ( $pad, $marker ) = ( $1, $2 ); 200 return $self->_make_yaml_token( $pad, $marker ); 201 }, 202 }, 203 pragma => { 204 syntax => 205 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x, 206 handler => sub { 207 my ( $self, $line ) = @_; 208 my $pragmas = $1; 209 return $self->_make_pragma_token( $line, $pragmas ); 210 }, 211 }, 212 ); 213 214 %language_for = ( 215 '12' => { 216 tokens => \%v12, 217 }, 218 '13' => { 219 tokens => \%v13, 220 setup => sub { 221 shift->{stream}->handle_unicode; 222 }, 223 }, 224 ); 225} 226 227############################################################################## 228 229=head2 Instance Methods 230 231=head3 C<set_version> 232 233 $grammar->set_version(13); 234 235Tell the grammar which TAP syntax version to support. The lowest 236supported version is 12. Although 'TAP version' isn't valid version 12 237syntax it is accepted so that higher version numbers may be parsed. 238 239=cut 240 241sub set_version { 242 my $self = shift; 243 my $version = shift; 244 245 if ( my $language = $language_for{$version} ) { 246 $self->{version} = $version; 247 $self->{tokens} = $language->{tokens}; 248 249 if ( my $setup = $language->{setup} ) { 250 $self->$setup(); 251 } 252 253 $self->_order_tokens; 254 } 255 else { 256 require Carp; 257 Carp::croak("Unsupported syntax version: $version"); 258 } 259} 260 261# Optimization to put the most frequent tokens first. 262sub _order_tokens { 263 my $self = shift; 264 265 my %copy = %{ $self->{tokens} }; 266 my @ordered_tokens = grep {defined} 267 map { delete $copy{$_} } qw( simple_test test comment plan ); 268 push @ordered_tokens, values %copy; 269 270 $self->{ordered_tokens} = \@ordered_tokens; 271} 272 273############################################################################## 274 275=head3 C<tokenize> 276 277 my $token = $grammar->tokenize; 278 279This method will return a L<TAP::Parser::Result> object representing the 280current line of TAP. 281 282=cut 283 284sub tokenize { 285 my $self = shift; 286 287 my $line = $self->{stream}->next; 288 unless ( defined $line ) { 289 delete $self->{parser}; # break circular ref 290 return; 291 } 292 293 my $token; 294 295 foreach my $token_data ( @{ $self->{ordered_tokens} } ) { 296 if ( $line =~ $token_data->{syntax} ) { 297 my $handler = $token_data->{handler}; 298 $token = $self->$handler($line); 299 last; 300 } 301 } 302 303 $token = $self->_make_unknown_token($line) unless $token; 304 305 return $self->{parser}->make_result($token); 306} 307 308############################################################################## 309 310=head3 C<token_types> 311 312 my @types = $grammar->token_types; 313 314Returns the different types of tokens which this grammar can parse. 315 316=cut 317 318sub token_types { 319 my $self = shift; 320 return keys %{ $self->{tokens} }; 321} 322 323############################################################################## 324 325=head3 C<syntax_for> 326 327 my $syntax = $grammar->syntax_for($token_type); 328 329Returns a pre-compiled regular expression which will match a chunk of TAP 330corresponding to the token type. For example (not that you should really pay 331attention to this, C<< $grammar->syntax_for('comment') >> will return 332C<< qr/^#(.*)/ >>. 333 334=cut 335 336sub syntax_for { 337 my ( $self, $type ) = @_; 338 return $self->{tokens}->{$type}->{syntax}; 339} 340 341############################################################################## 342 343=head3 C<handler_for> 344 345 my $handler = $grammar->handler_for($token_type); 346 347Returns a code reference which, when passed an appropriate line of TAP, 348returns the lexed token corresponding to that line. As a result, the basic 349TAP parsing loop looks similar to the following: 350 351 my @tokens; 352 my $grammar = TAP::Grammar->new; 353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) { 354 foreach my $type ( $grammar->token_types ) { 355 my $syntax = $grammar->syntax_for($type); 356 if ( $line =~ $syntax ) { 357 my $handler = $grammar->handler_for($type); 358 push @tokens => $grammar->$handler($line); 359 next LINE; 360 } 361 } 362 push @tokens => $grammar->_make_unknown_token($line); 363 } 364 365=cut 366 367sub handler_for { 368 my ( $self, $type ) = @_; 369 return $self->{tokens}->{$type}->{handler}; 370} 371 372sub _make_version_token { 373 my ( $self, $line, $version ) = @_; 374 return { 375 type => 'version', 376 raw => $line, 377 version => $version, 378 }; 379} 380 381sub _make_plan_token { 382 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_; 383 384 if ( $directive eq 'SKIP' 385 && 0 != $tests_planned 386 && $self->{version} < 13 ) 387 { 388 warn 389 "Specified SKIP directive in plan but more than 0 tests ($line)\n"; 390 } 391 392 return { 393 type => 'plan', 394 raw => $line, 395 tests_planned => $tests_planned, 396 directive => $directive, 397 explanation => _trim($explanation), 398 todo_list => $todo, 399 }; 400} 401 402sub _make_test_token { 403 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_; 404 return { 405 ok => $ok, 406 test_num => $num, 407 description => _trim($desc), 408 directive => ( defined $dir ? uc $dir : '' ), 409 explanation => _trim($explanation), 410 raw => $line, 411 type => 'test', 412 }; 413} 414 415sub _make_unknown_token { 416 my ( $self, $line ) = @_; 417 return { 418 raw => $line, 419 type => 'unknown', 420 }; 421} 422 423sub _make_comment_token { 424 my ( $self, $line, $comment ) = @_; 425 return { 426 type => 'comment', 427 raw => $line, 428 comment => _trim($comment) 429 }; 430} 431 432sub _make_bailout_token { 433 my ( $self, $line, $explanation ) = @_; 434 return { 435 type => 'bailout', 436 raw => $line, 437 bailout => _trim($explanation) 438 }; 439} 440 441sub _make_yaml_token { 442 my ( $self, $pad, $marker ) = @_; 443 444 my $yaml = TAP::Parser::YAMLish::Reader->new; 445 446 my $stream = $self->{stream}; 447 448 # Construct a reader that reads from our input stripping leading 449 # spaces from each line. 450 my $leader = length($pad); 451 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x; 452 my @extra = ($marker); 453 my $reader = sub { 454 return shift @extra if @extra; 455 my $line = $stream->next; 456 return $2 if $line =~ $strip; 457 return; 458 }; 459 460 my $data = $yaml->read($reader); 461 462 # Reconstitute input. This is convoluted. Maybe we should just 463 # record it on the way in... 464 chomp( my $raw = $yaml->get_raw ); 465 $raw =~ s/^/$pad/mg; 466 467 return { 468 type => 'yaml', 469 raw => $raw, 470 data => $data 471 }; 472} 473 474sub _make_pragma_token { 475 my ( $self, $line, $pragmas ) = @_; 476 return { 477 type => 'pragma', 478 raw => $line, 479 pragmas => [ split /\s*,\s*/, _trim($pragmas) ], 480 }; 481} 482 483sub _trim { 484 my $data = shift; 485 486 return '' unless defined $data; 487 488 $data =~ s/^\s+//; 489 $data =~ s/\s+$//; 490 return $data; 491} 492 4931; 494 495=head1 TAP GRAMMAR 496 497B<NOTE:> This grammar is slightly out of date. There's still some discussion 498about it and a new one will be provided when we have things better defined. 499 500The L<TAP::Parser> does not use a formal grammar because TAP is essentially a 501stream-based protocol. In fact, it's quite legal to have an infinite stream. 502For the same reason that we don't apply regexes to streams, we're not using a 503formal grammar here. Instead, we parse the TAP in lines. 504 505For purposes for forward compatability, any result which does not match the 506following grammar is currently referred to as 507L<TAP::Parser::Result::Unknown>. It is I<not> a parse error. 508 509A formal grammar would look similar to the following: 510 511 (* 512 For the time being, I'm cheating on the EBNF by allowing 513 certain terms to be defined by POSIX character classes by 514 using the following syntax: 515 516 digit ::= [:digit:] 517 518 As far as I am aware, that's not valid EBNF. Sue me. I 519 didn't know how to write "char" otherwise (Unicode issues). 520 Suggestions welcome. 521 *) 522 523 tap ::= version? { comment | unknown } leading_plan lines 524 | 525 lines trailing_plan {comment} 526 527 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n" 528 529 leading_plan ::= plan skip_directive? "\n" 530 531 trailing_plan ::= plan "\n" 532 533 plan ::= '1..' nonNegativeInteger 534 535 lines ::= line {line} 536 537 line ::= (comment | test | unknown | bailout ) "\n" 538 539 test ::= status positiveInteger? description? directive? 540 541 status ::= 'not '? 'ok ' 542 543 description ::= (character - (digit | '#')) {character - '#'} 544 545 directive ::= todo_directive | skip_directive 546 547 todo_directive ::= hash_mark 'TODO' ' ' {character} 548 549 skip_directive ::= hash_mark 'SKIP' ' ' {character} 550 551 comment ::= hash_mark {character} 552 553 hash_mark ::= '#' {' '} 554 555 bailout ::= 'Bail out!' {character} 556 557 unknown ::= { (character - "\n") } 558 559 (* POSIX character classes and other terminals *) 560 561 digit ::= [:digit:] 562 character ::= ([:print:] - "\n") 563 positiveInteger ::= ( digit - '0' ) {digit} 564 nonNegativeInteger ::= digit {digit} 565 566=head1 SUBCLASSING 567 568Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview. 569 570If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to 571do is read through the code. There's no easy way of summarizing it here. 572 573=head1 SEE ALSO 574 575L<TAP::Object>, 576L<TAP::Parser>, 577L<TAP::Parser::Iterator>, 578L<TAP::Parser::Result>, 579 580=cut 581