xref: /netbsd-src/crypto/dist/ipsec-tools/misc/cvs2cl.pl (revision 2b68c3a06a13906b3b45b45bfb14d4d52a38da63)
1#!/bin/sh
2exec perl -w -x "$0" ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*-
3#!perl -w
4
5
6##############################################################
7###                                                        ###
8### cvs2cl.pl: produce ChangeLog(s) from `cvs log` output. ###
9###                                                        ###
10##############################################################
11
12## $Revision: 1.1 $
13## $Date: 2009/01/20 14:36:08 $
14## $Author: tteras $
15##
16
17use strict;
18
19use File::Basename qw( fileparse );
20use Getopt::Long   qw( GetOptions );
21use Text::Wrap     qw( );
22use Time::Local    qw( timegm );
23use User::pwent    qw( getpwnam );
24
25# The Plan:
26#
27# Read in the logs for multiple files, spit out a nice ChangeLog that
28# mirrors the information entered during `cvs commit'.
29#
30# The problem presents some challenges. In an ideal world, we could
31# detect files with the same author, log message, and checkin time --
32# each <filelist, author, time, logmessage> would be a changelog entry.
33# We'd sort them; and spit them out.  Unfortunately, CVS is *not atomic*
34# so checkins can span a range of times.  Also, the directory structure
35# could be hierarchical.
36#
37# Another question is whether we really want to have the ChangeLog
38# exactly reflect commits. An author could issue two related commits,
39# with different log entries, reflecting a single logical change to the
40# source. GNU style ChangeLogs group these under a single author/date.
41# We try to do the same.
42#
43# So, we parse the output of `cvs log', storing log messages in a
44# multilevel hash that stores the mapping:
45#   directory => author => time => message => filelist
46# As we go, we notice "nearby" commit times and store them together
47# (i.e., under the same timestamp), so they appear in the same log
48# entry.
49#
50# When we've read all the logs, we twist this mapping into
51# a time => author => message => filelist mapping for each directory.
52#
53# If we're not using the `--distributed' flag, the directory is always
54# considered to be `./', even as descend into subdirectories.
55
56# Call Tree
57
58# name                         number of lines (10.xii.03)
59# parse_options                         192
60# derive_changelog                       13
61# +-maybe_grab_accumulation_date         38
62# +-read_changelog                      277
63#   +-maybe_read_user_map_file           94
64#     +-run_ext                           9
65#   +-read_file_path                     29
66#   +-read_symbolic_name                 43
67#   +-read_revision                      49
68#   +-read_date_author_and_state         25
69#     +-parse_date_author_and_state      20
70#   +-read_branches                      36
71# +-output_changelog                    424
72#   +-pretty_file_list                  290
73#     +-common_path_prefix               35
74#   +-preprocess_msg_text                30
75#     +-min                               1
76#   +-mywrap                             16
77#   +-last_line_len                       5
78#   +-wrap_log_entry                    177
79#
80# Utilities
81#
82# xml_escape                              6
83# slurp_file                             11
84# debug                                   5
85# version                                 2
86# usage                                 142
87
88# -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*-
89#
90# Note about a bug-slash-opportunity:
91# -----------------------------------
92#
93# There's a bug in Text::Wrap, which affects cvs2cl.  This script
94# reveals it:
95#
96#   #!/usr/bin/perl -w
97#
98#   use Text::Wrap;
99#
100#   my $test_text =
101#   "This script demonstrates a bug in Text::Wrap.  The very long line
102#   following this paragraph will be relocated relative to the surrounding
103#   text:
104#
105#   ====================================================================
106#
107#   See?  When the bug happens, we'll get the line of equal signs below
108#   this paragraph, even though it should be above.";
109#
110#
111#   # Print out the test text with no wrapping:
112#   print "$test_text";
113#   print "\n";
114#   print "\n";
115#
116#   # Now print it out wrapped, and see the bug:
117#   print wrap ("\t", "        ", "$test_text");
118#   print "\n";
119#   print "\n";
120#
121# If the line of equal signs were one shorter, then the bug doesn't
122# happen.  Interesting.
123#
124# Anyway, rather than fix this in Text::Wrap, we might as well write a
125# new wrap() which has the following much-needed features:
126#
127# * initial indentation, like current Text::Wrap()
128# * subsequent line indentation, like current Text::Wrap()
129# * user chooses among: force-break long words, leave them alone, or die()?
130# * preserve existing indentation: chopped chunks from an indented line
131#   are indented by same (like this line, not counting the asterisk!)
132# * optional list of things to preserve on line starts, default ">"
133#
134# Note that the last two are essentially the same concept, so unify in
135# implementation and give a good interface to controlling them.
136#
137# And how about:
138#
139# Optionally, when encounter a line pre-indented by same as previous
140# line, then strip the newline and refill, but indent by the same.
141# Yeah...
142
143# Globals --------------------------------------------------------------------
144
145# In case we have to print it out:
146my $VERSION = '$Revision: 1.1 $';
147$VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/;
148
149## Vars set by options:
150
151# Print debugging messages?
152my $Debug = 0;
153
154# Just show version and exit?
155my $Print_Version = 0;
156
157# Just print usage message and exit?
158my $Print_Usage = 0;
159
160# What file should we generate (defaults to "ChangeLog")?
161my $Log_File_Name = "ChangeLog";
162
163# Grab most recent entry date from existing ChangeLog file, just add
164# to that ChangeLog.
165my $Cumulative = 0;
166
167# `cvs log -d`, this will repeat the last entry in the old log.  This is OK,
168# as it guarantees at least one entry in the update changelog, which means
169# that there will always be a date to extract for the next update.  The repeat
170# entry can be removed in postprocessing, if necessary.
171
172# MJP 2003-08-02
173# I don't think this actually does anything useful
174my $Update = 0;
175
176# Expand usernames to email addresses based on a map file?
177my $User_Map_File = '';
178my $User_Passwd_File;
179my $Mail_Domain;
180
181# Output log in chronological order? [default is reverse chronological order]
182my $Chronological_Order = 0;
183
184# Grab user details via gecos
185my $Gecos = 0;
186
187# User domain for gecos email addresses
188my $Domain;
189
190# Output to a file or to stdout?
191my $Output_To_Stdout = 0;
192
193# Eliminate empty log messages?
194my $Prune_Empty_Msgs = 0;
195
196# Tags of which not to output
197my %ignore_tags;
198
199# Show only revisions with Tags
200my %show_tags;
201
202# Don't call Text::Wrap on the body of the message
203my $No_Wrap = 0;
204
205# Indentation of log messages
206my $Indent = "\t";
207
208# Don't do any pretty print processing
209my $Summary = 0;
210
211# Separates header from log message.  Code assumes it is either " " or
212# "\n\n", so if there's ever an option to set it to something else,
213# make sure to go through all conditionals that use this var.
214my $After_Header = " ";
215
216# XML Encoding
217my $XML_Encoding = '';
218
219# Format more for programs than for humans.
220my $XML_Output = 0;
221my $No_XML_Namespace = 0;
222my $No_XML_ISO_Date = 0;
223
224# Do some special tweaks for log data that was written in FSF
225# ChangeLog style.
226my $FSF_Style = 0;
227
228# Set iff output should be like an FSF-style ChangeLog.
229my $FSF_Output = 0;
230
231# Show times in UTC instead of local time
232my $UTC_Times = 0;
233
234# Show times in output?
235my $Show_Times = 1;
236
237# Show day of week in output?
238my $Show_Day_Of_Week = 0;
239
240# Show revision numbers in output?
241my $Show_Revisions = 0;
242
243# Show dead files in output?
244my $Show_Dead = 0;
245
246# Hide dead trunk files which were created as a result of additions on a
247# branch?
248my $Hide_Branch_Additions = 1;
249
250# Show tags (symbolic names) in output?
251my $Show_Tags = 0;
252
253# Show tags separately in output?
254my $Show_Tag_Dates = 0;
255
256# Show branches by symbolic name in output?
257my $Show_Branches = 0;
258
259# Show only revisions on these branches or their ancestors.
260my @Follow_Branches;
261# Show only revisions on these branches or their ancestors; ignore descendent
262# branches.
263my @Follow_Only;
264
265# Don't bother with files matching this regexp.
266my @Ignore_Files;
267
268# How exactly we match entries.  We definitely want "o",
269# and user might add "i" by using --case-insensitive option.
270my $Case_Insensitive = 0;
271
272# Maybe only show log messages matching a certain regular expression.
273my $Regexp_Gate = '';
274
275# Show tags only matching certain regular expression.
276my $Regexp_Tag = '';
277
278# Pass this global option string along to cvs, to the left of `log':
279my $Global_Opts = '';
280
281# Pass this option string along to the cvs log subcommand:
282my $Command_Opts = '';
283
284# Read log output from stdin instead of invoking cvs log?
285my $Input_From_Stdin = 0;
286
287# Don't show filenames in output.
288my $Hide_Filenames = 0;
289
290# Don't shorten directory names from filenames.
291my $Common_Dir = 1;
292
293# Max checkin duration. CVS checkin is not atomic, so we may have checkin
294# times that span a range of time. We assume that checkins will last no
295# longer than $Max_Checkin_Duration seconds, and that similarly, no
296# checkins will happen from the same users with the same message less
297# than $Max_Checkin_Duration seconds apart.
298my $Max_Checkin_Duration = 180;
299
300# What to put at the front of [each] ChangeLog.
301my $ChangeLog_Header = '';
302
303# Whether to enable 'delta' mode, and for what start/end tags.
304my $Delta_Mode = 0;
305my $Delta_From = '';
306my $Delta_To = '';
307
308my $TestCode;
309
310# Whether to parse filenames from the RCS filename, and if so what
311# prefix to strip.
312my $RCS_Root;
313
314# Whether to output information on the # of lines added and removed
315# by each file modification.
316my $Show_Lines_Modified = 0;
317
318## end vars set by options.
319
320# latest observed times for the start/end tags in delta mode
321my $Delta_StartTime = 0;
322my $Delta_EndTime = 0;
323
324my $No_Ancestors = 0;
325
326my $No_Extra_Indent = 0;
327
328my $GroupByDate = 0;
329my $GroupByAuthor = 0;
330
331# ----------------------------------------------------------------------------
332
333package CVS::Utils::ChangeLog::EntrySet;
334
335sub new {
336  my $class = shift;
337  my %self;
338  bless \%self, $class;
339}
340
341# -------------------------------------
342
343sub output_changelog {
344  my $output_type = $XML_Output ? 'XML' : 'Text';
345  my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}";
346  my $output = $output_class->new(follow_branches => \@Follow_Branches,
347                                  follow_only     => \@Follow_Only,
348                                  ignore_tags     => \%ignore_tags,
349                                  show_tags       => \%show_tags,
350                                 );
351  $output->output_changelog(@_);
352}
353
354# -------------------------------------
355
356sub add_fileentry {
357  my ($self, $file_full_path, $time, $revision, $state, $lines,
358      $branch_names, $branch_roots, $branch_numbers,
359      $symbolic_names, $author, $msg_txt) = @_;
360
361      my $qunk =
362        CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision,
363                                              $state, $lines,
364                                              $branch_names, $branch_roots,
365                                              $branch_numbers,
366                                              $symbolic_names);
367
368      # We might be including revision numbers and/or tags and/or
369      # branch names in the output.  Most of the code from here to
370      # loop-end deals with organizing these in qunk.
371
372      unless ( $Hide_Branch_Additions
373               and
374               $msg_txt =~ /file .+ was initially added on branch \S+./ ) {
375        # Add this file to the list
376        # (We use many spoonfuls of autovivication magic. Hashes and arrays
377        # will spring into existence if they aren't there already.)
378
379        &main::debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n");
380
381        # Store with the files in this commit.  Later we'll loop through
382        # again, making sure that revisions with the same log message
383        # and nearby commit times are grouped together as one commit.
384        $self->{$qunk->dir_key}{$author}{$time}{$msg_txt} =
385          CVS::Utils::ChangeLog::Message->new($msg_txt)
386              unless exists $self->{$qunk->dir_key}{$author}{$time}{$msg_txt};
387        $self->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk);
388      }
389
390}
391
392# ----------------------------------------------------------------------------
393
394package CVS::Utils::ChangeLog::EntrySet::Output::Text;
395
396use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
397
398use File::Basename qw( fileparse );
399
400sub new {
401  my $class = shift;
402  my $self = $class->SUPER::new(@_);
403}
404
405# -------------------------------------
406
407sub wday {
408  my $self = shift; my $class = ref $self;
409  my ($wday) = @_;
410
411  return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : '';
412}
413
414# -------------------------------------
415
416sub header_line {
417  my $self = shift;
418  my ($time, $author, $lastdate, $lastauthor) = @_;
419
420  my $header_line = '';
421
422  my (undef,$min,$hour,$mday,$mon,$year,$wday)
423    = $UTC_Times ? gmtime($time) : localtime($time);
424
425  my $date = $self->fdatetime($time);
426
427  if ($Show_Times) {
428    $header_line = sprintf "%s  %s\n\n", $date, $author;
429  } else {
430    if ( $GroupByDate and ($date eq $lastdate) and
431         ((!$GroupByAuthor) or ($author eq $lastauthor)) ) {
432      $header_line = '';
433    } else {
434      if ( $GroupByDate and ! $GroupByAuthor ) {
435        $header_line = "$date\n\n";
436      } else {
437        $header_line = "$date  $author\n\n";
438      }
439    }
440  }
441}
442
443# -------------------------------------
444
445sub preprocess_msg_text {
446  my $self = shift;
447  my ($text) = @_;
448
449  $text = $self->SUPER::preprocess_msg_text($text);
450
451  unless ( $No_Wrap ) {
452    # Strip off lone newlines, but only for lines that don't begin with
453    # whitespace or a mail-quoting character, since we want to preserve
454    # that kind of formatting.  Also don't strip newlines that follow a
455    # period; we handle those specially next.  And don't strip
456    # newlines that precede an open paren.
457    1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g;
458
459    # If a newline follows a period, make sure that when we bring up the
460    # bottom sentence, it begins with two spaces.
461    1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2  $3/g;
462  }
463
464  return $text;
465}
466
467# -------------------------------------
468
469# Here we take a bunch of qunks and convert them into printed
470# summary that will include all the information the user asked for.
471sub pretty_file_list {
472  my $self = shift;
473
474  return ''
475    if $Hide_Filenames;
476
477  my $qunksref = shift;
478
479  my @filenames;
480  my $beauty = '';          # The accumulating header string for this entry.
481  my %non_unanimous_tags;   # Tags found in a proper subset of qunks
482  my %unanimous_tags;       # Tags found in all qunks
483  my %all_branches;         # Branches found in any qunk
484  my $fbegun = 0;           # Did we begin printing filenames yet?
485
486  my ($common_dir, $qunkrefs) =
487    $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref);
488
489  my @qunkrefs = @$qunkrefs;
490
491  # Not XML output, so complexly compactify for chordate consumption.  At this
492  # point we have enough global information about all the qunks to organize
493  # them non-redundantly for output.
494
495  if ($common_dir) {
496    # Note that $common_dir still has its trailing slash
497    $beauty .= "$common_dir: ";
498  }
499
500  if ($Show_Branches)
501  {
502    # For trailing revision numbers.
503    my @brevisions;
504
505    foreach my $branch (keys (%all_branches))
506    {
507      foreach my $qunkref (@qunkrefs)
508      {
509        if ((defined ($qunkref->branch))
510            and ($qunkref->branch eq $branch))
511        {
512          if ($fbegun) {
513            # kff todo: comma-delimited in XML too?  Sure.
514            $beauty .= ", ";
515          }
516          else {
517            $fbegun = 1;
518          }
519          my $fname = substr ($qunkref->filename, length ($common_dir));
520          $beauty .= $fname;
521          $qunkref->{'printed'} = 1;  # Just setting a mark bit, basically
522
523          if ( $Show_Tags and defined $qunkref->tags ) {
524            my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
525
526            if (@tags) {
527              $beauty .= " (tags: ";
528              $beauty .= join (', ', @tags);
529              $beauty .= ")";
530            }
531          }
532
533          if ($Show_Revisions) {
534            # Collect the revision numbers' last components, but don't
535            # print them -- they'll get printed with the branch name
536            # later.
537            $qunkref->revision =~ /.+\.([\d]+)$/;
538            push (@brevisions, $1);
539
540            # todo: we're still collecting branch roots, but we're not
541            # showing them anywhere.  If we do show them, it would be
542            # nifty to just call them revision "0" on a the branch.
543            # Yeah, that's the ticket.
544          }
545        }
546      }
547      $beauty .= " ($branch";
548      if (@brevisions) {
549        if ((scalar (@brevisions)) > 1) {
550          $beauty .= ".[";
551          $beauty .= (join (',', @brevisions));
552          $beauty .= "]";
553        }
554        else {
555          # Square brackets are spurious here, since there's no range to
556          # encapsulate
557          $beauty .= ".$brevisions[0]";
558        }
559      }
560      $beauty .= ")";
561    }
562  }
563
564  # Okay; any qunks that were done according to branch are taken care
565  # of, and marked as printed.  Now print everyone else.
566
567  my %fileinfo_printed;
568  foreach my $qunkref (@qunkrefs)
569  {
570    next if (defined ($qunkref->{'printed'}));   # skip if already printed
571
572    my $b = substr ($qunkref->filename, length ($common_dir));
573    # todo: Shlomo's change was this:
574    # $beauty .= substr ($qunkref->filename,
575    #              (($common_dir eq "./") ? '' : length ($common_dir)));
576    $qunkref->{'printed'} = 1;  # Set a mark bit.
577
578    if ($Show_Revisions || $Show_Tags || $Show_Dead)
579    {
580      my $started_addendum = 0;
581
582      if ($Show_Revisions) {
583        $started_addendum = 1;
584        $b .= " (";
585        $b .= $qunkref->revision;
586      }
587      if ($Show_Dead && $qunkref->state =~ /dead/)
588      {
589        # Deliberately not using $started_addendum. Keeping it simple.
590        $b .= "[DEAD]";
591      }
592      if ($Show_Tags && (defined $qunkref->tags)) {
593        my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags});
594        if ((scalar (@tags)) > 0) {
595          if ($started_addendum) {
596            $b .= ", ";
597          }
598          else {
599            $b .= " (tags: ";
600          }
601          $b .= join (', ', @tags);
602          $started_addendum = 1;
603        }
604      }
605      if ($started_addendum) {
606        $b .= ")";
607      }
608    }
609
610    unless ( exists $fileinfo_printed{$b} ) {
611      if ($fbegun) {
612        $beauty .= ", ";
613      } else {
614        $fbegun = 1;
615      }
616      $beauty .= $b, $fileinfo_printed{$b} = 1;
617    }
618  }
619
620  # Unanimous tags always come last.
621  if ($Show_Tags && %unanimous_tags)
622  {
623    $beauty .= " (utags: ";
624    $beauty .= join (', ', sort keys (%unanimous_tags));
625    $beauty .= ")";
626  }
627
628  # todo: still have to take care of branch_roots?
629
630  $beauty = "$beauty:";
631
632  return $beauty;
633}
634
635# -------------------------------------
636
637sub output_tagdate {
638  my $self = shift;
639  my ($fh, $time, $tag) = @_;
640
641  my $fdatetime = $self->fdatetime($time);
642  print $fh "$fdatetime  tag $tag\n\n";
643  return;
644}
645
646# -------------------------------------
647
648sub format_body {
649  my $self = shift;
650  my ($msg, $files, $qunklist) = @_;
651
652  my $body;
653
654  if ( $No_Wrap and ! $Summary ) {
655    $msg = $self->preprocess_msg_text($msg);
656    $files = $self->mywrap("\t", "\t  ", "* $files");
657    $msg =~ s/\n(.+)/\n$Indent$1/g;
658    unless ($After_Header eq " ") {
659      $msg =~ s/^(.+)/$Indent$1/g;
660    }
661    if ( $Hide_Filenames ) {
662      $body = $After_Header . $msg;
663    } else {
664      $body = $files . $After_Header . $msg;
665    }
666  } elsif ( $Summary ) {
667    my ($filelist, $qunk);
668    my (@DeletedQunks, @AddedQunks, @ChangedQunks);
669
670    $msg = $self->preprocess_msg_text($msg);
671    #
672    #     Sort the files (qunks) according to the operation that was
673    # performed.  Files which were added have no line change
674    # indicator, whereas deleted files have state dead.
675    #
676    foreach $qunk ( @$qunklist ) {
677      if ( "dead" eq $qunk->state) {
678        push @DeletedQunks, $qunk;
679      } elsif ( ! defined $qunk->lines ) {
680        push @AddedQunks, $qunk;
681      } else {
682        push @ChangedQunks, $qunk;
683      }
684    }
685    #
686    #     The qunks list was  originally in tree search order.  Let's
687    # get that back.  The lists, if they exist, will be reversed upon
688    # processing.
689    #
690
691    #
692    #     Now write the three sections onto $filelist
693    #
694    if ( @DeletedQunks ) {
695      $filelist .= "\tDeleted:\n";
696      foreach $qunk ( @DeletedQunks ) {
697        $filelist .= "\t\t" . $qunk->filename;
698        $filelist .= " (" . $qunk->revision . ")";
699        $filelist .= "\n";
700      }
701      undef @DeletedQunks;
702    }
703
704    if ( @AddedQunks ) {
705      $filelist .= "\tAdded:\n";
706      foreach $qunk (@AddedQunks) {
707        $filelist .= "\t\t" . $qunk->filename;
708        $filelist .= " (" . $qunk->revision . ")";
709        $filelist .= "\n";
710      }
711      undef @AddedQunks ;
712    }
713
714    if ( @ChangedQunks ) {
715      $filelist .= "\tChanged:\n";
716      foreach $qunk (@ChangedQunks) {
717        $filelist .= "\t\t" . $qunk->filename;
718        $filelist .= " (" . $qunk->revision . ")";
719        $filelist .= ", \"" . $qunk->state . "\"";
720        $filelist .= ", lines: " . $qunk->lines;
721        $filelist .= "\n";
722      }
723      undef @ChangedQunks;
724    }
725
726    chomp $filelist;
727
728    if ( $Hide_Filenames ) {
729      $filelist = '';
730    }
731
732    $msg =~ s/\n(.*)/\n$Indent$1/g;
733    unless ( $After_Header eq " " or $FSF_Style ) {
734      $msg =~ s/^(.*)/$Indent$1/g;
735    }
736
737    unless ( $No_Wrap ) {
738      if ( $FSF_Style ) {
739        $msg = $self->wrap_log_entry($msg, '', 69, 69);
740        chomp($msg);
741        chomp($msg);
742      } else {
743        $msg = $self->mywrap('', $Indent, "$msg");
744        $msg =~ s/[ \t]+\n/\n/g;
745      }
746    }
747
748    $body = $filelist . $After_Header . $msg;
749  } else {  # do wrapping, either FSF-style or regular
750    my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent  ";
751
752    if ( $FSF_Style ) {
753      $files = $self->mywrap($Indent, $latter_wrap, "* $files");
754
755      my $files_last_line_len = 0;
756      if ( $After_Header eq " " ) {
757        $files_last_line_len = $self->last_line_len($files);
758        $files_last_line_len += 1;  # for $After_Header
759      }
760
761      $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69);
762      $body = $files . $After_Header . $msg;
763    } else {  # not FSF-style
764      $msg = $self->preprocess_msg_text($msg);
765      $body = $files . $After_Header . $msg;
766      $body = $self->mywrap($Indent, $latter_wrap, "* $body");
767      $body =~ s/[ \t]+\n/\n/g;
768    }
769  }
770
771  return $body;
772}
773
774# ----------------------------------------------------------------------------
775
776package CVS::Utils::ChangeLog::EntrySet::Output::XML;
777
778use base qw( CVS::Utils::ChangeLog::EntrySet::Output );
779
780use File::Basename qw( fileparse );
781
782sub new {
783  my $class = shift;
784  my $self = $class->SUPER::new(@_);
785}
786
787# -------------------------------------
788
789sub header_line {
790  my $self = shift;
791  my ($time, $author, $lastdate) = @_;
792
793  my $header_line = '';
794
795  my $isoDate;
796
797  my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
798
799  # Ideally, this would honor $UTC_Times and use +HH:MM syntax
800  $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
801                     $y + 1900, $m + 1, $d, $H, $M, $S);
802
803  my (undef,$min,$hour,$mday,$mon,$year,$wday)
804    = $UTC_Times ? gmtime($time) : localtime($time);
805
806  my $date = $self->fdatetime($time);
807  $wday = $self->wday($wday);
808
809  $header_line =
810    sprintf ("<date>%4u-%02u-%02u</date>\n${wday}<time>%02u:%02u</time>\n",
811             $year+1900, $mon+1, $mday, $hour, $min);
812  $header_line .= "<isoDate>$isoDate</isoDate>\n"
813    unless $No_XML_ISO_Date;
814  $header_line .= sprintf("<author>%s</author>\n" , $author);
815}
816
817# -------------------------------------
818
819sub wday {
820  my $self = shift; my $class = ref $self;
821  my ($wday) = @_;
822
823  return '<weekday>' . $class->weekday_en($wday) . "</weekday>\n";
824}
825
826# -------------------------------------
827
828sub escape {
829  my $self = shift;
830
831  my $txt = shift;
832  $txt =~ s/&/&amp;/g;
833  $txt =~ s/</&lt;/g;
834  $txt =~ s/>/&gt;/g;
835  return $txt;
836}
837
838# -------------------------------------
839
840sub output_header {
841  my $self = shift;
842  my ($fh) = @_;
843
844  my $encoding    =
845    length $XML_Encoding ? qq'encoding="$XML_Encoding"' : '';
846  my $version     = 'version="1.0"';
847  my $declaration =
848    sprintf '<?xml %s?>', join ' ', grep length, $version, $encoding;
849  my $root        =
850    $No_XML_Namespace ?
851      '<changelog>'     :
852        '<changelog xmlns="http://www.red-bean.com/xmlns/cvs2cl/">';
853  print $fh "$declaration\n\n$root\n\n";
854}
855
856# -------------------------------------
857
858sub output_footer {
859  my $self = shift;
860  my ($fh) = @_;
861
862  print $fh "</changelog>\n";
863}
864
865# -------------------------------------
866
867sub preprocess_msg_text {
868  my $self = shift;
869  my ($text) = @_;
870
871  $text = $self->SUPER::preprocess_msg_text($text);
872
873  $text = $self->escape($text);
874  chomp $text;
875  $text = "<msg>${text}</msg>\n";
876
877  return $text;
878}
879
880# -------------------------------------
881
882# Here we take a bunch of qunks and convert them into a printed
883# summary that will include all the information the user asked for.
884sub pretty_file_list {
885  my $self = shift;
886  my ($qunksref) = @_;
887
888  my $beauty = '';          # The accumulating header string for this entry.
889  my %non_unanimous_tags;   # Tags found in a proper subset of qunks
890  my %unanimous_tags;       # Tags found in all qunks
891  my %all_branches;         # Branches found in any qunk
892  my $fbegun = 0;           # Did we begin printing filenames yet?
893
894  my ($common_dir, $qunkrefs) =
895    $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches),
896      $qunksref);
897
898  my @qunkrefs = @$qunkrefs;
899
900  # If outputting XML, then our task is pretty simple, because we
901  # don't have to detect common dir, common tags, branch prefixing,
902  # etc.  We just output exactly what we have, and don't worry about
903  # redundancy or readability.
904
905  foreach my $qunkref (@qunkrefs)
906  {
907    my $filename    = $qunkref->filename;
908    my $state       = $qunkref->state;
909    my $revision    = $qunkref->revision;
910    my $tags        = $qunkref->tags;
911    my $branch      = $qunkref->branch;
912    my $branchroots = $qunkref->roots;
913    my $lines       = $qunkref->lines;
914
915    $filename = $self->escape($filename);   # probably paranoia
916    $revision = $self->escape($revision);   # definitely paranoia
917
918    $beauty .= "<file>\n";
919    $beauty .= "<name>${filename}</name>\n";
920    $beauty .= "<cvsstate>${state}</cvsstate>\n";
921    $beauty .= "<revision>${revision}</revision>\n";
922
923    if ($Show_Lines_Modified
924        && $lines && $lines =~ m/\+(\d+)\s+-(\d+)/) {
925        $beauty .= "<linesadded>$1</linesadded>\n";
926        $beauty .= "<linesremoved>$2</linesremoved>\n";
927    }
928
929    if ($branch) {
930      $branch   = $self->escape($branch);     # more paranoia
931      $beauty .= "<branch>${branch}</branch>\n";
932    }
933    foreach my $tag (@$tags) {
934      $tag = $self->escape($tag);  # by now you're used to the paranoia
935      $beauty .= "<tag>${tag}</tag>\n";
936    }
937    foreach my $root (@$branchroots) {
938      $root = $self->escape($root);  # which is good, because it will continue
939      $beauty .= "<branchroot>${root}</branchroot>\n";
940    }
941    $beauty .= "</file>\n";
942  }
943
944  # Theoretically, we could go home now.  But as long as we're here,
945  # let's print out the common_dir and utags, as a convenience to
946  # the receiver (after all, earlier code calculated that stuff
947  # anyway, so we might as well take advantage of it).
948
949  if ((scalar (keys (%unanimous_tags))) > 1) {
950    foreach my $utag ((keys (%unanimous_tags))) {
951      $utag = $self->escape($utag);   # the usual paranoia
952      $beauty .= "<utag>${utag}</utag>\n";
953    }
954  }
955  if ($common_dir) {
956    $common_dir = $self->escape($common_dir);
957    $beauty .= "<commondir>${common_dir}</commondir>\n";
958  }
959
960  # That's enough for XML, time to go home:
961  return $beauty;
962}
963
964# -------------------------------------
965
966sub output_tagdate {
967  my $self = shift;
968  my ($fh, $time, $tag) = @_;
969
970  my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0];
971
972  # Ideally, this would honor $UTC_Times and use +HH:MM syntax
973  my $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ",
974                       $y + 1900, $m + 1, $d, $H, $M, $S);
975
976  print $fh "<tagdate>\n";
977  print $fh "<tagisodate>$isoDate</tagisodate>\n";
978  print $fh "<tagdatetag>$tag</tagdatetag>\n";
979  print $fh "</tagdate>\n\n";
980  return;
981}
982
983# -------------------------------------
984
985sub output_entry {
986  my $self = shift;
987  my ($fh, $entry) = @_;
988  print $fh "<entry>\n$entry</entry>\n\n";
989}
990
991# -------------------------------------
992
993sub format_body {
994  my $self = shift;
995  my ($msg, $files, $qunklist) = @_;
996
997  $msg = $self->preprocess_msg_text($msg);
998  return $files . $msg;
999}
1000
1001# ----------------------------------------------------------------------------
1002
1003package CVS::Utils::ChangeLog::EntrySet::Output;
1004
1005use Carp           qw( croak );
1006use File::Basename qw( fileparse );
1007
1008# Class Utility Functions -------------
1009
1010{ # form closure
1011
1012my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday));
1013sub weekday_en {
1014  my $class = shift;
1015  return $weekdays[$_[0]];
1016}
1017
1018}
1019
1020# -------------------------------------
1021
1022sub new {
1023  my ($proto, %args) = @_;
1024  my $class = ref $proto || $proto;
1025
1026  my $follow_branches = delete $args{follow_branches};
1027  my $follow_only     = delete $args{follow_only};
1028  my $ignore_tags     = delete $args{ignore_tags};
1029  my $show_tags       = delete $args{show_tags};
1030  die "Unrecognized arg to EntrySet::Output::new: '$_'\n"
1031    for keys %args;
1032
1033  bless +{follow_branches => $follow_branches,
1034          follow_only     => $follow_only,
1035          show_tags       => $show_tags,
1036          ignore_tags     => $ignore_tags,
1037         }, $class;
1038}
1039
1040# Abstract Subrs ----------------------
1041
1042sub wday               { croak "Whoops.  Abtract method call (wday).\n" }
1043sub pretty_file_list   { croak "Whoops.  Abtract method call (pretty_file_list).\n" }
1044sub output_tagdate     { croak "Whoops.  Abtract method call (output_tagdate).\n" }
1045sub header_line        { croak "Whoops.  Abtract method call (header_line).\n" }
1046
1047# Instance Subrs ----------------------
1048
1049sub output_header { }
1050
1051# -------------------------------------
1052
1053sub output_entry {
1054  my $self = shift;
1055  my ($fh, $entry) = @_;
1056  print $fh "$entry\n";
1057}
1058
1059# -------------------------------------
1060
1061sub output_footer { }
1062
1063# -------------------------------------
1064
1065sub escape { return $_[1] }
1066
1067# -------------------------------------
1068
1069sub _revision_is_wanted {
1070  my ($self, $qunk) = @_;
1071
1072  my ($revision, $branch_numbers) = @{$qunk}{qw( revision branch_numbers )};
1073  my $follow_branches = $self->{follow_branches};
1074  my $follow_only     = $self->{follow_only};
1075
1076  for my $ignore_tag (keys %{$self->{ignore_tags}}) {
1077    return
1078      if defined $qunk->{tags} and grep $_ eq $ignore_tag, @{$qunk->{tags}};
1079  }
1080
1081  if ( keys %{$self->{show_tags}} ) {
1082    for my $show_tag (keys %{$self->{show_tags}}) {
1083      return
1084        if ! defined $qunk->{tags} or ! grep $_ eq $show_tag, @{$qunk->{tags}};
1085    }
1086  }
1087
1088  return 1
1089    unless @$follow_branches + @$follow_only; # no follow is follow all
1090
1091  for my $x (map([$_, 1], @$follow_branches),
1092             map([$_, 0], @$follow_only    )) {
1093    my ($branch, $followsub) = @$x;
1094
1095    # Special case for following trunk revisions
1096    return 1
1097      if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/;
1098
1099    if ( my $branch_number = $branch_numbers->{$branch} ) {
1100      # Are we on one of the follow branches or an ancestor of same?
1101
1102      # If this revision is a prefix of the branch number, or possibly is less
1103      # in the minormost number, OR if this branch number is a prefix of the
1104      # revision, then yes.  Otherwise, no.
1105
1106      # So below, we determine if any of those conditions are met.
1107
1108      # Trivial case: is this revision on the branch?  (Compare this way to
1109      # avoid regexps that screw up Emacs indentation, argh.)
1110      if ( substr($revision, 0, (length($branch_number) + 1))
1111           eq
1112           ($branch_number . ".") ) {
1113        if ( $followsub ) {
1114          return 1;
1115#        } elsif ( length($revision) == length($branch_number)+2 ) {
1116        } elsif ( substr($revision, length($branch_number)+1) =~ /^\d+$/ ) {
1117          return 1;
1118        }
1119      } elsif ( length($branch_number) > length($revision)
1120                and
1121                ! $No_Ancestors ) {
1122        # Non-trivial case: check if rev is ancestral to branch
1123
1124        # r_left still has the trailing "."
1125        my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/);
1126
1127        # b_left still has trailing "."
1128        # b_mid has no trailing "."
1129        my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/);
1130        return 1
1131          if $r_left eq $b_left and $r_end <= $b_mid;
1132      }
1133    }
1134  }
1135
1136  return;
1137}
1138
1139# -------------------------------------
1140
1141sub output_changelog {
1142my $self = shift; my $class = ref $self;
1143  my ($grand_poobah) = @_;
1144  ### Process each ChangeLog
1145
1146  while (my ($dir,$authorhash) = each %$grand_poobah)
1147  {
1148    &main::debug ("DOING DIR: $dir\n");
1149
1150    # Here we twist our hash around, from being
1151    #   author => time => message => filelist
1152    # in %$authorhash to
1153    #   time => author => message => filelist
1154    # in %changelog.
1155    #
1156    # This is also where we merge entries.  The algorithm proceeds
1157    # through the timeline of the changelog with a sliding window of
1158    # $Max_Checkin_Duration seconds; within that window, entries that
1159    # have the same log message are merged.
1160    #
1161    # (To save space, we zap %$authorhash after we've copied
1162    # everything out of it.)
1163
1164    my %changelog;
1165    while (my ($author,$timehash) = each %$authorhash)
1166    {
1167      my %stamptime;
1168      foreach my $time (sort {$a <=> $b} (keys %$timehash))
1169      {
1170        my $msghash = $timehash->{$time};
1171        while (my ($msg,$qunklist) = each %$msghash)
1172        {
1173          my $stamptime = $stamptime{$msg};
1174          if ((defined $stamptime)
1175              and (($time - $stamptime) < $Max_Checkin_Duration)
1176              and (defined $changelog{$stamptime}{$author}{$msg}))
1177          {
1178            push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files);
1179          }
1180          else {
1181            $changelog{$time}{$author}{$msg} = $qunklist->files;
1182            $stamptime{$msg} = $time;
1183          }
1184        }
1185      }
1186    }
1187    undef (%$authorhash);
1188
1189    ### Now we can write out the ChangeLog!
1190
1191    my ($logfile_here, $logfile_bak, $tmpfile);
1192    my $lastdate = "";
1193    my $lastauthor = "";
1194
1195    if (! $Output_To_Stdout) {
1196      $logfile_here =  $dir . $Log_File_Name;
1197      if (!$^O =~ /Win32/i) {
1198        $logfile_here =~ s/^\.\/\//\//;   # fix any leading ".//" problem
1199      }
1200      else {
1201        $logfile_here =~ s/^\.\/+//;      # remove any leading "./"
1202      }
1203      $tmpfile      = "${logfile_here}.cvs2cl$$.tmp";
1204      $logfile_bak  = "${logfile_here}.bak";
1205
1206      open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\"";
1207    }
1208    else {
1209      open (LOG_OUT, ">-") or die "Unable to open stdout for writing";
1210    }
1211
1212    print LOG_OUT $ChangeLog_Header;
1213
1214    my %tag_date_printed;
1215
1216    $self->output_header(\*LOG_OUT);
1217
1218    my @key_list = ();
1219    if($Chronological_Order) {
1220        @key_list = sort {$a <=> $b} (keys %changelog);
1221    } else {
1222        @key_list = sort {$b <=> $a} (keys %changelog);
1223    }
1224
1225    if ( $Show_Tag_Dates || $XML_Output ) {
1226      foreach my $time (@key_list) {
1227        my $authorhash = $changelog{$time};
1228        while (my ($author,$mesghash) = each %$authorhash) {
1229          while (my ($msg,$qunk) = each %$mesghash) {
1230            my $qunklist = $mesghash->{$msg};
1231            for my $qunkref2 (@$qunklist) {
1232              if (!$self->_revision_is_wanted($qunkref2)) {
1233                if (defined ($qunkref2->tags)) {
1234                  for my $tag (@{$qunkref2->tags}) {
1235                    $tag_date_printed{$tag} = 1;
1236                  }
1237                }
1238              }
1239            }
1240          }
1241        }
1242      }
1243    }
1244
1245    foreach my $time (@key_list)
1246    {
1247      next if ($Delta_Mode &&
1248               (($time <= $Delta_StartTime) ||
1249                ($time > $Delta_EndTime && $Delta_EndTime)));
1250
1251      # Set up the date/author line.
1252      # kff todo: do some more XML munging here, on the header
1253      # part of the entry:
1254      my (undef,$min,$hour,$mday,$mon,$year,$wday)
1255          = $UTC_Times ? gmtime($time) : localtime($time);
1256
1257      $wday = $self->wday($wday);
1258      # XML output includes everything else, we might as well make
1259      # it always include Day Of Week too, for consistency.
1260      my $authorhash = $changelog{$time};
1261      if ( $Show_Tag_Dates || $XML_Output ) {
1262        my %tags;
1263        while (my ($author,$mesghash) = each %$authorhash) {
1264          while (my ($msg,$qunk) = each %$mesghash) {
1265            for my $qunkref2 (@$qunk) {
1266              if (defined ($qunkref2->tags)) {
1267                for my $tag (@{$qunkref2->tags}) {
1268                  $tags{$tag} = 1;
1269                }
1270              }
1271            }
1272          }
1273        }
1274        # Sort here for determinism to ease testing
1275        foreach my $tag (sort keys %tags) {
1276          if ( ! defined $tag_date_printed{$tag} ) {
1277            $tag_date_printed{$tag} = $time;
1278            if ( (! defined $Regexp_Tag) or ( $tag =~ /$Regexp_Tag/ ) ) {
1279              $self->output_tagdate(\*LOG_OUT, $time, $tag);
1280              $lastauthor = ""
1281            }
1282          }
1283        }
1284      }
1285      while (my ($author,$mesghash) = each %$authorhash)
1286      {
1287        # If XML, escape in outer loop to avoid compound quoting:
1288        $author = $self->escape($author);
1289
1290      FOOBIE:
1291        # We sort here to enable predictable ordering for the testing porpoises
1292        for my $msg (sort keys %$mesghash)
1293        {
1294          my $qunklist = $mesghash->{$msg};
1295
1296          my @qunklist =
1297            grep $self->_revision_is_wanted($_), @$qunklist;
1298
1299          next FOOBIE unless @qunklist;
1300
1301          my $files               = $self->pretty_file_list(\@qunklist);
1302          my $header_line;          # date and author
1303          my $wholething;           # $header_line + $body
1304
1305          my $date = $self->fdatetime($time);
1306          $header_line = $self->header_line($time, $author, $lastdate, $lastauthor);
1307          $lastdate = $date;
1308          $lastauthor = $author;
1309
1310          $Text::Wrap::huge = 'overflow'
1311            if $Text::Wrap::VERSION >= 2001.0130;
1312          # Reshape the body according to user preferences.
1313          my $body = $self->format_body($msg, $files, \@qunklist);
1314
1315          $body =~ s/[ \t]+\n/\n/g;
1316          $wholething = $header_line . $body;
1317
1318          # One last check: make sure it passes the regexp test, if the
1319          # user asked for that.  We have to do it here, so that the
1320          # test can match against information in the header as well
1321          # as in the text of the log message.
1322
1323          # How annoying to duplicate so much code just because I
1324          # can't figure out a way to evaluate scalars on the trailing
1325          # operator portion of a regular expression.  Grrr.
1326          if ($Case_Insensitive) {
1327            unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) {
1328              $self->output_entry(\*LOG_OUT, $wholething);
1329            }
1330          }
1331          else {
1332            unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) {
1333              $self->output_entry(\*LOG_OUT, $wholething);
1334            }
1335          }
1336        }
1337      }
1338    }
1339
1340    $self->output_footer(\*LOG_OUT);
1341
1342    close (LOG_OUT);
1343
1344    if ( ! $Output_To_Stdout ) {
1345      # If accumulating, append old data to new before renaming.  But
1346      # don't append the most recent entry, since it's already in the
1347      # new log due to CVS's idiosyncratic interpretation of "log -d".
1348      if ($Cumulative && -f $logfile_here) {
1349        open NEW_LOG, ">>$tmpfile"
1350          or die "trouble appending to $tmpfile ($!)";
1351
1352        open OLD_LOG, "<$logfile_here"
1353          or die "trouble reading from $logfile_here ($!)";
1354
1355        my $started_first_entry = 0;
1356        my $passed_first_entry = 0;
1357        while (<OLD_LOG>) {
1358          if ( ! $passed_first_entry ) {
1359            if ( ( ! $started_first_entry )
1360                and /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1361              $started_first_entry = 1;
1362            } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/ ) {
1363              $passed_first_entry = 1;
1364              print NEW_LOG $_;
1365            }
1366          } else {
1367            print NEW_LOG $_;
1368          }
1369        }
1370
1371        close NEW_LOG;
1372        close OLD_LOG;
1373      }
1374
1375      if ( -f $logfile_here ) {
1376        rename $logfile_here, $logfile_bak;
1377      }
1378      rename $tmpfile, $logfile_here;
1379    }
1380  }
1381}
1382
1383# -------------------------------------
1384
1385# Don't call this wrap, because with 5.5.3, that clashes with the
1386# (unconditional :-( ) export of wrap() from Text::Wrap
1387sub mywrap {
1388  my $self = shift;
1389  my ($indent1, $indent2, @text) = @_;
1390  # If incoming text looks preformatted, don't get clever
1391  my $text = Text::Wrap::wrap($indent1, $indent2, @text);
1392  if ( grep /^\s+/m, @text ) {
1393    return $text;
1394  }
1395  my @lines = split /\n/, $text;
1396  $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e;
1397  $lines[0] =~ s/^$indent1\s+/$indent1/;
1398  s/^$indent2\s+/$indent2/
1399    for @lines[1..$#lines];
1400  my $newtext = join "\n", @lines;
1401  $newtext .= "\n"
1402    if substr($text, -1) eq "\n";
1403  return $newtext;
1404}
1405
1406# -------------------------------------
1407
1408sub preprocess_msg_text {
1409  my $self = shift;
1410  my ($text) = @_;
1411
1412  # Strip out carriage returns (as they probably result from DOSsy editors).
1413  $text =~ s/\r\n/\n/g;
1414  # If it *looks* like two newlines, make it *be* two newlines:
1415  $text =~ s/\n\s*\n/\n\n/g;
1416
1417  return $text;
1418}
1419
1420# -------------------------------------
1421
1422sub last_line_len {
1423  my $self = shift;
1424
1425  my $files_list = shift;
1426  my @lines = split (/\n/, $files_list);
1427  my $last_line = pop (@lines);
1428  return length ($last_line);
1429}
1430
1431# -------------------------------------
1432
1433# A custom wrap function, sensitive to some common constructs used in
1434# log entries.
1435sub wrap_log_entry {
1436  my $self = shift;
1437
1438  my $text = shift;                  # The text to wrap.
1439  my $left_pad_str = shift;          # String to pad with on the left.
1440
1441  # These do NOT take left_pad_str into account:
1442  my $length_remaining = shift;      # Amount left on current line.
1443  my $max_line_length  = shift;      # Amount left for a blank line.
1444
1445  my $wrapped_text = '';             # The accumulating wrapped entry.
1446  my $user_indent = '';              # Inherited user_indent from prev line.
1447
1448  my $first_time = 1;                # First iteration of the loop?
1449  my $suppress_line_start_match = 0; # Set to disable line start checks.
1450
1451  my @lines = split (/\n/, $text);
1452  while (@lines)   # Don't use `foreach' here, it won't work.
1453  {
1454    my $this_line = shift (@lines);
1455    chomp $this_line;
1456
1457    if ($this_line =~ /^(\s+)/) {
1458      $user_indent = $1;
1459    }
1460    else {
1461      $user_indent = '';
1462    }
1463
1464    # If it matches any of the line-start regexps, print a newline now...
1465    if ($suppress_line_start_match)
1466    {
1467      $suppress_line_start_match = 0;
1468    }
1469    elsif (($this_line =~ /^(\s*)\*\s+[a-zA-Z0-9]/)
1470           || ($this_line =~ /^(\s*)\* [a-zA-Z0-9_\.\/\+-]+/)
1471           || ($this_line =~ /^(\s*)\([a-zA-Z0-9_\.\/\+-]+(\)|,\s*)/)
1472           || ($this_line =~ /^(\s+)(\S+)/)
1473           || ($this_line =~ /^(\s*)- +/)
1474           || ($this_line =~ /^()\s*$/)
1475           || ($this_line =~ /^(\s*)\*\) +/)
1476           || ($this_line =~ /^(\s*)[a-zA-Z0-9](\)|\.|\:) +/))
1477    {
1478      # Make a line break immediately, unless header separator is set
1479      # and this line is the first line in the entry, in which case
1480      # we're getting the blank line for free already and shouldn't
1481      # add an extra one.
1482      unless (($After_Header ne " ") and ($first_time))
1483      {
1484        if ($this_line =~ /^()\s*$/) {
1485          $suppress_line_start_match = 1;
1486          $wrapped_text .= "\n${left_pad_str}";
1487        }
1488
1489        $wrapped_text .= "\n${left_pad_str}";
1490      }
1491
1492      $length_remaining = $max_line_length - (length ($user_indent));
1493    }
1494
1495    # Now that any user_indent has been preserved, strip off leading
1496    # whitespace, so up-folding has no ugly side-effects.
1497    $this_line =~ s/^\s*//;
1498
1499    # Accumulate the line, and adjust parameters for next line.
1500    my $this_len = length ($this_line);
1501    if ($this_len == 0)
1502    {
1503      # Blank lines should cancel any user_indent level.
1504      $user_indent = '';
1505      $length_remaining = $max_line_length;
1506    }
1507    elsif ($this_len >= $length_remaining) # Line too long, try breaking it.
1508    {
1509      # Walk backwards from the end.  At first acceptable spot, break
1510      # a new line.
1511      my $idx = $length_remaining - 1;
1512      if ($idx < 0) { $idx = 0 };
1513      while ($idx > 0)
1514      {
1515        if (substr ($this_line, $idx, 1) =~ /\s/)
1516        {
1517          my $line_now = substr ($this_line, 0, $idx);
1518          my $next_line = substr ($this_line, $idx);
1519          $this_line = $line_now;
1520
1521          # Clean whitespace off the end.
1522          chomp $this_line;
1523
1524          # The current line is ready to be printed.
1525          $this_line .= "\n${left_pad_str}";
1526
1527          # Make sure the next line is allowed full room.
1528          $length_remaining = $max_line_length - (length ($user_indent));
1529
1530          # Strip next_line, but then preserve any user_indent.
1531          $next_line =~ s/^\s*//;
1532
1533          # Sneak a peek at the user_indent of the upcoming line, so
1534          # $next_line (which will now precede it) can inherit that
1535          # indent level.  Otherwise, use whatever user_indent level
1536          # we currently have, which might be none.
1537          my $next_next_line = shift (@lines);
1538          if ((defined ($next_next_line)) && ($next_next_line =~ /^(\s+)/)) {
1539            $next_line = $1 . $next_line if (defined ($1));
1540            # $length_remaining = $max_line_length - (length ($1));
1541            $next_next_line =~ s/^\s*//;
1542          }
1543          else {
1544            $next_line = $user_indent . $next_line;
1545          }
1546          if (defined ($next_next_line)) {
1547            unshift (@lines, $next_next_line);
1548          }
1549          unshift (@lines, $next_line);
1550
1551          # Our new next line might, coincidentally, begin with one of
1552          # the line-start regexps, so we temporarily turn off
1553          # sensitivity to that until we're past the line.
1554          $suppress_line_start_match = 1;
1555
1556          last;
1557        }
1558        else
1559        {
1560          $idx--;
1561        }
1562      }
1563
1564      if ($idx == 0)
1565      {
1566        # We bottomed out because the line is longer than the
1567        # available space.  But that could be because the space is
1568        # small, or because the line is longer than even the maximum
1569        # possible space.  Handle both cases below.
1570
1571        if ($length_remaining == ($max_line_length - (length ($user_indent))))
1572        {
1573          # The line is simply too long -- there is no hope of ever
1574          # breaking it nicely, so just insert it verbatim, with
1575          # appropriate padding.
1576          $this_line = "\n${left_pad_str}${this_line}";
1577        }
1578        else
1579        {
1580          # Can't break it here, but may be able to on the next round...
1581          unshift (@lines, $this_line);
1582          $length_remaining = $max_line_length - (length ($user_indent));
1583          $this_line = "\n${left_pad_str}";
1584        }
1585      }
1586    }
1587    else  # $this_len < $length_remaining, so tack on what we can.
1588    {
1589      # Leave a note for the next iteration.
1590      $length_remaining = $length_remaining - $this_len;
1591
1592      if ($this_line =~ /\.$/)
1593      {
1594        $this_line .= "  ";
1595        $length_remaining -= 2;
1596      }
1597      else  # not a sentence end
1598      {
1599        $this_line .= " ";
1600        $length_remaining -= 1;
1601      }
1602    }
1603
1604    # Unconditionally indicate that loop has run at least once.
1605    $first_time = 0;
1606
1607    $wrapped_text .= "${user_indent}${this_line}";
1608  }
1609
1610  # One last bit of padding.
1611  $wrapped_text .= "\n";
1612
1613  return $wrapped_text;
1614}
1615
1616# -------------------------------------
1617
1618sub _pretty_file_list {
1619  my $self = shift;
1620
1621  my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_;
1622
1623  my @qunkrefs =
1624    grep +( ( ! $_->tags_exists
1625              or
1626              ! grep exists $ignore_tags{$_}, @{$_->tags})
1627            and
1628            ( ! keys %show_tags
1629              or
1630              ( $_->tags_exists
1631                and
1632                grep exists $show_tags{$_}, @{$_->tags} )
1633            )
1634          ),
1635    @$qunksref;
1636
1637  my $common_dir;           # Dir prefix common to all files ('' if none)
1638
1639  # First, loop over the qunks gathering all the tag/branch names.
1640  # We'll put them all in non_unanimous_tags, and take out the
1641  # unanimous ones later.
1642 QUNKREF:
1643  foreach my $qunkref (@qunkrefs)
1644  {
1645    # Keep track of whether all the files in this commit were in the
1646    # same directory, and memorize it if so.  We can make the output a
1647    # little more compact by mentioning the directory only once.
1648    if ($Common_Dir && (scalar (@qunkrefs)) > 1)
1649    {
1650      if (! (defined ($common_dir)))
1651      {
1652        my ($base, $dir);
1653        ($base, $dir, undef) = fileparse ($qunkref->filename);
1654
1655        if ((! (defined ($dir)))  # this first case is sheer paranoia
1656            or ($dir eq '')
1657            or ($dir eq "./")
1658            or ($dir eq ".\\"))
1659        {
1660          $common_dir = '';
1661        }
1662        else
1663        {
1664          $common_dir = $dir;
1665        }
1666      }
1667      elsif ($common_dir ne '')
1668      {
1669        # Already have a common dir prefix, so how much of it can we preserve?
1670        $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir);
1671      }
1672    }
1673    else  # only one file in this entry anyway, so common dir not an issue
1674    {
1675      $common_dir = '';
1676    }
1677
1678    if (defined ($qunkref->branch)) {
1679      $all_branches->{$qunkref->branch} = 1;
1680    }
1681    if (defined ($qunkref->tags)) {
1682      foreach my $tag (@{$qunkref->tags}) {
1683        $non_unanimous_tags->{$tag} = 1;
1684      }
1685    }
1686  }
1687
1688  # Any tag held by all qunks will be printed specially... but only if
1689  # there are multiple qunks in the first place!
1690  if ((scalar (@qunkrefs)) > 1) {
1691    foreach my $tag (keys (%$non_unanimous_tags)) {
1692      my $everyone_has_this_tag = 1;
1693      foreach my $qunkref (@qunkrefs) {
1694        if ((! (defined ($qunkref->tags)))
1695            or (! (grep ($_ eq $tag, @{$qunkref->tags})))) {
1696          $everyone_has_this_tag = 0;
1697        }
1698      }
1699      if ($everyone_has_this_tag) {
1700        $unanimous_tags->{$tag} = 1;
1701        delete $non_unanimous_tags->{$tag};
1702      }
1703    }
1704  }
1705
1706  return $common_dir, \@qunkrefs;
1707}
1708
1709# -------------------------------------
1710
1711sub fdatetime {
1712  my $self = shift;
1713
1714  my ($year, $mday, $mon, $wday, $hour, $min);
1715
1716  if ( @_ > 1 ) {
1717    ($year, $mday, $mon, $wday, $hour, $min) = @_;
1718  } else {
1719    my ($time) = @_;
1720    (undef, $min, $hour, $mday, $mon, $year, $wday) =
1721      $UTC_Times ? gmtime($time) : localtime($time);
1722
1723    $year += 1900;
1724    $mon  += 1;
1725    $wday  = $self->wday($wday);
1726  }
1727
1728  my $fdate = $self->fdate($year, $mon, $mday, $wday);
1729
1730  if ($Show_Times) {
1731    my $ftime = $self->ftime($hour, $min);
1732    return "$fdate $ftime";
1733  } else {
1734    return $fdate;
1735  }
1736}
1737
1738# -------------------------------------
1739
1740sub fdate {
1741  my $self = shift;
1742
1743  my ($year, $mday, $mon, $wday);
1744
1745  if ( @_ > 1 ) {
1746    ($year, $mon, $mday, $wday) = @_;
1747  } else {
1748    my ($time) = @_;
1749    (undef, undef, undef, $mday, $mon, $year, $wday) =
1750      $UTC_Times ? gmtime($time) : localtime($time);
1751
1752    $year += 1900;
1753    $mon  += 1;
1754    $wday  = $self->wday($wday);
1755  }
1756
1757  return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday;
1758}
1759
1760# -------------------------------------
1761
1762sub ftime {
1763  my $self = shift;
1764
1765  my ($hour, $min);
1766
1767  if ( @_ > 1 ) {
1768    ($hour, $min) = @_;
1769  } else {
1770    my ($time) = @_;
1771    (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time);
1772  }
1773
1774  return sprintf '%02u:%02u', $hour, $min;
1775}
1776
1777# ----------------------------------------------------------------------------
1778
1779package CVS::Utils::ChangeLog::Message;
1780
1781sub new {
1782  my $class = shift;
1783  my ($msg) = @_;
1784
1785  my %self = (msg => $msg, files => []);
1786
1787  bless \%self, $class;
1788}
1789
1790sub add_fileentry {
1791  my $self = shift;
1792  my ($fileentry) = @_;
1793
1794  die "Not a fileentry: $fileentry"
1795    unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry');
1796
1797  push @{$self->{files}}, $fileentry;
1798}
1799
1800sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} }
1801
1802# ----------------------------------------------------------------------------
1803
1804package CVS::Utils::ChangeLog::FileEntry;
1805
1806use File::Basename qw( fileparse );
1807
1808# Each revision of a file has a little data structure (a `qunk')
1809# associated with it.  That data structure holds not only the
1810# file's name, but any additional information about the file
1811# that might be needed in the output, such as the revision
1812# number, tags, branches, etc.  The reason to have these things
1813# arranged in a data structure, instead of just appending them
1814# textually to the file's name, is that we may want to do a
1815# little rearranging later as we write the output.  For example,
1816# all the files on a given tag/branch will go together, followed
1817# by the tag in parentheses (so trunk or otherwise non-tagged
1818# files would go at the end of the file list for a given log
1819# message).  This rearrangement is a lot easier to do if we
1820# don't have to reparse the text.
1821#
1822# A qunk looks like this:
1823#
1824#   {
1825#     filename    =>    "hello.c",
1826#     revision    =>    "1.4.3.2",
1827#     time        =>    a timegm() return value (moment of commit)
1828#     tags        =>    [ "tag1", "tag2", ... ],
1829#     branch      =>    "branchname" # There should be only one, right?
1830#     roots       =>    [ "branchtag1", "branchtag2", ... ]
1831#     lines       =>    "+x -y" # or undefined; x and y are integers
1832#   }
1833
1834# Single top-level ChangeLog, or one per subdirectory?
1835my $distributed;
1836sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; }
1837
1838sub new {
1839  my $class = shift;
1840  my ($path, $time, $revision, $state, $lines,
1841      $branch_names, $branch_roots, $branch_numbers, $symbolic_names) = @_;
1842
1843  my %self = (time     => $time,
1844              revision => $revision,
1845              state    => $state,
1846              lines    => $lines,
1847              branch_numbers => $branch_numbers,
1848             );
1849
1850  if ( $distributed ) {
1851    @self{qw(filename dir_key)} = fileparse($path);
1852  } else {
1853    @self{qw(filename dir_key)} = ($path, './');
1854  }
1855
1856  { # Scope for $branch_prefix
1857    (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/));
1858    $branch_prefix =~ s/\.$//;
1859    if ( $branch_names->{$branch_prefix} ) {
1860      my $branch_name = $branch_names->{$branch_prefix};
1861      $self{branch}   = $branch_name;
1862      $self{branches} = [$branch_name];
1863    }
1864    while ( $branch_prefix =~ s/^(\d+(?:\.\d+\.\d+)+)\.\d+\.\d+$/$1/ ) {
1865      push @{$self{branches}}, $branch_names->{$branch_prefix}
1866        if exists $branch_names->{$branch_prefix};
1867    }
1868  }
1869
1870  # If there's anything in the @branch_roots array, then this
1871  # revision is the root of at least one branch.  We'll display
1872  # them as branch names instead of revision numbers, the
1873  # substitution for which is done directly in the array:
1874  $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ]
1875    if @$branch_roots;
1876
1877  if ( exists $symbolic_names->{$revision} ) {
1878    $self{tags} = delete $symbolic_names->{$revision};
1879    &main::delta_check($time, $self{tags});
1880  }
1881
1882  bless \%self, $class;
1883}
1884
1885sub filename       { $_[0]->{filename}       }
1886sub dir_key        { $_[0]->{dir_key}        }
1887sub revision       { $_[0]->{revision}       }
1888sub branch         { $_[0]->{branch}         }
1889sub state          { $_[0]->{state}          }
1890sub lines          { $_[0]->{lines}          }
1891sub roots          { $_[0]->{roots}          }
1892sub branch_numbers { $_[0]->{branch_numbers} }
1893
1894sub tags        { $_[0]->{tags}     }
1895sub tags_exists {
1896  exists $_[0]->{tags};
1897}
1898
1899# This may someday be used in a more sophisticated calculation of what other
1900# files are involved in this commit.  For now, we don't use it much except for
1901# delta mode, because the common-commit-detection algorithm is hypothesized to
1902# be "good enough" as it stands.
1903sub time     { $_[0]->{time}     }
1904
1905# ----------------------------------------------------------------------------
1906
1907package CVS::Utils::ChangeLog::EntrySetBuilder;
1908
1909use File::Basename qw( fileparse );
1910use Time::Local    qw( timegm );
1911
1912use constant MAILNAME => "/etc/mailname";
1913
1914# In 'cvs log' output, one long unbroken line of equal signs separates files:
1915use constant FILE_SEPARATOR => '=' x 77;# . "\n";
1916# In 'cvs log' output, a shorter line of dashes separates log messages within
1917# a file:
1918use constant REV_SEPARATOR  => '-' x 28;# . "\n";
1919
1920use constant EMPTY_LOG_MESSAGE => '*** empty log message ***';
1921
1922# -------------------------------------
1923
1924sub new {
1925  my ($proto) = @_;
1926  my $class = ref $proto || $proto;
1927
1928  my $poobah  = CVS::Utils::ChangeLog::EntrySet->new;
1929  my $self = bless +{ grand_poobah => $poobah }, $class;
1930
1931  $self->clear_file;
1932  $self->maybe_read_user_map_file;
1933  return $self;
1934}
1935
1936# -------------------------------------
1937
1938sub clear_msg {
1939  my ($self) = @_;
1940
1941  # Make way for the next message
1942  undef $self->{rev_msg};
1943  undef $self->{rev_time};
1944  undef $self->{rev_revision};
1945  undef $self->{rev_author};
1946  undef $self->{rev_state};
1947  undef $self->{lines};
1948  $self->{rev_branch_roots} = [];       # For showing which files are branch
1949                                        # ancestors.
1950  $self->{collecting_symbolic_names} = 0;
1951}
1952
1953# -------------------------------------
1954
1955sub clear_file {
1956  my ($self) = @_;
1957  $self->clear_msg;
1958
1959  undef $self->{filename};
1960  $self->{branch_names}   = +{};        # We'll grab branch names while we're
1961                                        # at it.
1962  $self->{branch_numbers} = +{};        # Save some revisions for
1963                                        # @Follow_Branches
1964  $self->{symbolic_names} = +{};        # Where tag names get stored.
1965}
1966
1967# -------------------------------------
1968
1969sub grand_poobah { $_[0]->{grand_poobah} }
1970
1971# -------------------------------------
1972
1973sub read_changelog {
1974  my ($self, $command) = @_;
1975
1976  local (*READER);
1977  my $pid;
1978  if (! $Input_From_Stdin) {
1979    if ($^O =~ /Win32/i) {
1980      open (READER, "@$command |")
1981        or die "unable to run \"@$command\"";
1982    }
1983    else {
1984      local (*WRITER);
1985      pipe(READER, WRITER)
1986        or die "Couldn't form pipe: $!\n";
1987      $pid = fork;
1988      if (! defined $pid) {
1989        die "Couldn't fork: $!\n";
1990      }
1991      if ( ! $pid ) { # child
1992        open STDOUT, '>&=' . fileno WRITER
1993          or die "Couldn't dup stderr to ", fileno WRITER, "\n";
1994        # strangely, some perls give spurious warnings about STDIN being opened
1995        # for output only these close calls precede the STDOUT reopen above.
1996        # I think they must be reusing fd 1.
1997        close READER;
1998        close STDIN;
1999
2000        exec @$command;
2001      }
2002
2003      close WRITER;
2004    }
2005
2006    &main::debug ("(run \"@$command\")\n");
2007  }
2008  else {
2009    open READER, '-' or die "unable to open stdin for reading";
2010  }
2011
2012  binmode READER;
2013
2014 XX_Log_Source:
2015  while (<READER>) {
2016    chomp;
2017    s!\r$!!;
2018
2019    # If on a new file and don't see filename, skip until we find it, and
2020    # when we find it, grab it.
2021    if ( ! defined $self->{filename} ) {
2022      $self->read_file_path($_);
2023    } elsif ( /^symbolic names:$/ ) {
2024      $self->{collecting_symbolic_names} = 1;
2025    } elsif ( $self->{collecting_symbolic_names} ) {
2026      $self->read_symbolic_name($_);
2027    } elsif ( $_ eq FILE_SEPARATOR and ! defined $self->{rev_revision} ) {
2028      $self->clear_file;
2029    } elsif ( ! defined $self->{rev_revision} ) {
2030        # If have file name, but not revision, and see revision, then grab
2031        # it.  (We collect unconditionally, even though we may or may not
2032        # ever use it.)
2033      $self->read_revision($_);
2034    } elsif ( ! defined $self->{rev_time} ) { # and /^date: /) {
2035      $self->read_date_author_and_state($_);
2036    } elsif ( /^branches:\s+(.*);$/ ) {
2037      $self->read_branches($1);
2038    } elsif ( ! ( $_ eq FILE_SEPARATOR or $_ eq REV_SEPARATOR ) ) {
2039      # If have file name, time, and author, then we're just grabbing
2040      # log message texts:
2041      $self->{rev_msg} .= $_ . "\n";   # Normally, just accumulate the message...
2042    } else {
2043      my $noadd = 0;
2044      if ( ! $self->{rev_msg}
2045           or $self->{rev_msg} =~ /^\s*(\.\s*)?$/
2046           or index($self->{rev_msg}, EMPTY_LOG_MESSAGE) > -1 ) {
2047        # ... until a msg separator is encountered:
2048        # Ensure the message contains something:
2049        $self->clear_msg, $noadd = 1
2050          if $Prune_Empty_Msgs;
2051        $self->{rev_msg} = "[no log message]\n";
2052      }
2053
2054      $self->add_file_entry
2055        unless $noadd;
2056
2057      if ( $_ eq FILE_SEPARATOR ) {
2058        $self->clear_file;
2059      } else {
2060        $self->clear_msg;
2061      }
2062    }
2063  }
2064
2065  close READER
2066    or die "Couldn't close pipe reader: $!\n";
2067  if ( defined $pid ) {
2068    my $rv;
2069    waitpid $pid, 0;
2070    0 == $?
2071      or $!=1, die sprintf("Problem reading log input (pid/exit/signal/core: %d/%d/%d/%d)\n",
2072                           $pid, $? >> 8, $? & 127, $? & 128);
2073  }
2074  return;
2075}
2076
2077# -------------------------------------
2078
2079sub add_file_entry {
2080  $_[0]->grand_poobah->add_fileentry(@{$_[0]}{qw(filename rev_time rev_revision
2081                                                 rev_state lines branch_names
2082                                                 rev_branch_roots
2083                                                 branch_numbers
2084                                                 symbolic_names
2085                                                 rev_author rev_msg)});
2086}
2087
2088# -------------------------------------
2089
2090sub maybe_read_user_map_file {
2091  my ($self) = @_;
2092
2093  my %expansions;
2094  my $User_Map_Input;
2095
2096  if (defined $User_Passwd_File)
2097  {
2098    if ( ! defined $Domain ) {
2099      if ( -e MAILNAME ) {
2100        chomp($Domain = slurp_file(MAILNAME));
2101      } else {
2102      MAILDOMAIN_CMD:
2103        for ([qw(hostname -d)], 'dnsdomainname', 'domainname') {
2104          my ($text, $exit, $sig, $core) = run_ext($_);
2105          if ( $exit == 0 && $sig == 0 && $core == 0 ) {
2106            chomp $text;
2107            if ( length $text ) {
2108              $Domain = $text;
2109              last MAILDOMAIN_CMD;
2110            }
2111          }
2112        }
2113      }
2114    }
2115
2116    die "No mail domain found\n"
2117      unless defined $Domain;
2118
2119    open (MAPFILE, "<$User_Passwd_File")
2120        or die ("Unable to open $User_Passwd_File ($!)");
2121    while (<MAPFILE>)
2122    {
2123      # all lines are valid
2124      my ($username, $pw, $uid, $gid, $gecos, $homedir, $shell) = split ':';
2125      my $expansion = '';
2126      ($expansion) = split (',', $gecos)
2127        if defined $gecos && length $gecos;
2128
2129      my $mailname = $Domain eq '' ? $username : "$username\@$Domain";
2130      $expansions{$username} = "$expansion <$mailname>";
2131    }
2132    close (MAPFILE);
2133  }
2134
2135  if ($User_Map_File)
2136  {
2137    if ( $User_Map_File =~ m{^([-\w\@+=.,\/]+):([-\w\@+=.,\/:]+)} and
2138         !-f $User_Map_File )
2139    {
2140      my $rsh = (exists $ENV{'CVS_RSH'} ? $ENV{'CVS_RSH'} : 'ssh');
2141      $User_Map_Input = "$rsh $1 'cat $2' |";
2142      &main::debug ("(run \"${User_Map_Input}\")\n");
2143    }
2144    else
2145    {
2146      $User_Map_Input = "<$User_Map_File";
2147    }
2148
2149    open (MAPFILE, $User_Map_Input)
2150        or die ("Unable to open $User_Map_File ($!)");
2151
2152    while (<MAPFILE>)
2153    {
2154      next if /^\s*#/;  # Skip comment lines.
2155      next if not /:/;  # Skip lines without colons.
2156
2157      # It is now safe to split on ':'.
2158      my ($username, $expansion) = split ':';
2159      chomp $expansion;
2160      $expansion =~ s/^'(.*)'$/$1/;
2161      $expansion =~ s/^"(.*)"$/$1/;
2162
2163      # If it looks like the expansion has a real name already, then
2164      # we toss the username we got from CVS log.  Otherwise, keep
2165      # it to use in combination with the email address.
2166
2167      if ($expansion =~ /^\s*<{0,1}\S+@.*/) {
2168        # Also, add angle brackets if none present
2169        if (! ($expansion =~ /<\S+@\S+>/)) {
2170          $expansions{$username} = "$username <$expansion>";
2171        }
2172        else {
2173          $expansions{$username} = "$username $expansion";
2174        }
2175      }
2176      else {
2177        $expansions{$username} = $expansion;
2178      }
2179    } # fi ($User_Map_File)
2180
2181    close (MAPFILE);
2182  }
2183
2184 $self->{usermap} = \%expansions;
2185}
2186
2187# -------------------------------------
2188
2189sub read_file_path {
2190  my ($self, $line) = @_;
2191
2192  my $path;
2193
2194  if ( $line =~ /^Working file: (.*)/ ) {
2195    $path = $1;
2196  } elsif ( defined $RCS_Root
2197            and
2198            $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) {
2199    $path = $1;
2200    $path =~ s!Attic/!!;
2201  } else {
2202    return;
2203  }
2204
2205  if ( @Ignore_Files ) {
2206    my $base;
2207    ($base, undef, undef) = fileparse($path);
2208
2209    my $xpath = $Case_Insensitive ? lc($path) : $path;
2210    return
2211      if grep $path =~ /$_/, @Ignore_Files;
2212  }
2213
2214  $self->{filename} = $path;
2215  return;
2216}
2217
2218# -------------------------------------
2219
2220sub read_symbolic_name {
2221  my ($self, $line) = @_;
2222
2223  # All tag names are listed with whitespace in front in cvs log
2224  # output; so if see non-whitespace, then we're done collecting.
2225  if ( /^\S/ ) {
2226    $self->{collecting_symbolic_names} = 0;
2227    return;
2228  } else {
2229    # we're looking at a tag name, so parse & store it
2230
2231    # According to the Cederqvist manual, in node "Tags", tag names must start
2232    # with an uppercase or lowercase letter and can contain uppercase and
2233    # lowercase letters, digits, `-', and `_'.  However, it's not our place to
2234    # enforce that, so we'll allow anything CVS hands us to be a tag:
2235    my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/);
2236
2237    # A branch number either has an odd number of digit sections
2238    # (and hence an even number of dots), or has ".0." as the
2239    # second-to-last digit section.  Test for these conditions.
2240    my $real_branch_rev = '';
2241    if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/             # Even number of dots...
2242         and
2243         $tag_rev !~ /^(1\.)+1$/ ) {                  # ...but not "1.[1.]1"
2244      $real_branch_rev = $tag_rev;
2245    } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) {  # Has ".0."
2246      $real_branch_rev = $1 . $3;
2247    }
2248
2249    # If we got a branch, record its number.
2250    if ( $real_branch_rev ) {
2251      $self->{branch_names}->{$real_branch_rev} = $tag_name;
2252      $self->{branch_numbers}->{$tag_name} = $real_branch_rev;
2253    } else {
2254      # Else it's just a regular (non-branch) tag.
2255      push @{$self->{symbolic_names}->{$tag_rev}}, $tag_name;
2256    }
2257  }
2258
2259  $self->{collecting_symbolic_names} = 1;
2260  return;
2261}
2262
2263# -------------------------------------
2264
2265sub read_revision {
2266  my ($self, $line) = @_;
2267
2268  my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ );
2269
2270  return
2271    unless $revision;
2272
2273  $self->{rev_revision} = $revision;
2274  return;
2275}
2276
2277# -------------------------------------
2278
2279{ # Closure over %gecos_warned
2280my %gecos_warned;
2281sub read_date_author_and_state {
2282  my ($self, $line) = @_;
2283
2284  my ($time, $author, $state) = $self->parse_date_author_and_state($line);
2285
2286  if ( defined($self->{usermap}->{$author}) and $self->{usermap}->{$author} ) {
2287    $author = $self->{usermap}->{$author};
2288  } elsif ( defined $Domain or $Gecos == 1 ) {
2289    my $email = $author;
2290    $email = $author."@".$Domain
2291      if defined $Domain && $Domain ne '';
2292
2293    my $pw = getpwnam($author);
2294    my ($fullname, $office, $workphone, $homephone, $gcos);
2295    if ( defined $pw ) {
2296      $gcos = (getpwnam($author))[6];
2297      ($fullname, $office, $workphone, $homephone) =
2298        split /\s*,\s*/, $gcos;
2299    } else {
2300      warn "Couldn't find gecos info for author '$author'\n"
2301        unless $gecos_warned{$author}++;
2302      $fullname = '';
2303    }
2304    for (grep defined, $fullname, $office, $workphone, $homephone) {
2305      s/&/ucfirst(lc($pw))/ge;
2306    }
2307    $author = $fullname . "  <" . $email . ">"
2308      if defined $fullname && $fullname ne '';
2309  }
2310
2311  $self->{rev_state}  = $state;
2312  $self->{rev_time}   = $time;
2313  $self->{rev_author} = $author;
2314  return;
2315}
2316}
2317
2318# -------------------------------------
2319
2320sub read_branches {
2321  # A "branches: ..." line here indicates that one or more branches
2322  # are rooted at this revision.  If we're showing branches, then we
2323  # want to show that fact as well, so we collect all the branches
2324  # that this is the latest ancestor of and store them in
2325  # $self->[rev_branch_roots}.  Just for reference, the format of the
2326  # line we're seeing at this point is:
2327  #
2328  #    branches:  1.5.2;  1.5.4;  ...;
2329  #
2330  # Okay, here goes:
2331  my ($self, $line) = @_;
2332
2333  # Ugh.  This really bothers me.  Suppose we see a log entry
2334  # like this:
2335  #
2336  #    ----------------------------
2337  #    revision 1.1
2338  #    date: 1999/10/17 03:07:38;  author: jrandom;  state: Exp;
2339  #    branches:  1.1.2;
2340  #    Intended first line of log message begins here.
2341  #    ----------------------------
2342  #
2343  # The question is, how we can tell the difference between that
2344  # log message and a *two*-line log message whose first line is
2345  #
2346  #    "branches:  1.1.2;"
2347  #
2348  # See the problem?  The output of "cvs log" is inherently
2349  # ambiguous.
2350  #
2351  # For now, we punt: we liberally assume that people don't
2352  # write log messages like that, and just toss a "branches:"
2353  # line if we see it but are not showing branches.  I hope no
2354  # one ever loses real log data because of this.
2355  if ( $Show_Branches ) {
2356    $line =~ s/(1\.)+1;|(1\.)+1$//;  # ignore the trivial branch 1.1.1
2357    $self->{rev_branch_roots} = [split /;\s+/, $line]
2358      if length $line;
2359  }
2360}
2361
2362# -------------------------------------
2363
2364sub parse_date_author_and_state {
2365  my ($self, $line) = @_;
2366  # Parses the date/time and author out of a line like:
2367  #
2368  # date: 1999/02/19 23:29:05;  author: apharris;  state: Exp;
2369  #
2370  # or, in CVS 1.12.9:
2371  #
2372  # date: 2004-06-05 16:10:32 +0000; author: somebody; state: Exp;
2373
2374  my ($year, $mon, $mday, $hours, $min, $secs, $utcOffset, $author, $state, $rest) =
2375    $line =~
2376      m!(\d+)[-/](\d+)[-/](\d+)\s+(\d+):(\d+):(\d+)(\s+[+-]\d{4})?;\s+
2377        author:\s+([^;]+);\s+state:\s+([^;]+);(.*)!x
2378    or  die "Couldn't parse date ``$line''";
2379  die "Bad date or Y2K issues"
2380    unless $year > 1969 and $year < 2258;
2381  # Kinda arbitrary, but useful as a sanity check
2382  my $time = timegm($secs, $min, $hours, $mday, $mon-1, $year-1900);
2383  if ( defined $utcOffset ) {
2384    my ($plusminus, $hour, $minute) = ($utcOffset =~ m/([+-])(\d\d)(\d\d)/);
2385    my $offset = (($hour * 60) + $minute) * 60 * ($plusminus eq '+' ? -1 : 1);
2386    $time += $offset;
2387  }
2388  if ( $rest =~ m!\s+lines:\s+(.*)! ) {
2389    $self->{lines} = $1;
2390  }
2391
2392  return $time, $author, $state;
2393}
2394
2395# Subrs ----------------------------------------------------------------------
2396
2397package main;
2398
2399sub delta_check {
2400  my ($time, $tags) = @_;
2401
2402  # If we're in 'delta' mode, update the latest observed times for the
2403  # beginning and ending tags, and when we get around to printing output, we
2404  # will simply restrict ourselves to that timeframe...
2405  return
2406    unless $Delta_Mode;
2407
2408  $Delta_StartTime = $time
2409    if $time > $Delta_StartTime and $Delta_From and grep { $_ eq $Delta_From } @$tags;
2410
2411  $Delta_EndTime = $time
2412    if $time > $Delta_EndTime and $Delta_To and grep { $_ eq $Delta_To } @$tags;
2413}
2414
2415sub run_ext {
2416  my ($cmd) = @_;
2417  $cmd = [$cmd]
2418    unless ref $cmd;
2419  local $" = ' ';
2420  my $out = qx"@$cmd 2>&1";
2421  my $rv  = $?;
2422  my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8);
2423  return $out, $exit, $sig, $core;
2424}
2425
2426# -------------------------------------
2427
2428# If accumulating, grab the boundary date from pre-existing ChangeLog.
2429sub maybe_grab_accumulation_date {
2430  if (! $Cumulative || $Update) {
2431    return '';
2432  }
2433
2434  # else
2435
2436  open (LOG, "$Log_File_Name")
2437      or die ("trouble opening $Log_File_Name for reading ($!)");
2438
2439  my $boundary_date;
2440  while (<LOG>)
2441  {
2442    if (/^(\d\d\d\d-\d\d-\d\d\s+(\w+\s+)?\d\d:\d\d)/)
2443    {
2444      $boundary_date = "$1";
2445      last;
2446    }
2447  }
2448
2449  close (LOG);
2450
2451  # convert time from utc to local timezone if the ChangeLog has
2452  # dates/times in utc
2453  if ($UTC_Times && $boundary_date)
2454  {
2455    # convert the utc time to a time value
2456    my ($year,$mon,$mday,$hour,$min) = $boundary_date =~
2457      m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#;
2458    my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900);
2459    # print the timevalue in the local timezone
2460    my ($ignore,$wday);
2461    ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time);
2462    $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u",
2463                            $year+1900,$mon+1,$mday,$hour,$min);
2464  }
2465
2466  return $boundary_date;
2467}
2468
2469# -------------------------------------
2470
2471# Fills up a ChangeLog structure in the current directory.
2472sub derive_changelog {
2473  my ($command) = @_;
2474
2475  # See "The Plan" above for a full explanation.
2476
2477  # Might be adding to an existing ChangeLog
2478  my $accumulation_date = maybe_grab_accumulation_date;
2479  if ($accumulation_date) {
2480    # Insert -d immediately after 'cvs log'
2481    my $Log_Date_Command = "-d>${accumulation_date}";
2482
2483    my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command;
2484    splice @$command, $log_index+1, 0, $Log_Date_Command;
2485    &debug ("(adding log msg starting from $accumulation_date)\n");
2486  }
2487
2488#  output_changelog(read_changelog($command));
2489  my $builder = CVS::Utils::ChangeLog::EntrySetBuilder->new;
2490  $builder->read_changelog($command);
2491  $builder->grand_poobah->output_changelog;
2492}
2493
2494# -------------------------------------
2495
2496sub min { $_[0] < $_[1] ? $_[0] : $_[1] }
2497
2498# -------------------------------------
2499
2500sub common_path_prefix {
2501  my ($path1, $path2) = @_;
2502
2503  # For compatibility (with older versions of cvs2cl.pl), we think in UN*X
2504  # terms, and mould windoze filenames to match.  Is this really appropriate?
2505  # If a file is checked in under UN*X, and cvs log run on windoze, which way
2506  # do the path separators slope?  Can we use fileparse as per the local
2507  # conventions?  If so, we should probably have a user option to specify an
2508  # OS to emulate to handle stdin-fed logs.  If we did this, we could avoid
2509  # the nasty \-/ transmogrification below.
2510
2511  my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2;
2512
2513  # Transmogrify Windows filenames to look like Unix.
2514  # (It is far more likely that someone is running cvs2cl.pl under
2515  # Windows than that they would genuinely have backslashes in their
2516  # filenames.)
2517  tr!\\!/!
2518    for $dir1, $dir2;
2519
2520  my ($accum1, $accum2, $last_common_prefix) = ('') x 3;
2521
2522  my @path1 = grep length($_), split qr!/!, $dir1;
2523  my @path2 = grep length($_), split qr!/!, $dir2;
2524
2525  my @common_path;
2526  for (0..min($#path1,$#path2)) {
2527    if ( $path1[$_] eq $path2[$_]) {
2528      push @common_path, $path1[$_];
2529    } else {
2530      last;
2531    }
2532  }
2533
2534  return join '', map "$_/", @common_path;
2535}
2536
2537# -------------------------------------
2538
2539sub parse_options {
2540  # Check this internally before setting the global variable.
2541  my $output_file;
2542
2543  # If this gets set, we encountered unknown options and will exit at
2544  # the end of this subroutine.
2545  my $exit_with_admonishment = 0;
2546
2547  # command to generate the log
2548  my @log_source_command = qw( cvs log );
2549
2550  my (@Global_Opts, @Local_Opts);
2551
2552  Getopt::Long::Configure(qw( bundling permute no_getopt_compat
2553                              pass_through no_ignore_case ));
2554  GetOptions('help|usage|h'   => \$Print_Usage,
2555             'debug'          => \$Debug,        # unadvertised option, heh
2556             'version'        => \$Print_Version,
2557
2558             'file|f=s'       => \$output_file,
2559             'accum'          => \$Cumulative,
2560             'update'         => \$Update,
2561             'fsf'            => \$FSF_Style,
2562             'rcs=s'          => \$RCS_Root,
2563             'usermap|U=s'    => \$User_Map_File,
2564             'gecos'          => \$Gecos,
2565             'domain=s'       => \$Domain,
2566             'passwd=s'       => \$User_Passwd_File,
2567             'window|W=i'     => \$Max_Checkin_Duration,
2568             'chrono'         => \$Chronological_Order,
2569             'ignore|I=s'     => \@Ignore_Files,
2570             'case-insensitive|C' => \$Case_Insensitive,
2571             'regexp|R=s'     => \$Regexp_Gate,
2572             'stdin'          => \$Input_From_Stdin,
2573             'stdout'         => \$Output_To_Stdout,
2574             'distributed|d'  => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) },
2575             'prune|P'        => \$Prune_Empty_Msgs,
2576             'no-wrap'        => \$No_Wrap,
2577             'gmt|utc'        => \$UTC_Times,
2578             'day-of-week|w'  => \$Show_Day_Of_Week,
2579             'revisions|r'    => \$Show_Revisions,
2580             'show-dead'      => \$Show_Dead,
2581             'tags|t'         => \$Show_Tags,
2582             'tag-regexp=s'   => \$Regexp_Tag,
2583             'tagdates|T'     => \$Show_Tag_Dates,
2584             'branches|b'     => \$Show_Branches,
2585             'follow|F=s'     => \@Follow_Branches,
2586             'follow-only=s'  => \@Follow_Only,
2587             'xml-encoding=s' => \$XML_Encoding,
2588             'xml'            => \$XML_Output,
2589             'noxmlns'        => \$No_XML_Namespace,
2590             'no-xml-iso-date' => \$No_XML_ISO_Date,
2591             'no-ancestors'   => \$No_Ancestors,
2592             'lines-modified' => \$Show_Lines_Modified,
2593
2594             'no-indent'    => sub {
2595               $Indent = '';
2596             },
2597
2598             'summary'      => sub {
2599               $Summary = 1;
2600               $After_Header = "\n\n"; # Summary implies --separate-header
2601             },
2602
2603             'no-times'     => sub {
2604               $Show_Times = 0;
2605             },
2606
2607             'no-hide-branch-additions' => sub {
2608               $Hide_Branch_Additions = 0;
2609             },
2610
2611             'no-common-dir'  => sub {
2612               $Common_Dir = 0;
2613             },
2614
2615             'ignore-tag=s'   => sub {
2616               $ignore_tags{$_[1]} = 1;
2617             },
2618
2619             'show-tag=s'     => sub {
2620               $show_tags{$_[1]} = 1;
2621             },
2622
2623             # Deliberately undocumented.  This is not a public interface, and
2624             # may change/disappear at any time.
2625             'test-code=s'    => \$TestCode,
2626
2627             'delta=s'        => sub {
2628               my $arg = $_[1];
2629               if ( $arg =~
2630                    /^([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?:([A-Za-z][A-Za-z0-9_\-\]\[\.]*)?$/ )
2631               {
2632                 $Delta_From = $1;
2633                 $Delta_To = $2;
2634                 $Delta_Mode = 1;
2635               } else {
2636                 die "--delta FROM_TAG:TO_TAG is what you meant to say.\n";
2637               }
2638             },
2639
2640             'FSF'             => sub {
2641               $FSF_Output = 1;
2642               $Show_Times = 0;
2643               $Common_Dir = 0;
2644               $No_Extra_Indent = 1;
2645               $Indent = "\t";
2646             },
2647
2648             'header=s'        => sub {
2649               my $narg = $_[1];
2650               $ChangeLog_Header = &slurp_file ($narg);
2651               if (! defined ($ChangeLog_Header)) {
2652                 $ChangeLog_Header = '';
2653               }
2654             },
2655
2656             'global-opts|g=s' => sub {
2657               my $narg = $_[1];
2658               push @Global_Opts, $narg;
2659               splice @log_source_command, 1, 0, $narg;
2660             },
2661
2662             'log-opts|l=s' => sub {
2663               my $narg = $_[1];
2664               push @Local_Opts, $narg;
2665               push @log_source_command, $narg;
2666             },
2667
2668             'mailname=s'   => sub {
2669               my $narg = $_[1];
2670               warn "--mailname is deprecated; please use --domain instead\n";
2671               $Domain = $narg;
2672             },
2673
2674             'separate-header|S' => sub {
2675               $After_Header = "\n\n";
2676               $No_Extra_Indent = 1;
2677             },
2678
2679             'group-by-date' => sub {
2680               $GroupByDate = 1;
2681               $Show_Times = 0;
2682             },
2683
2684             'group-by-author' => sub {
2685               $GroupByDate = 1;
2686               $GroupByAuthor = 1;
2687               $Show_Times = 0;
2688             },
2689
2690             'hide-filenames' => sub {
2691               $Hide_Filenames = 1;
2692               $After_Header = '';
2693             },
2694            )
2695    or die "options parsing failed\n";
2696
2697  push @log_source_command, map "$_", @ARGV;
2698
2699  ## Check for contradictions...
2700
2701  if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) {
2702    print STDERR "cannot pass both --stdout and --distributed\n";
2703    $exit_with_admonishment = 1;
2704  }
2705
2706  if ($Output_To_Stdout && $output_file) {
2707    print STDERR "cannot pass both --stdout and --file\n";
2708    $exit_with_admonishment = 1;
2709  }
2710
2711  if ($Input_From_Stdin && @Global_Opts) {
2712    print STDERR "cannot pass both --stdin and -g\n";
2713    $exit_with_admonishment = 1;
2714  }
2715
2716  if ($Input_From_Stdin && @Local_Opts) {
2717    print STDERR "cannot pass both --stdin and -l\n";
2718    $exit_with_admonishment = 1;
2719  }
2720
2721  if ($XML_Output && $Cumulative) {
2722    print STDERR "cannot pass both --xml and --accum\n";
2723    $exit_with_admonishment = 1;
2724  }
2725
2726  if ($FSF_Output && $Cumulative) {
2727    print STDERR "cannot pass both --FSF and --accum\n";
2728    $exit_with_admonishment = 1;
2729  }
2730
2731  # Other consistency checks and option-driven logic
2732
2733  # Bleargh.  Compensate for a deficiency of custom wrapping.
2734  if ( ($After_Header ne " ") and $FSF_Style ) {
2735    $After_Header .= "\t";
2736  }
2737
2738  @Ignore_Files = map lc, @Ignore_Files
2739    if $Case_Insensitive;
2740
2741  # Or if any other error message has already been printed out, we
2742  # just leave now:
2743  if ($exit_with_admonishment) {
2744    &usage ();
2745    exit (1);
2746  }
2747  elsif ($Print_Usage) {
2748    &usage ();
2749    exit (0);
2750  }
2751  elsif ($Print_Version) {
2752    &version ();
2753    exit (0);
2754  }
2755
2756  ## Else no problems, so proceed.
2757
2758  if ($output_file) {
2759    $Log_File_Name = $output_file;
2760  }
2761
2762  return \@log_source_command;
2763}
2764
2765# -------------------------------------
2766
2767sub slurp_file {
2768  my $filename = shift || die ("no filename passed to slurp_file()");
2769  my $retstr;
2770
2771  open (SLURPEE, "<${filename}") or die ("unable to open $filename ($!)");
2772  local $/ = undef;
2773  $retstr = <SLURPEE>;
2774  close (SLURPEE);
2775  return $retstr;
2776}
2777
2778# -------------------------------------
2779
2780sub debug {
2781  if ($Debug) {
2782    my $msg = shift;
2783    print STDERR $msg;
2784  }
2785}
2786
2787# -------------------------------------
2788
2789sub version {
2790  print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n";
2791}
2792
2793# -------------------------------------
2794
2795sub usage {
2796  &version ();
2797
2798  eval "use Pod::Usage qw( pod2usage )";
2799
2800   if ( $@ ) {
2801    print <<'END';
2802
2803* Pod::Usage was not found.  The formatting may be suboptimal.  Consider
2804  upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and
2805  versions of perl prior to 5.6 are getting rather rusty, now.  Alternatively,
2806  install Pod::Usage direct from CPAN.
2807END
2808
2809    local $/ = undef;
2810    my $message = <DATA>;
2811    $message =~ s/^=(head1|item) //gm;
2812    $message =~ s/^=(over|back).*\n//gm;
2813    $message =~ s/\n{3,}/\n\n/g;
2814    print $message;
2815  } else {
2816    print "\n";
2817    pod2usage( -exitval => 'NOEXIT',
2818               -verbose => 1,
2819               -output  => \*STDOUT,
2820             );
2821  }
2822
2823  return;
2824}
2825
2826# Main -----------------------------------------------------------------------
2827
2828my $log_source_command = parse_options;
2829if ( defined $TestCode ) {
2830  eval $TestCode;
2831  die "Eval failed: '$@'\n"
2832    if $@;
2833} else {
2834  derive_changelog($log_source_command);
2835}
2836
2837__DATA__
2838
2839=head1 NAME
2840
2841cvs2cl.pl - convert cvs log messages to changelogs
2842
2843=head1 SYNOPSIS
2844
2845B<cvs2cl> [I<options>] [I<FILE1> [I<FILE2> ...]]
2846
2847=head1 DESCRIPTION
2848
2849cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by
2850running "cvs log" and parsing the output. Duplicate log messages get
2851unified in the Right Way.
2852
2853The default output of cvs2cl is designed to be compact, formally unambiguous,
2854but still easy for humans to read.  It should be largely self-explanatory; the
2855one abbreviation that might not be obvious is "utags".  That stands for
2856"universal tags" -- a universal tag is one held by all the files in a given
2857change entry.
2858
2859If you need output that's easy for a program to parse, use the B<--xml> option.
2860Note that with XML output, just about all available information is included
2861with each change entry, whether you asked for it or not, on the theory that
2862your parser can ignore anything it's not looking for.
2863
2864If filenames are given as arguments cvs2cl only shows log information for the
2865named files.
2866
2867=head1 OPTIONS
2868
2869=over 4
2870
2871=item B<-h>, B<-help>, B<--help>, B<-?>
2872
2873Show a short help and exit.
2874
2875=item B<--version>
2876
2877Show version and exit.
2878
2879=item B<-r>, B<--revisions>
2880
2881Show revision numbers in output.
2882
2883=item B<-b>, B<--branches>
2884
2885Show branch names in revisions when possible.
2886
2887=item B<-t>, B<--tags>
2888
2889Show tags (symbolic names) in output.
2890
2891=item B<-T>, B<--tagdates>
2892
2893Show tags in output on their first occurance.
2894
2895=item B<--show-dead>
2896
2897Show dead files.
2898
2899=item B<--stdin>
2900
2901Read from stdin, don't run cvs log.
2902
2903=item B<--stdout>
2904
2905Output to stdout not to ChangeLog.
2906
2907=item B<-d>, B<--distributed>
2908
2909Put ChangeLogs in subdirs.
2910
2911=item B<-f> I<FILE>, B<--file> I<FILE>
2912
2913Write to I<FILE> instead of ChangeLog.
2914
2915=item B<--fsf>
2916
2917Use this if log data is in FSF ChangeLog style.
2918
2919=item B<--FSF>
2920
2921Attempt strict FSF-standard compatible output (incompatible with B<--accum>).
2922
2923=item B<-W> I<SECS>, B<--window> I<SECS>
2924
2925Window of time within which log entries unify.
2926
2927=item -B<U> I<UFILE>, B<--usermap> I<UFILE>
2928
2929Expand usernames to email addresses from I<UFILE>.
2930
2931=item B<--passwd> I<PASSWORDFILE>
2932
2933Use system passwd file for user name expansion.  If no mail domain is provided
2934(via B<--domain>), it tries to read one from B</etc/mailname>, output of B<hostname
2935-d>, B<dnsdomainname>, or B<domain-name>.  cvs2cl exits with an error if none of
2936those options is successful. Use a domain of '' to prevent the addition of a
2937mail domain.
2938
2939=item B<--domain> I<DOMAIN>
2940
2941Domain to build email addresses from.
2942
2943=item B<--gecos>
2944
2945Get user information from GECOS data.
2946
2947=item B<-R> I<REGEXP>, B<--regexp> I<REGEXP>
2948
2949Include only entries that match I<REGEXP>.  This option may be used multiple
2950times.
2951
2952=item B<-I> I<REGEXP>, B<--ignore> I<REGEXP>
2953
2954Ignore files whose names match I<REGEXP>.  This option may be used multiple
2955times.  The regexp is a perl regular expression.  It is matched as is; you may
2956want to prefix with a ^ or suffix with a $ to anchor the match.
2957
2958=item B<-C>, B<--case-insensitive>
2959
2960Any regexp matching is done case-insensitively.
2961
2962=item B<-F> I<BRANCH>, B<--follow> I<BRANCH>
2963
2964Show only revisions on or ancestral to I<BRANCH>.
2965
2966=item B<--follow-only> I<BRANCH>
2967
2968Like --follow, but sub-branches are not followed.
2969
2970=item B<--no-ancestors>
2971
2972When using B<-F>, only track changes since the I<BRANCH> started.
2973
2974=item B<--no-hide-branch-additions>
2975
2976By default, entries generated by cvs for a file added on a branch (a dead 1.1
2977entry) are not shown.  This flag reverses that action.
2978
2979=item B<-S>, B<--separate-header>
2980
2981Blank line between each header and log message.
2982
2983=item B<--group-by-date>
2984
2985Group ChangeLog entries on the same date together, instead of having a
2986separate entry for each commit on that date.
2987
2988=item B<--group-by-author>
2989
2990Group consecutive ChangeLog entries from same author during same date,
2991instead of having separate entry for each commit.
2992
2993=item B<--summary>
2994
2995Add CVS change summary information.
2996
2997=item B<--no-wrap>
2998
2999Don't auto-wrap log message (recommend B<-S> also).
3000
3001=item B<--no-indent>
3002
3003Don't indent log message
3004
3005=item B<--gmt>, B<--utc>
3006
3007Show times in GMT/UTC instead of local time.
3008
3009=item B<--accum>
3010
3011Add to an existing ChangeLog (incompatible with B<--xml> and B<--FSF>).
3012
3013=item B<-w>, B<--day-of-week>
3014
3015Show day of week.
3016
3017=item B<--no-times>
3018
3019Don't show times in output.
3020
3021=item B<--chrono>
3022
3023Output log in chronological order (default is reverse chronological order).
3024
3025=item B<--header> I<FILE>
3026
3027Get ChangeLog header from I<FILE> ("B<->" means stdin).
3028
3029=item B<--xml>
3030
3031Output XML instead of ChangeLog format (incompatible with B<--accum>).
3032
3033=item B<--xml-encoding> I<ENCODING.>
3034
3035Insert encoding clause in XML header.
3036
3037=item B<--noxmlns>
3038
3039Don't include xmlns= attribute in root element.
3040
3041=item B<--hide-filenames>
3042
3043Don't show filenames (ignored for XML output).
3044
3045=item B<--no-common-dir>
3046
3047Don't shorten directory names from filenames.
3048
3049=item B<--rcs> I<CVSROOT>
3050
3051Handle filenames from raw RCS, for instance those produced by "cvs rlog"
3052output, stripping the prefix I<CVSROOT>.
3053
3054=item B<-P>, B<--prune>
3055
3056Don't show empty log messages.
3057
3058=item B<--lines-modified>
3059
3060Output the number of lines added and the number of lines removed for
3061each checkin (if applicable). At the moment, this only affects the
3062XML output mode.
3063
3064=item B<--ignore-tag> I<TAG>
3065
3066Ignore individual changes that are associated with a given tag.
3067May be repeated, if so, changes that are associated with any of
3068the given tags are ignored.
3069
3070=item B<--show-tag> I<TAG>
3071
3072Log only individual changes that are associated with a given
3073tag.  May be repeated, if so, changes that are associated with
3074any of the given tags are logged.
3075
3076=item B<--delta> I<FROM_TAG>B<:>I<TO_TAG>
3077
3078Attempt a delta between two tags (since I<FROM_TAG> up to and
3079including I<TO_TAG>).  The algorithm is a simple date-based one
3080(this is a hard problem) so results are imperfect.
3081
3082=item B<-g> I<OPTS>, B<--global-opts> I<OPTS>
3083
3084Pass I<OPTS> to cvs like in "cvs I<OPTS> log ...".
3085
3086=item B<-l> I<OPTS>, B<--log-opts> I<OPTS>
3087
3088Pass I<OPTS> to cvs log like in "cvs ... log I<OPTS>".
3089
3090=back
3091
3092Notes about the options and arguments:
3093
3094=over 4
3095
3096=item *
3097
3098The B<-I> and B<-F> options may appear multiple times.
3099
3100=item *
3101
3102To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works).  This is
3103okay because no would ever, ever be crazy enough to name a branch "trunk",
3104right?  Right.
3105
3106=item *
3107
3108For the B<-U> option, the I<UFILE> should be formatted like CVSROOT/users. That is,
3109each line of I<UFILE> looks like this:
3110
3111       jrandom:jrandom@red-bean.com
3112
3113or maybe even like this
3114
3115       jrandom:'Jesse Q. Random <jrandom@red-bean.com>'
3116
3117Don't forget to quote the portion after the colon if necessary.
3118
3119=item *
3120
3121Many people want to filter by date.  To do so, invoke cvs2cl.pl like this:
3122
3123       cvs2cl.pl -l "-d'DATESPEC'"
3124
3125where DATESPEC is any date specification valid for "cvs log -d".  (Note that
3126CVS 1.10.7 and below requires there be no space between -d and its argument).
3127
3128=item *
3129
3130Dates/times are interpreted in the local time zone.
3131
3132=item *
3133
3134Remember to quote the argument to `B<-l>' so that your shell doesn't interpret
3135spaces as argument separators.
3136
3137=item *
3138
3139See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like
3140systems) for more information.
3141
3142=item *
3143
3144Note that the rules for quoting under windows shells are different.
3145
3146=item *
3147
3148To run in an automated environment such as CGI or PHP, suidperl may be needed
3149in order to execute as the correct user to enable /cvsroot read lock files to
3150be written for the 'cvs log' command.  This is likely just a case of changing
3151the /usr/bin/perl command to /usr/bin/suidperl, and explicitly declaring the
3152PATH variable.
3153
3154=back
3155
3156=head1 EXAMPLES
3157
3158Some examples (working on UNIX shells):
3159
3160      # logs after 6th March, 2003 (inclusive)
3161      cvs2cl.pl -l "-d'>2003-03-06'"
3162      # logs after 4:34PM 6th March, 2003 (inclusive)
3163      cvs2cl.pl -l "-d'>2003-03-06 16:34'"
3164      # logs between 4:46PM 6th March, 2003 (exclusive) and
3165      # 4:34PM 6th March, 2003 (inclusive)
3166      cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'"
3167
3168Some examples (on non-UNIX shells):
3169
3170      # Reported to work on windows xp/2000
3171      cvs2cl.pl -l  "-d"">2003-10-18;today<"""
3172
3173=head1 AUTHORS
3174
3175=over 4
3176
3177=item Karl Fogel
3178
3179=item Melissa O'Neill
3180
3181=item Martyn J. Pearce
3182
3183=back
3184
3185Contributions from
3186
3187=over 4
3188
3189=item Mike Ayers
3190
3191=item Tim Bradshaw
3192
3193=item Richard Broberg
3194
3195=item Nathan Bryant
3196
3197=item Oswald Buddenhagen
3198
3199=item Neil Conway
3200
3201=item Arthur de Jong
3202
3203=item Mark W. Eichin
3204
3205=item Dave Elcock
3206
3207=item Reid Ellis
3208
3209=item Simon Josefsson
3210
3211=item Robin Hugh Johnson
3212
3213=item Terry Kane
3214
3215=item Pete Kempf
3216
3217=item Akos Kiss
3218
3219=item Claus Klein
3220
3221=item Eddie Kohler
3222
3223=item Richard Laager
3224
3225=item Kevin Lilly
3226
3227=item Karl-Heinz Marbaise
3228
3229=item Mitsuaki Masuhara
3230
3231=item Henrik Nordstrom
3232
3233=item Joe Orton
3234
3235=item Peter Palfrader
3236
3237=item Thomas Parmelan
3238
3239=item Jordan Russell
3240
3241=item Jacek Sliwerski
3242
3243=item Johannes Stezenbach
3244
3245=item Joseph Walton
3246
3247=item Ernie Zapata
3248
3249=back
3250
3251=head1 BUGS
3252
3253Please report bugs to C<cvs2cl-reports {_AT_} red-bean.com>.
3254
3255=head1 PREREQUISITES
3256
3257This script requires C<Text::Wrap>, C<Time::Local>, and C<File::Basename>.  It
3258also seems to require C<Perl 5.004_04> or higher.
3259
3260=head1 OPERATING SYSTEM COMPATIBILITY
3261
3262Should work on any OS.
3263
3264=head1 SCRIPT CATEGORIES
3265
3266Version_Control/CVS
3267
3268=head1 COPYRIGHT
3269
3270(C) 2001,2002,2003,2004 Martyn J. Pearce, under the GNU GPL.
3271
3272(C) 1999 Karl Fogel, under the GNU GPL.
3273
3274cvs2cl.pl is free software; you can redistribute it and/or modify
3275it under the terms of the GNU General Public License as published by
3276the Free Software Foundation; either version 2, or (at your option)
3277any later version.
3278
3279cvs2cl.pl is distributed in the hope that it will be useful,
3280but WITHOUT ANY WARRANTY; without even the implied warranty of
3281MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
3282GNU General Public License for more details.
3283
3284You may have received a copy of the GNU General Public License
3285along with cvs2cl.pl; see the file COPYING.  If not, write to the
3286Free Software Foundation, Inc., 59 Temple Place - Suite 330,
3287Boston, MA 02111-1307, USA.
3288
3289=head1 SEE ALSO
3290
3291cvs(1)
3292
3293