xref: /openbsd-src/gnu/usr.bin/perl/Porting/check-cpan-pollution (revision b39c515898423c8d899e35282f4b395f7cad3298)
1*b39c5158Smillert#!perl
2*b39c5158Smillertuse strict;
3*b39c5158Smillertuse warnings;
4*b39c5158Smillertuse Getopt::Long qw/GetOptions/;
5*b39c5158Smillertuse Term::ANSIColor qw/color/;
6*b39c5158Smillertuse constant GITCMD => 'git';
7*b39c5158Smillert
8*b39c5158Smillertsub usage {
9*b39c5158Smillert  print <<HERE;
10*b39c5158SmillertUsage: $0 [options] [<start-commit> [<end-commit>]]
11*b39c5158Smillert
12*b39c5158SmillertScans the commit logs for commits that are potentially, illegitimately
13*b39c5158Smillerttouching modules that are primarily maintained outside of the perl core.
14*b39c5158SmillertAlso checks for commits that span multiple distributions in cpan/ or dist/.
15*b39c5158SmillertMakes sure that updated CPAN distributions also update Porting/Maintainers.pl,
16*b39c5158Smillertbut otherwise ignores changes to that file (and MANIFEST).
17*b39c5158Smillert
18*b39c5158SmillertSkip the <start-commit> to go back indefinitely. <end-commit> defaults to
19*b39c5158SmillertHEAD.
20*b39c5158Smillert
21*b39c5158Smillert -h/--help shows this help
22*b39c5158Smillert -v/--verbose shows the output of "git show --stat <commit>" for each commit
23*b39c5158Smillert -c/--color uses colored output
24*b39c5158SmillertHERE
25*b39c5158Smillert  exit(1);
26*b39c5158Smillert}
27*b39c5158Smillert
28*b39c5158Smillertour $Verbose = 0;
29*b39c5158Smillertour $Color   = 0;
30*b39c5158SmillertGetOptions(
31*b39c5158Smillert  'h|help'         => \&usage,
32*b39c5158Smillert  'v|verbose'      => \$Verbose,
33*b39c5158Smillert  'c|color|colour' => \$Color,
34*b39c5158Smillert);
35*b39c5158Smillert
36*b39c5158Smillertmy $start_commit = shift;
37*b39c5158Smillertmy $end_commit   = shift;
38*b39c5158Smillert$end_commit = 'HEAD' if not defined $end_commit;
39*b39c5158Smillertmy $commit_range_cmd = defined($start_commit) ? " $start_commit..$end_commit" : "";
40*b39c5158Smillert
41*b39c5158Smillert# format: hash\0author\0committer\0short_msg
42*b39c5158Smillertour $LogCmd = GITCMD() . q{ log --no-color -M -C --name-only '--pretty=format:%h%x00%an%x00%cn%x00%s'} . $commit_range_cmd;
43*b39c5158Smillertour @ColumnSpec = qw(hash author committer commit_msg);
44*b39c5158Smillert
45*b39c5158Smillertopen my $fh, '-|', $LogCmd
46*b39c5158Smillert  or die "Can't run '$LogCmd' to get the commit log: $!";
47*b39c5158Smillert
48*b39c5158Smillertmy ($safe_commits, $unsafe_commits) = parse_log($fh);
49*b39c5158Smillert
50*b39c5158Smillertif (@$unsafe_commits) {
51*b39c5158Smillert  my $header = "Potentially unsafe commits:";
52*b39c5158Smillert  print color("red") if $Color;
53*b39c5158Smillert  print $header, "\n";
54*b39c5158Smillert  print("=" x length($header), "\n\n") if $Verbose;
55*b39c5158Smillert  print color("reset") if $Color;
56*b39c5158Smillert  print_commit_info($_) foreach reverse @$unsafe_commits;
57*b39c5158Smillert  print "\n";
58*b39c5158Smillert}
59*b39c5158Smillert
60*b39c5158Smillertif (@$safe_commits) {
61*b39c5158Smillert  my $header = "Presumably safe commits:";
62*b39c5158Smillert  print color("green") if $Color;
63*b39c5158Smillert  print $header, "\n";
64*b39c5158Smillert  print("=" x length($header), "\n") if $Verbose;
65*b39c5158Smillert  print color("reset") if $Color;
66*b39c5158Smillert  print_commit_info($_) foreach reverse @$safe_commits;
67*b39c5158Smillert  print "\n";
68*b39c5158Smillert}
69*b39c5158Smillert
70*b39c5158Smillertexit(0);
71*b39c5158Smillert
72*b39c5158Smillert
73*b39c5158Smillert
74*b39c5158Smillert# single-line info about the commit at hand
75*b39c5158Smillertsub print_commit_info {
76*b39c5158Smillert  my $commit = shift;
77*b39c5158Smillert
78*b39c5158Smillert  my $author_info = "by $commit->{author}"
79*b39c5158Smillert                    . ($commit->{author} eq $commit->{committer}
80*b39c5158Smillert                       ? ''
81*b39c5158Smillert                       : " committed by $commit->{committer}");
82*b39c5158Smillert
83*b39c5158Smillert  if ($Verbose) {
84*b39c5158Smillert    print color("yellow") if $Color;
85*b39c5158Smillert    my $header = "$commit->{hash} $author_info: $commit->{msg}";
86*b39c5158Smillert    print "$header\n", ("-" x length($header)), "\n";
87*b39c5158Smillert    print color("reset") if $Color;
88*b39c5158Smillert
89*b39c5158Smillert    my $cmd = GITCMD() . ' show --stat ' . ($Color?'--color ':'')
90*b39c5158Smillert              . $commit->{hash};
91*b39c5158Smillert    print `$cmd`; # make sure git knows this isn't a terminal
92*b39c5158Smillert    print "\n";
93*b39c5158Smillert  }
94*b39c5158Smillert  else {
95*b39c5158Smillert    print color("yellow") if $Color;
96*b39c5158Smillert    print "  $commit->{hash} $author_info: $commit->{msg}\n";
97*b39c5158Smillert    print color("reset") if $Color;
98*b39c5158Smillert  }
99*b39c5158Smillert}
100*b39c5158Smillert
101*b39c5158Smillert
102*b39c5158Smillert# check whether the commit at hand is safe, unsafe or uninteresting
103*b39c5158Smillertsub check_commit {
104*b39c5158Smillert  my $commit = shift;
105*b39c5158Smillert  my $safe   = shift;
106*b39c5158Smillert  my $unsafe = shift;
107*b39c5158Smillert
108*b39c5158Smillert  # Note to self: Adding any more greps and such will make this
109*b39c5158Smillert  # look even more silly. Just use a single foreach, smart guy!
110*b39c5158Smillert  my $touches_maintainers_pl = 0;
111*b39c5158Smillert  my @files = grep {
112*b39c5158Smillert                $touches_maintainers_pl = 1
113*b39c5158Smillert                  if $_ eq 'Porting/Maintainers.pl';
114*b39c5158Smillert                $_ ne 'MANIFEST' and $_ ne 'Porting/Maintainers.pl'
115*b39c5158Smillert              }
116*b39c5158Smillert              @{$commit->{files}};
117*b39c5158Smillert  my @touching_cpan = grep {/^cpan\//} @files;
118*b39c5158Smillert  return if not @touching_cpan;
119*b39c5158Smillert
120*b39c5158Smillert  # check for unsafe commits to cpan/
121*b39c5158Smillert  my %touched_cpan_dirs;
122*b39c5158Smillert  $touched_cpan_dirs{$_}++ for grep {defined $_}
123*b39c5158Smillert                               map {s/^cpan\/([^\/]*).*$/$1/; $_}
124*b39c5158Smillert                               @touching_cpan;
125*b39c5158Smillert
126*b39c5158Smillert  my $touches_multiple_cpan_dists = (keys(%touched_cpan_dirs) > 1);
127*b39c5158Smillert
128*b39c5158Smillert  my $touches_others              = @files - @touching_cpan;
129*b39c5158Smillert
130*b39c5158Smillert  if (@touching_cpan) {
131*b39c5158Smillert    if ($touches_others) {
132*b39c5158Smillert      $commit->{msg} = 'Touched files under cpan/ and other locations';
133*b39c5158Smillert      push @$unsafe, $commit;
134*b39c5158Smillert    }
135*b39c5158Smillert    elsif ($touches_multiple_cpan_dists) {
136*b39c5158Smillert      $commit->{msg} = 'Touched multiple directories under cpan/';
137*b39c5158Smillert      push @$unsafe, $commit;
138*b39c5158Smillert    }
139*b39c5158Smillert    elsif (not $touches_maintainers_pl) {
140*b39c5158Smillert      $commit->{msg} = 'Touched files under cpan/, but does not update '
141*b39c5158Smillert                       . 'Porting/Maintainers.pl';
142*b39c5158Smillert      push @$unsafe, $commit;
143*b39c5158Smillert    }
144*b39c5158Smillert    elsif ($commit->{commit_msg} =~ /(?:up(?:grad|dat)|import)(?:ed?|ing)/i) {
145*b39c5158Smillert      $commit->{msg} = 'Touched files under cpan/ with '
146*b39c5158Smillert                       . '"upgrading"-like commit message';
147*b39c5158Smillert      push @$safe, $commit;
148*b39c5158Smillert    }
149*b39c5158Smillert    else {
150*b39c5158Smillert      $commit->{msg} = 'Touched files under cpan/ without '
151*b39c5158Smillert                       . '"upgrading"-like commit message';
152*b39c5158Smillert      push @$unsafe, $commit;
153*b39c5158Smillert    }
154*b39c5158Smillert  }
155*b39c5158Smillert
156*b39c5158Smillert  # check for unsafe commits to dist/
157*b39c5158Smillert  my @touching_dist = grep {/^dist\//} @files;
158*b39c5158Smillert  my %touched_dist_dirs;
159*b39c5158Smillert  $touched_dist_dirs{$_}++ for grep {defined $_}
160*b39c5158Smillert                               map {s/^dist\/([^\/]*).*$/$1/; $_}
161*b39c5158Smillert                               @touching_dist;
162*b39c5158Smillert  $touches_others = @files - @touching_dist;
163*b39c5158Smillert  my $touches_multiple_dists = (keys(%touched_dist_dirs) > 1);
164*b39c5158Smillert
165*b39c5158Smillert  if (@touching_dist) {
166*b39c5158Smillert    if ($touches_others) {
167*b39c5158Smillert      $commit->{msg} = 'Touched files under dist/ and other locations';
168*b39c5158Smillert      push @$unsafe, $commit;
169*b39c5158Smillert    }
170*b39c5158Smillert    elsif ($touches_multiple_dists) {
171*b39c5158Smillert      $commit->{msg} = 'Touched multiple directories under cpan/';
172*b39c5158Smillert      push @$unsafe, $commit;
173*b39c5158Smillert    }
174*b39c5158Smillert  }
175*b39c5158Smillert}
176*b39c5158Smillert
177*b39c5158Smillert# given file handle, parse the git log output and put the resulting commit
178*b39c5158Smillert# structure into safe/unsafe compartments
179*b39c5158Smillertsub parse_log {
180*b39c5158Smillert  my $fh = shift;
181*b39c5158Smillert  my @safe_commits;
182*b39c5158Smillert  my @unsafe_commits;
183*b39c5158Smillert  my $commit;
184*b39c5158Smillert  while (defined(my $line = <$fh>)) {
185*b39c5158Smillert    chomp $line;
186*b39c5158Smillert    if (not $commit) {
187*b39c5158Smillert      next if $line =~ /^\s*$/;
188*b39c5158Smillert      my @cols = split /\0/, $line;
189*b39c5158Smillert      @cols == @ColumnSpec && !grep {!defined($_)} @cols
190*b39c5158Smillert        or die "Malformed commit header line: '$line'";
191*b39c5158Smillert      $commit = {
192*b39c5158Smillert        files => [],
193*b39c5158Smillert        map {$ColumnSpec[$_] => $cols[$_]} (0..$#cols)
194*b39c5158Smillert      };
195*b39c5158Smillert      next;
196*b39c5158Smillert    }
197*b39c5158Smillert    elsif ($line =~ /^\s*$/) { # within commit, blank line
198*b39c5158Smillert      check_commit($commit, \@safe_commits, \@unsafe_commits);
199*b39c5158Smillert      $commit = undef;
200*b39c5158Smillert    }
201*b39c5158Smillert    else { # within commit, non-blank (file) line
202*b39c5158Smillert      push @{$commit->{files}}, $line;
203*b39c5158Smillert    }
204*b39c5158Smillert  }
205*b39c5158Smillert
206*b39c5158Smillert  return(\@safe_commits, \@unsafe_commits);
207*b39c5158Smillert}
208*b39c5158Smillert
209