xref: /illumos-gate/usr/src/tools/scripts/git-pbchk.py (revision da88d39fc3dea7bae00c88b45c396af34c3a3130)
1#!@TOOLS_PYTHON@ -Es
2#
3#  This program is free software; you can redistribute it and/or modify
4#  it under the terms of the GNU General Public License version 2
5#  as published by the Free Software Foundation.
6#
7#  This program is distributed in the hope that it will be useful,
8#  but WITHOUT ANY WARRANTY; without even the implied warranty of
9#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10#  GNU General Public License for more details.
11#
12#  You should have received a copy of the GNU General Public License
13#  along with this program; if not, write to the Free Software
14#  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
15#
16
17#
18# Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
19# Copyright 2008, 2012 Richard Lowe
20# Copyright 2019 Garrett D'Amore <garrett@damore.org>
21# Copyright (c) 2015, 2016 by Delphix. All rights reserved.
22# Copyright 2016 Nexenta Systems, Inc.
23# Copyright (c) 2019, Joyent, Inc.
24# Copyright 2021 OmniOS Community Edition (OmniOSce) Association.
25# Copyright 2024 Bill Sommerfeld
26#
27
28from __future__ import print_function
29
30import getopt
31import io
32import os
33import re
34import subprocess
35import sys
36import tempfile
37
38if sys.version_info[0] < 3:
39    from cStringIO import StringIO
40else:
41    from io import StringIO
42
43#
44# Adjust the load path based on our location and the version of python into
45# which it is being loaded.  This assumes the normal onbld directory
46# structure, where we are in bin/ and the modules are in
47# lib/python(version)?/onbld/Scm/.  If that changes so too must this.
48#
49sys.path.insert(1, os.path.join(os.path.dirname(__file__), "..", "lib",
50                                "python%d.%d" % sys.version_info[:2]))
51
52#
53# Add the relative path to usr/src/tools to the load path, such that when run
54# from the source tree we use the modules also within the source tree.
55#
56sys.path.insert(2, os.path.join(os.path.dirname(__file__), ".."))
57
58from onbld.Scm import Ignore
59from onbld.Checks import Comments, Copyright, CStyle, HdrChk, WsCheck
60from onbld.Checks import JStyle, Keywords, ManLint, Mapfile, SpellCheck
61from onbld.Checks import ShellLint, PkgFmt
62
63class GitError(Exception):
64    pass
65
66def git(command):
67    """Run a command and return a stream containing its stdout (and write its
68    stderr to its stdout)"""
69
70    if type(command) != list:
71        command = command.split()
72
73    command = ["git"] + command
74
75    try:
76        tmpfile = tempfile.TemporaryFile(prefix="git-nits", mode="w+b")
77    except EnvironmentError as e:
78        raise GitError("Could not create temporary file: %s\n" % e)
79
80    try:
81        p = subprocess.Popen(command,
82                             stdout=tmpfile,
83                             stderr=subprocess.PIPE)
84    except OSError as e:
85        raise GitError("could not execute %s: %s\n" % (command, e))
86
87    err = p.wait()
88    if err != 0:
89        raise GitError(p.stderr.read())
90
91    tmpfile.seek(0)
92    lines = []
93    for l in tmpfile:
94        lines.append(l.decode('utf-8', 'replace'))
95    return lines
96
97def git_root():
98    """Return the root of the current git workspace"""
99
100    p = git('rev-parse --show-toplevel')
101    dir = p[0].strip()
102
103    return os.path.abspath(dir)
104
105def git_branch():
106    """Return the current git branch"""
107
108    p = git('branch')
109
110    for elt in p:
111        if elt[0] == '*':
112            if elt.endswith('(no branch)'):
113                return None
114            return elt.split()[1]
115
116def git_parent_branch(branch):
117    """Return the parent of the current git branch.
118
119    If this branch tracks a remote branch, return the remote branch which is
120    tracked.  If not, default to origin/master."""
121
122    if not branch:
123        return None
124
125    p = git(["for-each-ref", "--format=%(refname:short) %(upstream:short)",
126            "refs/heads/"])
127
128    if not p:
129        sys.stderr.write("Failed finding git parent branch\n")
130        sys.exit(1)
131
132    for line in p:
133        # Git 1.7 will leave a ' ' trailing any non-tracking branch
134        if ' ' in line and not line.endswith(' \n'):
135            local, remote = line.split()
136            if local == branch:
137                return remote
138    return 'origin/master'
139
140def git_comments(parent):
141    """Return a list of any checkin comments on this git branch"""
142
143    p = git('log --pretty=tformat:%%B:SEP: %s..' % parent)
144
145    if not p:
146        sys.stderr.write("No outgoing changesets found - missing -p option?\n");
147        sys.exit(1)
148
149    return [x.strip() for x in p if x != ':SEP:\n']
150
151def git_file_list(parent, paths=None):
152    """Return the set of files which have ever changed on this branch.
153
154    NB: This includes files which no longer exist, or no longer actually
155    differ."""
156
157    p = git("log --name-only --pretty=format: %s.. %s" %
158             (parent, ' '.join(paths)))
159
160    if not p:
161        sys.stderr.write("Failed building file-list from git\n")
162        sys.exit(1)
163
164    ret = set()
165    for fname in p:
166        fname = fname.strip()
167        if fname and not fname.isspace():
168            ret.add(fname)
169
170    return sorted(ret)
171
172def not_check(root, cmd):
173    """Return a function which returns True if a file given as an argument
174    should be excluded from the check named by 'cmd'"""
175
176    ignorefiles = list(filter(os.path.exists,
177                         [os.path.join(root, ".git/info", "%s.NOT" % cmd),
178                          os.path.join(root, "exception_lists", cmd)]))
179    return Ignore.ignore(root, ignorefiles)
180
181def gen_files(root, parent, paths, exclude, filter=None):
182    """Return a function producing file names, relative to the current
183    directory, of any file changed on this branch (limited to 'paths' if
184    requested), and excluding files for which exclude returns a true value """
185
186    if filter is None:
187        filter = lambda x: os.path.isfile(x)
188
189    def ret(select=None):
190        if not select:
191            select = lambda x: True
192
193        for abspath in git_file_list(parent, paths):
194            path = os.path.relpath(os.path.join(root, abspath), '.')
195            try:
196                res = git("diff %s HEAD %s" % (parent, path))
197            except GitError as e:
198                # This ignores all the errors that can be thrown. Usually, this
199                # means that git returned non-zero because the file doesn't
200                # exist, but it could also fail if git can't create a new file
201                # or it can't be executed.  Such errors are 1) unlikely, and 2)
202                # will be caught by other invocations of git().
203                continue
204            empty = not res
205            if (filter(path) and not empty and
206                select(path) and not exclude(abspath)):
207                yield path
208    return ret
209
210def gen_links(root, parent, paths, exclude):
211    """Return a function producing symbolic link names, relative to the current
212    directory, of any file changed on this branch (limited to 'paths' if
213    requested), and excluding files for which exclude returns a true value """
214
215    return gen_files(root, parent, paths, exclude, lambda x: os.path.islink(x))
216
217def comchk(root, parent, flist, output):
218    output.write("Comments:\n")
219
220    comments = git_comments(parent)
221    if len(comments) > 2:
222        if re.match('^Change-Id: I[0-9a-f]+', comments[-1]):
223            if comments[-2] == '':
224                print('Note: Gerrit Change Id present in comments')
225                comments = comments[:-2]
226
227    return Comments.comchk(comments, check_db=True,
228                           output=output)
229
230
231def mapfilechk(root, parent, flist, output):
232    ret = 0
233
234    # We are interested in examining any file that has the following
235    # in its final path segment:
236    #    - Contains the word 'mapfile'
237    #    - Begins with 'map.'
238    #    - Ends with '.map'
239    # We don't want to match unless these things occur in final path segment
240    # because directory names with these strings don't indicate a mapfile.
241    # We also ignore files with suffixes that tell us that the files
242    # are not mapfiles.
243    MapfileRE = re.compile(r'.*((mapfile[^/]*)|(/map\.+[^/]*)|(\.map))$',
244        re.IGNORECASE)
245    NotMapSuffixRE = re.compile(r'.*\.[ch]$', re.IGNORECASE)
246
247    output.write("Mapfile comments:\n")
248
249    for f in flist(lambda x: MapfileRE.match(x) and not
250                   NotMapSuffixRE.match(x)):
251        with io.open(f, encoding='utf-8', errors='replace') as fh:
252            ret |= Mapfile.mapfilechk(fh, output=output)
253    return ret
254
255def copyright(root, parent, flist, output):
256    ret = 0
257    output.write("Copyrights:\n")
258    for f in flist():
259        with io.open(f, encoding='utf-8', errors='replace') as fh:
260            ret |= Copyright.copyright(fh, output=output)
261    return ret
262
263def hdrchk(root, parent, flist, output):
264    ret = 0
265    output.write("Header format:\n")
266    for f in flist(lambda x: x.endswith('.h')):
267        with io.open(f, encoding='utf-8', errors='replace') as fh:
268            ret |= HdrChk.hdrchk(fh, lenient=True, output=output)
269    return ret
270
271def cstyle(root, parent, flist, output):
272    ret = 0
273    output.write("C style:\n")
274    for f in flist(lambda x: x.endswith('.c') or x.endswith('.h')):
275        with io.open(f, mode='rb') as fh:
276            ret |= CStyle.cstyle(fh, output=output, picky=True,
277                             check_posix_types=True,
278                             check_continuation=True)
279    return ret
280
281def jstyle(root, parent, flist, output):
282    ret = 0
283    output.write("Java style:\n")
284    for f in flist(lambda x: x.endswith('.java')):
285        with io.open(f, mode='rb') as fh:
286            ret |= JStyle.jstyle(fh, output=output, picky=True)
287    return ret
288
289def manlint(root, parent, flist, output):
290    ret = 0
291    output.write("Man page format/spelling:\n")
292    ManfileRE = re.compile(r'.*\.[0-9][a-z]*$', re.IGNORECASE)
293    for f in flist(lambda x: ManfileRE.match(x)):
294        with io.open(f, mode='rb') as fh:
295            ret |= ManLint.manlint(fh, output=output, picky=True)
296            ret |= SpellCheck.spellcheck(fh, output=output)
297    return ret
298
299def shelllint(root, parent, flist, output):
300    ret = 0
301    output.write("Shell lint:\n")
302
303    def isshell(x):
304        (_, ext) = os.path.splitext(x)
305        if ext in ['.sh', '.ksh']:
306            return True
307        if ext == '':
308            with io.open(x, mode='r', errors='ignore') as fh:
309                if re.match(r'^#.*\bk?sh\b', fh.readline()):
310                    return True
311        return False
312
313    for f in flist(isshell):
314        with io.open(f, mode='rb') as fh:
315            ret |= ShellLint.lint(fh, output=output)
316
317    return ret
318
319def pkgfmt(root, parent, flist, output):
320    ret = 0
321    output.write("Package manifests:\n")
322
323    for f in flist(lambda x: x.endswith('.p5m')):
324        with io.open(f, mode='rb') as fh:
325            ret |= PkgFmt.check(fh, output=output)
326
327    return ret
328
329def keywords(root, parent, flist, output):
330    ret = 0
331    output.write("SCCS Keywords:\n")
332    for f in flist():
333        with io.open(f, encoding='utf-8', errors='replace') as fh:
334            ret |= Keywords.keywords(fh, output=output)
335    return ret
336
337def wscheck(root, parent, flist, output):
338    ret = 0
339    output.write("white space nits:\n")
340    for f in flist():
341        with io.open(f, encoding='utf-8', errors='replace') as fh:
342            ret |= WsCheck.wscheck(fh, output=output)
343    return ret
344
345def symlinks(root, parent, flist, output):
346    ret = 0
347    output.write("Symbolic links:\n")
348    for f in flist():
349        output.write("  "+f+"\n")
350        ret |= 1
351    return ret
352
353def iswinreserved(name):
354    reserved = [
355        'con', 'prn', 'aux', 'nul',
356        'com1', 'com2', 'com3', 'com4', 'com5',
357        'com6', 'com7', 'com8', 'com9', 'com0',
358        'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5',
359        'lpt6', 'lpt7', 'lpt8', 'lpt9', 'lpt0' ]
360    l = name.lower()
361    for r in reserved:
362        if l == r or l.startswith(r+"."):
363            return True
364    return False
365
366def haswinspecial(name):
367    specials = '<>:"\\|?*'
368    for c in name:
369        if c in specials:
370            return True
371    return False
372
373def winnames(root, parent, flist, output):
374    ret = 0
375    output.write("Illegal filenames (Windows):\n")
376    for f in flist():
377        if haswinspecial(f):
378            output.write("  "+f+": invalid character in name\n")
379            ret |= 1
380            continue
381
382        parts = f.split('/')
383        for p in parts:
384            if iswinreserved(p):
385                output.write("  "+f+": reserved file name\n")
386                ret |= 1
387                break
388
389    return ret
390
391def run_checks(root, parent, cmds, scmds, paths='', opts={}):
392    """Run the checks given in 'cmds', expected to have well-known signatures,
393    and report results for any which fail.
394
395    Return failure if any of them did.
396
397    NB: the function name of the commands passed in is used to name the NOT
398    file which excepts files from them."""
399
400    ret = 0
401
402    for cmd in cmds:
403        s = StringIO()
404
405        exclude = not_check(root, cmd.__name__)
406        result = cmd(root, parent, gen_files(root, parent, paths, exclude),
407                     output=s)
408        ret |= result
409
410        if result != 0:
411            print(s.getvalue())
412
413    for cmd in scmds:
414        s = StringIO()
415
416        exclude = not_check(root, cmd.__name__)
417        result = cmd(root, parent, gen_links(root, parent, paths, exclude),
418                     output=s)
419        ret |= result
420
421        if result != 0:
422            print(s.getvalue())
423
424    return ret
425
426def nits(root, parent, paths):
427    cmds = [copyright,
428            cstyle,
429            hdrchk,
430            jstyle,
431            keywords,
432            manlint,
433            mapfilechk,
434            shelllint,
435            pkgfmt,
436            winnames,
437            wscheck]
438    scmds = [symlinks]
439    run_checks(root, parent, cmds, scmds, paths)
440
441def pbchk(root, parent, paths):
442    cmds = [comchk,
443            copyright,
444            cstyle,
445            hdrchk,
446            jstyle,
447            keywords,
448            manlint,
449            mapfilechk,
450            shelllint,
451            pkgfmt,
452            winnames,
453            wscheck]
454    scmds = [symlinks]
455    run_checks(root, parent, cmds, scmds)
456
457def main(cmd, args):
458    parent_branch = None
459    checkname = None
460
461    try:
462        opts, args = getopt.getopt(args, 'b:c:p:')
463    except getopt.GetoptError as e:
464        sys.stderr.write(str(e) + '\n')
465        sys.stderr.write("Usage: %s [-c check] [-p branch] [path...]\n" % cmd)
466        sys.exit(1)
467
468    for opt, arg in opts:
469        # We accept "-b" as an alias of "-p" for backwards compatibility.
470        if opt == '-p' or opt == '-b':
471            parent_branch = arg
472        elif opt == '-c':
473            checkname = arg
474
475    if not parent_branch:
476        parent_branch = git_parent_branch(git_branch())
477
478    if checkname is None:
479        if cmd == 'git-pbchk':
480            checkname = 'pbchk'
481        else:
482            checkname = 'nits'
483
484    if checkname == 'pbchk':
485        if args:
486            sys.stderr.write("only complete workspaces may be pbchk'd\n");
487            sys.exit(1)
488        pbchk(git_root(), parent_branch, None)
489    elif checkname == 'nits':
490        nits(git_root(), parent_branch, args)
491    else:
492        run_checks(git_root(), parent_branch, [eval(checkname)], args)
493
494if __name__ == '__main__':
495    try:
496        main(os.path.basename(sys.argv[0]), sys.argv[1:])
497    except GitError as e:
498        sys.stderr.write("failed to run git:\n %s\n" % str(e))
499        sys.exit(1)
500