xref: /llvm-project/llvm/utils/Reviewing/find_interesting_reviews.py (revision b71edfaa4ec3c998aadb35255ce2f60bba2940b0)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5import argparse
6import email.mime.multipart
7import email.mime.text
8import logging
9import os.path
10import pickle
11import re
12import smtplib
13import subprocess
14import sys
15from datetime import datetime, timedelta
16from phabricator import Phabricator
17
18# Setting up a virtualenv to run this script can be done by running the
19# following commands:
20# $ virtualenv venv
21# $ . ./venv/bin/activate
22# $ pip install Phabricator
23
24GIT_REPO_METADATA = (("llvm-monorepo", "https://github.com/llvm/llvm-project"),)
25
26# The below PhabXXX classes represent objects as modelled by Phabricator.
27# The classes can be serialized to disk, to try and make sure that we don't
28# needlessly have to re-fetch lots of data from Phabricator, as that would
29# make this script unusably slow.
30
31
32class PhabObject:
33    OBJECT_KIND = None
34
35    def __init__(self, id):
36        self.id = id
37
38
39class PhabObjectCache:
40    def __init__(self, PhabObjectClass):
41        self.PhabObjectClass = PhabObjectClass
42        self.most_recent_info = None
43        self.oldest_info = None
44        self.id2PhabObjects = {}
45
46    def get_name(self):
47        return self.PhabObjectClass.OBJECT_KIND + "sCache"
48
49    def get(self, id):
50        if id not in self.id2PhabObjects:
51            self.id2PhabObjects[id] = self.PhabObjectClass(id)
52        return self.id2PhabObjects[id]
53
54    def get_ids_in_cache(self):
55        return list(self.id2PhabObjects.keys())
56
57    def get_objects(self):
58        return list(self.id2PhabObjects.values())
59
60    DEFAULT_DIRECTORY = "PhabObjectCache"
61
62    def _get_pickle_name(self, directory):
63        file_name = "Phab" + self.PhabObjectClass.OBJECT_KIND + "s.pickle"
64        return os.path.join(directory, file_name)
65
66    def populate_cache_from_disk(self, directory=DEFAULT_DIRECTORY):
67        """
68        FIXME: consider if serializing to JSON would bring interoperability
69        advantages over serializing to pickle.
70        """
71        try:
72            f = open(self._get_pickle_name(directory), "rb")
73        except IOError as err:
74            print("Could not find cache. Error message: {0}. Continuing...".format(err))
75        else:
76            with f:
77                try:
78                    d = pickle.load(f)
79                    self.__dict__.update(d)
80                except EOFError as err:
81                    print(
82                        "Cache seems to be corrupt. "
83                        + "Not using cache. Error message: {0}".format(err)
84                    )
85
86    def write_cache_to_disk(self, directory=DEFAULT_DIRECTORY):
87        if not os.path.exists(directory):
88            os.makedirs(directory)
89        with open(self._get_pickle_name(directory), "wb") as f:
90            pickle.dump(self.__dict__, f)
91        print(
92            "wrote cache to disk, most_recent_info= {0}".format(
93                datetime.fromtimestamp(self.most_recent_info)
94                if self.most_recent_info is not None
95                else None
96            )
97        )
98
99
100class PhabReview(PhabObject):
101    OBJECT_KIND = "Review"
102
103    def __init__(self, id):
104        PhabObject.__init__(self, id)
105
106    def update(self, title, dateCreated, dateModified, author):
107        self.title = title
108        self.dateCreated = dateCreated
109        self.dateModified = dateModified
110        self.author = author
111
112    def setPhabDiffs(self, phabDiffs):
113        self.phabDiffs = phabDiffs
114
115
116class PhabUser(PhabObject):
117    OBJECT_KIND = "User"
118
119    def __init__(self, id):
120        PhabObject.__init__(self, id)
121
122    def update(self, phid, realName):
123        self.phid = phid
124        self.realName = realName
125
126
127class PhabHunk:
128    def __init__(self, rest_api_hunk):
129        self.oldOffset = int(rest_api_hunk["oldOffset"])
130        self.oldLength = int(rest_api_hunk["oldLength"])
131        # self.actual_lines_changed_offset will contain the offsets of the
132        # lines that were changed in this hunk.
133        self.actual_lines_changed_offset = []
134        offset = self.oldOffset
135        inHunk = False
136        hunkStart = -1
137        contextLines = 3
138        for line in rest_api_hunk["corpus"].split("\n"):
139            if line.startswith("+"):
140                # line is a new line that got introduced in this patch.
141                # Do not record it as a changed line.
142                if inHunk is False:
143                    inHunk = True
144                    hunkStart = max(self.oldOffset, offset - contextLines)
145                continue
146            if line.startswith("-"):
147                # line was changed or removed from the older version of the
148                # code. Record it as a changed line.
149                if inHunk is False:
150                    inHunk = True
151                    hunkStart = max(self.oldOffset, offset - contextLines)
152                offset += 1
153                continue
154            # line is a context line.
155            if inHunk is True:
156                inHunk = False
157                hunkEnd = offset + contextLines
158                self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
159            offset += 1
160        if inHunk is True:
161            hunkEnd = offset + contextLines
162            self.actual_lines_changed_offset.append((hunkStart, hunkEnd))
163
164        # The above algorithm could result in adjacent or overlapping ranges
165        # being recorded into self.actual_lines_changed_offset.
166        # Merge the adjacent and overlapping ranges in there:
167        t = []
168        lastRange = None
169        for start, end in self.actual_lines_changed_offset + [
170            (sys.maxsize, sys.maxsize)
171        ]:
172            if lastRange is None:
173                lastRange = (start, end)
174            else:
175                if lastRange[1] >= start:
176                    lastRange = (lastRange[0], end)
177                else:
178                    t.append(lastRange)
179                    lastRange = (start, end)
180        self.actual_lines_changed_offset = t
181
182
183class PhabChange:
184    def __init__(self, rest_api_change):
185        self.oldPath = rest_api_change["oldPath"]
186        self.hunks = [PhabHunk(h) for h in rest_api_change["hunks"]]
187
188
189class PhabDiff(PhabObject):
190    OBJECT_KIND = "Diff"
191
192    def __init__(self, id):
193        PhabObject.__init__(self, id)
194
195    def update(self, rest_api_results):
196        self.revisionID = rest_api_results["revisionID"]
197        self.dateModified = int(rest_api_results["dateModified"])
198        self.dateCreated = int(rest_api_results["dateCreated"])
199        self.changes = [PhabChange(c) for c in rest_api_results["changes"]]
200
201
202class ReviewsCache(PhabObjectCache):
203    def __init__(self):
204        PhabObjectCache.__init__(self, PhabReview)
205
206
207class UsersCache(PhabObjectCache):
208    def __init__(self):
209        PhabObjectCache.__init__(self, PhabUser)
210
211
212reviews_cache = ReviewsCache()
213users_cache = UsersCache()
214
215
216def init_phab_connection():
217    phab = Phabricator()
218    phab.update_interfaces()
219    return phab
220
221
222def update_cached_info(
223    phab,
224    cache,
225    phab_query,
226    order,
227    record_results,
228    max_nr_entries_per_fetch,
229    max_nr_days_to_cache,
230):
231    q = phab
232    LIMIT = max_nr_entries_per_fetch
233    for query_step in phab_query:
234        q = getattr(q, query_step)
235    results = q(order=order, limit=LIMIT)
236    most_recent_info, oldest_info = record_results(cache, results, phab)
237    oldest_info_to_fetch = datetime.fromtimestamp(most_recent_info) - timedelta(
238        days=max_nr_days_to_cache
239    )
240    most_recent_info_overall = most_recent_info
241    cache.write_cache_to_disk()
242    after = results["cursor"]["after"]
243    print("after: {0!r}".format(after))
244    print("most_recent_info: {0}".format(datetime.fromtimestamp(most_recent_info)))
245    while (
246        after is not None and datetime.fromtimestamp(oldest_info) > oldest_info_to_fetch
247    ):
248        need_more_older_data = (
249            cache.oldest_info is None
250            or datetime.fromtimestamp(cache.oldest_info) > oldest_info_to_fetch
251        )
252        print(
253            (
254                "need_more_older_data={0} cache.oldest_info={1} "
255                + "oldest_info_to_fetch={2}"
256            ).format(
257                need_more_older_data,
258                datetime.fromtimestamp(cache.oldest_info)
259                if cache.oldest_info is not None
260                else None,
261                oldest_info_to_fetch,
262            )
263        )
264        need_more_newer_data = (
265            cache.most_recent_info is None or cache.most_recent_info < most_recent_info
266        )
267        print(
268            (
269                "need_more_newer_data={0} cache.most_recent_info={1} "
270                + "most_recent_info={2}"
271            ).format(need_more_newer_data, cache.most_recent_info, most_recent_info)
272        )
273        if not need_more_older_data and not need_more_newer_data:
274            break
275        results = q(order=order, after=after, limit=LIMIT)
276        most_recent_info, oldest_info = record_results(cache, results, phab)
277        after = results["cursor"]["after"]
278        print("after: {0!r}".format(after))
279        print("most_recent_info: {0}".format(datetime.fromtimestamp(most_recent_info)))
280        cache.write_cache_to_disk()
281    cache.most_recent_info = most_recent_info_overall
282    if after is None:
283        # We did fetch all records. Mark the cache to contain all info since
284        # the start of time.
285        oldest_info = 0
286    cache.oldest_info = oldest_info
287    cache.write_cache_to_disk()
288
289
290def record_reviews(cache, reviews, phab):
291    most_recent_info = None
292    oldest_info = None
293    for reviewInfo in reviews["data"]:
294        if reviewInfo["type"] != "DREV":
295            continue
296        id = reviewInfo["id"]
297        # phid = reviewInfo["phid"]
298        dateModified = int(reviewInfo["fields"]["dateModified"])
299        dateCreated = int(reviewInfo["fields"]["dateCreated"])
300        title = reviewInfo["fields"]["title"]
301        author = reviewInfo["fields"]["authorPHID"]
302        phabReview = cache.get(id)
303        if (
304            "dateModified" not in phabReview.__dict__
305            or dateModified > phabReview.dateModified
306        ):
307            diff_results = phab.differential.querydiffs(revisionIDs=[id])
308            diff_ids = sorted(diff_results.keys())
309            phabDiffs = []
310            for diff_id in diff_ids:
311                diffInfo = diff_results[diff_id]
312                d = PhabDiff(diff_id)
313                d.update(diffInfo)
314                phabDiffs.append(d)
315            phabReview.update(title, dateCreated, dateModified, author)
316            phabReview.setPhabDiffs(phabDiffs)
317            print(
318                "Updated D{0} modified on {1} ({2} diffs)".format(
319                    id, datetime.fromtimestamp(dateModified), len(phabDiffs)
320                )
321            )
322
323        if most_recent_info is None:
324            most_recent_info = dateModified
325        elif most_recent_info < dateModified:
326            most_recent_info = dateModified
327
328        if oldest_info is None:
329            oldest_info = dateModified
330        elif oldest_info > dateModified:
331            oldest_info = dateModified
332    return most_recent_info, oldest_info
333
334
335def record_users(cache, users, phab):
336    most_recent_info = None
337    oldest_info = None
338    for info in users["data"]:
339        if info["type"] != "USER":
340            continue
341        id = info["id"]
342        phid = info["phid"]
343        dateModified = int(info["fields"]["dateModified"])
344        # dateCreated = int(info["fields"]["dateCreated"])
345        realName = info["fields"]["realName"]
346        phabUser = cache.get(id)
347        phabUser.update(phid, realName)
348        if most_recent_info is None:
349            most_recent_info = dateModified
350        elif most_recent_info < dateModified:
351            most_recent_info = dateModified
352        if oldest_info is None:
353            oldest_info = dateModified
354        elif oldest_info > dateModified:
355            oldest_info = dateModified
356    return most_recent_info, oldest_info
357
358
359PHABCACHESINFO = (
360    (
361        reviews_cache,
362        ("differential", "revision", "search"),
363        "updated",
364        record_reviews,
365        5,
366        7,
367    ),
368    (users_cache, ("user", "search"), "newest", record_users, 100, 1000),
369)
370
371
372def load_cache():
373    for cache, phab_query, order, record_results, _, _ in PHABCACHESINFO:
374        cache.populate_cache_from_disk()
375        print(
376            "Loaded {0} nr entries: {1}".format(
377                cache.get_name(), len(cache.get_ids_in_cache())
378            )
379        )
380        print(
381            "Loaded {0} has most recent info: {1}".format(
382                cache.get_name(),
383                datetime.fromtimestamp(cache.most_recent_info)
384                if cache.most_recent_info is not None
385                else None,
386            )
387        )
388
389
390def update_cache(phab):
391    load_cache()
392    for (
393        cache,
394        phab_query,
395        order,
396        record_results,
397        max_nr_entries_per_fetch,
398        max_nr_days_to_cache,
399    ) in PHABCACHESINFO:
400        update_cached_info(
401            phab,
402            cache,
403            phab_query,
404            order,
405            record_results,
406            max_nr_entries_per_fetch,
407            max_nr_days_to_cache,
408        )
409        ids_in_cache = cache.get_ids_in_cache()
410        print("{0} objects in {1}".format(len(ids_in_cache), cache.get_name()))
411        cache.write_cache_to_disk()
412
413
414def get_most_recent_reviews(days):
415    newest_reviews = sorted(reviews_cache.get_objects(), key=lambda r: -r.dateModified)
416    if len(newest_reviews) == 0:
417        return newest_reviews
418    most_recent_review_time = datetime.fromtimestamp(newest_reviews[0].dateModified)
419    cut_off_date = most_recent_review_time - timedelta(days=days)
420    result = []
421    for review in newest_reviews:
422        if datetime.fromtimestamp(review.dateModified) < cut_off_date:
423            return result
424        result.append(review)
425    return result
426
427
428# All of the above code is about fetching data from Phabricator and caching it
429# on local disk. The below code contains the actual "business logic" for this
430# script.
431
432_userphid2realname = None
433
434
435def get_real_name_from_author(user_phid):
436    global _userphid2realname
437    if _userphid2realname is None:
438        _userphid2realname = {}
439        for user in users_cache.get_objects():
440            _userphid2realname[user.phid] = user.realName
441    return _userphid2realname.get(user_phid, "unknown")
442
443
444def print_most_recent_reviews(phab, days, filter_reviewers):
445    msgs = []
446
447    def add_msg(msg):
448        msgs.append(msg)
449        print(msg.encode("utf-8"))
450
451    newest_reviews = get_most_recent_reviews(days)
452    add_msg(
453        "These are the reviews that look interesting to be reviewed. "
454        + "The report below has 2 sections. The first "
455        + "section is organized per review; the second section is organized "
456        + "per potential reviewer.\n"
457    )
458    oldest_review = newest_reviews[-1] if len(newest_reviews) > 0 else None
459    oldest_datetime = (
460        datetime.fromtimestamp(oldest_review.dateModified) if oldest_review else None
461    )
462    add_msg(
463        (
464            "The report below is based on analyzing the reviews that got "
465            + "touched in the past {0} days (since {1}). "
466            + "The script found {2} such reviews.\n"
467        ).format(days, oldest_datetime, len(newest_reviews))
468    )
469    reviewer2reviews_and_scores = {}
470    for i, review in enumerate(newest_reviews):
471        matched_reviewers = find_reviewers_for_review(review)
472        matched_reviewers = filter_reviewers(matched_reviewers)
473        if len(matched_reviewers) == 0:
474            continue
475        add_msg(
476            (
477                "{0:>3}. https://reviews.llvm.org/D{1} by {2}\n     {3}\n"
478                + "     Last updated on {4}"
479            ).format(
480                i,
481                review.id,
482                get_real_name_from_author(review.author),
483                review.title,
484                datetime.fromtimestamp(review.dateModified),
485            )
486        )
487        for reviewer, scores in matched_reviewers:
488            add_msg(
489                "    potential reviewer {0}, score {1}".format(
490                    reviewer,
491                    "(" + "/".join(["{0:.1f}%".format(s) for s in scores]) + ")",
492                )
493            )
494            if reviewer not in reviewer2reviews_and_scores:
495                reviewer2reviews_and_scores[reviewer] = []
496            reviewer2reviews_and_scores[reviewer].append((review, scores))
497
498    # Print out a summary per reviewer.
499    for reviewer in sorted(reviewer2reviews_and_scores.keys()):
500        reviews_and_scores = reviewer2reviews_and_scores[reviewer]
501        reviews_and_scores.sort(key=lambda rs: rs[1], reverse=True)
502        add_msg(
503            "\n\nSUMMARY FOR {0} (found {1} reviews):".format(
504                reviewer, len(reviews_and_scores)
505            )
506        )
507        for review, scores in reviews_and_scores:
508            add_msg(
509                "[{0}] https://reviews.llvm.org/D{1} '{2}' by {3}".format(
510                    "/".join(["{0:.1f}%".format(s) for s in scores]),
511                    review.id,
512                    review.title,
513                    get_real_name_from_author(review.author),
514                )
515            )
516    return "\n".join(msgs)
517
518
519def get_git_cmd_output(cmd):
520    output = None
521    try:
522        logging.debug(cmd)
523        output = subprocess.check_output(cmd, shell=True, stderr=subprocess.STDOUT)
524    except subprocess.CalledProcessError as e:
525        logging.debug(str(e))
526    if output is None:
527        return None
528    return output.decode("utf-8", errors="ignore")
529
530
531reAuthorMail = re.compile("^author-mail <([^>]*)>.*$")
532
533
534def parse_blame_output_line_porcelain(blame_output_lines):
535    email2nr_occurences = {}
536    if blame_output_lines is None:
537        return email2nr_occurences
538    for line in blame_output_lines:
539        m = reAuthorMail.match(line)
540        if m:
541            author_email_address = m.group(1)
542            if author_email_address not in email2nr_occurences:
543                email2nr_occurences[author_email_address] = 1
544            else:
545                email2nr_occurences[author_email_address] += 1
546    return email2nr_occurences
547
548
549class BlameOutputCache:
550    def __init__(self):
551        self.cache = {}
552
553    def _populate_cache_for(self, cache_key):
554        assert cache_key not in self.cache
555        git_repo, base_revision, path = cache_key
556        cmd = (
557            "git -C {0} blame --encoding=utf-8 --date iso -f -e -w "
558            + "--line-porcelain {1} -- {2}"
559        ).format(git_repo, base_revision, path)
560        blame_output = get_git_cmd_output(cmd)
561        self.cache[cache_key] = (
562            blame_output.split("\n") if blame_output is not None else None
563        )
564        # FIXME: the blame cache could probably be made more effective still if
565        # instead of storing the requested base_revision in the cache, the last
566        # revision before the base revision this file/path got changed in gets
567        # stored. That way multiple project revisions for which this specific
568        # file/patch hasn't changed would get cache hits (instead of misses in
569        # the current implementation).
570
571    def get_blame_output_for(
572        self, git_repo, base_revision, path, start_line=-1, end_line=-1
573    ):
574        cache_key = (git_repo, base_revision, path)
575        if cache_key not in self.cache:
576            self._populate_cache_for(cache_key)
577        assert cache_key in self.cache
578        all_blame_lines = self.cache[cache_key]
579        if all_blame_lines is None:
580            return None
581        if start_line == -1 and end_line == -1:
582            return all_blame_lines
583        assert start_line >= 0
584        assert end_line >= 0
585        assert end_line <= len(all_blame_lines)
586        assert start_line <= len(all_blame_lines)
587        assert start_line <= end_line
588        return all_blame_lines[start_line:end_line]
589
590    def get_parsed_git_blame_for(
591        self, git_repo, base_revision, path, start_line=-1, end_line=-1
592    ):
593        return parse_blame_output_line_porcelain(
594            self.get_blame_output_for(
595                git_repo, base_revision, path, start_line, end_line
596            )
597        )
598
599
600blameOutputCache = BlameOutputCache()
601
602
603def find_reviewers_for_diff_heuristic(diff):
604    # Heuristic 1: assume good reviewers are the ones that touched the same
605    # lines before as this patch is touching.
606    # Heuristic 2: assume good reviewers are the ones that touched the same
607    # files before as this patch is touching.
608    reviewers2nr_lines_touched = {}
609    reviewers2nr_files_touched = {}
610    # Assume last revision before diff was modified is the revision the diff
611    # applies to.
612    assert len(GIT_REPO_METADATA) == 1
613    git_repo = os.path.join("git_repos", GIT_REPO_METADATA[0][0])
614    cmd = 'git -C {0} rev-list -n 1 --before="{1}" main'.format(
615        git_repo,
616        datetime.fromtimestamp(diff.dateModified).strftime("%Y-%m-%d %H:%M:%s"),
617    )
618    base_revision = get_git_cmd_output(cmd).strip()
619    logging.debug("Base revision={0}".format(base_revision))
620    for change in diff.changes:
621        path = change.oldPath
622        # Compute heuristic 1: look at context of patch lines.
623        for hunk in change.hunks:
624            for start_line, end_line in hunk.actual_lines_changed_offset:
625                # Collect git blame results for authors in those ranges.
626                for (
627                    reviewer,
628                    nr_occurences,
629                ) in blameOutputCache.get_parsed_git_blame_for(
630                    git_repo, base_revision, path, start_line, end_line
631                ).items():
632                    if reviewer not in reviewers2nr_lines_touched:
633                        reviewers2nr_lines_touched[reviewer] = 0
634                    reviewers2nr_lines_touched[reviewer] += nr_occurences
635        # Compute heuristic 2: don't look at context, just at files touched.
636        # Collect git blame results for authors in those ranges.
637        for reviewer, nr_occurences in blameOutputCache.get_parsed_git_blame_for(
638            git_repo, base_revision, path
639        ).items():
640            if reviewer not in reviewers2nr_files_touched:
641                reviewers2nr_files_touched[reviewer] = 0
642            reviewers2nr_files_touched[reviewer] += 1
643
644    # Compute "match scores"
645    total_nr_lines = sum(reviewers2nr_lines_touched.values())
646    total_nr_files = len(diff.changes)
647    reviewers_matchscores = [
648        (
649            reviewer,
650            (
651                reviewers2nr_lines_touched.get(reviewer, 0) * 100.0 / total_nr_lines
652                if total_nr_lines != 0
653                else 0,
654                reviewers2nr_files_touched[reviewer] * 100.0 / total_nr_files
655                if total_nr_files != 0
656                else 0,
657            ),
658        )
659        for reviewer, nr_lines in reviewers2nr_files_touched.items()
660    ]
661    reviewers_matchscores.sort(key=lambda i: i[1], reverse=True)
662    return reviewers_matchscores
663
664
665def find_reviewers_for_review(review):
666    # Process the newest diff first.
667    diffs = sorted(review.phabDiffs, key=lambda d: d.dateModified, reverse=True)
668    if len(diffs) == 0:
669        return
670    diff = diffs[0]
671    matched_reviewers = find_reviewers_for_diff_heuristic(diff)
672    # Show progress, as this is a slow operation:
673    sys.stdout.write(".")
674    sys.stdout.flush()
675    logging.debug("matched_reviewers: {0}".format(matched_reviewers))
676    return matched_reviewers
677
678
679def update_git_repos():
680    git_repos_directory = "git_repos"
681    for name, url in GIT_REPO_METADATA:
682        dirname = os.path.join(git_repos_directory, name)
683        if not os.path.exists(dirname):
684            cmd = "git clone {0} {1}".format(url, dirname)
685            output = get_git_cmd_output(cmd)
686        cmd = "git -C {0} pull --rebase".format(dirname)
687        output = get_git_cmd_output(cmd)
688
689
690def send_emails(email_addresses, sender, msg):
691    s = smtplib.SMTP()
692    s.connect()
693    for email_address in email_addresses:
694        email_msg = email.mime.multipart.MIMEMultipart()
695        email_msg["From"] = sender
696        email_msg["To"] = email_address
697        email_msg["Subject"] = "LLVM patches you may be able to review."
698        email_msg.attach(email.mime.text.MIMEText(msg.encode("utf-8"), "plain"))
699        # python 3.x: s.send_message(email_msg)
700        s.sendmail(email_msg["From"], email_msg["To"], email_msg.as_string())
701    s.quit()
702
703
704def filter_reviewers_to_report_for(people_to_look_for):
705    # The below is just an example filter, to only report potential reviews
706    # to do for the people that will receive the report email.
707    return lambda potential_reviewers: [
708        r for r in potential_reviewers if r[0] in people_to_look_for
709    ]
710
711
712def main():
713    parser = argparse.ArgumentParser(
714        description="Match open reviews to potential reviewers."
715    )
716    parser.add_argument(
717        "--no-update-cache",
718        dest="update_cache",
719        action="store_false",
720        default=True,
721        help="Do not update cached Phabricator objects",
722    )
723    parser.add_argument(
724        "--email-report",
725        dest="email_report",
726        nargs="*",
727        default="",
728        help="A email addresses to send the report to.",
729    )
730    parser.add_argument(
731        "--sender",
732        dest="sender",
733        default="",
734        help="The email address to use in 'From' on messages emailed out.",
735    )
736    parser.add_argument(
737        "--email-addresses",
738        dest="email_addresses",
739        nargs="*",
740        help="The email addresses (as known by LLVM git) of "
741        + "the people to look for reviews for.",
742    )
743    parser.add_argument("--verbose", "-v", action="count")
744
745    args = parser.parse_args()
746
747    if args.verbose >= 1:
748        logging.basicConfig(level=logging.DEBUG)
749
750    people_to_look_for = [e.decode("utf-8") for e in args.email_addresses]
751    logging.debug(
752        "Will look for reviews that following contributors could "
753        + "review: {}".format(people_to_look_for)
754    )
755    logging.debug("Will email a report to: {}".format(args.email_report))
756
757    phab = init_phab_connection()
758
759    if args.update_cache:
760        update_cache(phab)
761
762    load_cache()
763    update_git_repos()
764    msg = print_most_recent_reviews(
765        phab,
766        days=1,
767        filter_reviewers=filter_reviewers_to_report_for(people_to_look_for),
768    )
769
770    if args.email_report != []:
771        send_emails(args.email_report, args.sender, msg)
772
773
774if __name__ == "__main__":
775    main()
776