xref: /llvm-project/flang/examples/FlangOmpReport/yaml_summarizer.py (revision d4a0154902fb9b0611ed857134b26a64a1d5ad1e)
1"""YAML Summariser
2
3The flang plugin ``flang-omp-report`` takes one Fortran
4file in and returns a YAML report file of the input file.
5This becomes an issue when you want to analyse an entire project
6into one final report.
7The purpose of this Python script is to generate a final YAML
8summary from all of the files generated by ``flang-omp-report``.
9
10Currently, it requires ``ruamel.yaml``,
11which can be installed with:
12
13    ``pip3 install ruamel.yaml``
14
15By default it scans the directory it is ran in
16for any YAML files and outputs a summary to
17stdout. It can be ran as:
18
19    ``python3 yaml_summarizer.py``
20
21Parameters:
22
23    -d   --directory   Specify which directory to scan. Multiple directories can be searched by
24                       providing a semicolon separated list of directories.
25
26    -l   --log         Combine all yaml files into one log (instead of generating a summary)
27
28    -o   --output      Specify a directory in which to save the summary file
29
30    -r   --recursive   Recursively search directory for all yaml files
31
32Examples:
33
34    ``python3 yaml_summarizer.py -d ~/llvm-project/build/ -r``
35
36    ``python3 yaml_summarizer.py -d "~/llvm-project/build/;~/llvm-project/flang/test/Examples"``
37
38    ``python3 yaml_summarizer.py -l -o ~/examples/report.yaml``
39
40Pseudo-examples:
41
42    Summary:
43
44    $ python3 yaml_summarizer.py file_1.yaml file_2.yaml
45    <Unique OMP constructs with there grouped clauses from file_1.yaml and file_2.yaml>
46
47    Constructs are in the form:
48    - construct: someOMPconstruct
49    count: 8
50    clauses:
51    - clause: clauseOne
52        count: 4
53    - clause: ClauseTwo
54        count: 2
55
56    Log:
57
58    $ python3 yaml_summarizer.py -l file_1.yaml file_2.yaml
59    file_1.yaml
60    <OMP clauses and constructs from file_1.yaml>
61    file_2.yaml
62    <OMP clauses and constructs from file_2.yaml>
63
64    Constructs are in the form:
65    - construct: someOMPConstruct
66    line: 12
67    clauses:
68    - clause: clauseOne
69        details: 'someDetailForClause'
70"""
71
72import sys
73import glob
74import argparse
75from pathlib import Path
76from os.path import isdir
77
78from ruamel.yaml import YAML
79
80
81def find_yaml_files(search_directory: Path, search_pattern: str):
82    """
83    Find all '.yaml' files and returns an iglob iterator to them.
84
85    Keyword arguments:
86    search_pattern -- Search pattern for 'iglob' to use for finding '.yaml' files.
87                      If this is set to 'None', then it will default to just searching
88                      for all '.yaml' files in the current directory.
89    """
90    # @TODO: Currently *all* yaml files are read - regardless of whether they have
91    # been generated with  'flang-omp-report' or not. This might result in the script
92    # reading files that it should ignore.
93    if search_directory:
94        return glob.iglob(
95            str(search_directory.joinpath(search_pattern)), recursive=True
96        )
97
98    return glob.iglob(str("/" + search_pattern), recursive=True)
99
100
101def process_log(data, result: list):
102    """
103    Process the data input as a 'log' to the result array. This esssentially just
104    stitches together all of the input '.yaml' files into one result.
105
106    Keyword arguments:
107    data -- Data from yaml.load() for a yaml file. So the type can be 'Any'.
108    result -- Array to add the processed data to.
109    """
110    for datum in data:
111        items = result.get(datum["file"], [])
112        items.append(
113            {
114                "construct": datum["construct"],
115                "line": datum["line"],
116                "clauses": datum["clauses"],
117            }
118        )
119        result[datum["file"]] = items
120
121
122def add_clause(datum, construct):
123    """
124    Add clauses to the construct if they're missing
125    Otherwise increment their count by one.
126
127    Keyword arguments:
128    datum -- Data construct containing clauses to check.
129    construct -- Construct to add or increment clause count.
130    """
131    to_check = [i["clause"] for i in construct["clauses"]]
132    to_add = [i["clause"] for i in datum["clauses"]]
133    clauses = construct["clauses"]
134    for item in to_add:
135        if item in to_check:
136            for clause in clauses:
137                if clause["clause"] == item:
138                    clause["count"] += 1
139        else:
140            clauses.append({"clause": item, "count": 1})
141
142
143def process_summary(data, result: dict):
144    """
145    Process the data input as a 'summary' to the 'result' dictionary.
146
147    Keyword arguments:
148    data -- Data from yaml.load() for a yaml file. So the type can be 'Any'.
149    result -- Dictionary to add the processed data to.
150    """
151    for datum in data:
152        construct = next(
153            (item for item in result if item["construct"] == datum["construct"]), None
154        )
155        clauses = []
156        # Add the construct and clauses to the summary if
157        # they haven't been seen before
158        if not construct:
159            for i in datum["clauses"]:
160                clauses.append({"clause": i["clause"], "count": 1})
161            result.append(
162                {"construct": datum["construct"], "count": 1, "clauses": clauses}
163            )
164        else:
165            construct["count"] += 1
166
167            add_clause(datum, construct)
168
169
170def clean_summary(result):
171    """Cleans the result after processing the yaml files with summary format."""
172    # Remove all "clauses" that are empty to keep things compact
173    for construct in result:
174        if construct["clauses"] == []:
175            construct.pop("clauses")
176
177
178def clean_log(result):
179    """Cleans the result after processing the yaml files with log format."""
180    for constructs in result.values():
181        for construct in constructs:
182            if construct["clauses"] == []:
183                construct.pop("clauses")
184
185
186def output_result(yaml: YAML, result, output_file: Path):
187    """
188    Outputs result to either 'stdout' or to a output file.
189
190    Keyword arguments:
191    result -- Format result to output.
192    output_file -- File to output result to. If this is 'None' then result will be
193                   outputted to 'stdout'.
194    """
195    if output_file:
196        with open(output_file, "w+", encoding="utf-8") as file:
197            if output_file.suffix == ".yaml":
198                yaml.dump(result, file)
199            else:
200                file.write(result)
201    else:
202        yaml.dump(result, sys.stdout)
203
204
205def process_yaml(
206    search_directories: list, search_pattern: str, result_format: str, output_file: Path
207):
208    """
209    Reads each yaml file, calls the appropiate format function for
210    the file and then ouputs the result to either 'stdout' or to an output file.
211
212    Keyword arguments:
213    search_directories -- List of directory paths to search for '.yaml' files in.
214    search_pattern -- String pattern formatted for use with glob.iglob to find all
215                      '.yaml' files.
216    result_format -- String representing output format. Current supported strings are: 'log'.
217    output_file -- Path to output file (If value is None, then default to outputting to 'stdout').
218    """
219    if result_format == "log":
220        result = {}
221        action = process_log
222        clean_report = clean_log
223    else:
224        result = []
225        action = process_summary
226        clean_report = clean_summary
227
228    yaml = YAML()
229
230    for search_directory in search_directories:
231        for file in find_yaml_files(search_directory, search_pattern):
232            with open(file, "r", encoding="utf-8") as yaml_file:
233                data = yaml.load(yaml_file)
234                action(data, result)
235
236    if clean_report is not None:
237        clean_report(result)
238
239    output_result(yaml, result, output_file)
240
241
242def create_arg_parser():
243    """Create and return a argparse.ArgumentParser modified for script."""
244    parser = argparse.ArgumentParser()
245    parser.add_argument(
246        "-d", "--directory", help="Specify a directory to scan", dest="dir", type=str
247    )
248    parser.add_argument(
249        "-o",
250        "--output",
251        help="Writes to a file instead of\
252                                                stdout",
253        dest="output",
254        type=str,
255    )
256    parser.add_argument(
257        "-r",
258        "--recursive",
259        help="Recursive search for .yaml files",
260        dest="recursive",
261        type=bool,
262        nargs="?",
263        const=True,
264        default=False,
265    )
266
267    exclusive_parser = parser.add_mutually_exclusive_group()
268    exclusive_parser.add_argument(
269        "-l",
270        "--log",
271        help="Modifies report format: " "Combines the log '.yaml' files into one file.",
272        action="store_true",
273        dest="log",
274    )
275    return parser
276
277
278def parse_arguments():
279    """Parses arguments given to script and returns a tuple of processed arguments."""
280    parser = create_arg_parser()
281    args = parser.parse_args()
282
283    if args.dir:
284        search_directory = [Path(path) for path in args.dir.split(";")]
285    else:
286        search_directory = [Path.cwd()]
287
288    if args.recursive:
289        search_pattern = "**/*.yaml"
290    else:
291        search_pattern = "*.yaml"
292
293    if args.log:
294        result_format = "log"
295    else:
296        result_format = "summary"
297
298    if args.output:
299        if isdir(args.output):
300            output_file = Path(args.output).joinpath("summary.yaml")
301        elif isdir(Path(args.output).resolve().parent):
302            output_file = Path(args.output)
303    else:
304        output_file = None
305
306    return (search_directory, search_pattern, result_format, output_file)
307
308
309def main():
310    """Main function of script."""
311    (search_directory, search_pattern, result_format, output_file) = parse_arguments()
312
313    process_yaml(search_directory, search_pattern, result_format, output_file)
314
315    return 0
316
317
318if __name__ == "__main__":
319    sys.exit(main())
320