xref: /llvm-project/flang/examples/FlangOmpReport/yaml_summarizer.py (revision 9e37b1e5a0c15f36c5642406d5aa02a657a0b19c)
1"""YAML Summariser
2
3The flang plugin ``flang-omp-report`` takes one Fortran
4file in and returns a YAML report file of the input file.
5This becomes an issue when you want to analyse an entire project
6into one final report.
7The purpose of this Python script is to generate a final YAML
8summary from all of the files generated by ``flang-omp-report``.
9
10Currently, it requires ``ruamel.yaml``,
11which can be installed with:
12
13    ``pip3 install ruamel.yaml``
14
15By default it scans the directory it is ran in
16for any YAML files and outputs a summary to
17stdout. It can be ran as:
18
19    ``python3 yaml_summarizer.py``
20
21Parameters:
22
23    -d   --directory   Specify which directory to scan. Multiple directories can be searched by
24                       providing a semicolon seperated list of directories.
25
26    -l   --log         Combine all yaml files into one log (instead of generating a summary)
27
28    -o   --output      Specify a directory in which to save the summary file
29
30    -r   --recursive   Recursively search directory for all yaml files
31
32Examples:
33
34    ``python3 yaml_summarizer.py -d ~/llvm-project/build/ -r``
35
36    ``python3 yaml_summarizer.py -d "~/llvm-project/build/;~/llvm-project/flang/test/Examples"``
37
38    ``python3 yaml_summarizer.py -l -o ~/examples/report.yaml``
39
40Pseudo-examples:
41
42    Summary:
43
44    $ python3 yaml_summarizer.py file_1.yaml file_2.yaml
45    <Unique OMP constructs with there grouped clauses from file_1.yaml and file_2.yaml>
46
47    Constructs are in the form:
48    - construct: someOMPconstruct
49    count: 8
50    clauses:
51    - clause: clauseOne
52        count: 4
53    - clause: ClauseTwo
54        count: 2
55
56    Log:
57
58    $ python3 yaml_summarizer.py -l file_1.yaml file_2.yaml
59    file_1.yaml
60    <OMP clauses and constructs from file_1.yaml>
61    file_2.yaml
62    <OMP clauses and constructs from file_2.yaml>
63
64    Constructs are in the form:
65    - construct: someOMPConstruct
66    line: 12
67    clauses:
68    - clause: clauseOne
69        details: 'someDetailForClause'
70"""
71
72import sys
73import glob
74import argparse
75from pathlib import Path
76from os.path import isdir
77
78from ruamel.yaml import YAML
79
80def find_yaml_files(search_directory: Path, search_pattern: str):
81    """
82    Find all '.yaml' files and returns an iglob iterator to them.
83
84    Keyword arguments:
85    search_pattern -- Search pattern for 'iglob' to use for finding '.yaml' files.
86                      If this is set to 'None', then it will default to just searching
87                      for all '.yaml' files in the current directory.
88    """
89    # @TODO: Currently *all* yaml files are read - regardless of whether they have
90    # been generated with  'flang-omp-report' or not. This might result in the script
91    # reading files that it should ignore.
92    if search_directory:
93        return glob.iglob(str(search_directory.joinpath(search_pattern)), recursive=True)
94
95    return glob.iglob(str("/" + search_pattern), recursive=True)
96
97def process_log(data, result: list):
98    """
99    Process the data input as a 'log' to the result array. This esssentially just
100    stitches together all of the input '.yaml' files into one result.
101
102    Keyword arguments:
103    data -- Data from yaml.load() for a yaml file. So the type can be 'Any'.
104    result -- Array to add the processed data to.
105    """
106    for datum in data:
107        items = result.get(datum['file'], [])
108        items.append({"construct" : datum['construct'],
109                        "line" : datum['line'],
110                        "clauses" : datum['clauses']})
111        result[datum['file']] = items
112
113def add_clause(datum, construct):
114    """
115    Add clauses to the construct if they're missing
116    Otherwise increment their count by one.
117
118    Keyword arguments:
119    datum -- Data construct containing clauses to check.
120    construct -- Construct to add or increment clause count.
121    """
122    to_check = [i['clause'] for i in construct['clauses']]
123    to_add = [i['clause'] for i in datum['clauses']]
124    clauses = construct["clauses"]
125    for item in to_add:
126        if item in to_check:
127            for clause in clauses:
128                if clause["clause"] == item:
129                    clause["count"] += 1
130        else:
131            clauses.append({"clause" : item,
132                            "count" : 1})
133
134def process_summary(data, result: dict):
135    """
136    Process the data input as a 'summary' to the 'result' dictionary.
137
138    Keyword arguments:
139    data -- Data from yaml.load() for a yaml file. So the type can be 'Any'.
140    result -- Dictionary to add the processed data to.
141    """
142    for datum in data:
143        construct = next((item for item in result
144                            if item["construct"] == datum["construct"]), None)
145        clauses = []
146        # Add the construct and clauses to the summary if
147        # they haven't been seen before
148        if not construct:
149            for i in datum['clauses']:
150                clauses.append({"clause" : i['clause'],
151                                "count"    : 1})
152            result.append({"construct" : datum['construct'],
153                            "count" : 1,
154                            "clauses" : clauses})
155        else:
156            construct["count"] += 1
157
158            add_clause(datum, construct)
159
160def clean_summary(result):
161    """ Cleans the result after processing the yaml files with summary format."""
162    # Remove all "clauses" that are empty to keep things compact
163    for construct in result:
164        if construct["clauses"] == []:
165            construct.pop("clauses")
166
167def clean_log(result):
168    """ Cleans the result after processing the yaml files with log format."""
169    for constructs in result.values():
170        for construct in constructs:
171            if construct["clauses"] == []:
172                construct.pop("clauses")
173
174def output_result(yaml: YAML, result, output_file: Path):
175    """
176    Outputs result to either 'stdout' or to a output file.
177
178    Keyword arguments:
179    result -- Format result to output.
180    output_file -- File to output result to. If this is 'None' then result will be
181                   outputted to 'stdout'.
182    """
183    if output_file:
184        with open(output_file, 'w+', encoding='utf-8') as file:
185            if output_file.suffix == ".yaml":
186                yaml.dump(result, file)
187            else:
188                file.write(result)
189    else:
190        yaml.dump(result, sys.stdout)
191
192def process_yaml(search_directories: list, search_pattern: str,
193                 result_format: str, output_file: Path):
194    """
195    Reads each yaml file, calls the appropiate format function for
196    the file and then ouputs the result to either 'stdout' or to an output file.
197
198    Keyword arguments:
199    search_directories -- List of directory paths to search for '.yaml' files in.
200    search_pattern -- String pattern formatted for use with glob.iglob to find all
201                      '.yaml' files.
202    result_format -- String representing output format. Current supported strings are: 'log'.
203    output_file -- Path to output file (If value is None, then default to outputting to 'stdout').
204    """
205    if result_format == "log":
206        result = {}
207        action = process_log
208        clean_report = clean_log
209    else:
210        result = []
211        action = process_summary
212        clean_report = clean_summary
213
214    yaml = YAML()
215
216    for search_directory in search_directories:
217        for file in find_yaml_files(search_directory, search_pattern):
218            with open(file, "r", encoding='utf-8') as yaml_file:
219                data = yaml.load(yaml_file)
220                action(data, result)
221
222    if clean_report is not None:
223        clean_report(result)
224
225    output_result(yaml, result, output_file)
226
227def create_arg_parser():
228    """ Create and return a argparse.ArgumentParser modified for script. """
229    parser = argparse.ArgumentParser()
230    parser.add_argument("-d", "--directory", help="Specify a directory to scan",
231                        dest="dir", type=str)
232    parser.add_argument("-o", "--output", help="Writes to a file instead of\
233                                                stdout", dest="output", type=str)
234    parser.add_argument("-r", "--recursive", help="Recursive search for .yaml files",
235                        dest="recursive", type=bool, nargs='?', const=True, default=False)
236
237    exclusive_parser = parser.add_mutually_exclusive_group()
238    exclusive_parser.add_argument("-l", "--log", help="Modifies report format: "
239                                  "Combines the log '.yaml' files into one file.",
240                                  action='store_true', dest='log')
241    return parser
242
243def parse_arguments():
244    """ Parses arguments given to script and returns a tuple of processed arguments. """
245    parser = create_arg_parser()
246    args = parser.parse_args()
247
248    if args.dir:
249        search_directory = [Path(path) for path in args.dir.split(";")]
250    else:
251        search_directory = [Path.cwd()]
252
253    if args.recursive:
254        search_pattern = "**/*.yaml"
255    else:
256        search_pattern = "*.yaml"
257
258    if args.log:
259        result_format = "log"
260    else:
261        result_format = "summary"
262
263    if args.output:
264        if isdir(args.output):
265            output_file = Path(args.output).joinpath("summary.yaml")
266        elif isdir(Path(args.output).resolve().parent):
267            output_file = Path(args.output)
268    else:
269        output_file = None
270
271    return (search_directory, search_pattern, result_format, output_file)
272
273def main():
274    """ Main function of script. """
275    (search_directory, search_pattern, result_format, output_file) = parse_arguments()
276
277    process_yaml(search_directory, search_pattern, result_format, output_file)
278
279    return 0
280
281if __name__ == "__main__":
282    sys.exit(main())
283