xref: /llvm-project/llvm/utils/mlgo-utils/tests/corpus/extract_ir_test.py (revision 02fff933d0eff71db8ff44f4acf1641bb1ad4d38)
1# REQUIRES: system-linux
2
3## Test the functionality of extract_ir_lib
4
5import sys
6
7from mlgo.corpus import extract_ir_lib
8
9## Test that we can convert a compilation database with a single compilation
10## command in it.
11
12# RUN: %python %s test_one_conversion | FileCheck %s --check-prefix CHECK-ONE-CONVERSION
13
14
15def test_one_conversion():
16    obj = extract_ir_lib.convert_compile_command_to_objectfile(
17        {
18            "directory": "/output/directory",
19            "command": "-cc1 -c /some/path/lib/foo/bar.cc -o lib/bar.o",
20            "file": "/some/path/lib/foo/bar.cc",
21        },
22        "/corpus/destination/path",
23    )
24    print(obj.input_obj())
25    # CHECK-ONE-CONVERSION: /output/directory/lib/bar.o
26    print(obj.relative_output_path())
27    # CHECK-ONE-CONVERSION: lib/bar.o
28    print(obj.cmd_file())
29    # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.cmd
30    print(obj.bc_file())
31    # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.bc
32    print(obj.thinlto_index_file())
33    # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.thinlto.bc
34
35
36## Test that we can convert an arguments style compilation database
37
38# RUN: %python %s test_one_conversion_arguments_style | FileCheck %s --check-prefix CHECK-ARGUMENTS-STYLE
39
40
41def test_one_conversion_arguments_style():
42    obj = extract_ir_lib.convert_compile_command_to_objectfile(
43        {
44            "directory": "/output/directory",
45            "arguments": [
46                "-cc1",
47                "-c",
48                "/some/path/lib/foo/bar.cc",
49                "-o",
50                "lib/bar.o",
51            ],
52            "file": "/some/path/lib/foo/bar.cc",
53        },
54        "/corpus/destination/path",
55    )
56    print(obj.input_obj())
57    # CHECK-ARGUMENTS-STYLE: /output/directory/lib/bar.o
58    print(obj.relative_output_path())
59    # CHECK-ARGUMENTS-STYLE: lib/bar.o
60    print(obj.cmd_file())
61    # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.cmd
62    print(obj.bc_file())
63    # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.bc
64    print(obj.thinlto_index_file())
65    # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.thinlto.bc
66
67
68## Test that converting multiple files works as well
69
70# RUN: %python %s test_multiple_conversion | FileCheck %s --check-prefix CHECK-MULTIPLE-CONVERSION
71
72
73def test_multiple_conversion():
74    res = extract_ir_lib.load_from_compile_commands(
75        [
76            {
77                "directory": "/output/directory",
78                "command": "-cc1 -c /some/path/lib/foo/bar.cc -o lib/bar.o",
79                "file": "/some/path/lib/foo/bar.cc",
80            },
81            {
82                "directory": "/output/directory",
83                "command": "-cc1 -c /some/path/lib/foo/baz.cc -o lib/other/baz.o",
84                "file": "/some/path/lib/foo/baz.cc",
85            },
86        ],
87        "/corpus/destination/path",
88    )
89    res = list(res)
90    print(res[0].input_obj())
91    # CHECK-MULTIPLE-CONVERSION: /output/directory/lib/bar.o
92    print(res[0].relative_output_path())
93    # CHECK-MULTIPLE-CONVERSION: lib/bar.o
94    print(res[0].cmd_file())
95    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.cmd
96    print(res[0].bc_file())
97    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.bc
98    print(res[0].thinlto_index_file())
99    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.thinlto.bc
100
101    print(res[1].input_obj(), "/output/directory/lib/other/baz.o")
102    # CHECK-MULTIPLE-CONVERSION: /output/directory/lib/other/baz.o
103    print(res[1].relative_output_path(), "lib/other/baz.o")
104    # CHECK-MULTIPLE-CONVERSION: lib/other/baz.o
105    print(res[1].cmd_file())
106    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.cmd
107    print(res[1].bc_file())
108    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.bc
109    print(res[1].thinlto_index_file())
110    # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.thinlto.bc
111
112
113## Test that we generate the correct objcopy commands for extracting commands
114
115# RUN: %python %s test_command_extraction | FileCheck %s --check-prefix CHECK-COMMAND-EXTRACT
116
117
118def test_command_extraction():
119    obj = extract_ir_lib.TrainingIRExtractor(
120        obj_relative_path="lib/obj_file.o",
121        output_base_dir="/where/corpus/goes",
122        obj_base_dir="/foo/bar",
123    )
124    extraction_cmd1 = obj._get_extraction_cmd_command(
125        "/bin/llvm_objcopy_path", ".llvmcmd"
126    )
127    for part in extraction_cmd1:
128        print(part)
129    # CHECK-COMMAND-EXTRACT: /bin/llvm_objcopy_path
130    # CHECK-COMMAND-EXTRACT: --dump-section=.llvmcmd=/where/corpus/goes/lib/obj_file.o.cmd
131    # CHECK-COMMAND-EXTRACT: /foo/bar/lib/obj_file.o
132    # CHECK-COMMAND-EXTRACT: /dev/null
133
134    extraction_cmd2 = obj._get_extraction_bc_command(
135        "/bin/llvm_objcopy_path", ".llvmbc"
136    )
137    for part in extraction_cmd2:
138        print(part)
139    # CHECK-COMMAND-EXTRACT: /bin/llvm_objcopy_path
140    # CHECK-COMMAND-EXTRACT: --dump-section=.llvmbc=/where/corpus/goes/lib/obj_file.o.bc
141    # CHECK-COMMAND-EXTRACT: /foo/bar/lib/obj_file.o
142    # CHECK-COMMAND-EXTRACT: /dev/null
143
144
145## Test that we generate the correct extraction commands without specifying
146## an output base directory.
147
148# RUN: %python %s test_command_extraction_no_basedir | FileCheck %s --check-prefix CHECK-COMMAND-EXTRACT-NOBASEDIR
149
150
151def test_command_extraction_no_basedir():
152    obj = extract_ir_lib.TrainingIRExtractor("lib/obj_file.o", "/where/corpus/goes")
153    extraction_cmd1 = obj._get_extraction_cmd_command(
154        "/bin/llvm_objcopy_path", ".llvmcmd"
155    )
156    for part in extraction_cmd1:
157        print(part)
158    # CHECK-COMMAND-EXTRACT-NOBASEDIR: /bin/llvm_objcopy_path
159    # CHECK-COMMAND-EXTRACT-NOBASEDIR: --dump-section=.llvmcmd=/where/corpus/goes/lib/obj_file.o.cmd
160    # CHECK-COMMAND-EXTRACT-NOBASEDIR: lib/obj_file.o
161    # CHECK-COMMAND-EXTRACT-NOBASEDIR: /dev/null
162
163    extraction_cmd2 = obj._get_extraction_bc_command(
164        "/bin/llvm_objcopy_path", ".llvmbc"
165    )
166    for part in extraction_cmd2:
167        print(part)
168    # CHECK-COMMAND-EXTRACT-NOBASEDIR: /bin/llvm_objcopy_path
169    # CHECK-COMMAND-EXTRACT-NOBASEDIR: --dump-section=.llvmbc=/where/corpus/goes/lib/obj_file.o.bc
170    # CHECK-COMMAND-EXTRACT-NOBASEDIR: lib/obj_file.o
171    # CHECK-COMMAND-EXTRACT-NOBASEDIR: /dev/null
172
173
174## Test that we can extract a corpus from lld parameters
175
176# RUN: %python %s test_lld_params | FileCheck %s --check-prefix CHECK-LLD-PARAMS
177
178
179def test_lld_params():
180    lld_opts = [
181        "-o",
182        "output/dir/exe",
183        "lib/obj1.o",
184        "somelib.a",
185        "-W,blah",
186        "lib/dir/obj2.o",
187    ]
188    obj = extract_ir_lib.load_from_lld_params(lld_opts, "/some/path", "/tmp/out")
189    print(obj[0].input_obj())
190    # CHECK-LLD-PARAMS: /some/path/lib/obj1.o
191    print(obj[0].relative_output_path())
192    # CHECK-LLD-PARAMS: lib/obj1.o
193    print(obj[0].cmd_file())
194    # CHECK-LLD-PARAMS: /tmp/out/lib/obj1.o.cmd
195    print(obj[0].thinlto_index_file())
196    # CHECK-LLD-PARAMS: /tmp/out/lib/obj1.o.thinlto.bc
197    print(obj[1].input_obj())
198    # CHECK-LLD-PARMAS: /some/path/lib/dir/obj2.o
199
200
201## Test that we can load a corpus from a directory containing object files
202
203# RUN: rm -rf %t.dir && mkdir %t.dir
204# RUN: mkdir %t.dir/subdir
205# RUN: touch %t.dir/subdir/test1.o
206# RUN: touch %t.dir/subdir/test2.o
207# RUN: %python %s test_load_from_directory %t.dir | FileCheck %s --check-prefix CHECK-LOAD-DIR
208
209
210def test_load_from_directory(tempdir):
211    objs = extract_ir_lib.load_from_directory(tempdir, "/output")
212    for index, obj in enumerate(sorted(objs, key=lambda x: x._obj_relative_path)):
213        print(obj._obj_relative_path, f"subdir/test{index + 1:d}.o")
214        # CHECK-LOAD-DIR: subdir/test1.o
215        # Explicitly check for equality here as we can not check within
216        # FileCheck the exact value as lit substitutions do not work in
217        # FileCheck lines.
218        print(obj._obj_base_dir == tempdir)
219        # CHECK-LOAD-DIR: True
220        print(obj._output_base_dir)
221        # CHECK-LOAD-DIR /output
222
223
224## Test that we can load a corpus in the lld thinLTO case
225
226# RUN: rm -rf %.dir && mkdir %t.dir
227# RUN: touch %t.dir/1.3.import.bc
228# RUN: touch %t.dir/2.3.import.bc
229# RUN: touch %t.dir/3.3.import.bc
230# RUN: touch %t.dir/1.thinlto.bc
231# RUN: touch %t.dir/2.thinlto.bc
232# RUN: touch %t.dir/3.thinlto.bc
233# RUN: %python %s test_lld_thinlto_discovery %t.dir | FileCheck %s --check-prefix CHECK-LLD-THINLTO-DISCOVERY
234
235
236def test_lld_thinlto_discovery(tempdir):
237    obj = extract_ir_lib.load_for_lld_thinlto(tempdir, "/output")
238    for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)):
239        print(o._obj_relative_path)
240        # Explicitly check for equality as we can not check within FileCheck
241        # using the lit substitution for the temp dir
242        print(o._obj_base_dir == tempdir)
243        print(o._output_base_dir)  # outdir
244    # CHECK-LLD-THINLTO-DISCOVERY: 1
245    # CHECK-LLD-THINLTO-DISCOVERY: True
246    # CHECK-LLD-THINLTO-DISCOVERY: /output
247    # CHECK-LLD-THINLTO-DISCOVERY: 2
248    # CHECK-LLD-THINLTO-DISCOVERY: True
249    # CHECK-LLD-THINLTO-DISCOVERY: /output
250    # CHECK-LLD-THINLTO-DISCOVERY: 3
251    # CHECK-LLD-THINLTO-DISCOVERY: True
252    # CHECK-LLD-THINLTO-DISCOVERY: /output
253
254
255## Test that we can load a corpus in the nested lld thinLTO case
256
257# RUN: mkdir %t.dir/nest
258# RUN: mv %t.dir/*.bc %t.dir/nest
259# RUN: %python %s test_lld_thinlto_discovery_nested %t.dir | FileCheck %s --check-prefix CHECK-LLD-THINLTO-DISCOVERY-NESTED
260
261
262def test_lld_thinlto_discovery_nested(outer):
263    obj = extract_ir_lib.load_for_lld_thinlto(outer, "/output")
264    for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)):
265        print(o._obj_relative_path)
266        print(o._obj_base_dir == outer)
267        print(o._output_base_dir)
268    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/1
269    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True
270    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output
271    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/2
272    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True
273    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output
274    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/3
275    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True
276    # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output
277
278
279## Test the lld extraction works as expected
280
281# RUN: rm -rf  %t.dir.out && mkdir %t.dir.out
282# RUN: %python %s test_lld_thinlto_extraction %t.dir %t.dir.out | FileCheck %s --check-prefix CHECK-LLD-THINLTO-EXTRACTION-PY
283# ls %t.dir.out/nest | FileChceck %s --check-prefix CHECK-LLD-THINLTO-EXTRACTION
284
285# CHECK-LLD-THINLTO-EXTRACTION: 1
286# CHECK-LLD-THINLTO-EXTRACTION: 2
287# CHECK-LLD-THINLTO-EXTRACTION: 3
288# CHECK-LLD-THINLTO-EXTRACTION: 1.bc
289# CHECK-LLD-THINLTO-EXTRACTION: 2.bc
290# CHECK-LLD-THINLTO-EXTRACTION: 3.bc
291# CHECK-LLD-THINLTO-EXTRACTION: 1.thinlto.bc
292# CHECK-LLD-THINLTO-EXTRACTION: 2.thinlto.bc
293# CHECK-LLD-THINLTO-EXTRACTION: 3.thinlto.bc
294
295
296def test_lld_thinlto_extraction(outer, outdir):
297    obj = extract_ir_lib.load_for_lld_thinlto(outer, outdir)
298    for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)):
299        mod_path = o.extract(thinlto_build="local")
300        print(mod_path)
301    # CHECK-LLD-THINLTO-EXTRACTION-PY: 1
302    # CHECK-LLD-THINLTO-EXTRACTION-PY: 2
303    # CHECK-LLD-THINLTO-EXTRACTION-PY: 3
304
305
306## Test that we can load a bazel query JSON as expected.
307
308# RUN: %python %s test_load_bazel_aquery | FileCheck %s --check-prefix CHECK-TEST-LOAD-BAZEL-AQUERY
309
310
311def test_load_bazel_aquery():
312    obj = extract_ir_lib.load_bazel_aquery(
313        {
314            "actions": [
315                {"mnemonic": "not-link", "arguments": []},
316                {
317                    "mnemonic": "CppLink",
318                    "arguments": ["clang", "-o", "output_binary", "test1.o", "test2.o"],
319                },
320            ]
321        },
322        "/some/path",
323        "/tmp/out",
324    )
325    print(obj[0].input_obj())
326    # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test1.o
327    print(obj[0].relative_output_path())
328    # CHECK-TEST-LOAD-BAZEL-AQUERY: test1.o
329    print(obj[0].cmd_file())
330    # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.cmd
331    print(obj[0].bc_file())
332    # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.bc
333    print(obj[1].input_obj())
334    # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test2.o
335    print(obj[1].relative_output_path())
336    # CHECK-TEST-LOAD-BAZEL-AQUERY: test2.o
337    print(obj[1].cmd_file())
338    # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.cmd
339    print(obj[1].bc_file())
340    # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.bc
341
342
343## Test that filtering works correctly
344
345# RUN: %python %s test_filtering | FileCheck %s --check-prefix CHECK-TEST-FILTERING
346
347
348def test_filtering():
349    cmdline = "-cc1\0x/y/foobar.cpp\0-Oz\0-Ifoo\0-o\0bin/out.o"
350    print(extract_ir_lib.should_include_module(cmdline, None))
351    # CHECK-TEST-FILTERING: True
352    print(extract_ir_lib.should_include_module(cmdline, ".*"))
353    # CHECK-TEST-FILTERING: True
354    print(extract_ir_lib.should_include_module(cmdline, "^-Oz$"))
355    # CHECK-TEST-FILTERING: True
356    print(extract_ir_lib.should_include_module(cmdline, "^-O3$"))
357    # CHECK-TEST-FILTERING: False
358
359
360## Test that we extract the thinLTO index correctly
361
362# RUN: %python %s test_thinlto_index_extractor | FileCheck %s --check-prefix CHECK-THINLTO-INDEX-EXTRACTOR
363
364
365def test_thinlto_index_extractor():
366    cmdline = (
367        "-cc1\0x/y/foobar.cpp\0-Oz\0-Ifoo\0-o\0bin/"
368        "out.o\0-fthinlto-index=foo/bar.thinlto.bc"
369    )
370    print(extract_ir_lib.get_thinlto_index(cmdline, "/the/base/dir"))
371    # CHECK-THINLTO-INDEX-EXTRACTOR: /the/base/dir/foo/bar.thinlto.bc
372
373
374if __name__ == "__main__":
375    globals()[sys.argv[1]](*sys.argv[2:])
376