1# REQUIRES: system-linux 2 3## Test the functionality of extract_ir_lib 4 5import sys 6 7from mlgo.corpus import extract_ir_lib 8 9## Test that we can convert a compilation database with a single compilation 10## command in it. 11 12# RUN: %python %s test_one_conversion | FileCheck %s --check-prefix CHECK-ONE-CONVERSION 13 14 15def test_one_conversion(): 16 obj = extract_ir_lib.convert_compile_command_to_objectfile( 17 { 18 "directory": "/output/directory", 19 "command": "-cc1 -c /some/path/lib/foo/bar.cc -o lib/bar.o", 20 "file": "/some/path/lib/foo/bar.cc", 21 }, 22 "/corpus/destination/path", 23 ) 24 print(obj.input_obj()) 25 # CHECK-ONE-CONVERSION: /output/directory/lib/bar.o 26 print(obj.relative_output_path()) 27 # CHECK-ONE-CONVERSION: lib/bar.o 28 print(obj.cmd_file()) 29 # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.cmd 30 print(obj.bc_file()) 31 # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.bc 32 print(obj.thinlto_index_file()) 33 # CHECK-ONE-CONVERSION: /corpus/destination/path/lib/bar.o.thinlto.bc 34 35 36## Test that we can convert an arguments style compilation database 37 38# RUN: %python %s test_one_conversion_arguments_style | FileCheck %s --check-prefix CHECK-ARGUMENTS-STYLE 39 40 41def test_one_conversion_arguments_style(): 42 obj = extract_ir_lib.convert_compile_command_to_objectfile( 43 { 44 "directory": "/output/directory", 45 "arguments": [ 46 "-cc1", 47 "-c", 48 "/some/path/lib/foo/bar.cc", 49 "-o", 50 "lib/bar.o", 51 ], 52 "file": "/some/path/lib/foo/bar.cc", 53 }, 54 "/corpus/destination/path", 55 ) 56 print(obj.input_obj()) 57 # CHECK-ARGUMENTS-STYLE: /output/directory/lib/bar.o 58 print(obj.relative_output_path()) 59 # CHECK-ARGUMENTS-STYLE: lib/bar.o 60 print(obj.cmd_file()) 61 # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.cmd 62 print(obj.bc_file()) 63 # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.bc 64 print(obj.thinlto_index_file()) 65 # CHECK-ARGUMENTS-STYLE: /corpus/destination/path/lib/bar.o.thinlto.bc 66 67 68## Test that converting multiple files works as well 69 70# RUN: %python %s test_multiple_conversion | FileCheck %s --check-prefix CHECK-MULTIPLE-CONVERSION 71 72 73def test_multiple_conversion(): 74 res = extract_ir_lib.load_from_compile_commands( 75 [ 76 { 77 "directory": "/output/directory", 78 "command": "-cc1 -c /some/path/lib/foo/bar.cc -o lib/bar.o", 79 "file": "/some/path/lib/foo/bar.cc", 80 }, 81 { 82 "directory": "/output/directory", 83 "command": "-cc1 -c /some/path/lib/foo/baz.cc -o lib/other/baz.o", 84 "file": "/some/path/lib/foo/baz.cc", 85 }, 86 ], 87 "/corpus/destination/path", 88 ) 89 res = list(res) 90 print(res[0].input_obj()) 91 # CHECK-MULTIPLE-CONVERSION: /output/directory/lib/bar.o 92 print(res[0].relative_output_path()) 93 # CHECK-MULTIPLE-CONVERSION: lib/bar.o 94 print(res[0].cmd_file()) 95 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.cmd 96 print(res[0].bc_file()) 97 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.bc 98 print(res[0].thinlto_index_file()) 99 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/bar.o.thinlto.bc 100 101 print(res[1].input_obj(), "/output/directory/lib/other/baz.o") 102 # CHECK-MULTIPLE-CONVERSION: /output/directory/lib/other/baz.o 103 print(res[1].relative_output_path(), "lib/other/baz.o") 104 # CHECK-MULTIPLE-CONVERSION: lib/other/baz.o 105 print(res[1].cmd_file()) 106 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.cmd 107 print(res[1].bc_file()) 108 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.bc 109 print(res[1].thinlto_index_file()) 110 # CHECK-MULTIPLE-CONVERSION: /corpus/destination/path/lib/other/baz.o.thinlto.bc 111 112 113## Test that we generate the correct objcopy commands for extracting commands 114 115# RUN: %python %s test_command_extraction | FileCheck %s --check-prefix CHECK-COMMAND-EXTRACT 116 117 118def test_command_extraction(): 119 obj = extract_ir_lib.TrainingIRExtractor( 120 obj_relative_path="lib/obj_file.o", 121 output_base_dir="/where/corpus/goes", 122 obj_base_dir="/foo/bar", 123 ) 124 extraction_cmd1 = obj._get_extraction_cmd_command( 125 "/bin/llvm_objcopy_path", ".llvmcmd" 126 ) 127 for part in extraction_cmd1: 128 print(part) 129 # CHECK-COMMAND-EXTRACT: /bin/llvm_objcopy_path 130 # CHECK-COMMAND-EXTRACT: --dump-section=.llvmcmd=/where/corpus/goes/lib/obj_file.o.cmd 131 # CHECK-COMMAND-EXTRACT: /foo/bar/lib/obj_file.o 132 # CHECK-COMMAND-EXTRACT: /dev/null 133 134 extraction_cmd2 = obj._get_extraction_bc_command( 135 "/bin/llvm_objcopy_path", ".llvmbc" 136 ) 137 for part in extraction_cmd2: 138 print(part) 139 # CHECK-COMMAND-EXTRACT: /bin/llvm_objcopy_path 140 # CHECK-COMMAND-EXTRACT: --dump-section=.llvmbc=/where/corpus/goes/lib/obj_file.o.bc 141 # CHECK-COMMAND-EXTRACT: /foo/bar/lib/obj_file.o 142 # CHECK-COMMAND-EXTRACT: /dev/null 143 144 145## Test that we generate the correct extraction commands without specifying 146## an output base directory. 147 148# RUN: %python %s test_command_extraction_no_basedir | FileCheck %s --check-prefix CHECK-COMMAND-EXTRACT-NOBASEDIR 149 150 151def test_command_extraction_no_basedir(): 152 obj = extract_ir_lib.TrainingIRExtractor("lib/obj_file.o", "/where/corpus/goes") 153 extraction_cmd1 = obj._get_extraction_cmd_command( 154 "/bin/llvm_objcopy_path", ".llvmcmd" 155 ) 156 for part in extraction_cmd1: 157 print(part) 158 # CHECK-COMMAND-EXTRACT-NOBASEDIR: /bin/llvm_objcopy_path 159 # CHECK-COMMAND-EXTRACT-NOBASEDIR: --dump-section=.llvmcmd=/where/corpus/goes/lib/obj_file.o.cmd 160 # CHECK-COMMAND-EXTRACT-NOBASEDIR: lib/obj_file.o 161 # CHECK-COMMAND-EXTRACT-NOBASEDIR: /dev/null 162 163 extraction_cmd2 = obj._get_extraction_bc_command( 164 "/bin/llvm_objcopy_path", ".llvmbc" 165 ) 166 for part in extraction_cmd2: 167 print(part) 168 # CHECK-COMMAND-EXTRACT-NOBASEDIR: /bin/llvm_objcopy_path 169 # CHECK-COMMAND-EXTRACT-NOBASEDIR: --dump-section=.llvmbc=/where/corpus/goes/lib/obj_file.o.bc 170 # CHECK-COMMAND-EXTRACT-NOBASEDIR: lib/obj_file.o 171 # CHECK-COMMAND-EXTRACT-NOBASEDIR: /dev/null 172 173 174## Test that we can extract a corpus from lld parameters 175 176# RUN: %python %s test_lld_params | FileCheck %s --check-prefix CHECK-LLD-PARAMS 177 178 179def test_lld_params(): 180 lld_opts = [ 181 "-o", 182 "output/dir/exe", 183 "lib/obj1.o", 184 "somelib.a", 185 "-W,blah", 186 "lib/dir/obj2.o", 187 ] 188 obj = extract_ir_lib.load_from_lld_params(lld_opts, "/some/path", "/tmp/out") 189 print(obj[0].input_obj()) 190 # CHECK-LLD-PARAMS: /some/path/lib/obj1.o 191 print(obj[0].relative_output_path()) 192 # CHECK-LLD-PARAMS: lib/obj1.o 193 print(obj[0].cmd_file()) 194 # CHECK-LLD-PARAMS: /tmp/out/lib/obj1.o.cmd 195 print(obj[0].thinlto_index_file()) 196 # CHECK-LLD-PARAMS: /tmp/out/lib/obj1.o.thinlto.bc 197 print(obj[1].input_obj()) 198 # CHECK-LLD-PARMAS: /some/path/lib/dir/obj2.o 199 200 201## Test that we can load a corpus from a directory containing object files 202 203# RUN: rm -rf %t.dir && mkdir %t.dir 204# RUN: mkdir %t.dir/subdir 205# RUN: touch %t.dir/subdir/test1.o 206# RUN: touch %t.dir/subdir/test2.o 207# RUN: %python %s test_load_from_directory %t.dir | FileCheck %s --check-prefix CHECK-LOAD-DIR 208 209 210def test_load_from_directory(tempdir): 211 objs = extract_ir_lib.load_from_directory(tempdir, "/output") 212 for index, obj in enumerate(sorted(objs, key=lambda x: x._obj_relative_path)): 213 print(obj._obj_relative_path, f"subdir/test{index + 1:d}.o") 214 # CHECK-LOAD-DIR: subdir/test1.o 215 # Explicitly check for equality here as we can not check within 216 # FileCheck the exact value as lit substitutions do not work in 217 # FileCheck lines. 218 print(obj._obj_base_dir == tempdir) 219 # CHECK-LOAD-DIR: True 220 print(obj._output_base_dir) 221 # CHECK-LOAD-DIR /output 222 223 224## Test that we can load a corpus in the lld thinLTO case 225 226# RUN: rm -rf %.dir && mkdir %t.dir 227# RUN: touch %t.dir/1.3.import.bc 228# RUN: touch %t.dir/2.3.import.bc 229# RUN: touch %t.dir/3.3.import.bc 230# RUN: touch %t.dir/1.thinlto.bc 231# RUN: touch %t.dir/2.thinlto.bc 232# RUN: touch %t.dir/3.thinlto.bc 233# RUN: %python %s test_lld_thinlto_discovery %t.dir | FileCheck %s --check-prefix CHECK-LLD-THINLTO-DISCOVERY 234 235 236def test_lld_thinlto_discovery(tempdir): 237 obj = extract_ir_lib.load_for_lld_thinlto(tempdir, "/output") 238 for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)): 239 print(o._obj_relative_path) 240 # Explicitly check for equality as we can not check within FileCheck 241 # using the lit substitution for the temp dir 242 print(o._obj_base_dir == tempdir) 243 print(o._output_base_dir) # outdir 244 # CHECK-LLD-THINLTO-DISCOVERY: 1 245 # CHECK-LLD-THINLTO-DISCOVERY: True 246 # CHECK-LLD-THINLTO-DISCOVERY: /output 247 # CHECK-LLD-THINLTO-DISCOVERY: 2 248 # CHECK-LLD-THINLTO-DISCOVERY: True 249 # CHECK-LLD-THINLTO-DISCOVERY: /output 250 # CHECK-LLD-THINLTO-DISCOVERY: 3 251 # CHECK-LLD-THINLTO-DISCOVERY: True 252 # CHECK-LLD-THINLTO-DISCOVERY: /output 253 254 255## Test that we can load a corpus in the nested lld thinLTO case 256 257# RUN: mkdir %t.dir/nest 258# RUN: mv %t.dir/*.bc %t.dir/nest 259# RUN: %python %s test_lld_thinlto_discovery_nested %t.dir | FileCheck %s --check-prefix CHECK-LLD-THINLTO-DISCOVERY-NESTED 260 261 262def test_lld_thinlto_discovery_nested(outer): 263 obj = extract_ir_lib.load_for_lld_thinlto(outer, "/output") 264 for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)): 265 print(o._obj_relative_path) 266 print(o._obj_base_dir == outer) 267 print(o._output_base_dir) 268 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/1 269 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True 270 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output 271 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/2 272 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True 273 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output 274 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: nest/3 275 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: True 276 # CHECK-LLD-THINLTO-DISCOVERY-NESTED: /output 277 278 279## Test the lld extraction works as expected 280 281# RUN: rm -rf %t.dir.out && mkdir %t.dir.out 282# RUN: %python %s test_lld_thinlto_extraction %t.dir %t.dir.out | FileCheck %s --check-prefix CHECK-LLD-THINLTO-EXTRACTION-PY 283# ls %t.dir.out/nest | FileChceck %s --check-prefix CHECK-LLD-THINLTO-EXTRACTION 284 285# CHECK-LLD-THINLTO-EXTRACTION: 1 286# CHECK-LLD-THINLTO-EXTRACTION: 2 287# CHECK-LLD-THINLTO-EXTRACTION: 3 288# CHECK-LLD-THINLTO-EXTRACTION: 1.bc 289# CHECK-LLD-THINLTO-EXTRACTION: 2.bc 290# CHECK-LLD-THINLTO-EXTRACTION: 3.bc 291# CHECK-LLD-THINLTO-EXTRACTION: 1.thinlto.bc 292# CHECK-LLD-THINLTO-EXTRACTION: 2.thinlto.bc 293# CHECK-LLD-THINLTO-EXTRACTION: 3.thinlto.bc 294 295 296def test_lld_thinlto_extraction(outer, outdir): 297 obj = extract_ir_lib.load_for_lld_thinlto(outer, outdir) 298 for i, o in enumerate(sorted(obj, key=lambda x: x._obj_relative_path)): 299 mod_path = o.extract(thinlto_build="local") 300 print(mod_path) 301 # CHECK-LLD-THINLTO-EXTRACTION-PY: 1 302 # CHECK-LLD-THINLTO-EXTRACTION-PY: 2 303 # CHECK-LLD-THINLTO-EXTRACTION-PY: 3 304 305 306## Test that we can load a bazel query JSON as expected. 307 308# RUN: %python %s test_load_bazel_aquery | FileCheck %s --check-prefix CHECK-TEST-LOAD-BAZEL-AQUERY 309 310 311def test_load_bazel_aquery(): 312 obj = extract_ir_lib.load_bazel_aquery( 313 { 314 "actions": [ 315 {"mnemonic": "not-link", "arguments": []}, 316 { 317 "mnemonic": "CppLink", 318 "arguments": ["clang", "-o", "output_binary", "test1.o", "test2.o"], 319 }, 320 ] 321 }, 322 "/some/path", 323 "/tmp/out", 324 ) 325 print(obj[0].input_obj()) 326 # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test1.o 327 print(obj[0].relative_output_path()) 328 # CHECK-TEST-LOAD-BAZEL-AQUERY: test1.o 329 print(obj[0].cmd_file()) 330 # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.cmd 331 print(obj[0].bc_file()) 332 # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test1.o.bc 333 print(obj[1].input_obj()) 334 # CHECK-TEST-LOAD-BAZEL-AQUERY: /some/path/test2.o 335 print(obj[1].relative_output_path()) 336 # CHECK-TEST-LOAD-BAZEL-AQUERY: test2.o 337 print(obj[1].cmd_file()) 338 # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.cmd 339 print(obj[1].bc_file()) 340 # CHECK-TEST-LOAD-BAZEL-AQUERY: /tmp/out/test2.o.bc 341 342 343## Test that filtering works correctly 344 345# RUN: %python %s test_filtering | FileCheck %s --check-prefix CHECK-TEST-FILTERING 346 347 348def test_filtering(): 349 cmdline = "-cc1\0x/y/foobar.cpp\0-Oz\0-Ifoo\0-o\0bin/out.o" 350 print(extract_ir_lib.should_include_module(cmdline, None)) 351 # CHECK-TEST-FILTERING: True 352 print(extract_ir_lib.should_include_module(cmdline, ".*")) 353 # CHECK-TEST-FILTERING: True 354 print(extract_ir_lib.should_include_module(cmdline, "^-Oz$")) 355 # CHECK-TEST-FILTERING: True 356 print(extract_ir_lib.should_include_module(cmdline, "^-O3$")) 357 # CHECK-TEST-FILTERING: False 358 359 360## Test that we extract the thinLTO index correctly 361 362# RUN: %python %s test_thinlto_index_extractor | FileCheck %s --check-prefix CHECK-THINLTO-INDEX-EXTRACTOR 363 364 365def test_thinlto_index_extractor(): 366 cmdline = ( 367 "-cc1\0x/y/foobar.cpp\0-Oz\0-Ifoo\0-o\0bin/" 368 "out.o\0-fthinlto-index=foo/bar.thinlto.bc" 369 ) 370 print(extract_ir_lib.get_thinlto_index(cmdline, "/the/base/dir")) 371 # CHECK-THINLTO-INDEX-EXTRACTOR: /the/base/dir/foo/bar.thinlto.bc 372 373 374if __name__ == "__main__": 375 globals()[sys.argv[1]](*sys.argv[2:]) 376