1# ===- perf-helper.py - Clang Python Bindings -----------------*- python -*--===# 2# 3# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4# See https://llvm.org/LICENSE.txt for license information. 5# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6# 7# ===------------------------------------------------------------------------===# 8 9from __future__ import absolute_import, division, print_function 10 11import sys 12import os 13import subprocess 14import argparse 15import time 16import bisect 17import shlex 18import tempfile 19import re 20import shutil 21 22test_env = {"PATH": os.environ["PATH"]} 23 24 25def findFilesWithExtension(path, extension): 26 filenames = [] 27 for root, dirs, files in os.walk(path): 28 for filename in files: 29 if filename.endswith(f".{extension}"): 30 filenames.append(os.path.join(root, filename)) 31 return filenames 32 33 34def clean(args): 35 if len(args) < 2: 36 print( 37 "Usage: %s clean <paths> <extension>\n" % __file__ 38 + "\tRemoves all files with extension from <path>." 39 ) 40 return 1 41 for path in args[0:-1]: 42 for filename in findFilesWithExtension(path, args[-1]): 43 os.remove(filename) 44 return 0 45 46 47def merge(args): 48 if len(args) < 3: 49 print( 50 "Usage: %s merge <llvm-profdata> <output> <paths>\n" % __file__ 51 + "\tMerges all profraw files from path into output." 52 ) 53 return 1 54 cmd = [args[0], "merge", "-o", args[1]] 55 for path in args[2:]: 56 cmd.extend(findFilesWithExtension(path, "profraw")) 57 subprocess.check_call(cmd) 58 return 0 59 60 61def merge_fdata(args): 62 if len(args) != 3: 63 print( 64 "Usage: %s merge-fdata <merge-fdata> <output> <path>\n" % __file__ 65 + "\tMerges all fdata files from path into output." 66 ) 67 return 1 68 cmd = [args[0], "-o", args[1]] 69 cmd.extend(findFilesWithExtension(args[2], "fdata")) 70 subprocess.check_call(cmd) 71 return 0 72 73 74def perf(args): 75 parser = argparse.ArgumentParser( 76 prog="perf-helper perf", description="perf wrapper for BOLT profile collection" 77 ) 78 parser.add_argument( 79 "--lbr", action="store_true", help="Use perf with branch stacks" 80 ) 81 parser.add_argument("cmd", nargs=argparse.REMAINDER, help="") 82 83 opts = parser.parse_args(args) 84 cmd = opts.cmd[1:] 85 86 perf_args = [ 87 "perf", 88 "record", 89 "--event=cycles:u", 90 "--freq=max", 91 "--output=%d.perf.data" % os.getpid(), 92 ] 93 if opts.lbr: 94 perf_args += ["--branch-filter=any,u"] 95 perf_args.extend(cmd) 96 97 start_time = time.time() 98 subprocess.check_call(perf_args) 99 100 elapsed = time.time() - start_time 101 print("... data collection took %.4fs" % elapsed) 102 return 0 103 104 105def perf2bolt(args): 106 parser = argparse.ArgumentParser( 107 prog="perf-helper perf2bolt", 108 description="perf2bolt conversion wrapper for perf.data files", 109 ) 110 parser.add_argument("bolt", help="Path to llvm-bolt") 111 parser.add_argument("path", help="Path containing perf.data files") 112 parser.add_argument("binary", help="Input binary") 113 parser.add_argument("--lbr", action="store_true", help="Use LBR perf2bolt mode") 114 opts = parser.parse_args(args) 115 116 p2b_args = [ 117 opts.bolt, 118 opts.binary, 119 "--aggregate-only", 120 "--profile-format=yaml", 121 ] 122 if not opts.lbr: 123 p2b_args += ["-nl"] 124 p2b_args += ["-p"] 125 for filename in findFilesWithExtension(opts.path, "perf.data"): 126 subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"]) 127 return 0 128 129 130def dtrace(args): 131 parser = argparse.ArgumentParser( 132 prog="perf-helper dtrace", 133 description="dtrace wrapper for order file generation", 134 ) 135 parser.add_argument( 136 "--buffer-size", 137 metavar="size", 138 type=int, 139 required=False, 140 default=1, 141 help="dtrace buffer size in MB (default 1)", 142 ) 143 parser.add_argument( 144 "--use-oneshot", 145 required=False, 146 action="store_true", 147 help="Use dtrace's oneshot probes", 148 ) 149 parser.add_argument( 150 "--use-ustack", 151 required=False, 152 action="store_true", 153 help="Use dtrace's ustack to print function names", 154 ) 155 parser.add_argument( 156 "--cc1", 157 required=False, 158 action="store_true", 159 help="Execute cc1 directly (don't profile the driver)", 160 ) 161 parser.add_argument("cmd", nargs="*", help="") 162 163 # Use python's arg parser to handle all leading option arguments, but pass 164 # everything else through to dtrace 165 first_cmd = next(arg for arg in args if not arg.startswith("--")) 166 last_arg_idx = args.index(first_cmd) 167 168 opts = parser.parse_args(args[:last_arg_idx]) 169 cmd = args[last_arg_idx:] 170 171 if opts.cc1: 172 cmd = get_cc1_command_for_args(cmd, test_env) 173 174 if opts.use_oneshot: 175 target = "oneshot$target:::entry" 176 else: 177 target = "pid$target:::entry" 178 predicate = '%s/probemod=="%s"/' % (target, os.path.basename(cmd[0])) 179 log_timestamp = 'printf("dtrace-TS: %d\\n", timestamp)' 180 if opts.use_ustack: 181 action = "ustack(1);" 182 else: 183 action = 'printf("dtrace-Symbol: %s\\n", probefunc);' 184 dtrace_script = "%s { %s; %s }" % (predicate, log_timestamp, action) 185 186 dtrace_args = [] 187 if not os.geteuid() == 0: 188 print( 189 "Script must be run as root, or you must add the following to your sudoers:" 190 + "%%admin ALL=(ALL) NOPASSWD: /usr/sbin/dtrace" 191 ) 192 dtrace_args.append("sudo") 193 194 dtrace_args.extend( 195 ( 196 "dtrace", 197 "-xevaltime=exec", 198 "-xbufsize=%dm" % (opts.buffer_size), 199 "-q", 200 "-n", 201 dtrace_script, 202 "-c", 203 " ".join(cmd), 204 ) 205 ) 206 207 if sys.platform == "darwin": 208 dtrace_args.append("-xmangled") 209 210 start_time = time.time() 211 212 with open("%d.dtrace" % os.getpid(), "w") as f: 213 f.write("### Command: %s" % dtrace_args) 214 subprocess.check_call(dtrace_args, stdout=f, stderr=subprocess.PIPE) 215 216 elapsed = time.time() - start_time 217 print("... data collection took %.4fs" % elapsed) 218 219 return 0 220 221 222def get_cc1_command_for_args(cmd, env): 223 # Find the cc1 command used by the compiler. To do this we execute the 224 # compiler with '-###' to figure out what it wants to do. 225 cmd = cmd + ["-###"] 226 cc_output = subprocess.check_output( 227 cmd, stderr=subprocess.STDOUT, env=env, universal_newlines=True 228 ).strip() 229 cc_commands = [] 230 for ln in cc_output.split("\n"): 231 # Filter out known garbage. 232 if ( 233 ln == "Using built-in specs." 234 or ln.startswith("Configured with:") 235 or ln.startswith("Target:") 236 or ln.startswith("Thread model:") 237 or ln.startswith("InstalledDir:") 238 or ln.startswith("LLVM Profile Note") 239 or ln.startswith(" (in-process)") 240 or " version " in ln 241 ): 242 continue 243 cc_commands.append(ln) 244 245 if len(cc_commands) != 1: 246 print("Fatal error: unable to determine cc1 command: %r" % cc_output) 247 exit(1) 248 249 cc1_cmd = shlex.split(cc_commands[0]) 250 if not cc1_cmd: 251 print("Fatal error: unable to determine cc1 command: %r" % cc_output) 252 exit(1) 253 254 return cc1_cmd 255 256 257def cc1(args): 258 parser = argparse.ArgumentParser( 259 prog="perf-helper cc1", description="cc1 wrapper for order file generation" 260 ) 261 parser.add_argument("cmd", nargs="*", help="") 262 263 # Use python's arg parser to handle all leading option arguments, but pass 264 # everything else through to dtrace 265 first_cmd = next(arg for arg in args if not arg.startswith("--")) 266 last_arg_idx = args.index(first_cmd) 267 268 opts = parser.parse_args(args[:last_arg_idx]) 269 cmd = args[last_arg_idx:] 270 271 # clear the profile file env, so that we don't generate profdata 272 # when capturing the cc1 command 273 cc1_env = test_env 274 cc1_env["LLVM_PROFILE_FILE"] = os.devnull 275 cc1_cmd = get_cc1_command_for_args(cmd, cc1_env) 276 277 subprocess.check_call(cc1_cmd) 278 return 0 279 280 281def parse_dtrace_symbol_file(path, all_symbols, all_symbols_set, missing_symbols, opts): 282 def fix_mangling(symbol): 283 if sys.platform == "darwin": 284 if symbol[0] != "_" and symbol != "start": 285 symbol = "_" + symbol 286 return symbol 287 288 def get_symbols_with_prefix(symbol): 289 start_index = bisect.bisect_left(all_symbols, symbol) 290 for s in all_symbols[start_index:]: 291 if not s.startswith(symbol): 292 break 293 yield s 294 295 # Extract the list of symbols from the given file, which is assumed to be 296 # the output of a dtrace run logging either probefunc or ustack(1) and 297 # nothing else. The dtrace -xdemangle option needs to be used. 298 # 299 # This is particular to OS X at the moment, because of the '_' handling. 300 with open(path) as f: 301 current_timestamp = None 302 for ln in f: 303 # Drop leading and trailing whitespace. 304 ln = ln.strip() 305 if not ln.startswith("dtrace-"): 306 continue 307 308 # If this is a timestamp specifier, extract it. 309 if ln.startswith("dtrace-TS: "): 310 _, data = ln.split(": ", 1) 311 if not data.isdigit(): 312 print( 313 "warning: unrecognized timestamp line %r, ignoring" % ln, 314 file=sys.stderr, 315 ) 316 continue 317 current_timestamp = int(data) 318 continue 319 elif ln.startswith("dtrace-Symbol: "): 320 321 _, ln = ln.split(": ", 1) 322 if not ln: 323 continue 324 325 # If there is a '`' in the line, assume it is a ustack(1) entry in 326 # the form of <modulename>`<modulefunc>, where <modulefunc> is never 327 # truncated (but does need the mangling patched). 328 if "`" in ln: 329 yield (current_timestamp, fix_mangling(ln.split("`", 1)[1])) 330 continue 331 332 # Otherwise, assume this is a probefunc printout. DTrace on OS X 333 # seems to have a bug where it prints the mangled version of symbols 334 # which aren't C++ mangled. We just add a '_' to anything but start 335 # which doesn't already have a '_'. 336 symbol = fix_mangling(ln) 337 338 # If we don't know all the symbols, or the symbol is one of them, 339 # just return it. 340 if not all_symbols_set or symbol in all_symbols_set: 341 yield (current_timestamp, symbol) 342 continue 343 344 # Otherwise, we have a symbol name which isn't present in the 345 # binary. We assume it is truncated, and try to extend it. 346 347 # Get all the symbols with this prefix. 348 possible_symbols = list(get_symbols_with_prefix(symbol)) 349 if not possible_symbols: 350 continue 351 352 # If we found too many possible symbols, ignore this as a prefix. 353 if len(possible_symbols) > 100: 354 print( 355 "warning: ignoring symbol %r " % symbol 356 + "(no match and too many possible suffixes)", 357 file=sys.stderr, 358 ) 359 continue 360 361 # Report that we resolved a missing symbol. 362 if opts.show_missing_symbols and symbol not in missing_symbols: 363 print( 364 "warning: resolved missing symbol %r" % symbol, file=sys.stderr 365 ) 366 missing_symbols.add(symbol) 367 368 # Otherwise, treat all the possible matches as having occurred. This 369 # is an over-approximation, but it should be ok in practice. 370 for s in possible_symbols: 371 yield (current_timestamp, s) 372 373 374def uniq(list): 375 seen = set() 376 for item in list: 377 if item not in seen: 378 yield item 379 seen.add(item) 380 381 382def form_by_call_order(symbol_lists): 383 # Simply strategy, just return symbols in order of occurrence, even across 384 # multiple runs. 385 return uniq(s for symbols in symbol_lists for s in symbols) 386 387 388def form_by_call_order_fair(symbol_lists): 389 # More complicated strategy that tries to respect the call order across all 390 # of the test cases, instead of giving a huge preference to the first test 391 # case. 392 393 # First, uniq all the lists. 394 uniq_lists = [list(uniq(symbols)) for symbols in symbol_lists] 395 396 # Compute the successors for each list. 397 succs = {} 398 for symbols in uniq_lists: 399 for a, b in zip(symbols[:-1], symbols[1:]): 400 succs[a] = items = succs.get(a, []) 401 if b not in items: 402 items.append(b) 403 404 # Emit all the symbols, but make sure to always emit all successors from any 405 # call list whenever we see a symbol. 406 # 407 # There isn't much science here, but this sometimes works better than the 408 # more naive strategy. Then again, sometimes it doesn't so more research is 409 # probably needed. 410 return uniq( 411 s 412 for symbols in symbol_lists 413 for node in symbols 414 for s in ([node] + succs.get(node, [])) 415 ) 416 417 418def form_by_frequency(symbol_lists): 419 # Form the order file by just putting the most commonly occurring symbols 420 # first. This assumes the data files didn't use the oneshot dtrace method. 421 422 counts = {} 423 for symbols in symbol_lists: 424 for a in symbols: 425 counts[a] = counts.get(a, 0) + 1 426 427 by_count = list(counts.items()) 428 by_count.sort(key=lambda __n: -__n[1]) 429 return [s for s, n in by_count] 430 431 432def form_by_random(symbol_lists): 433 # Randomize the symbols. 434 merged_symbols = uniq(s for symbols in symbol_lists for s in symbols) 435 random.shuffle(merged_symbols) 436 return merged_symbols 437 438 439def form_by_alphabetical(symbol_lists): 440 # Alphabetize the symbols. 441 merged_symbols = list(set(s for symbols in symbol_lists for s in symbols)) 442 merged_symbols.sort() 443 return merged_symbols 444 445 446methods = dict( 447 (name[len("form_by_") :], value) 448 for name, value in locals().items() 449 if name.startswith("form_by_") 450) 451 452 453def genOrderFile(args): 454 parser = argparse.ArgumentParser("%prog [options] <dtrace data file directories>]") 455 parser.add_argument("input", nargs="+", help="") 456 parser.add_argument( 457 "--binary", 458 metavar="PATH", 459 type=str, 460 dest="binary_path", 461 help="Path to the binary being ordered (for getting all symbols)", 462 default=None, 463 ) 464 parser.add_argument( 465 "--output", 466 dest="output_path", 467 help="path to output order file to write", 468 default=None, 469 required=True, 470 metavar="PATH", 471 ) 472 parser.add_argument( 473 "--show-missing-symbols", 474 dest="show_missing_symbols", 475 help="show symbols which are 'fixed up' to a valid name (requires --binary)", 476 action="store_true", 477 default=None, 478 ) 479 parser.add_argument( 480 "--output-unordered-symbols", 481 dest="output_unordered_symbols_path", 482 help="write a list of the unordered symbols to PATH (requires --binary)", 483 default=None, 484 metavar="PATH", 485 ) 486 parser.add_argument( 487 "--method", 488 dest="method", 489 help="order file generation method to use", 490 choices=list(methods.keys()), 491 default="call_order", 492 ) 493 opts = parser.parse_args(args) 494 495 # If the user gave us a binary, get all the symbols in the binary by 496 # snarfing 'nm' output. 497 if opts.binary_path is not None: 498 output = subprocess.check_output( 499 ["nm", "-P", opts.binary_path], universal_newlines=True 500 ) 501 lines = output.split("\n") 502 all_symbols = [ln.split(" ", 1)[0] for ln in lines if ln.strip()] 503 print("found %d symbols in binary" % len(all_symbols)) 504 all_symbols.sort() 505 else: 506 all_symbols = [] 507 all_symbols_set = set(all_symbols) 508 509 # Compute the list of input files. 510 input_files = [] 511 for dirname in opts.input: 512 input_files.extend(findFilesWithExtension(dirname, "dtrace")) 513 514 # Load all of the input files. 515 print("loading from %d data files" % len(input_files)) 516 missing_symbols = set() 517 timestamped_symbol_lists = [ 518 list( 519 parse_dtrace_symbol_file( 520 path, all_symbols, all_symbols_set, missing_symbols, opts 521 ) 522 ) 523 for path in input_files 524 ] 525 526 # Reorder each symbol list. 527 symbol_lists = [] 528 for timestamped_symbols_list in timestamped_symbol_lists: 529 timestamped_symbols_list.sort() 530 symbol_lists.append([symbol for _, symbol in timestamped_symbols_list]) 531 532 # Execute the desire order file generation method. 533 method = methods.get(opts.method) 534 result = list(method(symbol_lists)) 535 536 # Report to the user on what percentage of symbols are present in the order 537 # file. 538 num_ordered_symbols = len(result) 539 if all_symbols: 540 print( 541 "note: order file contains %d/%d symbols (%.2f%%)" 542 % ( 543 num_ordered_symbols, 544 len(all_symbols), 545 100.0 * num_ordered_symbols / len(all_symbols), 546 ), 547 file=sys.stderr, 548 ) 549 550 if opts.output_unordered_symbols_path: 551 ordered_symbols_set = set(result) 552 with open(opts.output_unordered_symbols_path, "w") as f: 553 f.write("\n".join(s for s in all_symbols if s not in ordered_symbols_set)) 554 555 # Write the order file. 556 with open(opts.output_path, "w") as f: 557 f.write("\n".join(result)) 558 f.write("\n") 559 560 return 0 561 562 563def bolt_optimize(args): 564 parser = argparse.ArgumentParser("%prog [options] ") 565 parser.add_argument("--method", choices=["INSTRUMENT", "PERF", "LBR"]) 566 parser.add_argument("--input") 567 parser.add_argument("--instrumented-output") 568 parser.add_argument("--fdata") 569 parser.add_argument("--perf-training-binary-dir") 570 parser.add_argument("--readelf") 571 parser.add_argument("--bolt") 572 parser.add_argument("--lit") 573 parser.add_argument("--merge-fdata") 574 575 opts = parser.parse_args(args) 576 577 output = subprocess.check_output( 578 [opts.readelf, "-WS", opts.input], universal_newlines=True 579 ) 580 581 # This binary has already been bolt-optimized, so skip further processing. 582 if re.search("\\.bolt\\.org\\.text", output, re.MULTILINE): 583 return 0 584 585 if opts.method == "INSTRUMENT": 586 process = subprocess.run( 587 [ 588 opts.bolt, 589 opts.input, 590 "-o", 591 opts.instrumented_output, 592 "-instrument", 593 "--instrumentation-file-append-pid", 594 f"--instrumentation-file={opts.fdata}", 595 ], 596 stdout=subprocess.PIPE, 597 stderr=subprocess.STDOUT, 598 text=True, 599 ) 600 601 print(process.args) 602 for line in process.stdout: 603 sys.stdout.write(line) 604 process.check_returncode() 605 606 process = subprocess.run( 607 [ 608 sys.executable, 609 opts.lit, 610 os.path.join(opts.perf_training_binary_dir, "bolt-fdata"), 611 ], 612 stdout=subprocess.PIPE, 613 stderr=subprocess.STDOUT, 614 text=True, 615 ) 616 617 print(process.args) 618 for line in process.stdout: 619 sys.stdout.write(line) 620 process.check_returncode() 621 622 if opts.method in ["PERF", "LBR"]: 623 perf2bolt([opts.bolt, opts.perf_training_binary_dir, opts.input]) 624 625 merge_fdata([opts.merge_fdata, opts.fdata, opts.perf_training_binary_dir]) 626 627 shutil.copy(opts.input, f"{opts.input}-prebolt") 628 629 process = subprocess.run( 630 [ 631 opts.bolt, 632 f"{opts.input}-prebolt", 633 "-o", 634 opts.input, 635 "-data", 636 opts.fdata, 637 "-reorder-blocks=ext-tsp", 638 "-reorder-functions=cdsort", 639 "-split-functions", 640 "-split-all-cold", 641 "-split-eh", 642 "-dyno-stats", 643 "-use-gnu-stack", 644 "-update-debug-sections", 645 "-nl" if opts.method == "PERF" else "", 646 ], 647 stdout=subprocess.PIPE, 648 stderr=subprocess.STDOUT, 649 text=True, 650 ) 651 652 print(process.args) 653 for line in process.stdout: 654 sys.stdout.write(line) 655 process.check_returncode() 656 657 658commands = { 659 "bolt-optimize": bolt_optimize, 660 "clean": clean, 661 "merge": merge, 662 "dtrace": dtrace, 663 "cc1": cc1, 664 "gen-order-file": genOrderFile, 665 "merge-fdata": merge_fdata, 666 "perf": perf, 667 "perf2bolt": perf2bolt, 668} 669 670 671def main(): 672 f = commands[sys.argv[1]] 673 sys.exit(f(sys.argv[2:])) 674 675 676if __name__ == "__main__": 677 main() 678