1# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 2# See https://llvm.org/LICENSE.txt for license information. 3# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 4"""Library functions for making a corpus from arbitrary bitcode.""" 5 6import pathlib 7import os 8import shutil 9import json 10 11from typing import List, Optional 12 13BITCODE_EXTENSION = ".bc" 14 15 16def load_bitcode_from_directory(bitcode_base_dir: str) -> List[str]: 17 """Finds bitcode files to extract from a given directory. 18 19 Args: 20 bitcode_base_dir: The base directory where the bitcode to be copied 21 is from. 22 output_dir: The directory to place the bitcode in. 23 24 Returns an array of paths representing the relative path to the bitcode 25 file from the base direcotry. 26 """ 27 paths = [ 28 str(p)[: -len(BITCODE_EXTENSION)] 29 for p in pathlib.Path(bitcode_base_dir).glob("**/*" + BITCODE_EXTENSION) 30 ] 31 32 return [os.path.relpath(full_path, start=bitcode_base_dir) for full_path in paths] 33 34 35def copy_bitcode( 36 relative_paths: List[str], bitcode_base_dir: str, output_dir: str 37) -> None: 38 """Copies bitcode files from the base directory to the output directory. 39 40 Args: 41 relative_paths: An array of relative paths to bitcode files that are copied 42 over to the output directory, preserving relative location. 43 bitcode_base_dir: The base directory where the bitcode is located. 44 output_dir: The output directory to place the bitcode in. 45 """ 46 for relative_path in relative_paths: 47 base_path = os.path.join(bitcode_base_dir, relative_path + BITCODE_EXTENSION) 48 destination_path = os.path.join(output_dir, relative_path + BITCODE_EXTENSION) 49 os.makedirs(os.path.dirname(destination_path), exist_ok=True) 50 shutil.copy(base_path, destination_path) 51 52 53def write_corpus_manifest( 54 relative_output_paths: List[str], 55 output_dir: str, 56 default_args: Optional[List[str]] = None, 57) -> None: 58 """Creates a corpus manifest describing the bitcode that has been found. 59 60 Args: 61 relative_output_paths: A list of paths to each bitcode file relative to the 62 output directory. 63 outout_dir: The output directory where the corpus is being created. 64 default_args: An array of compiler flags that should be used to compile 65 the bitcode when using further downstream tooling.""" 66 if default_args is None: 67 default_args = [] 68 corpus_description = { 69 "global_command_override": default_args, 70 "has_thinlto": False, 71 "modules": [path for path in relative_output_paths if path is not None], 72 } 73 74 with open( 75 os.path.join(output_dir, "corpus_description.json"), "w", encoding="utf-8" 76 ) as description_file: 77 json.dump(corpus_description, description_file, indent=2) 78