1 //===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This is a command line utility to replay the execution of recorded OpenMP 10 // offload kernels. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "omptarget.h" 15 16 #include "llvm/Frontend/Offloading/Utility.h" 17 #include "llvm/Support/CommandLine.h" 18 #include "llvm/Support/JSON.h" 19 #include "llvm/Support/MemoryBuffer.h" 20 21 #include <cstdint> 22 #include <cstdlib> 23 24 using namespace llvm; 25 26 cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options"); 27 28 // InputFilename - The filename to read the json description of the kernel. 29 static cl::opt<std::string> InputFilename(cl::Positional, 30 cl::desc("<input kernel json file>"), 31 cl::Required); 32 33 static cl::opt<bool> VerifyOpt( 34 "verify", 35 cl::desc( 36 "Verify device memory post execution against the original output."), 37 cl::init(false), cl::cat(ReplayOptions)); 38 39 static cl::opt<bool> SaveOutputOpt( 40 "save-output", 41 cl::desc("Save the device memory output of the replayed kernel execution."), 42 cl::init(false), cl::cat(ReplayOptions)); 43 44 static cl::opt<unsigned> NumTeamsOpt("num-teams", 45 cl::desc("Set the number of teams."), 46 cl::init(0), cl::cat(ReplayOptions)); 47 48 static cl::opt<unsigned> NumThreadsOpt("num-threads", 49 cl::desc("Set the number of threads."), 50 cl::init(0), cl::cat(ReplayOptions)); 51 52 static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."), 53 cl::init(-1), cl::cat(ReplayOptions)); 54 55 int main(int argc, char **argv) { 56 cl::HideUnrelatedOptions(ReplayOptions); 57 cl::ParseCommandLineOptions(argc, argv, "llvm-omp-kernel-replay\n"); 58 59 ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB = 60 MemoryBuffer::getFile(InputFilename, /*isText=*/true, 61 /*RequiresNullTerminator=*/true); 62 if (!KernelInfoMB) 63 report_fatal_error("Error reading the kernel info json file"); 64 Expected<json::Value> JsonKernelInfo = 65 json::parse(KernelInfoMB.get()->getBuffer()); 66 if (auto Err = JsonKernelInfo.takeError()) 67 report_fatal_error("Cannot parse the kernel info json file"); 68 69 auto NumTeamsJson = 70 JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause"); 71 unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value()); 72 auto NumThreadsJson = 73 JsonKernelInfo->getAsObject()->getInteger("ThreadLimitClause"); 74 unsigned NumThreads = 75 (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value()); 76 // TODO: Print a warning if number of teams/threads is explicitly set in the 77 // kernel info but overriden through command line options. 78 auto LoopTripCount = 79 JsonKernelInfo->getAsObject()->getInteger("LoopTripCount"); 80 auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name"); 81 82 SmallVector<void *> TgtArgs; 83 SmallVector<ptrdiff_t> TgtArgOffsets; 84 auto NumArgs = JsonKernelInfo->getAsObject()->getInteger("NumArgs"); 85 auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray("ArgPtrs"); 86 for (auto It : *TgtArgsArray) 87 TgtArgs.push_back(reinterpret_cast<void *>(It.getAsInteger().value())); 88 auto *TgtArgOffsetsArray = 89 JsonKernelInfo->getAsObject()->getArray("ArgOffsets"); 90 for (auto It : *TgtArgOffsetsArray) 91 TgtArgOffsets.push_back(static_cast<ptrdiff_t>(It.getAsInteger().value())); 92 93 void *BAllocStart = reinterpret_cast<void *>( 94 JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value()); 95 96 llvm::offloading::EntryTy KernelEntry = {~0U, 0, 0, 0, nullptr, 97 nullptr, 0, 0, nullptr}; 98 std::string KernelEntryName = KernelFunc.value().str(); 99 KernelEntry.SymbolName = const_cast<char *>(KernelEntryName.c_str()); 100 // Anything non-zero works to uniquely identify the kernel. 101 KernelEntry.Address = (void *)0x1; 102 103 ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB = 104 MemoryBuffer::getFile(KernelEntryName + ".image", /*isText=*/false, 105 /*RequiresNullTerminator=*/false); 106 if (!ImageMB) 107 report_fatal_error("Error reading the kernel image."); 108 109 __tgt_device_image DeviceImage; 110 DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart()); 111 DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd()); 112 DeviceImage.EntriesBegin = &KernelEntry; 113 DeviceImage.EntriesEnd = &KernelEntry + 1; 114 115 __tgt_bin_desc Desc; 116 Desc.NumDeviceImages = 1; 117 Desc.HostEntriesBegin = &KernelEntry; 118 Desc.HostEntriesEnd = &KernelEntry + 1; 119 Desc.DeviceImages = &DeviceImage; 120 121 auto DeviceMemorySizeJson = 122 JsonKernelInfo->getAsObject()->getInteger("DeviceMemorySize"); 123 // Set device memory size to the ceiling of GB granularity. 124 uint64_t DeviceMemorySize = std::ceil(DeviceMemorySizeJson.value()); 125 126 auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger("DeviceId"); 127 // TODO: Print warning if the user overrides the device id in the json file. 128 int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value()); 129 130 // TODO: do we need requires? 131 //__tgt_register_requires(/*Flags=*/1); 132 133 __tgt_register_lib(&Desc); 134 135 uint64_t ReqPtrArgOffset = 0; 136 int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart, 137 false, VerifyOpt, ReqPtrArgOffset); 138 139 if (Rc != OMP_TGT_SUCCESS) { 140 report_fatal_error("Cannot activate record replay\n"); 141 } 142 143 ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB = 144 MemoryBuffer::getFile(KernelEntryName + ".memory", /*isText=*/false, 145 /*RequiresNullTerminator=*/false); 146 147 if (!DeviceMemoryMB) 148 report_fatal_error("Error reading the kernel input device memory."); 149 150 // On AMD for currently unknown reasons we cannot copy memory mapped data to 151 // device. This is a work-around. 152 uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()]; 153 std::memcpy(recored_data, 154 const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()), 155 DeviceMemoryMB.get()->getBufferSize()); 156 157 // If necessary, adjust pointer arguments. 158 if (ReqPtrArgOffset) { 159 for (auto *&Arg : TgtArgs) { 160 auto ArgInt = uintptr_t(Arg); 161 // Try to find pointer arguments. 162 if (ArgInt < uintptr_t(BAllocStart) || 163 ArgInt >= uintptr_t(BAllocStart) + DeviceMemorySize) 164 continue; 165 Arg = reinterpret_cast<void *>(ArgInt - ReqPtrArgOffset); 166 } 167 } 168 169 __tgt_target_kernel_replay( 170 /*Loc=*/nullptr, DeviceId, KernelEntry.Address, (char *)recored_data, 171 DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(), 172 TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads, 173 LoopTripCount.value()); 174 175 if (VerifyOpt) { 176 ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB = 177 MemoryBuffer::getFile(KernelEntryName + ".original.output", 178 /*isText=*/false, 179 /*RequiresNullTerminator=*/false); 180 if (!OriginalOutputMB) 181 report_fatal_error("Error reading the kernel original output file, make " 182 "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording"); 183 ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB = 184 MemoryBuffer::getFile(KernelEntryName + ".replay.output", 185 /*isText=*/false, 186 /*RequiresNullTerminator=*/false); 187 if (!ReplayOutputMB) 188 report_fatal_error("Error reading the kernel replay output file"); 189 190 StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer(); 191 StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer(); 192 if (OriginalOutput == ReplayOutput) 193 outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n"; 194 else 195 outs() << "[llvm-omp-kernel-replay] Replay device memory failed to " 196 "verify!\n"; 197 } 198 199 delete[] recored_data; 200 201 return 0; 202 } 203