1 //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the WebAssembly-specific subclass of TargetMachine. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyTargetMachine.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssembly.h" 17 #include "WebAssemblyMachineFunctionInfo.h" 18 #include "WebAssemblyTargetObjectFile.h" 19 #include "WebAssemblyTargetTransformInfo.h" 20 #include "llvm/CodeGen/MIRParser/MIParser.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/RegAllocRegistry.h" 24 #include "llvm/CodeGen/TargetPassConfig.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Target/TargetOptions.h" 28 #include "llvm/Transforms/Scalar.h" 29 #include "llvm/Transforms/Scalar/LowerAtomic.h" 30 #include "llvm/Transforms/Utils.h" 31 using namespace llvm; 32 33 #define DEBUG_TYPE "wasm" 34 35 // Emscripten's asm.js-style exception handling 36 static cl::opt<bool> EnableEmException( 37 "enable-emscripten-cxx-exceptions", 38 cl::desc("WebAssembly Emscripten-style exception handling"), 39 cl::init(false)); 40 41 // Emscripten's asm.js-style setjmp/longjmp handling 42 static cl::opt<bool> EnableEmSjLj( 43 "enable-emscripten-sjlj", 44 cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), 45 cl::init(false)); 46 47 extern "C" void LLVMInitializeWebAssemblyTarget() { 48 // Register the target. 49 RegisterTargetMachine<WebAssemblyTargetMachine> X( 50 getTheWebAssemblyTarget32()); 51 RegisterTargetMachine<WebAssemblyTargetMachine> Y( 52 getTheWebAssemblyTarget64()); 53 54 // Register backend passes 55 auto &PR = *PassRegistry::getPassRegistry(); 56 initializeWebAssemblyAddMissingPrototypesPass(PR); 57 initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); 58 initializeLowerGlobalDtorsPass(PR); 59 initializeFixFunctionBitcastsPass(PR); 60 initializeOptimizeReturnedPass(PR); 61 initializeWebAssemblyArgumentMovePass(PR); 62 initializeWebAssemblySetP2AlignOperandsPass(PR); 63 initializeWebAssemblyReplacePhysRegsPass(PR); 64 initializeWebAssemblyPrepareForLiveIntervalsPass(PR); 65 initializeWebAssemblyOptimizeLiveIntervalsPass(PR); 66 initializeWebAssemblyMemIntrinsicResultsPass(PR); 67 initializeWebAssemblyRegStackifyPass(PR); 68 initializeWebAssemblyRegColoringPass(PR); 69 initializeWebAssemblyExplicitLocalsPass(PR); 70 initializeWebAssemblyFixIrreducibleControlFlowPass(PR); 71 initializeWebAssemblyLateEHPreparePass(PR); 72 initializeWebAssemblyExceptionInfoPass(PR); 73 initializeWebAssemblyCFGSortPass(PR); 74 initializeWebAssemblyCFGStackifyPass(PR); 75 initializeWebAssemblyLowerBrUnlessPass(PR); 76 initializeWebAssemblyRegNumberingPass(PR); 77 initializeWebAssemblyPeepholePass(PR); 78 initializeWebAssemblyCallIndirectFixupPass(PR); 79 } 80 81 //===----------------------------------------------------------------------===// 82 // WebAssembly Lowering public interface. 83 //===----------------------------------------------------------------------===// 84 85 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 86 if (!RM.hasValue()) { 87 // Default to static relocation model. This should always be more optimial 88 // than PIC since the static linker can determine all global addresses and 89 // assume direct function calls. 90 return Reloc::Static; 91 } 92 return *RM; 93 } 94 95 /// Create an WebAssembly architecture model. 96 /// 97 WebAssemblyTargetMachine::WebAssemblyTargetMachine( 98 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 99 const TargetOptions &Options, Optional<Reloc::Model> RM, 100 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 101 : LLVMTargetMachine(T, 102 TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" 103 : "e-m:e-p:32:32-i64:64-n32:64-S128", 104 TT, CPU, FS, Options, getEffectiveRelocModel(RM), 105 getEffectiveCodeModel(CM, CodeModel::Large), OL), 106 TLOF(new WebAssemblyTargetObjectFile()) { 107 // WebAssembly type-checks instructions, but a noreturn function with a return 108 // type that doesn't match the context will cause a check failure. So we lower 109 // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's 110 // 'unreachable' instructions which is meant for that case. 111 this->Options.TrapUnreachable = true; 112 113 // WebAssembly treats each function as an independent unit. Force 114 // -ffunction-sections, effectively, so that we can emit them independently. 115 this->Options.FunctionSections = true; 116 this->Options.DataSections = true; 117 this->Options.UniqueSectionNames = true; 118 119 initAsmInfo(); 120 121 // Note that we don't use setRequiresStructuredCFG(true). It disables 122 // optimizations than we're ok with, and want, such as critical edge 123 // splitting and tail merging. 124 } 125 126 WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. 127 128 const WebAssemblySubtarget * 129 WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, 130 std::string FS) const { 131 auto &I = SubtargetMap[CPU + FS]; 132 if (!I) { 133 I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); 134 } 135 return I.get(); 136 } 137 138 const WebAssemblySubtarget * 139 WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { 140 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 141 Attribute FSAttr = F.getFnAttribute("target-features"); 142 143 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 144 ? CPUAttr.getValueAsString().str() 145 : TargetCPU; 146 std::string FS = !FSAttr.hasAttribute(Attribute::None) 147 ? FSAttr.getValueAsString().str() 148 : TargetFS; 149 150 // This needs to be done before we create a new subtarget since any 151 // creation will depend on the TM and the code generation flags on the 152 // function that reside in TargetOptions. 153 resetTargetOptions(F); 154 155 return getSubtargetImpl(CPU, FS); 156 } 157 158 namespace { 159 160 class CoalesceFeaturesAndStripAtomics final : public ModulePass { 161 // Take the union of all features used in the module and use it for each 162 // function individually, since having multiple feature sets in one module 163 // currently does not make sense for WebAssembly. If atomics are not enabled, 164 // also strip atomic operations and thread local storage. 165 static char ID; 166 WebAssemblyTargetMachine *WasmTM; 167 168 public: 169 CoalesceFeaturesAndStripAtomics(WebAssemblyTargetMachine *WasmTM) 170 : ModulePass(ID), WasmTM(WasmTM) {} 171 172 bool runOnModule(Module &M) override { 173 FeatureBitset Features = coalesceFeatures(M); 174 175 std::string FeatureStr = getFeatureString(Features); 176 for (auto &F : M) 177 replaceFeatures(F, FeatureStr); 178 179 bool Stripped = false; 180 if (!Features[WebAssembly::FeatureAtomics]) { 181 Stripped |= stripAtomics(M); 182 Stripped |= stripThreadLocals(M); 183 } 184 185 recordFeatures(M, Features, Stripped); 186 187 // Conservatively assume we have made some change 188 return true; 189 } 190 191 private: 192 FeatureBitset coalesceFeatures(const Module &M) { 193 FeatureBitset Features = 194 WasmTM 195 ->getSubtargetImpl(WasmTM->getTargetCPU(), 196 WasmTM->getTargetFeatureString()) 197 ->getFeatureBits(); 198 for (auto &F : M) 199 Features |= WasmTM->getSubtargetImpl(F)->getFeatureBits(); 200 return Features; 201 } 202 203 std::string getFeatureString(const FeatureBitset &Features) { 204 std::string Ret; 205 for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { 206 if (Features[KV.Value]) 207 Ret += (StringRef("+") + KV.Key + ",").str(); 208 } 209 return Ret; 210 } 211 212 void replaceFeatures(Function &F, const std::string &Features) { 213 F.removeFnAttr("target-features"); 214 F.removeFnAttr("target-cpu"); 215 F.addFnAttr("target-features", Features); 216 } 217 218 bool stripAtomics(Module &M) { 219 // Detect whether any atomics will be lowered, since there is no way to tell 220 // whether the LowerAtomic pass lowers e.g. stores. 221 bool Stripped = false; 222 for (auto &F : M) { 223 for (auto &B : F) { 224 for (auto &I : B) { 225 if (I.isAtomic()) { 226 Stripped = true; 227 goto done; 228 } 229 } 230 } 231 } 232 233 done: 234 if (!Stripped) 235 return false; 236 237 LowerAtomicPass Lowerer; 238 FunctionAnalysisManager FAM; 239 for (auto &F : M) 240 Lowerer.run(F, FAM); 241 242 return true; 243 } 244 245 bool stripThreadLocals(Module &M) { 246 bool Stripped = false; 247 for (auto &GV : M.globals()) { 248 if (GV.getThreadLocalMode() != 249 GlobalValue::ThreadLocalMode::NotThreadLocal) { 250 Stripped = true; 251 GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal); 252 } 253 } 254 return Stripped; 255 } 256 257 void recordFeatures(Module &M, const FeatureBitset &Features, bool Stripped) { 258 for (const SubtargetFeatureKV &KV : WebAssemblyFeatureKV) { 259 std::string MDKey = (StringRef("wasm-feature-") + KV.Key).str(); 260 if (KV.Value == WebAssembly::FeatureAtomics && Stripped) { 261 // "atomics" is special: code compiled without atomics may have had its 262 // atomics lowered to nonatomic operations. In that case, atomics is 263 // disallowed to prevent unsafe linking with atomics-enabled objects. 264 assert(!Features[WebAssembly::FeatureAtomics]); 265 M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, 266 wasm::WASM_FEATURE_PREFIX_DISALLOWED); 267 } else if (Features[KV.Value]) { 268 // Otherwise features are marked Used or not mentioned 269 M.addModuleFlag(Module::ModFlagBehavior::Error, MDKey, 270 wasm::WASM_FEATURE_PREFIX_USED); 271 } 272 } 273 } 274 }; 275 char CoalesceFeaturesAndStripAtomics::ID = 0; 276 277 /// WebAssembly Code Generator Pass Configuration Options. 278 class WebAssemblyPassConfig final : public TargetPassConfig { 279 public: 280 WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) 281 : TargetPassConfig(TM, PM) {} 282 283 WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { 284 return getTM<WebAssemblyTargetMachine>(); 285 } 286 287 FunctionPass *createTargetRegisterAllocator(bool) override; 288 289 void addIRPasses() override; 290 bool addInstSelector() override; 291 void addPostRegAlloc() override; 292 bool addGCPasses() override { return false; } 293 void addPreEmitPass() override; 294 295 // No reg alloc 296 bool addRegAssignmentFast() override { return false; } 297 298 // No reg alloc 299 bool addRegAssignmentOptimized() override { return false; } 300 }; 301 } // end anonymous namespace 302 303 TargetTransformInfo 304 WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { 305 return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); 306 } 307 308 TargetPassConfig * 309 WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { 310 return new WebAssemblyPassConfig(*this, PM); 311 } 312 313 FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { 314 return nullptr; // No reg alloc 315 } 316 317 //===----------------------------------------------------------------------===// 318 // The following functions are called from lib/CodeGen/Passes.cpp to modify 319 // the CodeGen pass sequence. 320 //===----------------------------------------------------------------------===// 321 322 void WebAssemblyPassConfig::addIRPasses() { 323 // Runs LowerAtomicPass if necessary 324 addPass(new CoalesceFeaturesAndStripAtomics(&getWebAssemblyTargetMachine())); 325 326 // This is a no-op if atomics are not used in the module 327 addPass(createAtomicExpandPass()); 328 329 // Add signatures to prototype-less function declarations 330 addPass(createWebAssemblyAddMissingPrototypes()); 331 332 // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. 333 addPass(createWebAssemblyLowerGlobalDtors()); 334 335 // Fix function bitcasts, as WebAssembly requires caller and callee signatures 336 // to match. 337 addPass(createWebAssemblyFixFunctionBitcasts()); 338 339 // Optimize "returned" function attributes. 340 if (getOptLevel() != CodeGenOpt::None) 341 addPass(createWebAssemblyOptimizeReturned()); 342 343 // If exception handling is not enabled and setjmp/longjmp handling is 344 // enabled, we lower invokes into calls and delete unreachable landingpad 345 // blocks. Lowering invokes when there is no EH support is done in 346 // TargetPassConfig::addPassesToHandleExceptions, but this runs after this 347 // function and SjLj handling expects all invokes to be lowered before. 348 if (!EnableEmException && 349 TM->Options.ExceptionModel == ExceptionHandling::None) { 350 addPass(createLowerInvokePass()); 351 // The lower invoke pass may create unreachable code. Remove it in order not 352 // to process dead blocks in setjmp/longjmp handling. 353 addPass(createUnreachableBlockEliminationPass()); 354 } 355 356 // Handle exceptions and setjmp/longjmp if enabled. 357 if (EnableEmException || EnableEmSjLj) 358 addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException, 359 EnableEmSjLj)); 360 361 TargetPassConfig::addIRPasses(); 362 } 363 364 bool WebAssemblyPassConfig::addInstSelector() { 365 (void)TargetPassConfig::addInstSelector(); 366 addPass( 367 createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); 368 // Run the argument-move pass immediately after the ScheduleDAG scheduler 369 // so that we can fix up the ARGUMENT instructions before anything else 370 // sees them in the wrong place. 371 addPass(createWebAssemblyArgumentMove()); 372 // Set the p2align operands. This information is present during ISel, however 373 // it's inconvenient to collect. Collect it now, and update the immediate 374 // operands. 375 addPass(createWebAssemblySetP2AlignOperands()); 376 return false; 377 } 378 379 void WebAssemblyPassConfig::addPostRegAlloc() { 380 // TODO: The following CodeGen passes don't currently support code containing 381 // virtual registers. Consider removing their restrictions and re-enabling 382 // them. 383 384 // These functions all require the NoVRegs property. 385 disablePass(&MachineCopyPropagationID); 386 disablePass(&PostRAMachineSinkingID); 387 disablePass(&PostRASchedulerID); 388 disablePass(&FuncletLayoutID); 389 disablePass(&StackMapLivenessID); 390 disablePass(&LiveDebugValuesID); 391 disablePass(&PatchableFunctionID); 392 disablePass(&ShrinkWrapID); 393 394 // This pass hurts code size for wasm because it can generate irreducible 395 // control flow. 396 disablePass(&MachineBlockPlacementID); 397 398 TargetPassConfig::addPostRegAlloc(); 399 } 400 401 void WebAssemblyPassConfig::addPreEmitPass() { 402 TargetPassConfig::addPreEmitPass(); 403 404 // Rewrite pseudo call_indirect instructions as real instructions. 405 // This needs to run before register stackification, because we change the 406 // order of the arguments. 407 addPass(createWebAssemblyCallIndirectFixup()); 408 409 // Eliminate multiple-entry loops. 410 addPass(createWebAssemblyFixIrreducibleControlFlow()); 411 412 // Do various transformations for exception handling. 413 // Every CFG-changing optimizations should come before this. 414 addPass(createWebAssemblyLateEHPrepare()); 415 416 // Now that we have a prologue and epilogue and all frame indices are 417 // rewritten, eliminate SP and FP. This allows them to be stackified, 418 // colored, and numbered with the rest of the registers. 419 addPass(createWebAssemblyReplacePhysRegs()); 420 421 // Preparations and optimizations related to register stackification. 422 if (getOptLevel() != CodeGenOpt::None) { 423 // LiveIntervals isn't commonly run this late. Re-establish preconditions. 424 addPass(createWebAssemblyPrepareForLiveIntervals()); 425 426 // Depend on LiveIntervals and perform some optimizations on it. 427 addPass(createWebAssemblyOptimizeLiveIntervals()); 428 429 // Prepare memory intrinsic calls for register stackifying. 430 addPass(createWebAssemblyMemIntrinsicResults()); 431 432 // Mark registers as representing wasm's value stack. This is a key 433 // code-compression technique in WebAssembly. We run this pass (and 434 // MemIntrinsicResults above) very late, so that it sees as much code as 435 // possible, including code emitted by PEI and expanded by late tail 436 // duplication. 437 addPass(createWebAssemblyRegStackify()); 438 439 // Run the register coloring pass to reduce the total number of registers. 440 // This runs after stackification so that it doesn't consider registers 441 // that become stackified. 442 addPass(createWebAssemblyRegColoring()); 443 } 444 445 // Insert explicit local.get and local.set operators. 446 addPass(createWebAssemblyExplicitLocals()); 447 448 // Sort the blocks of the CFG into topological order, a prerequisite for 449 // BLOCK and LOOP markers. 450 addPass(createWebAssemblyCFGSort()); 451 452 // Insert BLOCK and LOOP markers. 453 addPass(createWebAssemblyCFGStackify()); 454 455 // Lower br_unless into br_if. 456 addPass(createWebAssemblyLowerBrUnless()); 457 458 // Perform the very last peephole optimizations on the code. 459 if (getOptLevel() != CodeGenOpt::None) 460 addPass(createWebAssemblyPeephole()); 461 462 // Create a mapping from LLVM CodeGen virtual registers to wasm registers. 463 addPass(createWebAssemblyRegNumbering()); 464 } 465 466 yaml::MachineFunctionInfo * 467 WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { 468 return new yaml::WebAssemblyFunctionInfo(); 469 } 470 471 yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( 472 const MachineFunction &MF) const { 473 const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 474 return new yaml::WebAssemblyFunctionInfo(*MFI); 475 } 476 477 bool WebAssemblyTargetMachine::parseMachineFunctionInfo( 478 const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, 479 SMDiagnostic &Error, SMRange &SourceRange) const { 480 const auto &YamlMFI = 481 reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI); 482 MachineFunction &MF = PFS.MF; 483 MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI); 484 return false; 485 } 486