1 //===- WebAssemblyTargetMachine.cpp - Define TargetMachine for WebAssembly -==// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file defines the WebAssembly-specific subclass of TargetMachine. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "WebAssemblyTargetMachine.h" 15 #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 16 #include "WebAssembly.h" 17 #include "WebAssemblyMachineFunctionInfo.h" 18 #include "WebAssemblyTargetObjectFile.h" 19 #include "WebAssemblyTargetTransformInfo.h" 20 #include "llvm/CodeGen/MIRParser/MIParser.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/Passes.h" 23 #include "llvm/CodeGen/RegAllocRegistry.h" 24 #include "llvm/CodeGen/TargetPassConfig.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Target/TargetOptions.h" 28 #include "llvm/Transforms/Scalar.h" 29 #include "llvm/Transforms/Utils.h" 30 using namespace llvm; 31 32 #define DEBUG_TYPE "wasm" 33 34 // Emscripten's asm.js-style exception handling 35 static cl::opt<bool> EnableEmException( 36 "enable-emscripten-cxx-exceptions", 37 cl::desc("WebAssembly Emscripten-style exception handling"), 38 cl::init(false)); 39 40 // Emscripten's asm.js-style setjmp/longjmp handling 41 static cl::opt<bool> EnableEmSjLj( 42 "enable-emscripten-sjlj", 43 cl::desc("WebAssembly Emscripten-style setjmp/longjmp handling"), 44 cl::init(false)); 45 46 extern "C" void LLVMInitializeWebAssemblyTarget() { 47 // Register the target. 48 RegisterTargetMachine<WebAssemblyTargetMachine> X( 49 getTheWebAssemblyTarget32()); 50 RegisterTargetMachine<WebAssemblyTargetMachine> Y( 51 getTheWebAssemblyTarget64()); 52 53 // Register backend passes 54 auto &PR = *PassRegistry::getPassRegistry(); 55 initializeWebAssemblyAddMissingPrototypesPass(PR); 56 initializeWebAssemblyLowerEmscriptenEHSjLjPass(PR); 57 initializeLowerGlobalDtorsPass(PR); 58 initializeFixFunctionBitcastsPass(PR); 59 initializeOptimizeReturnedPass(PR); 60 initializeWebAssemblyArgumentMovePass(PR); 61 initializeWebAssemblySetP2AlignOperandsPass(PR); 62 initializeWebAssemblyReplacePhysRegsPass(PR); 63 initializeWebAssemblyPrepareForLiveIntervalsPass(PR); 64 initializeWebAssemblyOptimizeLiveIntervalsPass(PR); 65 initializeWebAssemblyMemIntrinsicResultsPass(PR); 66 initializeWebAssemblyRegStackifyPass(PR); 67 initializeWebAssemblyRegColoringPass(PR); 68 initializeWebAssemblyExplicitLocalsPass(PR); 69 initializeWebAssemblyFixIrreducibleControlFlowPass(PR); 70 initializeWebAssemblyLateEHPreparePass(PR); 71 initializeWebAssemblyExceptionInfoPass(PR); 72 initializeWebAssemblyCFGSortPass(PR); 73 initializeWebAssemblyCFGStackifyPass(PR); 74 initializeWebAssemblyLowerBrUnlessPass(PR); 75 initializeWebAssemblyRegNumberingPass(PR); 76 initializeWebAssemblyPeepholePass(PR); 77 initializeWebAssemblyCallIndirectFixupPass(PR); 78 } 79 80 //===----------------------------------------------------------------------===// 81 // WebAssembly Lowering public interface. 82 //===----------------------------------------------------------------------===// 83 84 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 85 if (!RM.hasValue()) { 86 // Default to static relocation model. This should always be more optimial 87 // than PIC since the static linker can determine all global addresses and 88 // assume direct function calls. 89 return Reloc::Static; 90 } 91 return *RM; 92 } 93 94 /// Create an WebAssembly architecture model. 95 /// 96 WebAssemblyTargetMachine::WebAssemblyTargetMachine( 97 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 98 const TargetOptions &Options, Optional<Reloc::Model> RM, 99 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 100 : LLVMTargetMachine(T, 101 TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128" 102 : "e-m:e-p:32:32-i64:64-n32:64-S128", 103 TT, CPU, FS, Options, getEffectiveRelocModel(RM), 104 getEffectiveCodeModel(CM, CodeModel::Large), OL), 105 TLOF(new WebAssemblyTargetObjectFile()) { 106 // WebAssembly type-checks instructions, but a noreturn function with a return 107 // type that doesn't match the context will cause a check failure. So we lower 108 // LLVM 'unreachable' to ISD::TRAP and then lower that to WebAssembly's 109 // 'unreachable' instructions which is meant for that case. 110 this->Options.TrapUnreachable = true; 111 112 // WebAssembly treats each function as an independent unit. Force 113 // -ffunction-sections, effectively, so that we can emit them independently. 114 this->Options.FunctionSections = true; 115 this->Options.DataSections = true; 116 this->Options.UniqueSectionNames = true; 117 118 initAsmInfo(); 119 120 // Create a subtarget using the unmodified target machine features to 121 // initialize the used feature set with explicitly enabled features. 122 getSubtargetImpl(getTargetCPU(), getTargetFeatureString()); 123 124 // Note that we don't use setRequiresStructuredCFG(true). It disables 125 // optimizations than we're ok with, and want, such as critical edge 126 // splitting and tail merging. 127 } 128 129 WebAssemblyTargetMachine::~WebAssemblyTargetMachine() = default; // anchor. 130 131 const WebAssemblySubtarget * 132 WebAssemblyTargetMachine::getSubtargetImpl(std::string CPU, 133 std::string FS) const { 134 auto &I = SubtargetMap[CPU + FS]; 135 if (!I) { 136 I = llvm::make_unique<WebAssemblySubtarget>(TargetTriple, CPU, FS, *this); 137 UsedFeatures |= I->getFeatureBits(); 138 } 139 return I.get(); 140 } 141 142 const WebAssemblySubtarget * 143 WebAssemblyTargetMachine::getSubtargetImpl(const Function &F) const { 144 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 145 Attribute FSAttr = F.getFnAttribute("target-features"); 146 147 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 148 ? CPUAttr.getValueAsString().str() 149 : TargetCPU; 150 std::string FS = !FSAttr.hasAttribute(Attribute::None) 151 ? FSAttr.getValueAsString().str() 152 : TargetFS; 153 154 // This needs to be done before we create a new subtarget since any 155 // creation will depend on the TM and the code generation flags on the 156 // function that reside in TargetOptions. 157 resetTargetOptions(F); 158 159 return getSubtargetImpl(CPU, FS); 160 } 161 162 namespace { 163 class StripThreadLocal final : public ModulePass { 164 // The default thread model for wasm is single, where thread-local variables 165 // are identical to regular globals and should be treated the same. So this 166 // pass just converts all GlobalVariables to NotThreadLocal 167 static char ID; 168 169 public: 170 StripThreadLocal() : ModulePass(ID) {} 171 bool runOnModule(Module &M) override { 172 for (auto &GV : M.globals()) 173 GV.setThreadLocalMode(GlobalValue::ThreadLocalMode::NotThreadLocal); 174 return true; 175 } 176 }; 177 char StripThreadLocal::ID = 0; 178 179 /// WebAssembly Code Generator Pass Configuration Options. 180 class WebAssemblyPassConfig final : public TargetPassConfig { 181 public: 182 WebAssemblyPassConfig(WebAssemblyTargetMachine &TM, PassManagerBase &PM) 183 : TargetPassConfig(TM, PM) {} 184 185 WebAssemblyTargetMachine &getWebAssemblyTargetMachine() const { 186 return getTM<WebAssemblyTargetMachine>(); 187 } 188 189 FunctionPass *createTargetRegisterAllocator(bool) override; 190 191 void addIRPasses() override; 192 bool addInstSelector() override; 193 void addPostRegAlloc() override; 194 bool addGCPasses() override { return false; } 195 void addPreEmitPass() override; 196 197 // No reg alloc 198 bool addRegAssignmentFast() override { return false; } 199 200 // No reg alloc 201 bool addRegAssignmentOptimized() override { return false; } 202 }; 203 } // end anonymous namespace 204 205 TargetTransformInfo 206 WebAssemblyTargetMachine::getTargetTransformInfo(const Function &F) { 207 return TargetTransformInfo(WebAssemblyTTIImpl(this, F)); 208 } 209 210 TargetPassConfig * 211 WebAssemblyTargetMachine::createPassConfig(PassManagerBase &PM) { 212 return new WebAssemblyPassConfig(*this, PM); 213 } 214 215 FunctionPass *WebAssemblyPassConfig::createTargetRegisterAllocator(bool) { 216 return nullptr; // No reg alloc 217 } 218 219 //===----------------------------------------------------------------------===// 220 // The following functions are called from lib/CodeGen/Passes.cpp to modify 221 // the CodeGen pass sequence. 222 //===----------------------------------------------------------------------===// 223 224 void WebAssemblyPassConfig::addIRPasses() { 225 if (static_cast<WebAssemblyTargetMachine *>(TM) 226 ->getUsedFeatures()[WebAssembly::FeatureAtomics]) { 227 // Expand some atomic operations. WebAssemblyTargetLowering has hooks which 228 // control specifically what gets lowered. 229 addPass(createAtomicExpandPass()); 230 } else { 231 // If atomics are not enabled, they get lowered to non-atomics. 232 addPass(createLowerAtomicPass()); 233 addPass(new StripThreadLocal()); 234 } 235 236 // Add signatures to prototype-less function declarations 237 addPass(createWebAssemblyAddMissingPrototypes()); 238 239 // Lower .llvm.global_dtors into .llvm_global_ctors with __cxa_atexit calls. 240 addPass(createWebAssemblyLowerGlobalDtors()); 241 242 // Fix function bitcasts, as WebAssembly requires caller and callee signatures 243 // to match. 244 addPass(createWebAssemblyFixFunctionBitcasts()); 245 246 // Optimize "returned" function attributes. 247 if (getOptLevel() != CodeGenOpt::None) 248 addPass(createWebAssemblyOptimizeReturned()); 249 250 // If exception handling is not enabled and setjmp/longjmp handling is 251 // enabled, we lower invokes into calls and delete unreachable landingpad 252 // blocks. Lowering invokes when there is no EH support is done in 253 // TargetPassConfig::addPassesToHandleExceptions, but this runs after this 254 // function and SjLj handling expects all invokes to be lowered before. 255 if (!EnableEmException && 256 TM->Options.ExceptionModel == ExceptionHandling::None) { 257 addPass(createLowerInvokePass()); 258 // The lower invoke pass may create unreachable code. Remove it in order not 259 // to process dead blocks in setjmp/longjmp handling. 260 addPass(createUnreachableBlockEliminationPass()); 261 } 262 263 // Handle exceptions and setjmp/longjmp if enabled. 264 if (EnableEmException || EnableEmSjLj) 265 addPass(createWebAssemblyLowerEmscriptenEHSjLj(EnableEmException, 266 EnableEmSjLj)); 267 268 TargetPassConfig::addIRPasses(); 269 } 270 271 bool WebAssemblyPassConfig::addInstSelector() { 272 (void)TargetPassConfig::addInstSelector(); 273 addPass( 274 createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); 275 // Run the argument-move pass immediately after the ScheduleDAG scheduler 276 // so that we can fix up the ARGUMENT instructions before anything else 277 // sees them in the wrong place. 278 addPass(createWebAssemblyArgumentMove()); 279 // Set the p2align operands. This information is present during ISel, however 280 // it's inconvenient to collect. Collect it now, and update the immediate 281 // operands. 282 addPass(createWebAssemblySetP2AlignOperands()); 283 return false; 284 } 285 286 void WebAssemblyPassConfig::addPostRegAlloc() { 287 // TODO: The following CodeGen passes don't currently support code containing 288 // virtual registers. Consider removing their restrictions and re-enabling 289 // them. 290 291 // These functions all require the NoVRegs property. 292 disablePass(&MachineCopyPropagationID); 293 disablePass(&PostRAMachineSinkingID); 294 disablePass(&PostRASchedulerID); 295 disablePass(&FuncletLayoutID); 296 disablePass(&StackMapLivenessID); 297 disablePass(&LiveDebugValuesID); 298 disablePass(&PatchableFunctionID); 299 disablePass(&ShrinkWrapID); 300 301 // This pass hurts code size for wasm because it can generate irreducible 302 // control flow. 303 disablePass(&MachineBlockPlacementID); 304 305 TargetPassConfig::addPostRegAlloc(); 306 } 307 308 void WebAssemblyPassConfig::addPreEmitPass() { 309 TargetPassConfig::addPreEmitPass(); 310 311 // Rewrite pseudo call_indirect instructions as real instructions. 312 // This needs to run before register stackification, because we change the 313 // order of the arguments. 314 addPass(createWebAssemblyCallIndirectFixup()); 315 316 // Eliminate multiple-entry loops. 317 addPass(createWebAssemblyFixIrreducibleControlFlow()); 318 319 // Do various transformations for exception handling. 320 // Every CFG-changing optimizations should come before this. 321 addPass(createWebAssemblyLateEHPrepare()); 322 323 // Now that we have a prologue and epilogue and all frame indices are 324 // rewritten, eliminate SP and FP. This allows them to be stackified, 325 // colored, and numbered with the rest of the registers. 326 addPass(createWebAssemblyReplacePhysRegs()); 327 328 // Preparations and optimizations related to register stackification. 329 if (getOptLevel() != CodeGenOpt::None) { 330 // LiveIntervals isn't commonly run this late. Re-establish preconditions. 331 addPass(createWebAssemblyPrepareForLiveIntervals()); 332 333 // Depend on LiveIntervals and perform some optimizations on it. 334 addPass(createWebAssemblyOptimizeLiveIntervals()); 335 336 // Prepare memory intrinsic calls for register stackifying. 337 addPass(createWebAssemblyMemIntrinsicResults()); 338 339 // Mark registers as representing wasm's value stack. This is a key 340 // code-compression technique in WebAssembly. We run this pass (and 341 // MemIntrinsicResults above) very late, so that it sees as much code as 342 // possible, including code emitted by PEI and expanded by late tail 343 // duplication. 344 addPass(createWebAssemblyRegStackify()); 345 346 // Run the register coloring pass to reduce the total number of registers. 347 // This runs after stackification so that it doesn't consider registers 348 // that become stackified. 349 addPass(createWebAssemblyRegColoring()); 350 } 351 352 // Insert explicit local.get and local.set operators. 353 addPass(createWebAssemblyExplicitLocals()); 354 355 // Sort the blocks of the CFG into topological order, a prerequisite for 356 // BLOCK and LOOP markers. 357 addPass(createWebAssemblyCFGSort()); 358 359 // Insert BLOCK and LOOP markers. 360 addPass(createWebAssemblyCFGStackify()); 361 362 // Lower br_unless into br_if. 363 addPass(createWebAssemblyLowerBrUnless()); 364 365 // Perform the very last peephole optimizations on the code. 366 if (getOptLevel() != CodeGenOpt::None) 367 addPass(createWebAssemblyPeephole()); 368 369 // Create a mapping from LLVM CodeGen virtual registers to wasm registers. 370 addPass(createWebAssemblyRegNumbering()); 371 } 372 373 yaml::MachineFunctionInfo * 374 WebAssemblyTargetMachine::createDefaultFuncInfoYAML() const { 375 return new yaml::WebAssemblyFunctionInfo(); 376 } 377 378 yaml::MachineFunctionInfo *WebAssemblyTargetMachine::convertFuncInfoToYAML( 379 const MachineFunction &MF) const { 380 const auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>(); 381 return new yaml::WebAssemblyFunctionInfo(*MFI); 382 } 383 384 bool WebAssemblyTargetMachine::parseMachineFunctionInfo( 385 const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, 386 SMDiagnostic &Error, SMRange &SourceRange) const { 387 const auto &YamlMFI = 388 reinterpret_cast<const yaml::WebAssemblyFunctionInfo &>(MFI); 389 MachineFunction &MF = PFS.MF; 390 MF.getInfo<WebAssemblyFunctionInfo>()->initializeBaseYamlFields(YamlMFI); 391 return false; 392 } 393