10b57cec5SDimitry Andric //=== WebAssemblyLowerEmscriptenEHSjLj.cpp - Lower exceptions for Emscripten =// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 10349cc55cSDimitry Andric /// This file lowers exception-related instructions and setjmp/longjmp function 11349cc55cSDimitry Andric /// calls to use Emscripten's library functions. The pass uses JavaScript's try 12349cc55cSDimitry Andric /// and catch mechanism in case of Emscripten EH/SjLj and Wasm EH intrinsics in 13349cc55cSDimitry Andric /// case of Emscripten SjLJ. 140b57cec5SDimitry Andric /// 15349cc55cSDimitry Andric /// * Emscripten exception handling 160b57cec5SDimitry Andric /// This pass lowers invokes and landingpads into library functions in JS glue 170b57cec5SDimitry Andric /// code. Invokes are lowered into function wrappers called invoke wrappers that 180b57cec5SDimitry Andric /// exist in JS side, which wraps the original function call with JS try-catch. 190b57cec5SDimitry Andric /// If an exception occurred, cxa_throw() function in JS side sets some 200b57cec5SDimitry Andric /// variables (see below) so we can check whether an exception occurred from 210b57cec5SDimitry Andric /// wasm code and handle it appropriately. 220b57cec5SDimitry Andric /// 23349cc55cSDimitry Andric /// * Emscripten setjmp-longjmp handling 240b57cec5SDimitry Andric /// This pass lowers setjmp to a reasonably-performant approach for emscripten. 250b57cec5SDimitry Andric /// The idea is that each block with a setjmp is broken up into two parts: the 260b57cec5SDimitry Andric /// part containing setjmp and the part right after the setjmp. The latter part 270b57cec5SDimitry Andric /// is either reached from the setjmp, or later from a longjmp. To handle the 280b57cec5SDimitry Andric /// longjmp, all calls that might longjmp are also called using invoke wrappers 290b57cec5SDimitry Andric /// and thus JS / try-catch. JS longjmp() function also sets some variables so 300b57cec5SDimitry Andric /// we can check / whether a longjmp occurred from wasm code. Each block with a 310b57cec5SDimitry Andric /// function call that might longjmp is also split up after the longjmp call. 320b57cec5SDimitry Andric /// After the longjmp call, we check whether a longjmp occurred, and if it did, 330b57cec5SDimitry Andric /// which setjmp it corresponds to, and jump to the right post-setjmp block. 340b57cec5SDimitry Andric /// We assume setjmp-longjmp handling always run after EH handling, which means 350b57cec5SDimitry Andric /// we don't expect any exception-related instructions when SjLj runs. 360b57cec5SDimitry Andric /// FIXME Currently this scheme does not support indirect call of setjmp, 370b57cec5SDimitry Andric /// because of the limitation of the scheme itself. fastcomp does not support it 380b57cec5SDimitry Andric /// either. 390b57cec5SDimitry Andric /// 400b57cec5SDimitry Andric /// In detail, this pass does following things: 410b57cec5SDimitry Andric /// 420b57cec5SDimitry Andric /// 1) Assumes the existence of global variables: __THREW__, __threwValue 43fe6060f1SDimitry Andric /// __THREW__ and __threwValue are defined in compiler-rt in Emscripten. 44fe6060f1SDimitry Andric /// These variables are used for both exceptions and setjmp/longjmps. 450b57cec5SDimitry Andric /// __THREW__ indicates whether an exception or a longjmp occurred or not. 0 460b57cec5SDimitry Andric /// means nothing occurred, 1 means an exception occurred, and other numbers 47fe6060f1SDimitry Andric /// mean a longjmp occurred. In the case of longjmp, __THREW__ variable 480b57cec5SDimitry Andric /// indicates the corresponding setjmp buffer the longjmp corresponds to. 49fe6060f1SDimitry Andric /// __threwValue is 0 for exceptions, and the argument to longjmp in case of 50fe6060f1SDimitry Andric /// longjmp. 510b57cec5SDimitry Andric /// 52349cc55cSDimitry Andric /// * Emscripten exception handling 530b57cec5SDimitry Andric /// 540b57cec5SDimitry Andric /// 2) We assume the existence of setThrew and setTempRet0/getTempRet0 functions 55fe6060f1SDimitry Andric /// at link time. setThrew exists in Emscripten's compiler-rt: 560b57cec5SDimitry Andric /// 57fe6060f1SDimitry Andric /// void setThrew(uintptr_t threw, int value) { 580b57cec5SDimitry Andric /// if (__THREW__ == 0) { 590b57cec5SDimitry Andric /// __THREW__ = threw; 600b57cec5SDimitry Andric /// __threwValue = value; 610b57cec5SDimitry Andric /// } 620b57cec5SDimitry Andric /// } 630b57cec5SDimitry Andric // 640b57cec5SDimitry Andric /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. 650b57cec5SDimitry Andric /// In exception handling, getTempRet0 indicates the type of an exception 660b57cec5SDimitry Andric /// caught, and in setjmp/longjmp, it means the second argument to longjmp 670b57cec5SDimitry Andric /// function. 680b57cec5SDimitry Andric /// 690b57cec5SDimitry Andric /// 3) Lower 700b57cec5SDimitry Andric /// invoke @func(arg1, arg2) to label %invoke.cont unwind label %lpad 710b57cec5SDimitry Andric /// into 720b57cec5SDimitry Andric /// __THREW__ = 0; 730b57cec5SDimitry Andric /// call @__invoke_SIG(func, arg1, arg2) 740b57cec5SDimitry Andric /// %__THREW__.val = __THREW__; 750b57cec5SDimitry Andric /// __THREW__ = 0; 760b57cec5SDimitry Andric /// if (%__THREW__.val == 1) 770b57cec5SDimitry Andric /// goto %lpad 780b57cec5SDimitry Andric /// else 790b57cec5SDimitry Andric /// goto %invoke.cont 800b57cec5SDimitry Andric /// SIG is a mangled string generated based on the LLVM IR-level function 810b57cec5SDimitry Andric /// signature. After LLVM IR types are lowered to the target wasm types, 820b57cec5SDimitry Andric /// the names for these wrappers will change based on wasm types as well, 830b57cec5SDimitry Andric /// as in invoke_vi (function takes an int and returns void). The bodies of 840b57cec5SDimitry Andric /// these wrappers will be generated in JS glue code, and inside those 850b57cec5SDimitry Andric /// wrappers we use JS try-catch to generate actual exception effects. It 860b57cec5SDimitry Andric /// also calls the original callee function. An example wrapper in JS code 870b57cec5SDimitry Andric /// would look like this: 880b57cec5SDimitry Andric /// function invoke_vi(index,a1) { 890b57cec5SDimitry Andric /// try { 900b57cec5SDimitry Andric /// Module["dynCall_vi"](index,a1); // This calls original callee 910b57cec5SDimitry Andric /// } catch(e) { 920b57cec5SDimitry Andric /// if (typeof e !== 'number' && e !== 'longjmp') throw e; 93fe6060f1SDimitry Andric /// _setThrew(1, 0); // setThrew is called here 940b57cec5SDimitry Andric /// } 950b57cec5SDimitry Andric /// } 960b57cec5SDimitry Andric /// If an exception is thrown, __THREW__ will be set to true in a wrapper, 970b57cec5SDimitry Andric /// so we can jump to the right BB based on this value. 980b57cec5SDimitry Andric /// 990b57cec5SDimitry Andric /// 4) Lower 1000b57cec5SDimitry Andric /// %val = landingpad catch c1 catch c2 catch c3 ... 1010b57cec5SDimitry Andric /// ... use %val ... 1020b57cec5SDimitry Andric /// into 1030b57cec5SDimitry Andric /// %fmc = call @__cxa_find_matching_catch_N(c1, c2, c3, ...) 1040b57cec5SDimitry Andric /// %val = {%fmc, getTempRet0()} 1050b57cec5SDimitry Andric /// ... use %val ... 1060b57cec5SDimitry Andric /// Here N is a number calculated based on the number of clauses. 1070b57cec5SDimitry Andric /// setTempRet0 is called from __cxa_find_matching_catch() in JS glue code. 1080b57cec5SDimitry Andric /// 1090b57cec5SDimitry Andric /// 5) Lower 1100b57cec5SDimitry Andric /// resume {%a, %b} 1110b57cec5SDimitry Andric /// into 1120b57cec5SDimitry Andric /// call @__resumeException(%a) 1130b57cec5SDimitry Andric /// where __resumeException() is a function in JS glue code. 1140b57cec5SDimitry Andric /// 1150b57cec5SDimitry Andric /// 6) Lower 1160b57cec5SDimitry Andric /// call @llvm.eh.typeid.for(type) (intrinsic) 1170b57cec5SDimitry Andric /// into 1180b57cec5SDimitry Andric /// call @llvm_eh_typeid_for(type) 1190b57cec5SDimitry Andric /// llvm_eh_typeid_for function will be generated in JS glue code. 1200b57cec5SDimitry Andric /// 121349cc55cSDimitry Andric /// * Emscripten setjmp / longjmp handling 1220b57cec5SDimitry Andric /// 123349cc55cSDimitry Andric /// If there are calls to longjmp() 1240b57cec5SDimitry Andric /// 1250b57cec5SDimitry Andric /// 1) Lower 126349cc55cSDimitry Andric /// longjmp(env, val) 1270b57cec5SDimitry Andric /// into 128349cc55cSDimitry Andric /// emscripten_longjmp(env, val) 1290b57cec5SDimitry Andric /// 130349cc55cSDimitry Andric /// If there are calls to setjmp() 1310b57cec5SDimitry Andric /// 132*0fca6ea1SDimitry Andric /// 2) In the function entry that calls setjmp, initialize 133*0fca6ea1SDimitry Andric /// functionInvocationId as follows: 134*0fca6ea1SDimitry Andric /// 135*0fca6ea1SDimitry Andric /// functionInvocationId = alloca(4) 136*0fca6ea1SDimitry Andric /// 137*0fca6ea1SDimitry Andric /// Note: the alloca size is not important as this pointer is 138*0fca6ea1SDimitry Andric /// merely used for pointer comparisions. 1390b57cec5SDimitry Andric /// 1400b57cec5SDimitry Andric /// 3) Lower 141349cc55cSDimitry Andric /// setjmp(env) 1420b57cec5SDimitry Andric /// into 143*0fca6ea1SDimitry Andric /// __wasm_setjmp(env, label, functionInvocationId) 144*0fca6ea1SDimitry Andric /// 145*0fca6ea1SDimitry Andric /// __wasm_setjmp records the necessary info (the label and 146*0fca6ea1SDimitry Andric /// functionInvocationId) to the "env". 147*0fca6ea1SDimitry Andric /// A BB with setjmp is split into two after setjmp call in order to 148*0fca6ea1SDimitry Andric /// make the post-setjmp BB the possible destination of longjmp BB. 1490b57cec5SDimitry Andric /// 1500b57cec5SDimitry Andric /// 4) Lower every call that might longjmp into 1510b57cec5SDimitry Andric /// __THREW__ = 0; 1520b57cec5SDimitry Andric /// call @__invoke_SIG(func, arg1, arg2) 1530b57cec5SDimitry Andric /// %__THREW__.val = __THREW__; 1540b57cec5SDimitry Andric /// __THREW__ = 0; 155fe6060f1SDimitry Andric /// %__threwValue.val = __threwValue; 156fe6060f1SDimitry Andric /// if (%__THREW__.val != 0 & %__threwValue.val != 0) { 157*0fca6ea1SDimitry Andric /// %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); 1580b57cec5SDimitry Andric /// if (%label == 0) 159fe6060f1SDimitry Andric /// emscripten_longjmp(%__THREW__.val, %__threwValue.val); 160fe6060f1SDimitry Andric /// setTempRet0(%__threwValue.val); 1610b57cec5SDimitry Andric /// } else { 1620b57cec5SDimitry Andric /// %label = -1; 1630b57cec5SDimitry Andric /// } 1640b57cec5SDimitry Andric /// longjmp_result = getTempRet0(); 165349cc55cSDimitry Andric /// switch %label { 1660b57cec5SDimitry Andric /// label 1: goto post-setjmp BB 1 1670b57cec5SDimitry Andric /// label 2: goto post-setjmp BB 2 1680b57cec5SDimitry Andric /// ... 1690b57cec5SDimitry Andric /// default: goto splitted next BB 1700b57cec5SDimitry Andric /// } 171*0fca6ea1SDimitry Andric /// 172*0fca6ea1SDimitry Andric /// __wasm_setjmp_test examines the jmp buf to see if it was for a matching 173*0fca6ea1SDimitry Andric /// setjmp call. After calling an invoke wrapper, if a longjmp occurred, 174*0fca6ea1SDimitry Andric /// __THREW__ will be the address of matching jmp_buf buffer and 175*0fca6ea1SDimitry Andric /// __threwValue be the second argument to longjmp. 176*0fca6ea1SDimitry Andric /// __wasm_setjmp_test returns a setjmp label, a unique ID to each setjmp 177*0fca6ea1SDimitry Andric /// callsite. Label 0 means this longjmp buffer does not correspond to one 178*0fca6ea1SDimitry Andric /// of the setjmp callsites in this function, so in this case we just chain 179*0fca6ea1SDimitry Andric /// the longjmp to the caller. Label -1 means no longjmp occurred. 180*0fca6ea1SDimitry Andric /// Otherwise we jump to the right post-setjmp BB based on the label. 1810b57cec5SDimitry Andric /// 182349cc55cSDimitry Andric /// * Wasm setjmp / longjmp handling 183349cc55cSDimitry Andric /// This mode still uses some Emscripten library functions but not JavaScript's 184349cc55cSDimitry Andric /// try-catch mechanism. It instead uses Wasm exception handling intrinsics, 185349cc55cSDimitry Andric /// which will be lowered to exception handling instructions. 186349cc55cSDimitry Andric /// 187349cc55cSDimitry Andric /// If there are calls to longjmp() 188349cc55cSDimitry Andric /// 189349cc55cSDimitry Andric /// 1) Lower 190349cc55cSDimitry Andric /// longjmp(env, val) 191349cc55cSDimitry Andric /// into 192349cc55cSDimitry Andric /// __wasm_longjmp(env, val) 193349cc55cSDimitry Andric /// 194349cc55cSDimitry Andric /// If there are calls to setjmp() 195349cc55cSDimitry Andric /// 196349cc55cSDimitry Andric /// 2) and 3): The same as 2) and 3) in Emscripten SjLj. 197*0fca6ea1SDimitry Andric /// (functionInvocationId initialization + setjmp callsite transformation) 198349cc55cSDimitry Andric /// 199349cc55cSDimitry Andric /// 4) Create a catchpad with a wasm.catch() intrinsic, which returns the value 200*0fca6ea1SDimitry Andric /// thrown by __wasm_longjmp function. In the runtime library, we have an 201*0fca6ea1SDimitry Andric /// equivalent of the following struct: 202349cc55cSDimitry Andric /// 203349cc55cSDimitry Andric /// struct __WasmLongjmpArgs { 204349cc55cSDimitry Andric /// void *env; 205349cc55cSDimitry Andric /// int val; 206349cc55cSDimitry Andric /// }; 207349cc55cSDimitry Andric /// 208*0fca6ea1SDimitry Andric /// The thrown value here is a pointer to the struct. We use this struct to 209*0fca6ea1SDimitry Andric /// transfer two values by throwing a single value. Wasm throw and catch 210*0fca6ea1SDimitry Andric /// instructions are capable of throwing and catching multiple values, but 211*0fca6ea1SDimitry Andric /// it also requires multivalue support that is currently not very reliable. 212349cc55cSDimitry Andric /// TODO Switch to throwing and catching two values without using the struct 213349cc55cSDimitry Andric /// 214349cc55cSDimitry Andric /// All longjmpable function calls will be converted to an invoke that will 215349cc55cSDimitry Andric /// unwind to this catchpad in case a longjmp occurs. Within the catchpad, we 216*0fca6ea1SDimitry Andric /// test the thrown values using __wasm_setjmp_test function as we do for 217*0fca6ea1SDimitry Andric /// Emscripten SjLj. The main difference is, in Emscripten SjLj, we need to 218*0fca6ea1SDimitry Andric /// transform every longjmpable callsite into a sequence of code including 219*0fca6ea1SDimitry Andric /// __wasm_setjmp_test() call; in Wasm SjLj we do the testing in only one 220*0fca6ea1SDimitry Andric /// place, in this catchpad. 221349cc55cSDimitry Andric /// 222*0fca6ea1SDimitry Andric /// After testing calling __wasm_setjmp_test(), if the longjmp does not 223*0fca6ea1SDimitry Andric /// correspond to one of the setjmps within the current function, it rethrows 224*0fca6ea1SDimitry Andric /// the longjmp by calling __wasm_longjmp(). If it corresponds to one of 225*0fca6ea1SDimitry Andric /// setjmps in the function, we jump to the beginning of the function, which 226*0fca6ea1SDimitry Andric /// contains a switch to each post-setjmp BB. Again, in Emscripten SjLj, this 227*0fca6ea1SDimitry Andric /// switch is added for every longjmpable callsite; in Wasm SjLj we do this 228*0fca6ea1SDimitry Andric /// only once at the top of the function. (after functionInvocationId 229*0fca6ea1SDimitry Andric /// initialization) 230349cc55cSDimitry Andric /// 231349cc55cSDimitry Andric /// The below is the pseudocode for what we have described 232349cc55cSDimitry Andric /// 233349cc55cSDimitry Andric /// entry: 234*0fca6ea1SDimitry Andric /// Initialize functionInvocationId 235349cc55cSDimitry Andric /// 236349cc55cSDimitry Andric /// setjmp.dispatch: 237349cc55cSDimitry Andric /// switch %label { 238349cc55cSDimitry Andric /// label 1: goto post-setjmp BB 1 239349cc55cSDimitry Andric /// label 2: goto post-setjmp BB 2 240349cc55cSDimitry Andric /// ... 241349cc55cSDimitry Andric /// default: goto splitted next BB 242349cc55cSDimitry Andric /// } 243349cc55cSDimitry Andric /// ... 244349cc55cSDimitry Andric /// 245349cc55cSDimitry Andric /// bb: 246349cc55cSDimitry Andric /// invoke void @foo() ;; foo is a longjmpable function 247349cc55cSDimitry Andric /// to label %next unwind label %catch.dispatch.longjmp 248349cc55cSDimitry Andric /// ... 249349cc55cSDimitry Andric /// 250349cc55cSDimitry Andric /// catch.dispatch.longjmp: 251349cc55cSDimitry Andric /// %0 = catchswitch within none [label %catch.longjmp] unwind to caller 252349cc55cSDimitry Andric /// 253349cc55cSDimitry Andric /// catch.longjmp: 254349cc55cSDimitry Andric /// %longjmp.args = wasm.catch() ;; struct __WasmLongjmpArgs 255349cc55cSDimitry Andric /// %env = load 'env' field from __WasmLongjmpArgs 256349cc55cSDimitry Andric /// %val = load 'val' field from __WasmLongjmpArgs 257*0fca6ea1SDimitry Andric /// %label = __wasm_setjmp_test(%env, functionInvocationId); 258349cc55cSDimitry Andric /// if (%label == 0) 259349cc55cSDimitry Andric /// __wasm_longjmp(%env, %val) 260349cc55cSDimitry Andric /// catchret to %setjmp.dispatch 261349cc55cSDimitry Andric /// 2620b57cec5SDimitry Andric ///===----------------------------------------------------------------------===// 2630b57cec5SDimitry Andric 26406c3fb27SDimitry Andric #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" 2650b57cec5SDimitry Andric #include "WebAssembly.h" 266e8d8bef9SDimitry Andric #include "WebAssemblyTargetMachine.h" 2675ffd83dbSDimitry Andric #include "llvm/ADT/StringExtras.h" 268e8d8bef9SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 269349cc55cSDimitry Andric #include "llvm/CodeGen/WasmEHFuncInfo.h" 2705ffd83dbSDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 2710b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 2720b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 273349cc55cSDimitry Andric #include "llvm/IR/IntrinsicsWebAssembly.h" 274*0fca6ea1SDimitry Andric #include "llvm/IR/Module.h" 275480093f4SDimitry Andric #include "llvm/Support/CommandLine.h" 2760b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 27704eeddc0SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 2780b57cec5SDimitry Andric #include "llvm/Transforms/Utils/SSAUpdater.h" 279349cc55cSDimitry Andric #include "llvm/Transforms/Utils/SSAUpdaterBulk.h" 2805f757f3fSDimitry Andric #include <set> 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric using namespace llvm; 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric #define DEBUG_TYPE "wasm-lower-em-ehsjlj" 2850b57cec5SDimitry Andric 2860b57cec5SDimitry Andric static cl::list<std::string> 2875ffd83dbSDimitry Andric EHAllowlist("emscripten-cxx-exceptions-allowed", 2880b57cec5SDimitry Andric cl::desc("The list of function names in which Emscripten-style " 2890b57cec5SDimitry Andric "exception handling is enabled (see emscripten " 2905ffd83dbSDimitry Andric "EMSCRIPTEN_CATCHING_ALLOWED options)"), 2910b57cec5SDimitry Andric cl::CommaSeparated); 2920b57cec5SDimitry Andric 2930b57cec5SDimitry Andric namespace { 2940b57cec5SDimitry Andric class WebAssemblyLowerEmscriptenEHSjLj final : public ModulePass { 295349cc55cSDimitry Andric bool EnableEmEH; // Enable Emscripten exception handling 296349cc55cSDimitry Andric bool EnableEmSjLj; // Enable Emscripten setjmp/longjmp handling 297349cc55cSDimitry Andric bool EnableWasmSjLj; // Enable Wasm setjmp/longjmp handling 298fe6060f1SDimitry Andric bool DoSjLj; // Whether we actually perform setjmp/longjmp handling 2990b57cec5SDimitry Andric 300349cc55cSDimitry Andric GlobalVariable *ThrewGV = nullptr; // __THREW__ (Emscripten) 301349cc55cSDimitry Andric GlobalVariable *ThrewValueGV = nullptr; // __threwValue (Emscripten) 302349cc55cSDimitry Andric Function *GetTempRet0F = nullptr; // getTempRet0() (Emscripten) 303349cc55cSDimitry Andric Function *SetTempRet0F = nullptr; // setTempRet0() (Emscripten) 304349cc55cSDimitry Andric Function *ResumeF = nullptr; // __resumeException() (Emscripten) 305349cc55cSDimitry Andric Function *EHTypeIDF = nullptr; // llvm.eh.typeid.for() (intrinsic) 306349cc55cSDimitry Andric Function *EmLongjmpF = nullptr; // emscripten_longjmp() (Emscripten) 307*0fca6ea1SDimitry Andric Function *WasmSetjmpF = nullptr; // __wasm_setjmp() (Emscripten) 308*0fca6ea1SDimitry Andric Function *WasmSetjmpTestF = nullptr; // __wasm_setjmp_test() (Emscripten) 309349cc55cSDimitry Andric Function *WasmLongjmpF = nullptr; // __wasm_longjmp() (Emscripten) 310349cc55cSDimitry Andric Function *CatchF = nullptr; // wasm.catch() (intrinsic) 311349cc55cSDimitry Andric 312349cc55cSDimitry Andric // type of 'struct __WasmLongjmpArgs' defined in emscripten 313349cc55cSDimitry Andric Type *LongjmpArgsTy = nullptr; 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric // __cxa_find_matching_catch_N functions. 3160b57cec5SDimitry Andric // Indexed by the number of clauses in an original landingpad instruction. 3170b57cec5SDimitry Andric DenseMap<int, Function *> FindMatchingCatches; 3180b57cec5SDimitry Andric // Map of <function signature string, invoke_ wrappers> 3190b57cec5SDimitry Andric StringMap<Function *> InvokeWrappers; 3205ffd83dbSDimitry Andric // Set of allowed function names for exception handling 3215ffd83dbSDimitry Andric std::set<std::string> EHAllowlistSet; 322fe6060f1SDimitry Andric // Functions that contains calls to setjmp 323fe6060f1SDimitry Andric SmallPtrSet<Function *, 8> SetjmpUsers; 3240b57cec5SDimitry Andric 3250b57cec5SDimitry Andric StringRef getPassName() const override { 3260b57cec5SDimitry Andric return "WebAssembly Lower Emscripten Exceptions"; 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric 329349cc55cSDimitry Andric using InstVector = SmallVectorImpl<Instruction *>; 3300b57cec5SDimitry Andric bool runEHOnFunction(Function &F); 3310b57cec5SDimitry Andric bool runSjLjOnFunction(Function &F); 332349cc55cSDimitry Andric void handleLongjmpableCallsForEmscriptenSjLj( 333*0fca6ea1SDimitry Andric Function &F, Instruction *FunctionInvocationId, 334349cc55cSDimitry Andric SmallVectorImpl<PHINode *> &SetjmpRetPHIs); 335349cc55cSDimitry Andric void 336*0fca6ea1SDimitry Andric handleLongjmpableCallsForWasmSjLj(Function &F, 337*0fca6ea1SDimitry Andric Instruction *FunctionInvocationId, 338349cc55cSDimitry Andric SmallVectorImpl<PHINode *> &SetjmpRetPHIs); 3390b57cec5SDimitry Andric Function *getFindMatchingCatch(Module &M, unsigned NumClauses); 3400b57cec5SDimitry Andric 3415ffd83dbSDimitry Andric Value *wrapInvoke(CallBase *CI); 3425ffd83dbSDimitry Andric void wrapTestSetjmp(BasicBlock *BB, DebugLoc DL, Value *Threw, 343*0fca6ea1SDimitry Andric Value *FunctionInvocationId, Value *&Label, 344349cc55cSDimitry Andric Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, 345349cc55cSDimitry Andric PHINode *&CallEmLongjmpBBThrewPHI, 346349cc55cSDimitry Andric PHINode *&CallEmLongjmpBBThrewValuePHI, 347349cc55cSDimitry Andric BasicBlock *&EndBB); 3485ffd83dbSDimitry Andric Function *getInvokeWrapper(CallBase *CI); 3490b57cec5SDimitry Andric 3505ffd83dbSDimitry Andric bool areAllExceptionsAllowed() const { return EHAllowlistSet.empty(); } 351fe6060f1SDimitry Andric bool supportsException(const Function *F) const { 352349cc55cSDimitry Andric return EnableEmEH && (areAllExceptionsAllowed() || 353fe6060f1SDimitry Andric EHAllowlistSet.count(std::string(F->getName()))); 354fe6060f1SDimitry Andric } 355349cc55cSDimitry Andric void replaceLongjmpWith(Function *LongjmpF, Function *NewF); 3560b57cec5SDimitry Andric 3570b57cec5SDimitry Andric void rebuildSSA(Function &F); 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric public: 3600b57cec5SDimitry Andric static char ID; 3610b57cec5SDimitry Andric 362349cc55cSDimitry Andric WebAssemblyLowerEmscriptenEHSjLj() 3630eae32dcSDimitry Andric : ModulePass(ID), EnableEmEH(WebAssembly::WasmEnableEmEH), 3640eae32dcSDimitry Andric EnableEmSjLj(WebAssembly::WasmEnableEmSjLj), 3650eae32dcSDimitry Andric EnableWasmSjLj(WebAssembly::WasmEnableSjLj) { 366349cc55cSDimitry Andric assert(!(EnableEmSjLj && EnableWasmSjLj) && 367349cc55cSDimitry Andric "Two SjLj modes cannot be turned on at the same time"); 368349cc55cSDimitry Andric assert(!(EnableEmEH && EnableWasmSjLj) && 369349cc55cSDimitry Andric "Wasm SjLj should be only used with Wasm EH"); 3705ffd83dbSDimitry Andric EHAllowlistSet.insert(EHAllowlist.begin(), EHAllowlist.end()); 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric bool runOnModule(Module &M) override; 3730b57cec5SDimitry Andric 3740b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 3750b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric }; 3780b57cec5SDimitry Andric } // End anonymous namespace 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric char WebAssemblyLowerEmscriptenEHSjLj::ID = 0; 3810b57cec5SDimitry Andric INITIALIZE_PASS(WebAssemblyLowerEmscriptenEHSjLj, DEBUG_TYPE, 3820b57cec5SDimitry Andric "WebAssembly Lower Emscripten Exceptions / Setjmp / Longjmp", 3830b57cec5SDimitry Andric false, false) 3840b57cec5SDimitry Andric 385349cc55cSDimitry Andric ModulePass *llvm::createWebAssemblyLowerEmscriptenEHSjLj() { 386349cc55cSDimitry Andric return new WebAssemblyLowerEmscriptenEHSjLj(); 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric 3890b57cec5SDimitry Andric static bool canThrow(const Value *V) { 3900b57cec5SDimitry Andric if (const auto *F = dyn_cast<const Function>(V)) { 3910b57cec5SDimitry Andric // Intrinsics cannot throw 3920b57cec5SDimitry Andric if (F->isIntrinsic()) 3930b57cec5SDimitry Andric return false; 3940b57cec5SDimitry Andric StringRef Name = F->getName(); 3950b57cec5SDimitry Andric // leave setjmp and longjmp (mostly) alone, we process them properly later 396fe6060f1SDimitry Andric if (Name == "setjmp" || Name == "longjmp" || Name == "emscripten_longjmp") 3970b57cec5SDimitry Andric return false; 3980b57cec5SDimitry Andric return !F->doesNotThrow(); 3990b57cec5SDimitry Andric } 4000b57cec5SDimitry Andric // not a function, so an indirect call - can throw, we can't tell 4010b57cec5SDimitry Andric return true; 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric 40481ad6265SDimitry Andric // Get a thread-local global variable with the given name. If it doesn't exist 40581ad6265SDimitry Andric // declare it, which will generate an import and assume that it will exist at 40681ad6265SDimitry Andric // link time. 407fe6060f1SDimitry Andric static GlobalVariable *getGlobalVariable(Module &M, Type *Ty, 408e8d8bef9SDimitry Andric WebAssemblyTargetMachine &TM, 4090b57cec5SDimitry Andric const char *Name) { 410fe6060f1SDimitry Andric auto *GV = dyn_cast<GlobalVariable>(M.getOrInsertGlobal(Name, Ty)); 4110b57cec5SDimitry Andric if (!GV) 4120b57cec5SDimitry Andric report_fatal_error(Twine("unable to create global: ") + Name); 4130b57cec5SDimitry Andric 41481ad6265SDimitry Andric // Variables created by this function are thread local. If the target does not 41581ad6265SDimitry Andric // support TLS, we depend on CoalesceFeaturesAndStripAtomics to downgrade it 41681ad6265SDimitry Andric // to non-thread-local ones, in which case we don't allow this object to be 41781ad6265SDimitry Andric // linked with other objects using shared memory. 41881ad6265SDimitry Andric GV->setThreadLocalMode(GlobalValue::GeneralDynamicTLSModel); 4190b57cec5SDimitry Andric return GV; 4200b57cec5SDimitry Andric } 4210b57cec5SDimitry Andric 4220b57cec5SDimitry Andric // Simple function name mangler. 4230b57cec5SDimitry Andric // This function simply takes LLVM's string representation of parameter types 4240b57cec5SDimitry Andric // and concatenate them with '_'. There are non-alphanumeric characters but llc 4250b57cec5SDimitry Andric // is ok with it, and we need to postprocess these names after the lowering 4260b57cec5SDimitry Andric // phase anyway. 4270b57cec5SDimitry Andric static std::string getSignature(FunctionType *FTy) { 4280b57cec5SDimitry Andric std::string Sig; 4290b57cec5SDimitry Andric raw_string_ostream OS(Sig); 4300b57cec5SDimitry Andric OS << *FTy->getReturnType(); 4310b57cec5SDimitry Andric for (Type *ParamTy : FTy->params()) 4320b57cec5SDimitry Andric OS << "_" << *ParamTy; 4330b57cec5SDimitry Andric if (FTy->isVarArg()) 4340b57cec5SDimitry Andric OS << "_..."; 4350b57cec5SDimitry Andric Sig = OS.str(); 436e8d8bef9SDimitry Andric erase_if(Sig, isSpace); 4370b57cec5SDimitry Andric // When s2wasm parses .s file, a comma means the end of an argument. So a 4380b57cec5SDimitry Andric // mangled function name can contain any character but a comma. 4390b57cec5SDimitry Andric std::replace(Sig.begin(), Sig.end(), ',', '.'); 4400b57cec5SDimitry Andric return Sig; 4410b57cec5SDimitry Andric } 4420b57cec5SDimitry Andric 4435ffd83dbSDimitry Andric static Function *getEmscriptenFunction(FunctionType *Ty, const Twine &Name, 4445ffd83dbSDimitry Andric Module *M) { 4455ffd83dbSDimitry Andric Function* F = Function::Create(Ty, GlobalValue::ExternalLinkage, Name, M); 4465ffd83dbSDimitry Andric // Tell the linker that this function is expected to be imported from the 4475ffd83dbSDimitry Andric // 'env' module. 4485ffd83dbSDimitry Andric if (!F->hasFnAttribute("wasm-import-module")) { 44904eeddc0SDimitry Andric llvm::AttrBuilder B(M->getContext()); 4505ffd83dbSDimitry Andric B.addAttribute("wasm-import-module", "env"); 451349cc55cSDimitry Andric F->addFnAttrs(B); 4525ffd83dbSDimitry Andric } 4535ffd83dbSDimitry Andric if (!F->hasFnAttribute("wasm-import-name")) { 45404eeddc0SDimitry Andric llvm::AttrBuilder B(M->getContext()); 4555ffd83dbSDimitry Andric B.addAttribute("wasm-import-name", F->getName()); 456349cc55cSDimitry Andric F->addFnAttrs(B); 4575ffd83dbSDimitry Andric } 4585ffd83dbSDimitry Andric return F; 4595ffd83dbSDimitry Andric } 4605ffd83dbSDimitry Andric 461fe6060f1SDimitry Andric // Returns an integer type for the target architecture's address space. 462fe6060f1SDimitry Andric // i32 for wasm32 and i64 for wasm64. 463fe6060f1SDimitry Andric static Type *getAddrIntType(Module *M) { 464fe6060f1SDimitry Andric IRBuilder<> IRB(M->getContext()); 465fe6060f1SDimitry Andric return IRB.getIntNTy(M->getDataLayout().getPointerSizeInBits()); 466fe6060f1SDimitry Andric } 467fe6060f1SDimitry Andric 468fe6060f1SDimitry Andric // Returns an integer pointer type for the target architecture's address space. 4695f757f3fSDimitry Andric // i32* for wasm32 and i64* for wasm64. With opaque pointers this is just a ptr 4705f757f3fSDimitry Andric // in address space zero. 471fe6060f1SDimitry Andric static Type *getAddrPtrType(Module *M) { 4725f757f3fSDimitry Andric return PointerType::getUnqual(M->getContext()); 473fe6060f1SDimitry Andric } 474fe6060f1SDimitry Andric 475fe6060f1SDimitry Andric // Returns an integer whose type is the integer type for the target's address 476fe6060f1SDimitry Andric // space. Returns (i32 C) for wasm32 and (i64 C) for wasm64, when C is the 477fe6060f1SDimitry Andric // integer. 478fe6060f1SDimitry Andric static Value *getAddrSizeInt(Module *M, uint64_t C) { 479fe6060f1SDimitry Andric IRBuilder<> IRB(M->getContext()); 480fe6060f1SDimitry Andric return IRB.getIntN(M->getDataLayout().getPointerSizeInBits(), C); 481fe6060f1SDimitry Andric } 482fe6060f1SDimitry Andric 4830b57cec5SDimitry Andric // Returns __cxa_find_matching_catch_N function, where N = NumClauses + 2. 4840b57cec5SDimitry Andric // This is because a landingpad instruction contains two more arguments, a 4850b57cec5SDimitry Andric // personality function and a cleanup bit, and __cxa_find_matching_catch_N 4860b57cec5SDimitry Andric // functions are named after the number of arguments in the original landingpad 4870b57cec5SDimitry Andric // instruction. 4880b57cec5SDimitry Andric Function * 4890b57cec5SDimitry Andric WebAssemblyLowerEmscriptenEHSjLj::getFindMatchingCatch(Module &M, 4900b57cec5SDimitry Andric unsigned NumClauses) { 4910b57cec5SDimitry Andric if (FindMatchingCatches.count(NumClauses)) 4920b57cec5SDimitry Andric return FindMatchingCatches[NumClauses]; 4935f757f3fSDimitry Andric PointerType *Int8PtrTy = PointerType::getUnqual(M.getContext()); 4940b57cec5SDimitry Andric SmallVector<Type *, 16> Args(NumClauses, Int8PtrTy); 4950b57cec5SDimitry Andric FunctionType *FTy = FunctionType::get(Int8PtrTy, Args, false); 4965ffd83dbSDimitry Andric Function *F = getEmscriptenFunction( 4975ffd83dbSDimitry Andric FTy, "__cxa_find_matching_catch_" + Twine(NumClauses + 2), &M); 4980b57cec5SDimitry Andric FindMatchingCatches[NumClauses] = F; 4990b57cec5SDimitry Andric return F; 5000b57cec5SDimitry Andric } 5010b57cec5SDimitry Andric 5020b57cec5SDimitry Andric // Generate invoke wrapper seqence with preamble and postamble 5030b57cec5SDimitry Andric // Preamble: 5040b57cec5SDimitry Andric // __THREW__ = 0; 5050b57cec5SDimitry Andric // Postamble: 5060b57cec5SDimitry Andric // %__THREW__.val = __THREW__; __THREW__ = 0; 5070b57cec5SDimitry Andric // Returns %__THREW__.val, which indicates whether an exception is thrown (or 5080b57cec5SDimitry Andric // whether longjmp occurred), for future use. 5095ffd83dbSDimitry Andric Value *WebAssemblyLowerEmscriptenEHSjLj::wrapInvoke(CallBase *CI) { 510fe6060f1SDimitry Andric Module *M = CI->getModule(); 511fe6060f1SDimitry Andric LLVMContext &C = M->getContext(); 5120b57cec5SDimitry Andric 5130b57cec5SDimitry Andric IRBuilder<> IRB(C); 5140b57cec5SDimitry Andric IRB.SetInsertPoint(CI); 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric // Pre-invoke 5170b57cec5SDimitry Andric // __THREW__ = 0; 518fe6060f1SDimitry Andric IRB.CreateStore(getAddrSizeInt(M, 0), ThrewGV); 5190b57cec5SDimitry Andric 5200b57cec5SDimitry Andric // Invoke function wrapper in JavaScript 5210b57cec5SDimitry Andric SmallVector<Value *, 16> Args; 5220b57cec5SDimitry Andric // Put the pointer to the callee as first argument, so it can be called 5230b57cec5SDimitry Andric // within the invoke wrapper later 5245ffd83dbSDimitry Andric Args.push_back(CI->getCalledOperand()); 5250b57cec5SDimitry Andric Args.append(CI->arg_begin(), CI->arg_end()); 5260b57cec5SDimitry Andric CallInst *NewCall = IRB.CreateCall(getInvokeWrapper(CI), Args); 5270b57cec5SDimitry Andric NewCall->takeName(CI); 5288bcb0991SDimitry Andric NewCall->setCallingConv(CallingConv::WASM_EmscriptenInvoke); 5290b57cec5SDimitry Andric NewCall->setDebugLoc(CI->getDebugLoc()); 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric // Because we added the pointer to the callee as first argument, all 5320b57cec5SDimitry Andric // argument attribute indices have to be incremented by one. 5330b57cec5SDimitry Andric SmallVector<AttributeSet, 8> ArgAttributes; 5340b57cec5SDimitry Andric const AttributeList &InvokeAL = CI->getAttributes(); 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric // No attributes for the callee pointer. 5370b57cec5SDimitry Andric ArgAttributes.push_back(AttributeSet()); 5380b57cec5SDimitry Andric // Copy the argument attributes from the original 539349cc55cSDimitry Andric for (unsigned I = 0, E = CI->arg_size(); I < E; ++I) 540349cc55cSDimitry Andric ArgAttributes.push_back(InvokeAL.getParamAttrs(I)); 5410b57cec5SDimitry Andric 54204eeddc0SDimitry Andric AttrBuilder FnAttrs(CI->getContext(), InvokeAL.getFnAttrs()); 543bdd1243dSDimitry Andric if (auto Args = FnAttrs.getAllocSizeArgs()) { 5448bcb0991SDimitry Andric // The allocsize attribute (if any) referes to parameters by index and needs 5458bcb0991SDimitry Andric // to be adjusted. 546bdd1243dSDimitry Andric auto [SizeArg, NEltArg] = *Args; 5478bcb0991SDimitry Andric SizeArg += 1; 54881ad6265SDimitry Andric if (NEltArg) 549bdd1243dSDimitry Andric NEltArg = *NEltArg + 1; 5508bcb0991SDimitry Andric FnAttrs.addAllocSizeAttr(SizeArg, NEltArg); 5518bcb0991SDimitry Andric } 5521fd87a68SDimitry Andric // In case the callee has 'noreturn' attribute, We need to remove it, because 5531fd87a68SDimitry Andric // we expect invoke wrappers to return. 5541fd87a68SDimitry Andric FnAttrs.removeAttribute(Attribute::NoReturn); 5558bcb0991SDimitry Andric 5560b57cec5SDimitry Andric // Reconstruct the AttributesList based on the vector we constructed. 557349cc55cSDimitry Andric AttributeList NewCallAL = AttributeList::get( 558349cc55cSDimitry Andric C, AttributeSet::get(C, FnAttrs), InvokeAL.getRetAttrs(), ArgAttributes); 5590b57cec5SDimitry Andric NewCall->setAttributes(NewCallAL); 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric CI->replaceAllUsesWith(NewCall); 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric // Post-invoke 5640b57cec5SDimitry Andric // %__THREW__.val = __THREW__; __THREW__ = 0; 5650b57cec5SDimitry Andric Value *Threw = 566fe6060f1SDimitry Andric IRB.CreateLoad(getAddrIntType(M), ThrewGV, ThrewGV->getName() + ".val"); 567fe6060f1SDimitry Andric IRB.CreateStore(getAddrSizeInt(M, 0), ThrewGV); 5680b57cec5SDimitry Andric return Threw; 5690b57cec5SDimitry Andric } 5700b57cec5SDimitry Andric 5710b57cec5SDimitry Andric // Get matching invoke wrapper based on callee signature 5725ffd83dbSDimitry Andric Function *WebAssemblyLowerEmscriptenEHSjLj::getInvokeWrapper(CallBase *CI) { 5730b57cec5SDimitry Andric Module *M = CI->getModule(); 5740b57cec5SDimitry Andric SmallVector<Type *, 16> ArgTys; 5755ffd83dbSDimitry Andric FunctionType *CalleeFTy = CI->getFunctionType(); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric std::string Sig = getSignature(CalleeFTy); 57806c3fb27SDimitry Andric if (InvokeWrappers.contains(Sig)) 5790b57cec5SDimitry Andric return InvokeWrappers[Sig]; 5800b57cec5SDimitry Andric 5810b57cec5SDimitry Andric // Put the pointer to the callee as first argument 5820b57cec5SDimitry Andric ArgTys.push_back(PointerType::getUnqual(CalleeFTy)); 5830b57cec5SDimitry Andric // Add argument types 5840b57cec5SDimitry Andric ArgTys.append(CalleeFTy->param_begin(), CalleeFTy->param_end()); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric FunctionType *FTy = FunctionType::get(CalleeFTy->getReturnType(), ArgTys, 5870b57cec5SDimitry Andric CalleeFTy->isVarArg()); 5885ffd83dbSDimitry Andric Function *F = getEmscriptenFunction(FTy, "__invoke_" + Sig, M); 5890b57cec5SDimitry Andric InvokeWrappers[Sig] = F; 5900b57cec5SDimitry Andric return F; 5910b57cec5SDimitry Andric } 5920b57cec5SDimitry Andric 593349cc55cSDimitry Andric static bool canLongjmp(const Value *Callee) { 5940b57cec5SDimitry Andric if (auto *CalleeF = dyn_cast<Function>(Callee)) 5950b57cec5SDimitry Andric if (CalleeF->isIntrinsic()) 5960b57cec5SDimitry Andric return false; 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric // Attempting to transform inline assembly will result in something like: 5990b57cec5SDimitry Andric // call void @__invoke_void(void ()* asm ...) 6000b57cec5SDimitry Andric // which is invalid because inline assembly blocks do not have addresses 6010b57cec5SDimitry Andric // and can't be passed by pointer. The result is a crash with illegal IR. 6020b57cec5SDimitry Andric if (isa<InlineAsm>(Callee)) 6030b57cec5SDimitry Andric return false; 6048bcb0991SDimitry Andric StringRef CalleeName = Callee->getName(); 6050b57cec5SDimitry Andric 60604eeddc0SDimitry Andric // TODO Include more functions or consider checking with mangled prefixes 60704eeddc0SDimitry Andric 6080b57cec5SDimitry Andric // The reason we include malloc/free here is to exclude the malloc/free 6090b57cec5SDimitry Andric // calls generated in setjmp prep / cleanup routines. 6108bcb0991SDimitry Andric if (CalleeName == "setjmp" || CalleeName == "malloc" || CalleeName == "free") 6110b57cec5SDimitry Andric return false; 6120b57cec5SDimitry Andric 613fe6060f1SDimitry Andric // There are functions in Emscripten's JS glue code or compiler-rt 6148bcb0991SDimitry Andric if (CalleeName == "__resumeException" || CalleeName == "llvm_eh_typeid_for" || 615*0fca6ea1SDimitry Andric CalleeName == "__wasm_setjmp" || CalleeName == "__wasm_setjmp_test" || 6168bcb0991SDimitry Andric CalleeName == "getTempRet0" || CalleeName == "setTempRet0") 6170b57cec5SDimitry Andric return false; 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric // __cxa_find_matching_catch_N functions cannot longjmp 6205f757f3fSDimitry Andric if (Callee->getName().starts_with("__cxa_find_matching_catch_")) 6210b57cec5SDimitry Andric return false; 6220b57cec5SDimitry Andric 6230b57cec5SDimitry Andric // Exception-catching related functions 62404eeddc0SDimitry Andric // 6251fd87a68SDimitry Andric // We intentionally treat __cxa_end_catch longjmpable in Wasm SjLj even though 6261fd87a68SDimitry Andric // it surely cannot longjmp, in order to maintain the unwind relationship from 6271fd87a68SDimitry Andric // all existing catchpads (and calls within them) to catch.dispatch.longjmp. 62804eeddc0SDimitry Andric // 62904eeddc0SDimitry Andric // In Wasm EH + Wasm SjLj, we 63004eeddc0SDimitry Andric // 1. Make all catchswitch and cleanuppad that unwind to caller unwind to 63104eeddc0SDimitry Andric // catch.dispatch.longjmp instead 63204eeddc0SDimitry Andric // 2. Convert all longjmpable calls to invokes that unwind to 63304eeddc0SDimitry Andric // catch.dispatch.longjmp 63404eeddc0SDimitry Andric // But catchswitch BBs are removed in isel, so if an EH catchswitch (generated 63504eeddc0SDimitry Andric // from an exception)'s catchpad does not contain any calls that are converted 63604eeddc0SDimitry Andric // into invokes unwinding to catch.dispatch.longjmp, this unwind relationship 63704eeddc0SDimitry Andric // (EH catchswitch BB -> catch.dispatch.longjmp BB) is lost and 63804eeddc0SDimitry Andric // catch.dispatch.longjmp BB can be placed before the EH catchswitch BB in 63904eeddc0SDimitry Andric // CFGSort. 64004eeddc0SDimitry Andric // int ret = setjmp(buf); 64104eeddc0SDimitry Andric // try { 64204eeddc0SDimitry Andric // foo(); // longjmps 64304eeddc0SDimitry Andric // } catch (...) { 64404eeddc0SDimitry Andric // } 64504eeddc0SDimitry Andric // Then in this code, if 'foo' longjmps, it first unwinds to 'catch (...)' 64604eeddc0SDimitry Andric // catchswitch, and is not caught by that catchswitch because it is a longjmp, 64704eeddc0SDimitry Andric // then it should next unwind to catch.dispatch.longjmp BB. But if this 'catch 64804eeddc0SDimitry Andric // (...)' catchswitch -> catch.dispatch.longjmp unwind relationship is lost, 64904eeddc0SDimitry Andric // it will not unwind to catch.dispatch.longjmp, producing an incorrect 65004eeddc0SDimitry Andric // result. 65104eeddc0SDimitry Andric // 65204eeddc0SDimitry Andric // Every catchpad generated by Wasm C++ contains __cxa_end_catch, so we 65304eeddc0SDimitry Andric // intentionally treat it as longjmpable to work around this problem. This is 65404eeddc0SDimitry Andric // a hacky fix but an easy one. 65504eeddc0SDimitry Andric // 65604eeddc0SDimitry Andric // The comment block in findWasmUnwindDestinations() in 65704eeddc0SDimitry Andric // SelectionDAGBuilder.cpp is addressing a similar problem. 6581fd87a68SDimitry Andric if (CalleeName == "__cxa_end_catch") 6591fd87a68SDimitry Andric return WebAssembly::WasmEnableSjLj; 66004eeddc0SDimitry Andric if (CalleeName == "__cxa_begin_catch" || 6618bcb0991SDimitry Andric CalleeName == "__cxa_allocate_exception" || CalleeName == "__cxa_throw" || 6628bcb0991SDimitry Andric CalleeName == "__clang_call_terminate") 6630b57cec5SDimitry Andric return false; 6640b57cec5SDimitry Andric 66504eeddc0SDimitry Andric // std::terminate, which is generated when another exception occurs while 66604eeddc0SDimitry Andric // handling an exception, cannot longjmp. 66704eeddc0SDimitry Andric if (CalleeName == "_ZSt9terminatev") 66804eeddc0SDimitry Andric return false; 66904eeddc0SDimitry Andric 6700b57cec5SDimitry Andric // Otherwise we don't know 6710b57cec5SDimitry Andric return true; 6720b57cec5SDimitry Andric } 6730b57cec5SDimitry Andric 674349cc55cSDimitry Andric static bool isEmAsmCall(const Value *Callee) { 6758bcb0991SDimitry Andric StringRef CalleeName = Callee->getName(); 6768bcb0991SDimitry Andric // This is an exhaustive list from Emscripten's <emscripten/em_asm.h>. 6778bcb0991SDimitry Andric return CalleeName == "emscripten_asm_const_int" || 6788bcb0991SDimitry Andric CalleeName == "emscripten_asm_const_double" || 6798bcb0991SDimitry Andric CalleeName == "emscripten_asm_const_int_sync_on_main_thread" || 6808bcb0991SDimitry Andric CalleeName == "emscripten_asm_const_double_sync_on_main_thread" || 6818bcb0991SDimitry Andric CalleeName == "emscripten_asm_const_async_on_main_thread"; 6828bcb0991SDimitry Andric } 6838bcb0991SDimitry Andric 684*0fca6ea1SDimitry Andric // Generate __wasm_setjmp_test function call seqence with preamble and 685*0fca6ea1SDimitry Andric // postamble. The code this generates is equivalent to the following 686*0fca6ea1SDimitry Andric // JavaScript code: 687fe6060f1SDimitry Andric // %__threwValue.val = __threwValue; 688fe6060f1SDimitry Andric // if (%__THREW__.val != 0 & %__threwValue.val != 0) { 689*0fca6ea1SDimitry Andric // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); 6900b57cec5SDimitry Andric // if (%label == 0) 691fe6060f1SDimitry Andric // emscripten_longjmp(%__THREW__.val, %__threwValue.val); 692fe6060f1SDimitry Andric // setTempRet0(%__threwValue.val); 6930b57cec5SDimitry Andric // } else { 6940b57cec5SDimitry Andric // %label = -1; 6950b57cec5SDimitry Andric // } 6960b57cec5SDimitry Andric // %longjmp_result = getTempRet0(); 6970b57cec5SDimitry Andric // 6980b57cec5SDimitry Andric // As output parameters. returns %label, %longjmp_result, and the BB the last 6990b57cec5SDimitry Andric // instruction (%longjmp_result = ...) is in. 7000b57cec5SDimitry Andric void WebAssemblyLowerEmscriptenEHSjLj::wrapTestSetjmp( 701*0fca6ea1SDimitry Andric BasicBlock *BB, DebugLoc DL, Value *Threw, Value *FunctionInvocationId, 702*0fca6ea1SDimitry Andric Value *&Label, Value *&LongjmpResult, BasicBlock *&CallEmLongjmpBB, 703*0fca6ea1SDimitry Andric PHINode *&CallEmLongjmpBBThrewPHI, PHINode *&CallEmLongjmpBBThrewValuePHI, 704*0fca6ea1SDimitry Andric BasicBlock *&EndBB) { 7050b57cec5SDimitry Andric Function *F = BB->getParent(); 706fe6060f1SDimitry Andric Module *M = F->getParent(); 707fe6060f1SDimitry Andric LLVMContext &C = M->getContext(); 7080b57cec5SDimitry Andric IRBuilder<> IRB(C); 7095ffd83dbSDimitry Andric IRB.SetCurrentDebugLocation(DL); 7100b57cec5SDimitry Andric 711fe6060f1SDimitry Andric // if (%__THREW__.val != 0 & %__threwValue.val != 0) 7120b57cec5SDimitry Andric IRB.SetInsertPoint(BB); 7130b57cec5SDimitry Andric BasicBlock *ThenBB1 = BasicBlock::Create(C, "if.then1", F); 7140b57cec5SDimitry Andric BasicBlock *ElseBB1 = BasicBlock::Create(C, "if.else1", F); 7150b57cec5SDimitry Andric BasicBlock *EndBB1 = BasicBlock::Create(C, "if.end", F); 716fe6060f1SDimitry Andric Value *ThrewCmp = IRB.CreateICmpNE(Threw, getAddrSizeInt(M, 0)); 7170b57cec5SDimitry Andric Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV, 7180b57cec5SDimitry Andric ThrewValueGV->getName() + ".val"); 7190b57cec5SDimitry Andric Value *ThrewValueCmp = IRB.CreateICmpNE(ThrewValue, IRB.getInt32(0)); 7200b57cec5SDimitry Andric Value *Cmp1 = IRB.CreateAnd(ThrewCmp, ThrewValueCmp, "cmp1"); 7210b57cec5SDimitry Andric IRB.CreateCondBr(Cmp1, ThenBB1, ElseBB1); 7220b57cec5SDimitry Andric 723349cc55cSDimitry Andric // Generate call.em.longjmp BB once and share it within the function 724349cc55cSDimitry Andric if (!CallEmLongjmpBB) { 725349cc55cSDimitry Andric // emscripten_longjmp(%__THREW__.val, %__threwValue.val); 726349cc55cSDimitry Andric CallEmLongjmpBB = BasicBlock::Create(C, "call.em.longjmp", F); 727349cc55cSDimitry Andric IRB.SetInsertPoint(CallEmLongjmpBB); 728349cc55cSDimitry Andric CallEmLongjmpBBThrewPHI = IRB.CreatePHI(getAddrIntType(M), 4, "threw.phi"); 729349cc55cSDimitry Andric CallEmLongjmpBBThrewValuePHI = 730349cc55cSDimitry Andric IRB.CreatePHI(IRB.getInt32Ty(), 4, "threwvalue.phi"); 731349cc55cSDimitry Andric CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1); 732349cc55cSDimitry Andric CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1); 733349cc55cSDimitry Andric IRB.CreateCall(EmLongjmpF, 734349cc55cSDimitry Andric {CallEmLongjmpBBThrewPHI, CallEmLongjmpBBThrewValuePHI}); 735349cc55cSDimitry Andric IRB.CreateUnreachable(); 736349cc55cSDimitry Andric } else { 737349cc55cSDimitry Andric CallEmLongjmpBBThrewPHI->addIncoming(Threw, ThenBB1); 738349cc55cSDimitry Andric CallEmLongjmpBBThrewValuePHI->addIncoming(ThrewValue, ThenBB1); 739349cc55cSDimitry Andric } 740349cc55cSDimitry Andric 741*0fca6ea1SDimitry Andric // %label = __wasm_setjmp_test(%__THREW__.val, functionInvocationId); 7420b57cec5SDimitry Andric // if (%label == 0) 7430b57cec5SDimitry Andric IRB.SetInsertPoint(ThenBB1); 7440b57cec5SDimitry Andric BasicBlock *EndBB2 = BasicBlock::Create(C, "if.end2", F); 745fe6060f1SDimitry Andric Value *ThrewPtr = 746fe6060f1SDimitry Andric IRB.CreateIntToPtr(Threw, getAddrPtrType(M), Threw->getName() + ".p"); 747*0fca6ea1SDimitry Andric Value *ThenLabel = IRB.CreateCall(WasmSetjmpTestF, 748*0fca6ea1SDimitry Andric {ThrewPtr, FunctionInvocationId}, "label"); 7490b57cec5SDimitry Andric Value *Cmp2 = IRB.CreateICmpEQ(ThenLabel, IRB.getInt32(0)); 750349cc55cSDimitry Andric IRB.CreateCondBr(Cmp2, CallEmLongjmpBB, EndBB2); 7510b57cec5SDimitry Andric 752fe6060f1SDimitry Andric // setTempRet0(%__threwValue.val); 7530b57cec5SDimitry Andric IRB.SetInsertPoint(EndBB2); 754349cc55cSDimitry Andric IRB.CreateCall(SetTempRet0F, ThrewValue); 7550b57cec5SDimitry Andric IRB.CreateBr(EndBB1); 7560b57cec5SDimitry Andric 7570b57cec5SDimitry Andric IRB.SetInsertPoint(ElseBB1); 7580b57cec5SDimitry Andric IRB.CreateBr(EndBB1); 7590b57cec5SDimitry Andric 7600b57cec5SDimitry Andric // longjmp_result = getTempRet0(); 7610b57cec5SDimitry Andric IRB.SetInsertPoint(EndBB1); 7620b57cec5SDimitry Andric PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label"); 7630b57cec5SDimitry Andric LabelPHI->addIncoming(ThenLabel, EndBB2); 7640b57cec5SDimitry Andric 7650b57cec5SDimitry Andric LabelPHI->addIncoming(IRB.getInt32(-1), ElseBB1); 7660b57cec5SDimitry Andric 7670b57cec5SDimitry Andric // Output parameter assignment 7680b57cec5SDimitry Andric Label = LabelPHI; 7690b57cec5SDimitry Andric EndBB = EndBB1; 770bdd1243dSDimitry Andric LongjmpResult = IRB.CreateCall(GetTempRet0F, std::nullopt, "longjmp_result"); 7710b57cec5SDimitry Andric } 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric void WebAssemblyLowerEmscriptenEHSjLj::rebuildSSA(Function &F) { 7740b57cec5SDimitry Andric DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 7750b57cec5SDimitry Andric DT.recalculate(F); // CFG has been changed 776349cc55cSDimitry Andric 777349cc55cSDimitry Andric SSAUpdaterBulk SSA; 7780b57cec5SDimitry Andric for (BasicBlock &BB : F) { 7790b57cec5SDimitry Andric for (Instruction &I : BB) { 780349cc55cSDimitry Andric unsigned VarID = SSA.AddVariable(I.getName(), I.getType()); 781349cc55cSDimitry Andric // If a value is defined by an invoke instruction, it is only available in 782349cc55cSDimitry Andric // its normal destination and not in its unwind destination. 783349cc55cSDimitry Andric if (auto *II = dyn_cast<InvokeInst>(&I)) 784349cc55cSDimitry Andric SSA.AddAvailableValue(VarID, II->getNormalDest(), II); 785349cc55cSDimitry Andric else 786349cc55cSDimitry Andric SSA.AddAvailableValue(VarID, &BB, &I); 787349cc55cSDimitry Andric for (auto &U : I.uses()) { 7880b57cec5SDimitry Andric auto *User = cast<Instruction>(U.getUser()); 7890b57cec5SDimitry Andric if (auto *UserPN = dyn_cast<PHINode>(User)) 7900b57cec5SDimitry Andric if (UserPN->getIncomingBlock(U) == &BB) 7910b57cec5SDimitry Andric continue; 7920b57cec5SDimitry Andric if (DT.dominates(&I, User)) 7930b57cec5SDimitry Andric continue; 794349cc55cSDimitry Andric SSA.AddUse(VarID, &U); 7950b57cec5SDimitry Andric } 7960b57cec5SDimitry Andric } 7970b57cec5SDimitry Andric } 798349cc55cSDimitry Andric SSA.RewriteAllUses(&DT); 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric 801349cc55cSDimitry Andric // Replace uses of longjmp with a new longjmp function in Emscripten library. 802349cc55cSDimitry Andric // In Emscripten SjLj, the new function is 803349cc55cSDimitry Andric // void emscripten_longjmp(uintptr_t, i32) 804349cc55cSDimitry Andric // In Wasm SjLj, the new function is 805349cc55cSDimitry Andric // void __wasm_longjmp(i8*, i32) 806349cc55cSDimitry Andric // Because the original libc longjmp function takes (jmp_buf*, i32), we need a 807349cc55cSDimitry Andric // ptrtoint/bitcast instruction here to make the type match. jmp_buf* will 808349cc55cSDimitry Andric // eventually be lowered to i32/i64 in the wasm backend. 809349cc55cSDimitry Andric void WebAssemblyLowerEmscriptenEHSjLj::replaceLongjmpWith(Function *LongjmpF, 810349cc55cSDimitry Andric Function *NewF) { 811349cc55cSDimitry Andric assert(NewF == EmLongjmpF || NewF == WasmLongjmpF); 812fe6060f1SDimitry Andric Module *M = LongjmpF->getParent(); 813e8d8bef9SDimitry Andric SmallVector<CallInst *, 8> ToErase; 814e8d8bef9SDimitry Andric LLVMContext &C = LongjmpF->getParent()->getContext(); 815e8d8bef9SDimitry Andric IRBuilder<> IRB(C); 816e8d8bef9SDimitry Andric 817349cc55cSDimitry Andric // For calls to longjmp, replace it with emscripten_longjmp/__wasm_longjmp and 818349cc55cSDimitry Andric // cast its first argument (jmp_buf*) appropriately 819e8d8bef9SDimitry Andric for (User *U : LongjmpF->users()) { 820e8d8bef9SDimitry Andric auto *CI = dyn_cast<CallInst>(U); 821e8d8bef9SDimitry Andric if (CI && CI->getCalledFunction() == LongjmpF) { 822e8d8bef9SDimitry Andric IRB.SetInsertPoint(CI); 823349cc55cSDimitry Andric Value *Env = nullptr; 824349cc55cSDimitry Andric if (NewF == EmLongjmpF) 825349cc55cSDimitry Andric Env = 826349cc55cSDimitry Andric IRB.CreatePtrToInt(CI->getArgOperand(0), getAddrIntType(M), "env"); 827349cc55cSDimitry Andric else // WasmLongjmpF 8285f757f3fSDimitry Andric Env = IRB.CreateBitCast(CI->getArgOperand(0), IRB.getPtrTy(), "env"); 829349cc55cSDimitry Andric IRB.CreateCall(NewF, {Env, CI->getArgOperand(1)}); 830e8d8bef9SDimitry Andric ToErase.push_back(CI); 831e8d8bef9SDimitry Andric } 832e8d8bef9SDimitry Andric } 833e8d8bef9SDimitry Andric for (auto *I : ToErase) 834e8d8bef9SDimitry Andric I->eraseFromParent(); 835e8d8bef9SDimitry Andric 836e8d8bef9SDimitry Andric // If we have any remaining uses of longjmp's function pointer, replace it 837349cc55cSDimitry Andric // with (void(*)(jmp_buf*, int))emscripten_longjmp / __wasm_longjmp. 838e8d8bef9SDimitry Andric if (!LongjmpF->uses().empty()) { 839349cc55cSDimitry Andric Value *NewLongjmp = 840349cc55cSDimitry Andric IRB.CreateBitCast(NewF, LongjmpF->getType(), "longjmp.cast"); 841349cc55cSDimitry Andric LongjmpF->replaceAllUsesWith(NewLongjmp); 842e8d8bef9SDimitry Andric } 843e8d8bef9SDimitry Andric } 844e8d8bef9SDimitry Andric 845349cc55cSDimitry Andric static bool containsLongjmpableCalls(const Function *F) { 846349cc55cSDimitry Andric for (const auto &BB : *F) 847349cc55cSDimitry Andric for (const auto &I : BB) 848349cc55cSDimitry Andric if (const auto *CB = dyn_cast<CallBase>(&I)) 849349cc55cSDimitry Andric if (canLongjmp(CB->getCalledOperand())) 850349cc55cSDimitry Andric return true; 851349cc55cSDimitry Andric return false; 852349cc55cSDimitry Andric } 853349cc55cSDimitry Andric 85404eeddc0SDimitry Andric // When a function contains a setjmp call but not other calls that can longjmp, 85504eeddc0SDimitry Andric // we don't do setjmp transformation for that setjmp. But we need to convert the 85604eeddc0SDimitry Andric // setjmp calls into "i32 0" so they don't cause link time errors. setjmp always 85704eeddc0SDimitry Andric // returns 0 when called directly. 85804eeddc0SDimitry Andric static void nullifySetjmp(Function *F) { 85904eeddc0SDimitry Andric Module &M = *F->getParent(); 86004eeddc0SDimitry Andric IRBuilder<> IRB(M.getContext()); 86104eeddc0SDimitry Andric Function *SetjmpF = M.getFunction("setjmp"); 86204eeddc0SDimitry Andric SmallVector<Instruction *, 1> ToErase; 86304eeddc0SDimitry Andric 8641fd87a68SDimitry Andric for (User *U : make_early_inc_range(SetjmpF->users())) { 8651fd87a68SDimitry Andric auto *CB = cast<CallBase>(U); 8661fd87a68SDimitry Andric BasicBlock *BB = CB->getParent(); 86704eeddc0SDimitry Andric if (BB->getParent() != F) // in other function 86804eeddc0SDimitry Andric continue; 8691fd87a68SDimitry Andric CallInst *CI = nullptr; 8701fd87a68SDimitry Andric // setjmp cannot throw. So if it is an invoke, lower it to a call 8711fd87a68SDimitry Andric if (auto *II = dyn_cast<InvokeInst>(CB)) 8721fd87a68SDimitry Andric CI = llvm::changeToCall(II); 8731fd87a68SDimitry Andric else 8741fd87a68SDimitry Andric CI = cast<CallInst>(CB); 87504eeddc0SDimitry Andric ToErase.push_back(CI); 87604eeddc0SDimitry Andric CI->replaceAllUsesWith(IRB.getInt32(0)); 87704eeddc0SDimitry Andric } 87804eeddc0SDimitry Andric for (auto *I : ToErase) 87904eeddc0SDimitry Andric I->eraseFromParent(); 88004eeddc0SDimitry Andric } 88104eeddc0SDimitry Andric 8820b57cec5SDimitry Andric bool WebAssemblyLowerEmscriptenEHSjLj::runOnModule(Module &M) { 8830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "********** Lower Emscripten EH & SjLj **********\n"); 8840b57cec5SDimitry Andric 8850b57cec5SDimitry Andric LLVMContext &C = M.getContext(); 8860b57cec5SDimitry Andric IRBuilder<> IRB(C); 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric Function *SetjmpF = M.getFunction("setjmp"); 8890b57cec5SDimitry Andric Function *LongjmpF = M.getFunction("longjmp"); 890349cc55cSDimitry Andric 891349cc55cSDimitry Andric // In some platforms _setjmp and _longjmp are used instead. Change these to 892349cc55cSDimitry Andric // use setjmp/longjmp instead, because we later detect these functions by 893349cc55cSDimitry Andric // their names. 894349cc55cSDimitry Andric Function *SetjmpF2 = M.getFunction("_setjmp"); 895349cc55cSDimitry Andric Function *LongjmpF2 = M.getFunction("_longjmp"); 896349cc55cSDimitry Andric if (SetjmpF2) { 897349cc55cSDimitry Andric if (SetjmpF) { 898349cc55cSDimitry Andric if (SetjmpF->getFunctionType() != SetjmpF2->getFunctionType()) 899349cc55cSDimitry Andric report_fatal_error("setjmp and _setjmp have different function types"); 900349cc55cSDimitry Andric } else { 901349cc55cSDimitry Andric SetjmpF = Function::Create(SetjmpF2->getFunctionType(), 902349cc55cSDimitry Andric GlobalValue::ExternalLinkage, "setjmp", M); 903349cc55cSDimitry Andric } 904349cc55cSDimitry Andric SetjmpF2->replaceAllUsesWith(SetjmpF); 905349cc55cSDimitry Andric } 906349cc55cSDimitry Andric if (LongjmpF2) { 907349cc55cSDimitry Andric if (LongjmpF) { 908349cc55cSDimitry Andric if (LongjmpF->getFunctionType() != LongjmpF2->getFunctionType()) 909349cc55cSDimitry Andric report_fatal_error( 910349cc55cSDimitry Andric "longjmp and _longjmp have different function types"); 911349cc55cSDimitry Andric } else { 912349cc55cSDimitry Andric LongjmpF = Function::Create(LongjmpF2->getFunctionType(), 913349cc55cSDimitry Andric GlobalValue::ExternalLinkage, "setjmp", M); 914349cc55cSDimitry Andric } 915349cc55cSDimitry Andric LongjmpF2->replaceAllUsesWith(LongjmpF); 916349cc55cSDimitry Andric } 917e8d8bef9SDimitry Andric 918e8d8bef9SDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 919e8d8bef9SDimitry Andric assert(TPC && "Expected a TargetPassConfig"); 920e8d8bef9SDimitry Andric auto &TM = TPC->getTM<WebAssemblyTargetMachine>(); 921e8d8bef9SDimitry Andric 9220b57cec5SDimitry Andric // Declare (or get) global variables __THREW__, __threwValue, and 9230b57cec5SDimitry Andric // getTempRet0/setTempRet0 function which are used in common for both 9240b57cec5SDimitry Andric // exception handling and setjmp/longjmp handling 925fe6060f1SDimitry Andric ThrewGV = getGlobalVariable(M, getAddrIntType(&M), TM, "__THREW__"); 926fe6060f1SDimitry Andric ThrewValueGV = getGlobalVariable(M, IRB.getInt32Ty(), TM, "__threwValue"); 927349cc55cSDimitry Andric GetTempRet0F = getEmscriptenFunction( 9285ffd83dbSDimitry Andric FunctionType::get(IRB.getInt32Ty(), false), "getTempRet0", &M); 929349cc55cSDimitry Andric SetTempRet0F = getEmscriptenFunction( 9300b57cec5SDimitry Andric FunctionType::get(IRB.getVoidTy(), IRB.getInt32Ty(), false), 9315ffd83dbSDimitry Andric "setTempRet0", &M); 932349cc55cSDimitry Andric GetTempRet0F->setDoesNotThrow(); 933349cc55cSDimitry Andric SetTempRet0F->setDoesNotThrow(); 9340b57cec5SDimitry Andric 9350b57cec5SDimitry Andric bool Changed = false; 9360b57cec5SDimitry Andric 937fe6060f1SDimitry Andric // Function registration for exception handling 938349cc55cSDimitry Andric if (EnableEmEH) { 9390b57cec5SDimitry Andric // Register __resumeException function 9400b57cec5SDimitry Andric FunctionType *ResumeFTy = 9415f757f3fSDimitry Andric FunctionType::get(IRB.getVoidTy(), IRB.getPtrTy(), false); 9425ffd83dbSDimitry Andric ResumeF = getEmscriptenFunction(ResumeFTy, "__resumeException", &M); 943349cc55cSDimitry Andric ResumeF->addFnAttr(Attribute::NoReturn); 9440b57cec5SDimitry Andric 9450b57cec5SDimitry Andric // Register llvm_eh_typeid_for function 9460b57cec5SDimitry Andric FunctionType *EHTypeIDTy = 9475f757f3fSDimitry Andric FunctionType::get(IRB.getInt32Ty(), IRB.getPtrTy(), false); 9485ffd83dbSDimitry Andric EHTypeIDF = getEmscriptenFunction(EHTypeIDTy, "llvm_eh_typeid_for", &M); 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric 95104eeddc0SDimitry Andric // Functions that contains calls to setjmp but don't have other longjmpable 95204eeddc0SDimitry Andric // calls within them. 95304eeddc0SDimitry Andric SmallPtrSet<Function *, 4> SetjmpUsersToNullify; 95404eeddc0SDimitry Andric 955349cc55cSDimitry Andric if ((EnableEmSjLj || EnableWasmSjLj) && SetjmpF) { 956349cc55cSDimitry Andric // Precompute setjmp users 957349cc55cSDimitry Andric for (User *U : SetjmpF->users()) { 958349cc55cSDimitry Andric if (auto *CB = dyn_cast<CallBase>(U)) { 959349cc55cSDimitry Andric auto *UserF = CB->getFunction(); 960349cc55cSDimitry Andric // If a function that calls setjmp does not contain any other calls that 961349cc55cSDimitry Andric // can longjmp, we don't need to do any transformation on that function, 962349cc55cSDimitry Andric // so can ignore it 963349cc55cSDimitry Andric if (containsLongjmpableCalls(UserF)) 964349cc55cSDimitry Andric SetjmpUsers.insert(UserF); 96504eeddc0SDimitry Andric else 96604eeddc0SDimitry Andric SetjmpUsersToNullify.insert(UserF); 967349cc55cSDimitry Andric } else { 968349cc55cSDimitry Andric std::string S; 969349cc55cSDimitry Andric raw_string_ostream SS(S); 970349cc55cSDimitry Andric SS << *U; 971349cc55cSDimitry Andric report_fatal_error(Twine("Indirect use of setjmp is not supported: ") + 972349cc55cSDimitry Andric SS.str()); 973349cc55cSDimitry Andric } 974349cc55cSDimitry Andric } 975349cc55cSDimitry Andric } 976349cc55cSDimitry Andric 977349cc55cSDimitry Andric bool SetjmpUsed = SetjmpF && !SetjmpUsers.empty(); 978349cc55cSDimitry Andric bool LongjmpUsed = LongjmpF && !LongjmpF->use_empty(); 979349cc55cSDimitry Andric DoSjLj = (EnableEmSjLj | EnableWasmSjLj) && (SetjmpUsed || LongjmpUsed); 980349cc55cSDimitry Andric 981fe6060f1SDimitry Andric // Function registration and data pre-gathering for setjmp/longjmp handling 9820b57cec5SDimitry Andric if (DoSjLj) { 983349cc55cSDimitry Andric assert(EnableEmSjLj || EnableWasmSjLj); 984349cc55cSDimitry Andric if (EnableEmSjLj) { 985e8d8bef9SDimitry Andric // Register emscripten_longjmp function 986e8d8bef9SDimitry Andric FunctionType *FTy = FunctionType::get( 987fe6060f1SDimitry Andric IRB.getVoidTy(), {getAddrIntType(&M), IRB.getInt32Ty()}, false); 988e8d8bef9SDimitry Andric EmLongjmpF = getEmscriptenFunction(FTy, "emscripten_longjmp", &M); 989349cc55cSDimitry Andric EmLongjmpF->addFnAttr(Attribute::NoReturn); 990349cc55cSDimitry Andric } else { // EnableWasmSjLj 9915f757f3fSDimitry Andric Type *Int8PtrTy = IRB.getPtrTy(); 992349cc55cSDimitry Andric // Register __wasm_longjmp function, which calls __builtin_wasm_longjmp. 993349cc55cSDimitry Andric FunctionType *FTy = FunctionType::get( 9945f757f3fSDimitry Andric IRB.getVoidTy(), {Int8PtrTy, IRB.getInt32Ty()}, false); 995349cc55cSDimitry Andric WasmLongjmpF = getEmscriptenFunction(FTy, "__wasm_longjmp", &M); 996349cc55cSDimitry Andric WasmLongjmpF->addFnAttr(Attribute::NoReturn); 997349cc55cSDimitry Andric } 998e8d8bef9SDimitry Andric 9990b57cec5SDimitry Andric if (SetjmpF) { 10005f757f3fSDimitry Andric Type *Int8PtrTy = IRB.getPtrTy(); 10015f757f3fSDimitry Andric Type *Int32PtrTy = IRB.getPtrTy(); 10025f757f3fSDimitry Andric Type *Int32Ty = IRB.getInt32Ty(); 1003*0fca6ea1SDimitry Andric 1004*0fca6ea1SDimitry Andric // Register __wasm_setjmp function 10050b57cec5SDimitry Andric FunctionType *SetjmpFTy = SetjmpF->getFunctionType(); 10065f757f3fSDimitry Andric FunctionType *FTy = FunctionType::get( 1007*0fca6ea1SDimitry Andric IRB.getVoidTy(), {SetjmpFTy->getParamType(0), Int32Ty, Int32PtrTy}, 1008*0fca6ea1SDimitry Andric false); 1009*0fca6ea1SDimitry Andric WasmSetjmpF = getEmscriptenFunction(FTy, "__wasm_setjmp", &M); 10100b57cec5SDimitry Andric 1011*0fca6ea1SDimitry Andric // Register __wasm_setjmp_test function 1012*0fca6ea1SDimitry Andric FTy = FunctionType::get(Int32Ty, {Int32PtrTy, Int32PtrTy}, false); 1013*0fca6ea1SDimitry Andric WasmSetjmpTestF = getEmscriptenFunction(FTy, "__wasm_setjmp_test", &M); 10140b57cec5SDimitry Andric 1015349cc55cSDimitry Andric // wasm.catch() will be lowered down to wasm 'catch' instruction in 1016349cc55cSDimitry Andric // instruction selection. 1017349cc55cSDimitry Andric CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch); 1018349cc55cSDimitry Andric // Type for struct __WasmLongjmpArgs 10195f757f3fSDimitry Andric LongjmpArgsTy = StructType::get(Int8PtrTy, // env 10205f757f3fSDimitry Andric Int32Ty // val 1021349cc55cSDimitry Andric ); 1022fe6060f1SDimitry Andric } 1023fe6060f1SDimitry Andric } 1024fe6060f1SDimitry Andric 1025fe6060f1SDimitry Andric // Exception handling transformation 1026349cc55cSDimitry Andric if (EnableEmEH) { 1027fe6060f1SDimitry Andric for (Function &F : M) { 1028fe6060f1SDimitry Andric if (F.isDeclaration()) 1029fe6060f1SDimitry Andric continue; 1030fe6060f1SDimitry Andric Changed |= runEHOnFunction(F); 1031fe6060f1SDimitry Andric } 1032fe6060f1SDimitry Andric } 1033fe6060f1SDimitry Andric 1034fe6060f1SDimitry Andric // Setjmp/longjmp handling transformation 1035fe6060f1SDimitry Andric if (DoSjLj) { 1036fe6060f1SDimitry Andric Changed = true; // We have setjmp or longjmp somewhere 1037fe6060f1SDimitry Andric if (LongjmpF) 1038349cc55cSDimitry Andric replaceLongjmpWith(LongjmpF, EnableEmSjLj ? EmLongjmpF : WasmLongjmpF); 1039fe6060f1SDimitry Andric // Only traverse functions that uses setjmp in order not to insert 1040fe6060f1SDimitry Andric // unnecessary prep / cleanup code in every function 1041fe6060f1SDimitry Andric if (SetjmpF) 10420b57cec5SDimitry Andric for (Function *F : SetjmpUsers) 10430b57cec5SDimitry Andric runSjLjOnFunction(*F); 10440b57cec5SDimitry Andric } 10450b57cec5SDimitry Andric 104604eeddc0SDimitry Andric // Replace unnecessary setjmp calls with 0 104704eeddc0SDimitry Andric if ((EnableEmSjLj || EnableWasmSjLj) && !SetjmpUsersToNullify.empty()) { 104804eeddc0SDimitry Andric Changed = true; 104904eeddc0SDimitry Andric assert(SetjmpF); 105004eeddc0SDimitry Andric for (Function *F : SetjmpUsersToNullify) 105104eeddc0SDimitry Andric nullifySetjmp(F); 105204eeddc0SDimitry Andric } 105304eeddc0SDimitry Andric 10540b57cec5SDimitry Andric // Delete unused global variables and functions 105581ad6265SDimitry Andric for (auto *V : {ThrewGV, ThrewValueGV}) 105681ad6265SDimitry Andric if (V && V->use_empty()) 105781ad6265SDimitry Andric V->eraseFromParent(); 105881ad6265SDimitry Andric for (auto *V : {GetTempRet0F, SetTempRet0F, ResumeF, EHTypeIDF, EmLongjmpF, 1059*0fca6ea1SDimitry Andric WasmSetjmpF, WasmSetjmpTestF, WasmLongjmpF, CatchF}) 106081ad6265SDimitry Andric if (V && V->use_empty()) 106181ad6265SDimitry Andric V->eraseFromParent(); 10620b57cec5SDimitry Andric 106381ad6265SDimitry Andric return Changed; 10640b57cec5SDimitry Andric } 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric bool WebAssemblyLowerEmscriptenEHSjLj::runEHOnFunction(Function &F) { 10670b57cec5SDimitry Andric Module &M = *F.getParent(); 10680b57cec5SDimitry Andric LLVMContext &C = F.getContext(); 10690b57cec5SDimitry Andric IRBuilder<> IRB(C); 10700b57cec5SDimitry Andric bool Changed = false; 10710b57cec5SDimitry Andric SmallVector<Instruction *, 64> ToErase; 10720b57cec5SDimitry Andric SmallPtrSet<LandingPadInst *, 32> LandingPads; 10730b57cec5SDimitry Andric 1074349cc55cSDimitry Andric // rethrow.longjmp BB that will be shared within the function. 1075349cc55cSDimitry Andric BasicBlock *RethrowLongjmpBB = nullptr; 1076349cc55cSDimitry Andric // PHI node for the loaded value of __THREW__ global variable in 1077349cc55cSDimitry Andric // rethrow.longjmp BB 1078349cc55cSDimitry Andric PHINode *RethrowLongjmpBBThrewPHI = nullptr; 1079349cc55cSDimitry Andric 10800b57cec5SDimitry Andric for (BasicBlock &BB : F) { 10810b57cec5SDimitry Andric auto *II = dyn_cast<InvokeInst>(BB.getTerminator()); 10820b57cec5SDimitry Andric if (!II) 10830b57cec5SDimitry Andric continue; 108413138422SDimitry Andric Changed = true; 10850b57cec5SDimitry Andric LandingPads.insert(II->getLandingPadInst()); 10860b57cec5SDimitry Andric IRB.SetInsertPoint(II); 10870b57cec5SDimitry Andric 1088fe6060f1SDimitry Andric const Value *Callee = II->getCalledOperand(); 1089fe6060f1SDimitry Andric bool NeedInvoke = supportsException(&F) && canThrow(Callee); 10900b57cec5SDimitry Andric if (NeedInvoke) { 10910b57cec5SDimitry Andric // Wrap invoke with invoke wrapper and generate preamble/postamble 10920b57cec5SDimitry Andric Value *Threw = wrapInvoke(II); 10930b57cec5SDimitry Andric ToErase.push_back(II); 10940b57cec5SDimitry Andric 1095fe6060f1SDimitry Andric // If setjmp/longjmp handling is enabled, the thrown value can be not an 1096fe6060f1SDimitry Andric // exception but a longjmp. If the current function contains calls to 1097fe6060f1SDimitry Andric // setjmp, it will be appropriately handled in runSjLjOnFunction. But even 1098fe6060f1SDimitry Andric // if the function does not contain setjmp calls, we shouldn't silently 1099fe6060f1SDimitry Andric // ignore longjmps; we should rethrow them so they can be correctly 1100349cc55cSDimitry Andric // handled in somewhere up the call chain where setjmp is. __THREW__'s 1101349cc55cSDimitry Andric // value is 0 when nothing happened, 1 when an exception is thrown, and 1102349cc55cSDimitry Andric // other values when longjmp is thrown. 1103fe6060f1SDimitry Andric // 1104fe6060f1SDimitry Andric // if (%__THREW__.val == 0 || %__THREW__.val == 1) 1105fe6060f1SDimitry Andric // goto %tail 1106fe6060f1SDimitry Andric // else 1107fe6060f1SDimitry Andric // goto %longjmp.rethrow 1108fe6060f1SDimitry Andric // 1109349cc55cSDimitry Andric // rethrow.longjmp: ;; This is longjmp. Rethrow it 1110fe6060f1SDimitry Andric // %__threwValue.val = __threwValue 1111fe6060f1SDimitry Andric // emscripten_longjmp(%__THREW__.val, %__threwValue.val); 1112fe6060f1SDimitry Andric // 1113fe6060f1SDimitry Andric // tail: ;; Nothing happened or an exception is thrown 1114fe6060f1SDimitry Andric // ... Continue exception handling ... 1115349cc55cSDimitry Andric if (DoSjLj && EnableEmSjLj && !SetjmpUsers.count(&F) && 1116349cc55cSDimitry Andric canLongjmp(Callee)) { 1117349cc55cSDimitry Andric // Create longjmp.rethrow BB once and share it within the function 1118349cc55cSDimitry Andric if (!RethrowLongjmpBB) { 1119349cc55cSDimitry Andric RethrowLongjmpBB = BasicBlock::Create(C, "rethrow.longjmp", &F); 1120349cc55cSDimitry Andric IRB.SetInsertPoint(RethrowLongjmpBB); 1121349cc55cSDimitry Andric RethrowLongjmpBBThrewPHI = 1122349cc55cSDimitry Andric IRB.CreatePHI(getAddrIntType(&M), 4, "threw.phi"); 1123349cc55cSDimitry Andric RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB); 1124349cc55cSDimitry Andric Value *ThrewValue = IRB.CreateLoad(IRB.getInt32Ty(), ThrewValueGV, 1125349cc55cSDimitry Andric ThrewValueGV->getName() + ".val"); 1126349cc55cSDimitry Andric IRB.CreateCall(EmLongjmpF, {RethrowLongjmpBBThrewPHI, ThrewValue}); 1127349cc55cSDimitry Andric IRB.CreateUnreachable(); 1128349cc55cSDimitry Andric } else { 1129349cc55cSDimitry Andric RethrowLongjmpBBThrewPHI->addIncoming(Threw, &BB); 1130349cc55cSDimitry Andric } 1131349cc55cSDimitry Andric 1132349cc55cSDimitry Andric IRB.SetInsertPoint(II); // Restore the insert point back 1133fe6060f1SDimitry Andric BasicBlock *Tail = BasicBlock::Create(C, "tail", &F); 1134fe6060f1SDimitry Andric Value *CmpEqOne = 1135fe6060f1SDimitry Andric IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one"); 1136fe6060f1SDimitry Andric Value *CmpEqZero = 1137fe6060f1SDimitry Andric IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 0), "cmp.eq.zero"); 1138fe6060f1SDimitry Andric Value *Or = IRB.CreateOr(CmpEqZero, CmpEqOne, "or"); 1139349cc55cSDimitry Andric IRB.CreateCondBr(Or, Tail, RethrowLongjmpBB); 1140fe6060f1SDimitry Andric IRB.SetInsertPoint(Tail); 1141349cc55cSDimitry Andric BB.replaceSuccessorsPhiUsesWith(&BB, Tail); 1142fe6060f1SDimitry Andric } 1143fe6060f1SDimitry Andric 11440b57cec5SDimitry Andric // Insert a branch based on __THREW__ variable 1145fe6060f1SDimitry Andric Value *Cmp = IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp"); 11460b57cec5SDimitry Andric IRB.CreateCondBr(Cmp, II->getUnwindDest(), II->getNormalDest()); 11470b57cec5SDimitry Andric 11480b57cec5SDimitry Andric } else { 11490b57cec5SDimitry Andric // This can't throw, and we don't need this invoke, just replace it with a 11500b57cec5SDimitry Andric // call+branch 115104eeddc0SDimitry Andric changeToCall(II); 11520b57cec5SDimitry Andric } 11530b57cec5SDimitry Andric } 11540b57cec5SDimitry Andric 11550b57cec5SDimitry Andric // Process resume instructions 11560b57cec5SDimitry Andric for (BasicBlock &BB : F) { 11570b57cec5SDimitry Andric // Scan the body of the basic block for resumes 11580b57cec5SDimitry Andric for (Instruction &I : BB) { 11590b57cec5SDimitry Andric auto *RI = dyn_cast<ResumeInst>(&I); 11600b57cec5SDimitry Andric if (!RI) 11610b57cec5SDimitry Andric continue; 116213138422SDimitry Andric Changed = true; 11630b57cec5SDimitry Andric 11640b57cec5SDimitry Andric // Split the input into legal values 11650b57cec5SDimitry Andric Value *Input = RI->getValue(); 11660b57cec5SDimitry Andric IRB.SetInsertPoint(RI); 11670b57cec5SDimitry Andric Value *Low = IRB.CreateExtractValue(Input, 0, "low"); 11680b57cec5SDimitry Andric // Create a call to __resumeException function 11690b57cec5SDimitry Andric IRB.CreateCall(ResumeF, {Low}); 11700b57cec5SDimitry Andric // Add a terminator to the block 11710b57cec5SDimitry Andric IRB.CreateUnreachable(); 11720b57cec5SDimitry Andric ToErase.push_back(RI); 11730b57cec5SDimitry Andric } 11740b57cec5SDimitry Andric } 11750b57cec5SDimitry Andric 11760b57cec5SDimitry Andric // Process llvm.eh.typeid.for intrinsics 11770b57cec5SDimitry Andric for (BasicBlock &BB : F) { 11780b57cec5SDimitry Andric for (Instruction &I : BB) { 11790b57cec5SDimitry Andric auto *CI = dyn_cast<CallInst>(&I); 11800b57cec5SDimitry Andric if (!CI) 11810b57cec5SDimitry Andric continue; 11820b57cec5SDimitry Andric const Function *Callee = CI->getCalledFunction(); 11830b57cec5SDimitry Andric if (!Callee) 11840b57cec5SDimitry Andric continue; 11850b57cec5SDimitry Andric if (Callee->getIntrinsicID() != Intrinsic::eh_typeid_for) 11860b57cec5SDimitry Andric continue; 118713138422SDimitry Andric Changed = true; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric IRB.SetInsertPoint(CI); 11900b57cec5SDimitry Andric CallInst *NewCI = 11910b57cec5SDimitry Andric IRB.CreateCall(EHTypeIDF, CI->getArgOperand(0), "typeid"); 11920b57cec5SDimitry Andric CI->replaceAllUsesWith(NewCI); 11930b57cec5SDimitry Andric ToErase.push_back(CI); 11940b57cec5SDimitry Andric } 11950b57cec5SDimitry Andric } 11960b57cec5SDimitry Andric 11970b57cec5SDimitry Andric // Look for orphan landingpads, can occur in blocks with no predecessors 11980b57cec5SDimitry Andric for (BasicBlock &BB : F) { 11990b57cec5SDimitry Andric Instruction *I = BB.getFirstNonPHI(); 12000b57cec5SDimitry Andric if (auto *LPI = dyn_cast<LandingPadInst>(I)) 12010b57cec5SDimitry Andric LandingPads.insert(LPI); 12020b57cec5SDimitry Andric } 120313138422SDimitry Andric Changed |= !LandingPads.empty(); 12040b57cec5SDimitry Andric 12050b57cec5SDimitry Andric // Handle all the landingpad for this function together, as multiple invokes 12060b57cec5SDimitry Andric // may share a single lp 12070b57cec5SDimitry Andric for (LandingPadInst *LPI : LandingPads) { 12080b57cec5SDimitry Andric IRB.SetInsertPoint(LPI); 12090b57cec5SDimitry Andric SmallVector<Value *, 16> FMCArgs; 12100b57cec5SDimitry Andric for (unsigned I = 0, E = LPI->getNumClauses(); I < E; ++I) { 12110b57cec5SDimitry Andric Constant *Clause = LPI->getClause(I); 121223408297SDimitry Andric // TODO Handle filters (= exception specifications). 121306c3fb27SDimitry Andric // https://github.com/llvm/llvm-project/issues/49740 121423408297SDimitry Andric if (LPI->isCatch(I)) 12150b57cec5SDimitry Andric FMCArgs.push_back(Clause); 12160b57cec5SDimitry Andric } 12170b57cec5SDimitry Andric 12180b57cec5SDimitry Andric // Create a call to __cxa_find_matching_catch_N function 12190b57cec5SDimitry Andric Function *FMCF = getFindMatchingCatch(M, FMCArgs.size()); 12200b57cec5SDimitry Andric CallInst *FMCI = IRB.CreateCall(FMCF, FMCArgs, "fmc"); 1221bdd1243dSDimitry Andric Value *Poison = PoisonValue::get(LPI->getType()); 1222bdd1243dSDimitry Andric Value *Pair0 = IRB.CreateInsertValue(Poison, FMCI, 0, "pair0"); 1223bdd1243dSDimitry Andric Value *TempRet0 = IRB.CreateCall(GetTempRet0F, std::nullopt, "tempret0"); 12240b57cec5SDimitry Andric Value *Pair1 = IRB.CreateInsertValue(Pair0, TempRet0, 1, "pair1"); 12250b57cec5SDimitry Andric 12260b57cec5SDimitry Andric LPI->replaceAllUsesWith(Pair1); 12270b57cec5SDimitry Andric ToErase.push_back(LPI); 12280b57cec5SDimitry Andric } 12290b57cec5SDimitry Andric 12300b57cec5SDimitry Andric // Erase everything we no longer need in this function 12310b57cec5SDimitry Andric for (Instruction *I : ToErase) 12320b57cec5SDimitry Andric I->eraseFromParent(); 12330b57cec5SDimitry Andric 12340b57cec5SDimitry Andric return Changed; 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12375ffd83dbSDimitry Andric // This tries to get debug info from the instruction before which a new 12385ffd83dbSDimitry Andric // instruction will be inserted, and if there's no debug info in that 12395ffd83dbSDimitry Andric // instruction, tries to get the info instead from the previous instruction (if 12405ffd83dbSDimitry Andric // any). If none of these has debug info and a DISubprogram is provided, it 12415ffd83dbSDimitry Andric // creates a dummy debug info with the first line of the function, because IR 12425ffd83dbSDimitry Andric // verifier requires all inlinable callsites should have debug info when both a 12435ffd83dbSDimitry Andric // caller and callee have DISubprogram. If none of these conditions are met, 12445ffd83dbSDimitry Andric // returns empty info. 12455ffd83dbSDimitry Andric static DebugLoc getOrCreateDebugLoc(const Instruction *InsertBefore, 12465ffd83dbSDimitry Andric DISubprogram *SP) { 12475ffd83dbSDimitry Andric assert(InsertBefore); 12485ffd83dbSDimitry Andric if (InsertBefore->getDebugLoc()) 12495ffd83dbSDimitry Andric return InsertBefore->getDebugLoc(); 12505ffd83dbSDimitry Andric const Instruction *Prev = InsertBefore->getPrevNode(); 12515ffd83dbSDimitry Andric if (Prev && Prev->getDebugLoc()) 12525ffd83dbSDimitry Andric return Prev->getDebugLoc(); 12535ffd83dbSDimitry Andric if (SP) 12545ffd83dbSDimitry Andric return DILocation::get(SP->getContext(), SP->getLine(), 1, SP); 12555ffd83dbSDimitry Andric return DebugLoc(); 12565ffd83dbSDimitry Andric } 12575ffd83dbSDimitry Andric 12580b57cec5SDimitry Andric bool WebAssemblyLowerEmscriptenEHSjLj::runSjLjOnFunction(Function &F) { 1259349cc55cSDimitry Andric assert(EnableEmSjLj || EnableWasmSjLj); 12600b57cec5SDimitry Andric Module &M = *F.getParent(); 12610b57cec5SDimitry Andric LLVMContext &C = F.getContext(); 12620b57cec5SDimitry Andric IRBuilder<> IRB(C); 12630b57cec5SDimitry Andric SmallVector<Instruction *, 64> ToErase; 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andric // Setjmp preparation 12660b57cec5SDimitry Andric 1267349cc55cSDimitry Andric BasicBlock *Entry = &F.getEntryBlock(); 1268349cc55cSDimitry Andric DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram()); 1269349cc55cSDimitry Andric SplitBlock(Entry, &*Entry->getFirstInsertionPt()); 1270349cc55cSDimitry Andric 1271*0fca6ea1SDimitry Andric IRB.SetInsertPoint(Entry->getTerminator()->getIterator()); 1272*0fca6ea1SDimitry Andric // This alloca'ed pointer is used by the runtime to identify function 1273*0fca6ea1SDimitry Andric // invocations. It's just for pointer comparisons. It will never be 1274*0fca6ea1SDimitry Andric // dereferenced. 1275*0fca6ea1SDimitry Andric Instruction *FunctionInvocationId = 1276*0fca6ea1SDimitry Andric IRB.CreateAlloca(IRB.getInt32Ty(), nullptr, "functionInvocationId"); 1277*0fca6ea1SDimitry Andric FunctionInvocationId->setDebugLoc(FirstDL); 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric // Setjmp transformation 1280349cc55cSDimitry Andric SmallVector<PHINode *, 4> SetjmpRetPHIs; 12810b57cec5SDimitry Andric Function *SetjmpF = M.getFunction("setjmp"); 128204eeddc0SDimitry Andric for (auto *U : make_early_inc_range(SetjmpF->users())) { 12831fd87a68SDimitry Andric auto *CB = cast<CallBase>(U); 128404eeddc0SDimitry Andric BasicBlock *BB = CB->getParent(); 12850b57cec5SDimitry Andric if (BB->getParent() != &F) // in other function 12860b57cec5SDimitry Andric continue; 128781ad6265SDimitry Andric if (CB->getOperandBundle(LLVMContext::OB_funclet)) { 128881ad6265SDimitry Andric std::string S; 128981ad6265SDimitry Andric raw_string_ostream SS(S); 129081ad6265SDimitry Andric SS << "In function " + F.getName() + 129181ad6265SDimitry Andric ": setjmp within a catch clause is not supported in Wasm EH:\n"; 129281ad6265SDimitry Andric SS << *CB; 129381ad6265SDimitry Andric report_fatal_error(StringRef(SS.str())); 129481ad6265SDimitry Andric } 12950b57cec5SDimitry Andric 129604eeddc0SDimitry Andric CallInst *CI = nullptr; 129704eeddc0SDimitry Andric // setjmp cannot throw. So if it is an invoke, lower it to a call 129804eeddc0SDimitry Andric if (auto *II = dyn_cast<InvokeInst>(CB)) 129904eeddc0SDimitry Andric CI = llvm::changeToCall(II); 130004eeddc0SDimitry Andric else 130104eeddc0SDimitry Andric CI = cast<CallInst>(CB); 130204eeddc0SDimitry Andric 13030b57cec5SDimitry Andric // The tail is everything right after the call, and will be reached once 13040b57cec5SDimitry Andric // when setjmp is called, and later when longjmp returns to the setjmp 13050b57cec5SDimitry Andric BasicBlock *Tail = SplitBlock(BB, CI->getNextNode()); 13060b57cec5SDimitry Andric // Add a phi to the tail, which will be the output of setjmp, which 13070b57cec5SDimitry Andric // indicates if this is the first call or a longjmp back. The phi directly 13080b57cec5SDimitry Andric // uses the right value based on where we arrive from 13095f757f3fSDimitry Andric IRB.SetInsertPoint(Tail, Tail->getFirstNonPHIIt()); 13100b57cec5SDimitry Andric PHINode *SetjmpRet = IRB.CreatePHI(IRB.getInt32Ty(), 2, "setjmp.ret"); 13110b57cec5SDimitry Andric 13120b57cec5SDimitry Andric // setjmp initial call returns 0 13130b57cec5SDimitry Andric SetjmpRet->addIncoming(IRB.getInt32(0), BB); 13140b57cec5SDimitry Andric // The proper output is now this, not the setjmp call itself 13150b57cec5SDimitry Andric CI->replaceAllUsesWith(SetjmpRet); 13160b57cec5SDimitry Andric // longjmp returns to the setjmp will add themselves to this phi 13170b57cec5SDimitry Andric SetjmpRetPHIs.push_back(SetjmpRet); 13180b57cec5SDimitry Andric 13190b57cec5SDimitry Andric // Fix call target 13200b57cec5SDimitry Andric // Our index in the function is our place in the array + 1 to avoid index 13210b57cec5SDimitry Andric // 0, because index 0 means the longjmp is not ours to handle. 13220b57cec5SDimitry Andric IRB.SetInsertPoint(CI); 13230b57cec5SDimitry Andric Value *Args[] = {CI->getArgOperand(0), IRB.getInt32(SetjmpRetPHIs.size()), 1324*0fca6ea1SDimitry Andric FunctionInvocationId}; 1325*0fca6ea1SDimitry Andric IRB.CreateCall(WasmSetjmpF, Args); 13260b57cec5SDimitry Andric ToErase.push_back(CI); 13270b57cec5SDimitry Andric } 13280b57cec5SDimitry Andric 1329349cc55cSDimitry Andric // Handle longjmpable calls. 1330349cc55cSDimitry Andric if (EnableEmSjLj) 1331*0fca6ea1SDimitry Andric handleLongjmpableCallsForEmscriptenSjLj(F, FunctionInvocationId, 1332349cc55cSDimitry Andric SetjmpRetPHIs); 1333*0fca6ea1SDimitry Andric else // EnableWasmSjLj 1334*0fca6ea1SDimitry Andric handleLongjmpableCallsForWasmSjLj(F, FunctionInvocationId, SetjmpRetPHIs); 1335349cc55cSDimitry Andric 1336349cc55cSDimitry Andric // Erase everything we no longer need in this function 1337349cc55cSDimitry Andric for (Instruction *I : ToErase) 1338349cc55cSDimitry Andric I->eraseFromParent(); 1339349cc55cSDimitry Andric 1340349cc55cSDimitry Andric // Finally, our modifications to the cfg can break dominance of SSA variables. 1341349cc55cSDimitry Andric // For example, in this code, 1342349cc55cSDimitry Andric // if (x()) { .. setjmp() .. } 1343349cc55cSDimitry Andric // if (y()) { .. longjmp() .. } 1344349cc55cSDimitry Andric // We must split the longjmp block, and it can jump into the block splitted 1345349cc55cSDimitry Andric // from setjmp one. But that means that when we split the setjmp block, it's 1346349cc55cSDimitry Andric // first part no longer dominates its second part - there is a theoretically 1347349cc55cSDimitry Andric // possible control flow path where x() is false, then y() is true and we 1348349cc55cSDimitry Andric // reach the second part of the setjmp block, without ever reaching the first 1349349cc55cSDimitry Andric // part. So, we rebuild SSA form here. 1350349cc55cSDimitry Andric rebuildSSA(F); 1351349cc55cSDimitry Andric return true; 1352349cc55cSDimitry Andric } 1353349cc55cSDimitry Andric 1354349cc55cSDimitry Andric // Update each call that can longjmp so it can return to the corresponding 1355349cc55cSDimitry Andric // setjmp. Refer to 4) of "Emscripten setjmp/longjmp handling" section in the 1356349cc55cSDimitry Andric // comments at top of the file for details. 1357349cc55cSDimitry Andric void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForEmscriptenSjLj( 1358*0fca6ea1SDimitry Andric Function &F, Instruction *FunctionInvocationId, 1359349cc55cSDimitry Andric SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { 1360349cc55cSDimitry Andric Module &M = *F.getParent(); 1361349cc55cSDimitry Andric LLVMContext &C = F.getContext(); 1362349cc55cSDimitry Andric IRBuilder<> IRB(C); 1363349cc55cSDimitry Andric SmallVector<Instruction *, 64> ToErase; 1364349cc55cSDimitry Andric 1365349cc55cSDimitry Andric // call.em.longjmp BB that will be shared within the function. 1366349cc55cSDimitry Andric BasicBlock *CallEmLongjmpBB = nullptr; 1367349cc55cSDimitry Andric // PHI node for the loaded value of __THREW__ global variable in 1368349cc55cSDimitry Andric // call.em.longjmp BB 1369349cc55cSDimitry Andric PHINode *CallEmLongjmpBBThrewPHI = nullptr; 1370349cc55cSDimitry Andric // PHI node for the loaded value of __threwValue global variable in 1371349cc55cSDimitry Andric // call.em.longjmp BB 1372349cc55cSDimitry Andric PHINode *CallEmLongjmpBBThrewValuePHI = nullptr; 1373349cc55cSDimitry Andric // rethrow.exn BB that will be shared within the function. 1374349cc55cSDimitry Andric BasicBlock *RethrowExnBB = nullptr; 13750b57cec5SDimitry Andric 13760b57cec5SDimitry Andric // Because we are creating new BBs while processing and don't want to make 13770b57cec5SDimitry Andric // all these newly created BBs candidates again for longjmp processing, we 13780b57cec5SDimitry Andric // first make the vector of candidate BBs. 13790b57cec5SDimitry Andric std::vector<BasicBlock *> BBs; 13800b57cec5SDimitry Andric for (BasicBlock &BB : F) 13810b57cec5SDimitry Andric BBs.push_back(&BB); 13820b57cec5SDimitry Andric 13830b57cec5SDimitry Andric // BBs.size() will change within the loop, so we query it every time 13840b57cec5SDimitry Andric for (unsigned I = 0; I < BBs.size(); I++) { 13850b57cec5SDimitry Andric BasicBlock *BB = BBs[I]; 13860b57cec5SDimitry Andric for (Instruction &I : *BB) { 138781ad6265SDimitry Andric if (isa<InvokeInst>(&I)) { 138881ad6265SDimitry Andric std::string S; 138981ad6265SDimitry Andric raw_string_ostream SS(S); 139081ad6265SDimitry Andric SS << "In function " << F.getName() 139181ad6265SDimitry Andric << ": When using Wasm EH with Emscripten SjLj, there is a " 139281ad6265SDimitry Andric "restriction that `setjmp` function call and exception cannot be " 139381ad6265SDimitry Andric "used within the same function:\n"; 139481ad6265SDimitry Andric SS << I; 139581ad6265SDimitry Andric report_fatal_error(StringRef(SS.str())); 139681ad6265SDimitry Andric } 13970b57cec5SDimitry Andric auto *CI = dyn_cast<CallInst>(&I); 13980b57cec5SDimitry Andric if (!CI) 13990b57cec5SDimitry Andric continue; 14000b57cec5SDimitry Andric 14015ffd83dbSDimitry Andric const Value *Callee = CI->getCalledOperand(); 1402349cc55cSDimitry Andric if (!canLongjmp(Callee)) 14030b57cec5SDimitry Andric continue; 1404349cc55cSDimitry Andric if (isEmAsmCall(Callee)) 14058bcb0991SDimitry Andric report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " + 14068bcb0991SDimitry Andric F.getName() + 14078bcb0991SDimitry Andric ". Please consider using EM_JS, or move the " 14088bcb0991SDimitry Andric "EM_ASM into another function.", 14098bcb0991SDimitry Andric false); 14100b57cec5SDimitry Andric 14110b57cec5SDimitry Andric Value *Threw = nullptr; 14120b57cec5SDimitry Andric BasicBlock *Tail; 14135f757f3fSDimitry Andric if (Callee->getName().starts_with("__invoke_")) { 14140b57cec5SDimitry Andric // If invoke wrapper has already been generated for this call in 14150b57cec5SDimitry Andric // previous EH phase, search for the load instruction 14160b57cec5SDimitry Andric // %__THREW__.val = __THREW__; 14170b57cec5SDimitry Andric // in postamble after the invoke wrapper call 14180b57cec5SDimitry Andric LoadInst *ThrewLI = nullptr; 14190b57cec5SDimitry Andric StoreInst *ThrewResetSI = nullptr; 14200b57cec5SDimitry Andric for (auto I = std::next(BasicBlock::iterator(CI)), IE = BB->end(); 14210b57cec5SDimitry Andric I != IE; ++I) { 14220b57cec5SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(I)) 14230b57cec5SDimitry Andric if (auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand())) 14240b57cec5SDimitry Andric if (GV == ThrewGV) { 14250b57cec5SDimitry Andric Threw = ThrewLI = LI; 14260b57cec5SDimitry Andric break; 14270b57cec5SDimitry Andric } 14280b57cec5SDimitry Andric } 14290b57cec5SDimitry Andric // Search for the store instruction after the load above 14300b57cec5SDimitry Andric // __THREW__ = 0; 14310b57cec5SDimitry Andric for (auto I = std::next(BasicBlock::iterator(ThrewLI)), IE = BB->end(); 14320b57cec5SDimitry Andric I != IE; ++I) { 1433fe6060f1SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(I)) { 1434fe6060f1SDimitry Andric if (auto *GV = dyn_cast<GlobalVariable>(SI->getPointerOperand())) { 1435fe6060f1SDimitry Andric if (GV == ThrewGV && 1436fe6060f1SDimitry Andric SI->getValueOperand() == getAddrSizeInt(&M, 0)) { 14370b57cec5SDimitry Andric ThrewResetSI = SI; 14380b57cec5SDimitry Andric break; 14390b57cec5SDimitry Andric } 14400b57cec5SDimitry Andric } 1441fe6060f1SDimitry Andric } 1442fe6060f1SDimitry Andric } 14430b57cec5SDimitry Andric assert(Threw && ThrewLI && "Cannot find __THREW__ load after invoke"); 14440b57cec5SDimitry Andric assert(ThrewResetSI && "Cannot find __THREW__ store after invoke"); 14450b57cec5SDimitry Andric Tail = SplitBlock(BB, ThrewResetSI->getNextNode()); 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric } else { 14480b57cec5SDimitry Andric // Wrap call with invoke wrapper and generate preamble/postamble 14490b57cec5SDimitry Andric Threw = wrapInvoke(CI); 14500b57cec5SDimitry Andric ToErase.push_back(CI); 14510b57cec5SDimitry Andric Tail = SplitBlock(BB, CI->getNextNode()); 1452fe6060f1SDimitry Andric 1453fe6060f1SDimitry Andric // If exception handling is enabled, the thrown value can be not a 1454fe6060f1SDimitry Andric // longjmp but an exception, in which case we shouldn't silently ignore 1455fe6060f1SDimitry Andric // exceptions; we should rethrow them. 1456fe6060f1SDimitry Andric // __THREW__'s value is 0 when nothing happened, 1 when an exception is 1457fe6060f1SDimitry Andric // thrown, other values when longjmp is thrown. 1458fe6060f1SDimitry Andric // 1459fe6060f1SDimitry Andric // if (%__THREW__.val == 1) 1460fe6060f1SDimitry Andric // goto %eh.rethrow 1461fe6060f1SDimitry Andric // else 1462fe6060f1SDimitry Andric // goto %normal 1463fe6060f1SDimitry Andric // 1464fe6060f1SDimitry Andric // eh.rethrow: ;; Rethrow exception 1465fe6060f1SDimitry Andric // %exn = call @__cxa_find_matching_catch_2() ;; Retrieve thrown ptr 1466fe6060f1SDimitry Andric // __resumeException(%exn) 1467fe6060f1SDimitry Andric // 1468fe6060f1SDimitry Andric // normal: 1469fe6060f1SDimitry Andric // <-- Insertion point. Will insert sjlj handling code from here 1470fe6060f1SDimitry Andric // goto %tail 1471fe6060f1SDimitry Andric // 1472fe6060f1SDimitry Andric // tail: 1473fe6060f1SDimitry Andric // ... 1474fe6060f1SDimitry Andric if (supportsException(&F) && canThrow(Callee)) { 1475fe6060f1SDimitry Andric // We will add a new conditional branch. So remove the branch created 1476fe6060f1SDimitry Andric // when we split the BB 1477fe6060f1SDimitry Andric ToErase.push_back(BB->getTerminator()); 1478349cc55cSDimitry Andric 1479349cc55cSDimitry Andric // Generate rethrow.exn BB once and share it within the function 1480349cc55cSDimitry Andric if (!RethrowExnBB) { 1481349cc55cSDimitry Andric RethrowExnBB = BasicBlock::Create(C, "rethrow.exn", &F); 1482349cc55cSDimitry Andric IRB.SetInsertPoint(RethrowExnBB); 1483349cc55cSDimitry Andric CallInst *Exn = 1484349cc55cSDimitry Andric IRB.CreateCall(getFindMatchingCatch(M, 0), {}, "exn"); 1485fe6060f1SDimitry Andric IRB.CreateCall(ResumeF, {Exn}); 1486fe6060f1SDimitry Andric IRB.CreateUnreachable(); 1487349cc55cSDimitry Andric } 1488349cc55cSDimitry Andric 1489349cc55cSDimitry Andric IRB.SetInsertPoint(CI); 1490349cc55cSDimitry Andric BasicBlock *NormalBB = BasicBlock::Create(C, "normal", &F); 1491349cc55cSDimitry Andric Value *CmpEqOne = 1492349cc55cSDimitry Andric IRB.CreateICmpEQ(Threw, getAddrSizeInt(&M, 1), "cmp.eq.one"); 1493349cc55cSDimitry Andric IRB.CreateCondBr(CmpEqOne, RethrowExnBB, NormalBB); 1494349cc55cSDimitry Andric 1495fe6060f1SDimitry Andric IRB.SetInsertPoint(NormalBB); 1496fe6060f1SDimitry Andric IRB.CreateBr(Tail); 1497*0fca6ea1SDimitry Andric BB = NormalBB; // New insertion point to insert __wasm_setjmp_test() 1498fe6060f1SDimitry Andric } 14990b57cec5SDimitry Andric } 15000b57cec5SDimitry Andric 15010b57cec5SDimitry Andric // We need to replace the terminator in Tail - SplitBlock makes BB go 15020b57cec5SDimitry Andric // straight to Tail, we need to check if a longjmp occurred, and go to the 15030b57cec5SDimitry Andric // right setjmp-tail if so 15040b57cec5SDimitry Andric ToErase.push_back(BB->getTerminator()); 15050b57cec5SDimitry Andric 1506*0fca6ea1SDimitry Andric // Generate a function call to __wasm_setjmp_test function and 1507*0fca6ea1SDimitry Andric // preamble/postamble code to figure out (1) whether longjmp 1508*0fca6ea1SDimitry Andric // occurred (2) if longjmp occurred, which setjmp it corresponds to 15090b57cec5SDimitry Andric Value *Label = nullptr; 15100b57cec5SDimitry Andric Value *LongjmpResult = nullptr; 15110b57cec5SDimitry Andric BasicBlock *EndBB = nullptr; 1512*0fca6ea1SDimitry Andric wrapTestSetjmp(BB, CI->getDebugLoc(), Threw, FunctionInvocationId, Label, 1513*0fca6ea1SDimitry Andric LongjmpResult, CallEmLongjmpBB, CallEmLongjmpBBThrewPHI, 1514*0fca6ea1SDimitry Andric CallEmLongjmpBBThrewValuePHI, EndBB); 15150b57cec5SDimitry Andric assert(Label && LongjmpResult && EndBB); 15160b57cec5SDimitry Andric 15170b57cec5SDimitry Andric // Create switch instruction 15180b57cec5SDimitry Andric IRB.SetInsertPoint(EndBB); 1519bdd1243dSDimitry Andric IRB.SetCurrentDebugLocation(EndBB->back().getDebugLoc()); 15200b57cec5SDimitry Andric SwitchInst *SI = IRB.CreateSwitch(Label, Tail, SetjmpRetPHIs.size()); 15210b57cec5SDimitry Andric // -1 means no longjmp happened, continue normally (will hit the default 15220b57cec5SDimitry Andric // switch case). 0 means a longjmp that is not ours to handle, needs a 15230b57cec5SDimitry Andric // rethrow. Otherwise the index is the same as the index in P+1 (to avoid 15240b57cec5SDimitry Andric // 0). 15250b57cec5SDimitry Andric for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { 15260b57cec5SDimitry Andric SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent()); 15270b57cec5SDimitry Andric SetjmpRetPHIs[I]->addIncoming(LongjmpResult, EndBB); 15280b57cec5SDimitry Andric } 15290b57cec5SDimitry Andric 15300b57cec5SDimitry Andric // We are splitting the block here, and must continue to find other calls 15310b57cec5SDimitry Andric // in the block - which is now split. so continue to traverse in the Tail 15320b57cec5SDimitry Andric BBs.push_back(Tail); 15330b57cec5SDimitry Andric } 15340b57cec5SDimitry Andric } 15350b57cec5SDimitry Andric 15360b57cec5SDimitry Andric for (Instruction *I : ToErase) 15370b57cec5SDimitry Andric I->eraseFromParent(); 15380b57cec5SDimitry Andric } 15390b57cec5SDimitry Andric 154004eeddc0SDimitry Andric static BasicBlock *getCleanupRetUnwindDest(const CleanupPadInst *CPI) { 154104eeddc0SDimitry Andric for (const User *U : CPI->users()) 154204eeddc0SDimitry Andric if (const auto *CRI = dyn_cast<CleanupReturnInst>(U)) 154304eeddc0SDimitry Andric return CRI->getUnwindDest(); 154404eeddc0SDimitry Andric return nullptr; 154504eeddc0SDimitry Andric } 154604eeddc0SDimitry Andric 1547349cc55cSDimitry Andric // Create a catchpad in which we catch a longjmp's env and val arguments, test 1548349cc55cSDimitry Andric // if the longjmp corresponds to one of setjmps in the current function, and if 1549349cc55cSDimitry Andric // so, jump to the setjmp dispatch BB from which we go to one of post-setjmp 1550349cc55cSDimitry Andric // BBs. Refer to 4) of "Wasm setjmp/longjmp handling" section in the comments at 1551349cc55cSDimitry Andric // top of the file for details. 1552349cc55cSDimitry Andric void WebAssemblyLowerEmscriptenEHSjLj::handleLongjmpableCallsForWasmSjLj( 1553*0fca6ea1SDimitry Andric Function &F, Instruction *FunctionInvocationId, 1554349cc55cSDimitry Andric SmallVectorImpl<PHINode *> &SetjmpRetPHIs) { 1555349cc55cSDimitry Andric Module &M = *F.getParent(); 1556349cc55cSDimitry Andric LLVMContext &C = F.getContext(); 1557349cc55cSDimitry Andric IRBuilder<> IRB(C); 1558349cc55cSDimitry Andric 1559349cc55cSDimitry Andric // A function with catchswitch/catchpad instruction should have a personality 1560349cc55cSDimitry Andric // function attached to it. Search for the wasm personality function, and if 1561349cc55cSDimitry Andric // it exists, use it, and if it doesn't, create a dummy personality function. 1562349cc55cSDimitry Andric // (SjLj is not going to call it anyway.) 1563349cc55cSDimitry Andric if (!F.hasPersonalityFn()) { 1564349cc55cSDimitry Andric StringRef PersName = getEHPersonalityName(EHPersonality::Wasm_CXX); 1565349cc55cSDimitry Andric FunctionType *PersType = 1566349cc55cSDimitry Andric FunctionType::get(IRB.getInt32Ty(), /* isVarArg */ true); 1567349cc55cSDimitry Andric Value *PersF = M.getOrInsertFunction(PersName, PersType).getCallee(); 1568349cc55cSDimitry Andric F.setPersonalityFn( 15695f757f3fSDimitry Andric cast<Constant>(IRB.CreateBitCast(PersF, IRB.getPtrTy()))); 1570349cc55cSDimitry Andric } 1571349cc55cSDimitry Andric 1572349cc55cSDimitry Andric // Use the entry BB's debugloc as a fallback 1573349cc55cSDimitry Andric BasicBlock *Entry = &F.getEntryBlock(); 1574349cc55cSDimitry Andric DebugLoc FirstDL = getOrCreateDebugLoc(&*Entry->begin(), F.getSubprogram()); 1575349cc55cSDimitry Andric IRB.SetCurrentDebugLocation(FirstDL); 1576349cc55cSDimitry Andric 1577349cc55cSDimitry Andric // Add setjmp.dispatch BB right after the entry block. Because we have 1578*0fca6ea1SDimitry Andric // initialized functionInvocationId in the entry block and split the 1579349cc55cSDimitry Andric // rest into another BB, here 'OrigEntry' is the function's original entry 1580349cc55cSDimitry Andric // block before the transformation. 1581349cc55cSDimitry Andric // 15820b57cec5SDimitry Andric // entry: 1583*0fca6ea1SDimitry Andric // functionInvocationId initialization 1584349cc55cSDimitry Andric // setjmp.dispatch: 1585349cc55cSDimitry Andric // switch will be inserted here later 1586349cc55cSDimitry Andric // entry.split: (OrigEntry) 1587349cc55cSDimitry Andric // the original function starts here 1588349cc55cSDimitry Andric BasicBlock *OrigEntry = Entry->getNextNode(); 1589349cc55cSDimitry Andric BasicBlock *SetjmpDispatchBB = 1590349cc55cSDimitry Andric BasicBlock::Create(C, "setjmp.dispatch", &F, OrigEntry); 1591349cc55cSDimitry Andric cast<BranchInst>(Entry->getTerminator())->setSuccessor(0, SetjmpDispatchBB); 1592349cc55cSDimitry Andric 159304eeddc0SDimitry Andric // Create catch.dispatch.longjmp BB and a catchswitch instruction 159404eeddc0SDimitry Andric BasicBlock *CatchDispatchLongjmpBB = 1595349cc55cSDimitry Andric BasicBlock::Create(C, "catch.dispatch.longjmp", &F); 159604eeddc0SDimitry Andric IRB.SetInsertPoint(CatchDispatchLongjmpBB); 159704eeddc0SDimitry Andric CatchSwitchInst *CatchSwitchLongjmp = 1598349cc55cSDimitry Andric IRB.CreateCatchSwitch(ConstantTokenNone::get(C), nullptr, 1); 1599349cc55cSDimitry Andric 1600349cc55cSDimitry Andric // Create catch.longjmp BB and a catchpad instruction 1601349cc55cSDimitry Andric BasicBlock *CatchLongjmpBB = BasicBlock::Create(C, "catch.longjmp", &F); 160204eeddc0SDimitry Andric CatchSwitchLongjmp->addHandler(CatchLongjmpBB); 1603349cc55cSDimitry Andric IRB.SetInsertPoint(CatchLongjmpBB); 160404eeddc0SDimitry Andric CatchPadInst *CatchPad = IRB.CreateCatchPad(CatchSwitchLongjmp, {}); 1605349cc55cSDimitry Andric 1606349cc55cSDimitry Andric // Wasm throw and catch instructions can throw and catch multiple values, but 1607349cc55cSDimitry Andric // that requires multivalue support in the toolchain, which is currently not 1608349cc55cSDimitry Andric // very reliable. We instead throw and catch a pointer to a struct value of 1609349cc55cSDimitry Andric // type 'struct __WasmLongjmpArgs', which is defined in Emscripten. 161006c3fb27SDimitry Andric Instruction *LongjmpArgs = 1611349cc55cSDimitry Andric IRB.CreateCall(CatchF, {IRB.getInt32(WebAssembly::C_LONGJMP)}, "thrown"); 1612349cc55cSDimitry Andric Value *EnvField = 1613349cc55cSDimitry Andric IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 0, "env_gep"); 1614349cc55cSDimitry Andric Value *ValField = 1615349cc55cSDimitry Andric IRB.CreateConstGEP2_32(LongjmpArgsTy, LongjmpArgs, 0, 1, "val_gep"); 1616349cc55cSDimitry Andric // void *env = __wasm_longjmp_args.env; 16175f757f3fSDimitry Andric Instruction *Env = IRB.CreateLoad(IRB.getPtrTy(), EnvField, "env"); 1618349cc55cSDimitry Andric // int val = __wasm_longjmp_args.val; 1619349cc55cSDimitry Andric Instruction *Val = IRB.CreateLoad(IRB.getInt32Ty(), ValField, "val"); 1620349cc55cSDimitry Andric 1621*0fca6ea1SDimitry Andric // %label = __wasm_setjmp_test(%env, functionInvocatinoId); 1622349cc55cSDimitry Andric // if (%label == 0) 1623349cc55cSDimitry Andric // __wasm_longjmp(%env, %val) 1624349cc55cSDimitry Andric // catchret to %setjmp.dispatch 1625349cc55cSDimitry Andric BasicBlock *ThenBB = BasicBlock::Create(C, "if.then", &F); 1626349cc55cSDimitry Andric BasicBlock *EndBB = BasicBlock::Create(C, "if.end", &F); 1627349cc55cSDimitry Andric Value *EnvP = IRB.CreateBitCast(Env, getAddrPtrType(&M), "env.p"); 1628*0fca6ea1SDimitry Andric Value *Label = IRB.CreateCall(WasmSetjmpTestF, {EnvP, FunctionInvocationId}, 1629349cc55cSDimitry Andric OperandBundleDef("funclet", CatchPad), "label"); 1630349cc55cSDimitry Andric Value *Cmp = IRB.CreateICmpEQ(Label, IRB.getInt32(0)); 1631349cc55cSDimitry Andric IRB.CreateCondBr(Cmp, ThenBB, EndBB); 1632349cc55cSDimitry Andric 1633349cc55cSDimitry Andric IRB.SetInsertPoint(ThenBB); 1634349cc55cSDimitry Andric CallInst *WasmLongjmpCI = IRB.CreateCall( 1635349cc55cSDimitry Andric WasmLongjmpF, {Env, Val}, OperandBundleDef("funclet", CatchPad)); 1636349cc55cSDimitry Andric IRB.CreateUnreachable(); 1637349cc55cSDimitry Andric 1638349cc55cSDimitry Andric IRB.SetInsertPoint(EndBB); 1639349cc55cSDimitry Andric // Jump to setjmp.dispatch block 1640349cc55cSDimitry Andric IRB.CreateCatchRet(CatchPad, SetjmpDispatchBB); 1641349cc55cSDimitry Andric 1642349cc55cSDimitry Andric // Go back to setjmp.dispatch BB 1643349cc55cSDimitry Andric // setjmp.dispatch: 1644349cc55cSDimitry Andric // switch %label { 1645349cc55cSDimitry Andric // label 1: goto post-setjmp BB 1 1646349cc55cSDimitry Andric // label 2: goto post-setjmp BB 2 16470b57cec5SDimitry Andric // ... 1648349cc55cSDimitry Andric // default: goto splitted next BB 1649349cc55cSDimitry Andric // } 1650349cc55cSDimitry Andric IRB.SetInsertPoint(SetjmpDispatchBB); 1651349cc55cSDimitry Andric PHINode *LabelPHI = IRB.CreatePHI(IRB.getInt32Ty(), 2, "label.phi"); 1652349cc55cSDimitry Andric LabelPHI->addIncoming(Label, EndBB); 1653349cc55cSDimitry Andric LabelPHI->addIncoming(IRB.getInt32(-1), Entry); 1654349cc55cSDimitry Andric SwitchInst *SI = IRB.CreateSwitch(LabelPHI, OrigEntry, SetjmpRetPHIs.size()); 1655349cc55cSDimitry Andric // -1 means no longjmp happened, continue normally (will hit the default 1656349cc55cSDimitry Andric // switch case). 0 means a longjmp that is not ours to handle, needs a 1657349cc55cSDimitry Andric // rethrow. Otherwise the index is the same as the index in P+1 (to avoid 1658349cc55cSDimitry Andric // 0). 1659349cc55cSDimitry Andric for (unsigned I = 0; I < SetjmpRetPHIs.size(); I++) { 1660349cc55cSDimitry Andric SI->addCase(IRB.getInt32(I + 1), SetjmpRetPHIs[I]->getParent()); 1661349cc55cSDimitry Andric SetjmpRetPHIs[I]->addIncoming(Val, SetjmpDispatchBB); 16620b57cec5SDimitry Andric } 16630b57cec5SDimitry Andric 1664349cc55cSDimitry Andric // Convert all longjmpable call instructions to invokes that unwind to the 1665349cc55cSDimitry Andric // newly created catch.dispatch.longjmp BB. 166604eeddc0SDimitry Andric SmallVector<CallInst *, 64> LongjmpableCalls; 1667349cc55cSDimitry Andric for (auto *BB = &*F.begin(); BB; BB = BB->getNextNode()) { 166804eeddc0SDimitry Andric for (auto &I : *BB) { 1669349cc55cSDimitry Andric auto *CI = dyn_cast<CallInst>(&I); 1670349cc55cSDimitry Andric if (!CI) 1671349cc55cSDimitry Andric continue; 1672349cc55cSDimitry Andric const Value *Callee = CI->getCalledOperand(); 1673349cc55cSDimitry Andric if (!canLongjmp(Callee)) 1674349cc55cSDimitry Andric continue; 1675349cc55cSDimitry Andric if (isEmAsmCall(Callee)) 1676349cc55cSDimitry Andric report_fatal_error("Cannot use EM_ASM* alongside setjmp/longjmp in " + 1677349cc55cSDimitry Andric F.getName() + 1678349cc55cSDimitry Andric ". Please consider using EM_JS, or move the " 1679349cc55cSDimitry Andric "EM_ASM into another function.", 1680349cc55cSDimitry Andric false); 1681349cc55cSDimitry Andric // This is __wasm_longjmp() call we inserted in this function, which 1682349cc55cSDimitry Andric // rethrows the longjmp when the longjmp does not correspond to one of 1683349cc55cSDimitry Andric // setjmps in this function. We should not convert this call to an invoke. 1684349cc55cSDimitry Andric if (CI == WasmLongjmpCI) 1685349cc55cSDimitry Andric continue; 168604eeddc0SDimitry Andric LongjmpableCalls.push_back(CI); 168704eeddc0SDimitry Andric } 168804eeddc0SDimitry Andric } 1689349cc55cSDimitry Andric 169004eeddc0SDimitry Andric for (auto *CI : LongjmpableCalls) { 1691349cc55cSDimitry Andric // Even if the callee function has attribute 'nounwind', which is true for 1692349cc55cSDimitry Andric // all C functions, it can longjmp, which means it can throw a Wasm 1693349cc55cSDimitry Andric // exception now. 1694349cc55cSDimitry Andric CI->removeFnAttr(Attribute::NoUnwind); 169504eeddc0SDimitry Andric if (Function *CalleeF = CI->getCalledFunction()) 1696349cc55cSDimitry Andric CalleeF->removeFnAttr(Attribute::NoUnwind); 169704eeddc0SDimitry Andric 169804eeddc0SDimitry Andric // Change it to an invoke and make it unwind to the catch.dispatch.longjmp 169904eeddc0SDimitry Andric // BB. If the call is enclosed in another catchpad/cleanuppad scope, unwind 170004eeddc0SDimitry Andric // to its parent pad's unwind destination instead to preserve the scope 170104eeddc0SDimitry Andric // structure. It will eventually unwind to the catch.dispatch.longjmp. 170204eeddc0SDimitry Andric SmallVector<OperandBundleDef, 1> Bundles; 170304eeddc0SDimitry Andric BasicBlock *UnwindDest = nullptr; 170404eeddc0SDimitry Andric if (auto Bundle = CI->getOperandBundle(LLVMContext::OB_funclet)) { 170504eeddc0SDimitry Andric Instruction *FromPad = cast<Instruction>(Bundle->Inputs[0]); 17061fd87a68SDimitry Andric while (!UnwindDest) { 170704eeddc0SDimitry Andric if (auto *CPI = dyn_cast<CatchPadInst>(FromPad)) { 170804eeddc0SDimitry Andric UnwindDest = CPI->getCatchSwitch()->getUnwindDest(); 17091fd87a68SDimitry Andric break; 171081ad6265SDimitry Andric } 171181ad6265SDimitry Andric if (auto *CPI = dyn_cast<CleanupPadInst>(FromPad)) { 171204eeddc0SDimitry Andric // getCleanupRetUnwindDest() can return nullptr when 171304eeddc0SDimitry Andric // 1. This cleanuppad's matching cleanupret uwninds to caller 171404eeddc0SDimitry Andric // 2. There is no matching cleanupret because it ends with 171504eeddc0SDimitry Andric // unreachable. 171604eeddc0SDimitry Andric // In case of 2, we need to traverse the parent pad chain. 171704eeddc0SDimitry Andric UnwindDest = getCleanupRetUnwindDest(CPI); 17181fd87a68SDimitry Andric Value *ParentPad = CPI->getParentPad(); 17191fd87a68SDimitry Andric if (isa<ConstantTokenNone>(ParentPad)) 17201fd87a68SDimitry Andric break; 17211fd87a68SDimitry Andric FromPad = cast<Instruction>(ParentPad); 172204eeddc0SDimitry Andric } 172304eeddc0SDimitry Andric } 172404eeddc0SDimitry Andric } 172504eeddc0SDimitry Andric if (!UnwindDest) 172604eeddc0SDimitry Andric UnwindDest = CatchDispatchLongjmpBB; 172704eeddc0SDimitry Andric changeToInvokeAndSplitBasicBlock(CI, UnwindDest); 1728349cc55cSDimitry Andric } 1729349cc55cSDimitry Andric 173004eeddc0SDimitry Andric SmallVector<Instruction *, 16> ToErase; 173104eeddc0SDimitry Andric for (auto &BB : F) { 173204eeddc0SDimitry Andric if (auto *CSI = dyn_cast<CatchSwitchInst>(BB.getFirstNonPHI())) { 173304eeddc0SDimitry Andric if (CSI != CatchSwitchLongjmp && CSI->unwindsToCaller()) { 173404eeddc0SDimitry Andric IRB.SetInsertPoint(CSI); 173504eeddc0SDimitry Andric ToErase.push_back(CSI); 173604eeddc0SDimitry Andric auto *NewCSI = IRB.CreateCatchSwitch(CSI->getParentPad(), 173704eeddc0SDimitry Andric CatchDispatchLongjmpBB, 1); 173804eeddc0SDimitry Andric NewCSI->addHandler(*CSI->handler_begin()); 173904eeddc0SDimitry Andric NewCSI->takeName(CSI); 174004eeddc0SDimitry Andric CSI->replaceAllUsesWith(NewCSI); 174104eeddc0SDimitry Andric } 174204eeddc0SDimitry Andric } 174304eeddc0SDimitry Andric 174404eeddc0SDimitry Andric if (auto *CRI = dyn_cast<CleanupReturnInst>(BB.getTerminator())) { 174504eeddc0SDimitry Andric if (CRI->unwindsToCaller()) { 174604eeddc0SDimitry Andric IRB.SetInsertPoint(CRI); 174704eeddc0SDimitry Andric ToErase.push_back(CRI); 174804eeddc0SDimitry Andric IRB.CreateCleanupRet(CRI->getCleanupPad(), CatchDispatchLongjmpBB); 174904eeddc0SDimitry Andric } 1750349cc55cSDimitry Andric } 1751349cc55cSDimitry Andric } 1752349cc55cSDimitry Andric 1753349cc55cSDimitry Andric for (Instruction *I : ToErase) 1754349cc55cSDimitry Andric I->eraseFromParent(); 17550b57cec5SDimitry Andric } 1756