Lines Matching full:ve
1 //===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
9 // This file implements the interfaces that VE uses to lower LLVM code into a
39 #define DEBUG_TYPE "ve-lower"
85 addRegisterClass(MVT::i32, &VE::I32RegClass);
86 addRegisterClass(MVT::i64, &VE::I64RegClass);
87 addRegisterClass(MVT::f32, &VE::F32RegClass);
88 addRegisterClass(MVT::f64, &VE::I64RegClass);
89 addRegisterClass(MVT::f128, &VE::F128RegClass);
93 addRegisterClass(VecVT, &VE::V64RegClass);
94 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
103 // VE doesn't have i1 sign extending load.
111 // VE doesn't have floating point extload/truncstore, so expand them.
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
153 // VE doesn't have BRCOND
163 // VE has no REM or DIVREM operations.
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
181 // VE has no CTTZ, ROTL, ROTR operations.
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
229 // VE doesn't have following floating point operations.
235 // VE doesn't have fdiv of f128.
246 // VE doesn't have following floating point math functions.
257 // VE has single and double FMINNUM and FMAXNUM
284 // VE doesn't have follwing instructions.
415 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
492 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
559 .Case("sp", VE::SX11) // Stack pointer
560 .Case("fp", VE::SX9) // Frame pointer
561 .Case("sl", VE::SX8) // Stack limit
562 .Case("lr", VE::SX10) // Link register
563 .Case("tp", VE::SX14) // Thread pointer
564 .Case("outer", VE::SX12) // Outer regiser
565 .Case("info", VE::SX17) // Info area register
566 .Case("got", VE::SX15) // Global offset table register
567 .Case("plt", VE::SX16) // Procedure linkage table register
587 // VE target does not yet support tail call optimization.
605 // VE requires to use both register and stack for varargs or no-prototyped
637 // VE needs to get address of callee function in a register
682 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
713 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
731 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
838 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
860 // VE uses 64 bit addressing, so we need multiple instructions to generate
890 // It's fast anytime on VE
911 setStackPointerRegisterToSaveRestore(VE::SX11);
921 // VE stores all argument by 8 bytes alignment
1015 // Handle PIC mode first. VE needs a got load for every variable!
1057 // The mappings for emitLeading/TrailingFence for VE is designed by following
1107 // VE uses Release consistency, so need a fence instruction if it is a
1118 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1124 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1133 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1292 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1310 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1313 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1344 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1345 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1347 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1393 VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1414 VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1456 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1457 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1468 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1507 DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1522 DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1576 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1597 // VE f128 values must be stored with 16 bytes alignment. We don't
1963 /// JumpTable for VE.
1965 /// VE cannot generate relocatable symbol in jump table. VE cannot
2028 const TargetRegisterClass *RC = &VE::I64RegClass;
2038 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2042 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2045 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2046 .addReg(VE::SX15)
2054 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2058 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2061 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2078 const TargetRegisterClass *RC = &VE::I64RegClass;
2090 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2099 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2103 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2106 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2107 .addReg(VE::SX15)
2119 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2123 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2126 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2127 .addReg(VE::SX15)
2130 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2142 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2146 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2149 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2169 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2241 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2245 MIB.addReg(VE::SX17);
2250 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2261 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2269 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2276 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2288 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2289 MIB.addReg(VE::SX10);
2294 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2298 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2318 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2320 Register FP = VE::SX9;
2321 Register SP = VE::SX11;
2337 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2344 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2352 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2357 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2364 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2470 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2487 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2493 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2497 const TargetRegisterClass *RC = &VE::I64RegClass;
2499 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2502 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2510 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2514 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2530 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2534 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2537 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2538 .addReg(VE::SX15)
2546 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2550 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2553 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2569 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2572 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2576 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2595 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2598 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2605 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2608 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2682 case VE::EH_SjLj_LongJmp:
2684 case VE::EH_SjLj_SetJmp:
2686 case VE::EH_SjLj_Setup_Dispatch:
2705 Val <<= 32; // Immediate value of float place at higher bits on VE.
2755 // safe since VE uses f64 result of f128 comparison.
2807 // VE's condition move can handle MImm in True clause, so nothing to do.
2809 // VE's condition move can handle MImm in True clause, so swap True and
2863 // VE's comparison can handle MImm in RHS, so nothing to do.
2865 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2871 // VE's condition move can handle MImm in True clause, so nothing to do.
2873 // VE's condition move can handle MImm in True clause, so swap True and
3021 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3044 // VE Inline Assembly Support
3070 RC = &VE::I64RegClass;
3073 RC = &VE::V64RegClass;
3083 // VE Target Optimization Support
3097 // VE doesn't have vector and not instruction.
3101 // VE allows different immediate values for X and Y where ~X & Y.
3102 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3110 // correctly with Aurora VE.
3141 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3149 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3199 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3210 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),