xref: /llvm-project/llvm/lib/Target/Lanai/LanaiISelLowering.cpp (revision 754ed95b6672b9a678a994cc652862a91cdc4406)
1 //===-- LanaiISelLowering.cpp - Lanai DAG Lowering Implementation ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the LanaiTargetLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "LanaiISelLowering.h"
14 #include "LanaiCondCode.h"
15 #include "LanaiMachineFunctionInfo.h"
16 #include "LanaiSubtarget.h"
17 #include "LanaiTargetObjectFile.h"
18 #include "MCTargetDesc/LanaiBaseInfo.h"
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringRef.h"
23 #include "llvm/ADT/StringSwitch.h"
24 #include "llvm/CodeGen/CallingConvLower.h"
25 #include "llvm/CodeGen/MachineFrameInfo.h"
26 #include "llvm/CodeGen/MachineFunction.h"
27 #include "llvm/CodeGen/MachineMemOperand.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/SelectionDAG.h"
30 #include "llvm/CodeGen/SelectionDAGNodes.h"
31 #include "llvm/CodeGen/TargetCallingConv.h"
32 #include "llvm/CodeGen/ValueTypes.h"
33 #include "llvm/CodeGenTypes/MachineValueType.h"
34 #include "llvm/IR/CallingConv.h"
35 #include "llvm/IR/DerivedTypes.h"
36 #include "llvm/IR/Function.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/Support/Casting.h"
39 #include "llvm/Support/CodeGen.h"
40 #include "llvm/Support/CommandLine.h"
41 #include "llvm/Support/Debug.h"
42 #include "llvm/Support/ErrorHandling.h"
43 #include "llvm/Support/KnownBits.h"
44 #include "llvm/Support/MathExtras.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/Target/TargetMachine.h"
47 #include <cassert>
48 #include <cmath>
49 #include <cstdint>
50 #include <cstdlib>
51 #include <utility>
52 
53 #define DEBUG_TYPE "lanai-lower"
54 
55 using namespace llvm;
56 
57 // Limit on number of instructions the lowered multiplication may have before a
58 // call to the library function should be generated instead. The threshold is
59 // currently set to 14 as this was the smallest threshold that resulted in all
60 // constant multiplications being lowered. A threshold of 5 covered all cases
61 // except for one multiplication which required 14. mulsi3 requires 16
62 // instructions (including the prologue and epilogue but excluding instructions
63 // at call site). Until we can inline mulsi3, generating at most 14 instructions
64 // will be faster than invoking mulsi3.
65 static cl::opt<int> LanaiLowerConstantMulThreshold(
66     "lanai-constant-mul-threshold", cl::Hidden,
67     cl::desc("Maximum number of instruction to generate when lowering constant "
68              "multiplication instead of calling library function [default=14]"),
69     cl::init(14));
70 
71 LanaiTargetLowering::LanaiTargetLowering(const TargetMachine &TM,
72                                          const LanaiSubtarget &STI)
73     : TargetLowering(TM) {
74   // Set up the register classes.
75   addRegisterClass(MVT::i32, &Lanai::GPRRegClass);
76 
77   // Compute derived properties from the register classes
78   TRI = STI.getRegisterInfo();
79   computeRegisterProperties(TRI);
80 
81   setStackPointerRegisterToSaveRestore(Lanai::SP);
82 
83   setOperationAction(ISD::BR_CC, MVT::i32, Custom);
84   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
85   setOperationAction(ISD::BRCOND, MVT::Other, Expand);
86   setOperationAction(ISD::SETCC, MVT::i32, Custom);
87   setOperationAction(ISD::SELECT, MVT::i32, Expand);
88   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
89 
90   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
91   setOperationAction(ISD::BlockAddress, MVT::i32, Custom);
92   setOperationAction(ISD::JumpTable, MVT::i32, Custom);
93   setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
94 
95   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
96   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
97   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
98 
99   setOperationAction(ISD::VASTART, MVT::Other, Custom);
100   setOperationAction(ISD::VAARG, MVT::Other, Expand);
101   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
102   setOperationAction(ISD::VAEND, MVT::Other, Expand);
103 
104   setOperationAction(ISD::SDIV, MVT::i32, Expand);
105   setOperationAction(ISD::UDIV, MVT::i32, Expand);
106   setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
107   setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
108   setOperationAction(ISD::SREM, MVT::i32, Expand);
109   setOperationAction(ISD::UREM, MVT::i32, Expand);
110 
111   setOperationAction(ISD::MUL, MVT::i32, Custom);
112   setOperationAction(ISD::MULHU, MVT::i32, Expand);
113   setOperationAction(ISD::MULHS, MVT::i32, Expand);
114   setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
115   setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
116 
117   setOperationAction(ISD::ROTR, MVT::i32, Expand);
118   setOperationAction(ISD::ROTL, MVT::i32, Expand);
119   setOperationAction(ISD::SHL_PARTS, MVT::i32, Custom);
120   setOperationAction(ISD::SRL_PARTS, MVT::i32, Custom);
121   setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
122 
123   setOperationAction(ISD::BSWAP, MVT::i32, Expand);
124   setOperationAction(ISD::CTPOP, MVT::i32, Legal);
125   setOperationAction(ISD::CTLZ, MVT::i32, Legal);
126   setOperationAction(ISD::CTTZ, MVT::i32, Legal);
127 
128   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
129   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
130   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
131 
132   // Extended load operations for i1 types must be promoted
133   for (MVT VT : MVT::integer_valuetypes()) {
134     setLoadExtAction(ISD::EXTLOAD, VT, MVT::i1, Promote);
135     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i1, Promote);
136     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
137   }
138 
139   setTargetDAGCombine({ISD::ADD, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR});
140 
141   // Function alignments
142   setMinFunctionAlignment(Align(4));
143   setPrefFunctionAlignment(Align(4));
144 
145   setJumpIsExpensive(true);
146 
147   // TODO: Setting the minimum jump table entries needed before a
148   // switch is transformed to a jump table to 100 to avoid creating jump tables
149   // as this was causing bad performance compared to a large group of if
150   // statements. Re-evaluate this on new benchmarks.
151   setMinimumJumpTableEntries(100);
152 
153   // Use fast calling convention for library functions.
154   for (int I = 0; I < RTLIB::UNKNOWN_LIBCALL; ++I) {
155     setLibcallCallingConv(static_cast<RTLIB::Libcall>(I), CallingConv::Fast);
156   }
157 
158   MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
159   MaxStoresPerMemsetOptSize = 8;
160   MaxStoresPerMemcpy = 16; // For @llvm.memcpy -> sequence of stores
161   MaxStoresPerMemcpyOptSize = 8;
162   MaxStoresPerMemmove = 16; // For @llvm.memmove -> sequence of stores
163   MaxStoresPerMemmoveOptSize = 8;
164 
165   // Booleans always contain 0 or 1.
166   setBooleanContents(ZeroOrOneBooleanContent);
167 
168   setMaxAtomicSizeInBitsSupported(0);
169 }
170 
171 SDValue LanaiTargetLowering::LowerOperation(SDValue Op,
172                                             SelectionDAG &DAG) const {
173   switch (Op.getOpcode()) {
174   case ISD::MUL:
175     return LowerMUL(Op, DAG);
176   case ISD::BR_CC:
177     return LowerBR_CC(Op, DAG);
178   case ISD::ConstantPool:
179     return LowerConstantPool(Op, DAG);
180   case ISD::GlobalAddress:
181     return LowerGlobalAddress(Op, DAG);
182   case ISD::BlockAddress:
183     return LowerBlockAddress(Op, DAG);
184   case ISD::JumpTable:
185     return LowerJumpTable(Op, DAG);
186   case ISD::SELECT_CC:
187     return LowerSELECT_CC(Op, DAG);
188   case ISD::SETCC:
189     return LowerSETCC(Op, DAG);
190   case ISD::SHL_PARTS:
191     return LowerSHL_PARTS(Op, DAG);
192   case ISD::SRL_PARTS:
193     return LowerSRL_PARTS(Op, DAG);
194   case ISD::VASTART:
195     return LowerVASTART(Op, DAG);
196   case ISD::DYNAMIC_STACKALLOC:
197     return LowerDYNAMIC_STACKALLOC(Op, DAG);
198   case ISD::RETURNADDR:
199     return LowerRETURNADDR(Op, DAG);
200   case ISD::FRAMEADDR:
201     return LowerFRAMEADDR(Op, DAG);
202   default:
203     llvm_unreachable("unimplemented operand");
204   }
205 }
206 
207 //===----------------------------------------------------------------------===//
208 //                       Lanai Inline Assembly Support
209 //===----------------------------------------------------------------------===//
210 
211 Register LanaiTargetLowering::getRegisterByName(
212   const char *RegName, LLT /*VT*/,
213   const MachineFunction & /*MF*/) const {
214   // Only unallocatable registers should be matched here.
215   Register Reg = StringSwitch<unsigned>(RegName)
216                      .Case("pc", Lanai::PC)
217                      .Case("sp", Lanai::SP)
218                      .Case("fp", Lanai::FP)
219                      .Case("rr1", Lanai::RR1)
220                      .Case("r10", Lanai::R10)
221                      .Case("rr2", Lanai::RR2)
222                      .Case("r11", Lanai::R11)
223                      .Case("rca", Lanai::RCA)
224                      .Default(0);
225 
226   if (Reg)
227     return Reg;
228   report_fatal_error("Invalid register name global variable");
229 }
230 
231 std::pair<unsigned, const TargetRegisterClass *>
232 LanaiTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
233                                                   StringRef Constraint,
234                                                   MVT VT) const {
235   if (Constraint.size() == 1)
236     // GCC Constraint Letters
237     switch (Constraint[0]) {
238     case 'r': // GENERAL_REGS
239       return std::make_pair(0U, &Lanai::GPRRegClass);
240     default:
241       break;
242     }
243 
244   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
245 }
246 
247 // Examine constraint type and operand type and determine a weight value.
248 // This object must already have been set up with the operand type
249 // and the current alternative constraint selected.
250 TargetLowering::ConstraintWeight
251 LanaiTargetLowering::getSingleConstraintMatchWeight(
252     AsmOperandInfo &Info, const char *Constraint) const {
253   ConstraintWeight Weight = CW_Invalid;
254   Value *CallOperandVal = Info.CallOperandVal;
255   // If we don't have a value, we can't do a match,
256   // but allow it at the lowest weight.
257   if (CallOperandVal == nullptr)
258     return CW_Default;
259   // Look at the constraint type.
260   switch (*Constraint) {
261   case 'I': // signed 16 bit immediate
262   case 'J': // integer zero
263   case 'K': // unsigned 16 bit immediate
264   case 'L': // immediate in the range 0 to 31
265   case 'M': // signed 32 bit immediate where lower 16 bits are 0
266   case 'N': // signed 26 bit immediate
267   case 'O': // integer zero
268     if (isa<ConstantInt>(CallOperandVal))
269       Weight = CW_Constant;
270     break;
271   default:
272     Weight = TargetLowering::getSingleConstraintMatchWeight(Info, Constraint);
273     break;
274   }
275   return Weight;
276 }
277 
278 // LowerAsmOperandForConstraint - Lower the specified operand into the Ops
279 // vector.  If it is invalid, don't add anything to Ops.
280 void LanaiTargetLowering::LowerAsmOperandForConstraint(
281     SDValue Op, StringRef Constraint, std::vector<SDValue> &Ops,
282     SelectionDAG &DAG) const {
283   SDValue Result;
284 
285   // Only support length 1 constraints for now.
286   if (Constraint.size() > 1)
287     return;
288 
289   char ConstraintLetter = Constraint[0];
290   switch (ConstraintLetter) {
291   case 'I': // Signed 16 bit constant
292     // If this fails, the parent routine will give an error
293     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
294       if (isInt<16>(C->getSExtValue())) {
295         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(C),
296                                        Op.getValueType());
297         break;
298       }
299     }
300     return;
301   case 'J': // integer zero
302   case 'O':
303     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
304       if (C->getZExtValue() == 0) {
305         Result = DAG.getTargetConstant(0, SDLoc(C), Op.getValueType());
306         break;
307       }
308     }
309     return;
310   case 'K': // unsigned 16 bit immediate
311     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
312       if (isUInt<16>(C->getZExtValue())) {
313         Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(C),
314                                        Op.getValueType());
315         break;
316       }
317     }
318     return;
319   case 'L': // immediate in the range 0 to 31
320     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
321       if (C->getZExtValue() <= 31) {
322         Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(C),
323                                        Op.getValueType());
324         break;
325       }
326     }
327     return;
328   case 'M': // signed 32 bit immediate where lower 16 bits are 0
329     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
330       int64_t Val = C->getSExtValue();
331       if ((isInt<32>(Val)) && ((Val & 0xffff) == 0)) {
332         Result = DAG.getTargetConstant(Val, SDLoc(C), Op.getValueType());
333         break;
334       }
335     }
336     return;
337   case 'N': // signed 26 bit immediate
338     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) {
339       int64_t Val = C->getSExtValue();
340       if ((Val >= -33554432) && (Val <= 33554431)) {
341         Result = DAG.getTargetConstant(Val, SDLoc(C), Op.getValueType());
342         break;
343       }
344     }
345     return;
346   default:
347     break; // This will fall through to the generic implementation
348   }
349 
350   if (Result.getNode()) {
351     Ops.push_back(Result);
352     return;
353   }
354 
355   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
356 }
357 
358 //===----------------------------------------------------------------------===//
359 //                      Calling Convention Implementation
360 //===----------------------------------------------------------------------===//
361 
362 #include "LanaiGenCallingConv.inc"
363 
364 static unsigned NumFixedArgs;
365 static bool CC_Lanai32_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
366                               CCValAssign::LocInfo LocInfo,
367                               ISD::ArgFlagsTy ArgFlags, CCState &State) {
368   // Handle fixed arguments with default CC.
369   // Note: Both the default and fast CC handle VarArg the same and hence the
370   // calling convention of the function is not considered here.
371   if (ValNo < NumFixedArgs) {
372     return CC_Lanai32(ValNo, ValVT, LocVT, LocInfo, ArgFlags, State);
373   }
374 
375   // Promote i8/i16 args to i32
376   if (LocVT == MVT::i8 || LocVT == MVT::i16) {
377     LocVT = MVT::i32;
378     if (ArgFlags.isSExt())
379       LocInfo = CCValAssign::SExt;
380     else if (ArgFlags.isZExt())
381       LocInfo = CCValAssign::ZExt;
382     else
383       LocInfo = CCValAssign::AExt;
384   }
385 
386   // VarArgs get passed on stack
387   unsigned Offset = State.AllocateStack(4, Align(4));
388   State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
389   return false;
390 }
391 
392 SDValue LanaiTargetLowering::LowerFormalArguments(
393     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
394     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
395     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
396   switch (CallConv) {
397   case CallingConv::C:
398   case CallingConv::Fast:
399     return LowerCCCArguments(Chain, CallConv, IsVarArg, Ins, DL, DAG, InVals);
400   default:
401     report_fatal_error("Unsupported calling convention");
402   }
403 }
404 
405 SDValue LanaiTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
406                                        SmallVectorImpl<SDValue> &InVals) const {
407   SelectionDAG &DAG = CLI.DAG;
408   SDLoc &DL = CLI.DL;
409   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
410   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
411   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
412   SDValue Chain = CLI.Chain;
413   SDValue Callee = CLI.Callee;
414   bool &IsTailCall = CLI.IsTailCall;
415   CallingConv::ID CallConv = CLI.CallConv;
416   bool IsVarArg = CLI.IsVarArg;
417 
418   // Lanai target does not yet support tail call optimization.
419   IsTailCall = false;
420 
421   switch (CallConv) {
422   case CallingConv::Fast:
423   case CallingConv::C:
424     return LowerCCCCallTo(Chain, Callee, CallConv, IsVarArg, IsTailCall, Outs,
425                           OutVals, Ins, DL, DAG, InVals);
426   default:
427     report_fatal_error("Unsupported calling convention");
428   }
429 }
430 
431 // LowerCCCArguments - transform physical registers into virtual registers and
432 // generate load operations for arguments places on the stack.
433 SDValue LanaiTargetLowering::LowerCCCArguments(
434     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
435     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
436     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
437   MachineFunction &MF = DAG.getMachineFunction();
438   MachineFrameInfo &MFI = MF.getFrameInfo();
439   MachineRegisterInfo &RegInfo = MF.getRegInfo();
440   LanaiMachineFunctionInfo *LanaiMFI = MF.getInfo<LanaiMachineFunctionInfo>();
441 
442   // Assign locations to all of the incoming arguments.
443   SmallVector<CCValAssign, 16> ArgLocs;
444   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
445                  *DAG.getContext());
446   if (CallConv == CallingConv::Fast) {
447     CCInfo.AnalyzeFormalArguments(Ins, CC_Lanai32_Fast);
448   } else {
449     CCInfo.AnalyzeFormalArguments(Ins, CC_Lanai32);
450   }
451 
452   for (const CCValAssign &VA : ArgLocs) {
453     if (VA.isRegLoc()) {
454       // Arguments passed in registers
455       EVT RegVT = VA.getLocVT();
456       switch (RegVT.getSimpleVT().SimpleTy) {
457       case MVT::i32: {
458         Register VReg = RegInfo.createVirtualRegister(&Lanai::GPRRegClass);
459         RegInfo.addLiveIn(VA.getLocReg(), VReg);
460         SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, VReg, RegVT);
461 
462         // If this is an 8/16-bit value, it is really passed promoted to 32
463         // bits. Insert an assert[sz]ext to capture this, then truncate to the
464         // right size.
465         if (VA.getLocInfo() == CCValAssign::SExt)
466           ArgValue = DAG.getNode(ISD::AssertSext, DL, RegVT, ArgValue,
467                                  DAG.getValueType(VA.getValVT()));
468         else if (VA.getLocInfo() == CCValAssign::ZExt)
469           ArgValue = DAG.getNode(ISD::AssertZext, DL, RegVT, ArgValue,
470                                  DAG.getValueType(VA.getValVT()));
471 
472         if (VA.getLocInfo() != CCValAssign::Full)
473           ArgValue = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), ArgValue);
474 
475         InVals.push_back(ArgValue);
476         break;
477       }
478       default:
479         LLVM_DEBUG(dbgs() << "LowerFormalArguments Unhandled argument type: "
480                           << RegVT << "\n");
481         llvm_unreachable("unhandled argument type");
482       }
483     } else {
484       // Only arguments passed on the stack should make it here.
485       assert(VA.isMemLoc());
486       // Load the argument to a virtual register
487       unsigned ObjSize = VA.getLocVT().getSizeInBits() / 8;
488       // Check that the argument fits in stack slot
489       if (ObjSize > 4) {
490         errs() << "LowerFormalArguments Unhandled argument type: "
491                << VA.getLocVT() << "\n";
492       }
493       // Create the frame index object for this incoming parameter...
494       int FI = MFI.CreateFixedObject(ObjSize, VA.getLocMemOffset(), true);
495 
496       // Create the SelectionDAG nodes corresponding to a load
497       // from this parameter
498       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
499       InVals.push_back(DAG.getLoad(
500           VA.getLocVT(), DL, Chain, FIN,
501           MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
502     }
503   }
504 
505   // The Lanai ABI for returning structs by value requires that we copy
506   // the sret argument into rv for the return. Save the argument into
507   // a virtual register so that we can access it from the return points.
508   if (MF.getFunction().hasStructRetAttr()) {
509     Register Reg = LanaiMFI->getSRetReturnReg();
510     if (!Reg) {
511       Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(MVT::i32));
512       LanaiMFI->setSRetReturnReg(Reg);
513     }
514     SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), DL, Reg, InVals[0]);
515     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Copy, Chain);
516   }
517 
518   if (IsVarArg) {
519     // Record the frame index of the first variable argument
520     // which is a value necessary to VASTART.
521     int FI = MFI.CreateFixedObject(4, CCInfo.getStackSize(), true);
522     LanaiMFI->setVarArgsFrameIndex(FI);
523   }
524 
525   return Chain;
526 }
527 
528 bool LanaiTargetLowering::CanLowerReturn(
529     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
530     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
531     const Type *RetTy) const {
532   SmallVector<CCValAssign, 16> RVLocs;
533   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
534 
535   return CCInfo.CheckReturn(Outs, RetCC_Lanai32);
536 }
537 
538 SDValue
539 LanaiTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
540                                  bool IsVarArg,
541                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
542                                  const SmallVectorImpl<SDValue> &OutVals,
543                                  const SDLoc &DL, SelectionDAG &DAG) const {
544   // CCValAssign - represent the assignment of the return value to a location
545   SmallVector<CCValAssign, 16> RVLocs;
546 
547   // CCState - Info about the registers and stack slot.
548   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
549                  *DAG.getContext());
550 
551   // Analize return values.
552   CCInfo.AnalyzeReturn(Outs, RetCC_Lanai32);
553 
554   SDValue Glue;
555   SmallVector<SDValue, 4> RetOps(1, Chain);
556 
557   // Copy the result values into the output registers.
558   for (unsigned i = 0; i != RVLocs.size(); ++i) {
559     CCValAssign &VA = RVLocs[i];
560     assert(VA.isRegLoc() && "Can only return in registers!");
561 
562     Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVals[i], Glue);
563 
564     // Guarantee that all emitted copies are stuck together with flags.
565     Glue = Chain.getValue(1);
566     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
567   }
568 
569   // The Lanai ABI for returning structs by value requires that we copy
570   // the sret argument into rv for the return. We saved the argument into
571   // a virtual register in the entry block, so now we copy the value out
572   // and into rv.
573   if (DAG.getMachineFunction().getFunction().hasStructRetAttr()) {
574     MachineFunction &MF = DAG.getMachineFunction();
575     LanaiMachineFunctionInfo *LanaiMFI = MF.getInfo<LanaiMachineFunctionInfo>();
576     Register Reg = LanaiMFI->getSRetReturnReg();
577     assert(Reg &&
578            "SRetReturnReg should have been set in LowerFormalArguments().");
579     SDValue Val =
580         DAG.getCopyFromReg(Chain, DL, Reg, getPointerTy(DAG.getDataLayout()));
581 
582     Chain = DAG.getCopyToReg(Chain, DL, Lanai::RV, Val, Glue);
583     Glue = Chain.getValue(1);
584     RetOps.push_back(
585         DAG.getRegister(Lanai::RV, getPointerTy(DAG.getDataLayout())));
586   }
587 
588   RetOps[0] = Chain; // Update chain
589 
590   unsigned Opc = LanaiISD::RET_GLUE;
591   if (Glue.getNode())
592     RetOps.push_back(Glue);
593 
594   // Return Void
595   return DAG.getNode(Opc, DL, MVT::Other,
596                      ArrayRef<SDValue>(&RetOps[0], RetOps.size()));
597 }
598 
599 // LowerCCCCallTo - functions arguments are copied from virtual regs to
600 // (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
601 SDValue LanaiTargetLowering::LowerCCCCallTo(
602     SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool IsVarArg,
603     bool /*IsTailCall*/, const SmallVectorImpl<ISD::OutputArg> &Outs,
604     const SmallVectorImpl<SDValue> &OutVals,
605     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
606     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
607   // Analyze operands of the call, assigning locations to each operand.
608   SmallVector<CCValAssign, 16> ArgLocs;
609   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
610                  *DAG.getContext());
611   GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
612   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
613 
614   NumFixedArgs = 0;
615   if (IsVarArg && G) {
616     const Function *CalleeFn = dyn_cast<Function>(G->getGlobal());
617     if (CalleeFn)
618       NumFixedArgs = CalleeFn->getFunctionType()->getNumParams();
619   }
620   if (NumFixedArgs)
621     CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_VarArg);
622   else {
623     if (CallConv == CallingConv::Fast)
624       CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32_Fast);
625     else
626       CCInfo.AnalyzeCallOperands(Outs, CC_Lanai32);
627   }
628 
629   // Get a count of how many bytes are to be pushed on the stack.
630   unsigned NumBytes = CCInfo.getStackSize();
631 
632   // Create local copies for byval args.
633   SmallVector<SDValue, 8> ByValArgs;
634   for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
635     ISD::ArgFlagsTy Flags = Outs[I].Flags;
636     if (!Flags.isByVal())
637       continue;
638 
639     SDValue Arg = OutVals[I];
640     unsigned Size = Flags.getByValSize();
641     Align Alignment = Flags.getNonZeroByValAlign();
642 
643     int FI = MFI.CreateStackObject(Size, Alignment, false);
644     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
645     SDValue SizeNode = DAG.getConstant(Size, DL, MVT::i32);
646 
647     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
648                           /*IsVolatile=*/false,
649                           /*AlwaysInline=*/false,
650                           /*CI=*/nullptr, std::nullopt, MachinePointerInfo(),
651                           MachinePointerInfo());
652     ByValArgs.push_back(FIPtr);
653   }
654 
655   Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
656 
657   SmallVector<std::pair<unsigned, SDValue>, 4> RegsToPass;
658   SmallVector<SDValue, 12> MemOpChains;
659   SDValue StackPtr;
660 
661   // Walk the register/memloc assignments, inserting copies/loads.
662   for (unsigned I = 0, J = 0, E = ArgLocs.size(); I != E; ++I) {
663     CCValAssign &VA = ArgLocs[I];
664     SDValue Arg = OutVals[I];
665     ISD::ArgFlagsTy Flags = Outs[I].Flags;
666 
667     // Promote the value if needed.
668     switch (VA.getLocInfo()) {
669     case CCValAssign::Full:
670       break;
671     case CCValAssign::SExt:
672       Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
673       break;
674     case CCValAssign::ZExt:
675       Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
676       break;
677     case CCValAssign::AExt:
678       Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
679       break;
680     default:
681       llvm_unreachable("Unknown loc info!");
682     }
683 
684     // Use local copy if it is a byval arg.
685     if (Flags.isByVal())
686       Arg = ByValArgs[J++];
687 
688     // Arguments that can be passed on register must be kept at RegsToPass
689     // vector
690     if (VA.isRegLoc()) {
691       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
692     } else {
693       assert(VA.isMemLoc());
694 
695       if (StackPtr.getNode() == nullptr)
696         StackPtr = DAG.getCopyFromReg(Chain, DL, Lanai::SP,
697                                       getPointerTy(DAG.getDataLayout()));
698 
699       SDValue PtrOff =
700           DAG.getNode(ISD::ADD, DL, getPointerTy(DAG.getDataLayout()), StackPtr,
701                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
702 
703       MemOpChains.push_back(
704           DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
705     }
706   }
707 
708   // Transform all store nodes into one single node because all store nodes are
709   // independent of each other.
710   if (!MemOpChains.empty())
711     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
712                         ArrayRef<SDValue>(&MemOpChains[0], MemOpChains.size()));
713 
714   SDValue InGlue;
715 
716   // Build a sequence of copy-to-reg nodes chained together with token chain and
717   // flag operands which copy the outgoing args into registers.  The InGlue in
718   // necessary since all emitted instructions must be stuck together.
719   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I) {
720     Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[I].first,
721                              RegsToPass[I].second, InGlue);
722     InGlue = Chain.getValue(1);
723   }
724 
725   // If the callee is a GlobalAddress node (quite common, every direct call is)
726   // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
727   // Likewise ExternalSymbol -> TargetExternalSymbol.
728   uint8_t OpFlag = LanaiII::MO_NO_FLAG;
729   if (G) {
730     Callee = DAG.getTargetGlobalAddress(
731         G->getGlobal(), DL, getPointerTy(DAG.getDataLayout()), 0, OpFlag);
732   } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
733     Callee = DAG.getTargetExternalSymbol(
734         E->getSymbol(), getPointerTy(DAG.getDataLayout()), OpFlag);
735   }
736 
737   // Returns a chain & a flag for retval copy to use.
738   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
739   SmallVector<SDValue, 8> Ops;
740   Ops.push_back(Chain);
741   Ops.push_back(Callee);
742 
743   // Add a register mask operand representing the call-preserved registers.
744   // TODO: Should return-twice functions be handled?
745   const uint32_t *Mask =
746       TRI->getCallPreservedMask(DAG.getMachineFunction(), CallConv);
747   assert(Mask && "Missing call preserved mask for calling convention");
748   Ops.push_back(DAG.getRegisterMask(Mask));
749 
750   // Add argument registers to the end of the list so that they are
751   // known live into the call.
752   for (unsigned I = 0, E = RegsToPass.size(); I != E; ++I)
753     Ops.push_back(DAG.getRegister(RegsToPass[I].first,
754                                   RegsToPass[I].second.getValueType()));
755 
756   if (InGlue.getNode())
757     Ops.push_back(InGlue);
758 
759   Chain = DAG.getNode(LanaiISD::CALL, DL, NodeTys,
760                       ArrayRef<SDValue>(&Ops[0], Ops.size()));
761   InGlue = Chain.getValue(1);
762 
763   // Create the CALLSEQ_END node.
764   Chain = DAG.getCALLSEQ_END(Chain, NumBytes, 0, InGlue, DL);
765   InGlue = Chain.getValue(1);
766 
767   // Handle result values, copying them out of physregs into vregs that we
768   // return.
769   return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL, DAG,
770                          InVals);
771 }
772 
773 // LowerCallResult - Lower the result values of a call into the
774 // appropriate copies out of appropriate physical registers.
775 SDValue LanaiTargetLowering::LowerCallResult(
776     SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
777     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
778     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
779   // Assign locations to each value returned by this call.
780   SmallVector<CCValAssign, 16> RVLocs;
781   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
782                  *DAG.getContext());
783 
784   CCInfo.AnalyzeCallResult(Ins, RetCC_Lanai32);
785 
786   // Copy all of the result registers out of their specified physreg.
787   for (unsigned I = 0; I != RVLocs.size(); ++I) {
788     Chain = DAG.getCopyFromReg(Chain, DL, RVLocs[I].getLocReg(),
789                                RVLocs[I].getValVT(), InGlue)
790                 .getValue(1);
791     InGlue = Chain.getValue(2);
792     InVals.push_back(Chain.getValue(0));
793   }
794 
795   return Chain;
796 }
797 
798 //===----------------------------------------------------------------------===//
799 //                      Custom Lowerings
800 //===----------------------------------------------------------------------===//
801 
802 static LPCC::CondCode IntCondCCodeToICC(SDValue CC, const SDLoc &DL,
803                                         SDValue &RHS, SelectionDAG &DAG) {
804   ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
805 
806   // For integer, only the SETEQ, SETNE, SETLT, SETLE, SETGT, SETGE, SETULT,
807   // SETULE, SETUGT, and SETUGE opcodes are used (see CodeGen/ISDOpcodes.h)
808   // and Lanai only supports integer comparisons, so only provide definitions
809   // for them.
810   switch (SetCCOpcode) {
811   case ISD::SETEQ:
812     return LPCC::ICC_EQ;
813   case ISD::SETGT:
814     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS))
815       if (RHSC->getZExtValue() == 0xFFFFFFFF) {
816         // X > -1 -> X >= 0 -> is_plus(X)
817         RHS = DAG.getConstant(0, DL, RHS.getValueType());
818         return LPCC::ICC_PL;
819       }
820     return LPCC::ICC_GT;
821   case ISD::SETUGT:
822     return LPCC::ICC_UGT;
823   case ISD::SETLT:
824     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS))
825       if (RHSC->getZExtValue() == 0)
826         // X < 0 -> is_minus(X)
827         return LPCC::ICC_MI;
828     return LPCC::ICC_LT;
829   case ISD::SETULT:
830     return LPCC::ICC_ULT;
831   case ISD::SETLE:
832     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS))
833       if (RHSC->getZExtValue() == 0xFFFFFFFF) {
834         // X <= -1 -> X < 0 -> is_minus(X)
835         RHS = DAG.getConstant(0, DL, RHS.getValueType());
836         return LPCC::ICC_MI;
837       }
838     return LPCC::ICC_LE;
839   case ISD::SETULE:
840     return LPCC::ICC_ULE;
841   case ISD::SETGE:
842     if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS))
843       if (RHSC->getZExtValue() == 0)
844         // X >= 0 -> is_plus(X)
845         return LPCC::ICC_PL;
846     return LPCC::ICC_GE;
847   case ISD::SETUGE:
848     return LPCC::ICC_UGE;
849   case ISD::SETNE:
850     return LPCC::ICC_NE;
851   case ISD::SETONE:
852   case ISD::SETUNE:
853   case ISD::SETOGE:
854   case ISD::SETOLE:
855   case ISD::SETOLT:
856   case ISD::SETOGT:
857   case ISD::SETOEQ:
858   case ISD::SETUEQ:
859   case ISD::SETO:
860   case ISD::SETUO:
861     llvm_unreachable("Unsupported comparison.");
862   default:
863     llvm_unreachable("Unknown integer condition code!");
864   }
865 }
866 
867 SDValue LanaiTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
868   SDValue Chain = Op.getOperand(0);
869   SDValue Cond = Op.getOperand(1);
870   SDValue LHS = Op.getOperand(2);
871   SDValue RHS = Op.getOperand(3);
872   SDValue Dest = Op.getOperand(4);
873   SDLoc DL(Op);
874 
875   LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
876   SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
877   SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS);
878 
879   return DAG.getNode(LanaiISD::BR_CC, DL, Op.getValueType(), Chain, Dest,
880                      TargetCC, Glue);
881 }
882 
883 SDValue LanaiTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
884   EVT VT = Op->getValueType(0);
885   if (VT != MVT::i32)
886     return SDValue();
887 
888   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op->getOperand(1));
889   if (!C)
890     return SDValue();
891 
892   int64_t MulAmt = C->getSExtValue();
893   int32_t HighestOne = -1;
894   uint32_t NonzeroEntries = 0;
895   int SignedDigit[32] = {0};
896 
897   // Convert to non-adjacent form (NAF) signed-digit representation.
898   // NAF is a signed-digit form where no adjacent digits are non-zero. It is the
899   // minimal Hamming weight representation of a number (on average 1/3 of the
900   // digits will be non-zero vs 1/2 for regular binary representation). And as
901   // the non-zero digits will be the only digits contributing to the instruction
902   // count, this is desirable. The next loop converts it to NAF (following the
903   // approach in 'Guide to Elliptic Curve Cryptography' [ISBN: 038795273X]) by
904   // choosing the non-zero coefficients such that the resulting quotient is
905   // divisible by 2 which will cause the next coefficient to be zero.
906   int64_t E = std::abs(MulAmt);
907   int S = (MulAmt < 0 ? -1 : 1);
908   int I = 0;
909   while (E > 0) {
910     int ZI = 0;
911     if (E % 2 == 1) {
912       ZI = 2 - (E % 4);
913       if (ZI != 0)
914         ++NonzeroEntries;
915     }
916     SignedDigit[I] = S * ZI;
917     if (SignedDigit[I] == 1)
918       HighestOne = I;
919     E = (E - ZI) / 2;
920     ++I;
921   }
922 
923   // Compute number of instructions required. Due to differences in lowering
924   // between the different processors this count is not exact.
925   // Start by assuming a shift and a add/sub for every non-zero entry (hence
926   // every non-zero entry requires 1 shift and 1 add/sub except for the first
927   // entry).
928   int32_t InstrRequired = 2 * NonzeroEntries - 1;
929   // Correct possible over-adding due to shift by 0 (which is not emitted).
930   if (std::abs(MulAmt) % 2 == 1)
931     --InstrRequired;
932   // Return if the form generated would exceed the instruction threshold.
933   if (InstrRequired > LanaiLowerConstantMulThreshold)
934     return SDValue();
935 
936   SDValue Res;
937   SDLoc DL(Op);
938   SDValue V = Op->getOperand(0);
939 
940   // Initialize the running sum. Set the running sum to the maximal shifted
941   // positive value (i.e., largest i such that zi == 1 and MulAmt has V<<i as a
942   // term NAF).
943   if (HighestOne == -1)
944     Res = DAG.getConstant(0, DL, MVT::i32);
945   else {
946     Res = DAG.getNode(ISD::SHL, DL, VT, V,
947                       DAG.getConstant(HighestOne, DL, MVT::i32));
948     SignedDigit[HighestOne] = 0;
949   }
950 
951   // Assemble multiplication from shift, add, sub using NAF form and running
952   // sum.
953   for (unsigned int I = 0; I < std::size(SignedDigit); ++I) {
954     if (SignedDigit[I] == 0)
955       continue;
956 
957     // Shifted multiplicand (v<<i).
958     SDValue Op =
959         DAG.getNode(ISD::SHL, DL, VT, V, DAG.getConstant(I, DL, MVT::i32));
960     if (SignedDigit[I] == 1)
961       Res = DAG.getNode(ISD::ADD, DL, VT, Res, Op);
962     else if (SignedDigit[I] == -1)
963       Res = DAG.getNode(ISD::SUB, DL, VT, Res, Op);
964   }
965   return Res;
966 }
967 
968 SDValue LanaiTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
969   SDValue LHS = Op.getOperand(0);
970   SDValue RHS = Op.getOperand(1);
971   SDValue Cond = Op.getOperand(2);
972   SDLoc DL(Op);
973 
974   LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
975   SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
976   SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS);
977 
978   return DAG.getNode(LanaiISD::SETCC, DL, Op.getValueType(), TargetCC, Glue);
979 }
980 
981 SDValue LanaiTargetLowering::LowerSELECT_CC(SDValue Op,
982                                             SelectionDAG &DAG) const {
983   SDValue LHS = Op.getOperand(0);
984   SDValue RHS = Op.getOperand(1);
985   SDValue TrueV = Op.getOperand(2);
986   SDValue FalseV = Op.getOperand(3);
987   SDValue Cond = Op.getOperand(4);
988   SDLoc DL(Op);
989 
990   LPCC::CondCode CC = IntCondCCodeToICC(Cond, DL, RHS, DAG);
991   SDValue TargetCC = DAG.getConstant(CC, DL, MVT::i32);
992   SDValue Glue = DAG.getNode(LanaiISD::SET_FLAG, DL, MVT::Glue, LHS, RHS);
993 
994   return DAG.getNode(LanaiISD::SELECT_CC, DL, Op.getValueType(), TrueV, FalseV,
995                      TargetCC, Glue);
996 }
997 
998 SDValue LanaiTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
999   MachineFunction &MF = DAG.getMachineFunction();
1000   LanaiMachineFunctionInfo *FuncInfo = MF.getInfo<LanaiMachineFunctionInfo>();
1001 
1002   SDLoc DL(Op);
1003   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
1004                                  getPointerTy(DAG.getDataLayout()));
1005 
1006   // vastart just stores the address of the VarArgsFrameIndex slot into the
1007   // memory location argument.
1008   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1009   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
1010                       MachinePointerInfo(SV));
1011 }
1012 
1013 SDValue LanaiTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
1014                                                      SelectionDAG &DAG) const {
1015   SDValue Chain = Op.getOperand(0);
1016   SDValue Size = Op.getOperand(1);
1017   SDLoc DL(Op);
1018 
1019   Register SPReg = getStackPointerRegisterToSaveRestore();
1020 
1021   // Get a reference to the stack pointer.
1022   SDValue StackPointer = DAG.getCopyFromReg(Chain, DL, SPReg, MVT::i32);
1023 
1024   // Subtract the dynamic size from the actual stack size to
1025   // obtain the new stack size.
1026   SDValue Sub = DAG.getNode(ISD::SUB, DL, MVT::i32, StackPointer, Size);
1027 
1028   // For Lanai, the outgoing memory arguments area should be on top of the
1029   // alloca area on the stack i.e., the outgoing memory arguments should be
1030   // at a lower address than the alloca area. Move the alloca area down the
1031   // stack by adding back the space reserved for outgoing arguments to SP
1032   // here.
1033   //
1034   // We do not know what the size of the outgoing args is at this point.
1035   // So, we add a pseudo instruction ADJDYNALLOC that will adjust the
1036   // stack pointer. We replace this instruction with on that has the correct,
1037   // known offset in emitPrologue().
1038   SDValue ArgAdjust = DAG.getNode(LanaiISD::ADJDYNALLOC, DL, MVT::i32, Sub);
1039 
1040   // The Sub result contains the new stack start address, so it
1041   // must be placed in the stack pointer register.
1042   SDValue CopyChain = DAG.getCopyToReg(Chain, DL, SPReg, Sub);
1043 
1044   SDValue Ops[2] = {ArgAdjust, CopyChain};
1045   return DAG.getMergeValues(Ops, DL);
1046 }
1047 
1048 SDValue LanaiTargetLowering::LowerRETURNADDR(SDValue Op,
1049                                              SelectionDAG &DAG) const {
1050   MachineFunction &MF = DAG.getMachineFunction();
1051   MachineFrameInfo &MFI = MF.getFrameInfo();
1052   MFI.setReturnAddressIsTaken(true);
1053 
1054   EVT VT = Op.getValueType();
1055   SDLoc DL(Op);
1056   unsigned Depth = Op.getConstantOperandVal(0);
1057   if (Depth) {
1058     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
1059     const unsigned Offset = -4;
1060     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1061                               DAG.getIntPtrConstant(Offset, DL));
1062     return DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1063   }
1064 
1065   // Return the link register, which contains the return address.
1066   // Mark it an implicit live-in.
1067   Register Reg = MF.addLiveIn(TRI->getRARegister(), getRegClassFor(MVT::i32));
1068   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, VT);
1069 }
1070 
1071 SDValue LanaiTargetLowering::LowerFRAMEADDR(SDValue Op,
1072                                             SelectionDAG &DAG) const {
1073   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
1074   MFI.setFrameAddressIsTaken(true);
1075 
1076   EVT VT = Op.getValueType();
1077   SDLoc DL(Op);
1078   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, Lanai::FP, VT);
1079   unsigned Depth = Op.getConstantOperandVal(0);
1080   while (Depth--) {
1081     const unsigned Offset = -8;
1082     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
1083                               DAG.getIntPtrConstant(Offset, DL));
1084     FrameAddr =
1085         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
1086   }
1087   return FrameAddr;
1088 }
1089 
1090 const char *LanaiTargetLowering::getTargetNodeName(unsigned Opcode) const {
1091   switch (Opcode) {
1092   case LanaiISD::ADJDYNALLOC:
1093     return "LanaiISD::ADJDYNALLOC";
1094   case LanaiISD::RET_GLUE:
1095     return "LanaiISD::RET_GLUE";
1096   case LanaiISD::CALL:
1097     return "LanaiISD::CALL";
1098   case LanaiISD::SELECT_CC:
1099     return "LanaiISD::SELECT_CC";
1100   case LanaiISD::SETCC:
1101     return "LanaiISD::SETCC";
1102   case LanaiISD::SUBBF:
1103     return "LanaiISD::SUBBF";
1104   case LanaiISD::SET_FLAG:
1105     return "LanaiISD::SET_FLAG";
1106   case LanaiISD::BR_CC:
1107     return "LanaiISD::BR_CC";
1108   case LanaiISD::Wrapper:
1109     return "LanaiISD::Wrapper";
1110   case LanaiISD::HI:
1111     return "LanaiISD::HI";
1112   case LanaiISD::LO:
1113     return "LanaiISD::LO";
1114   case LanaiISD::SMALL:
1115     return "LanaiISD::SMALL";
1116   default:
1117     return nullptr;
1118   }
1119 }
1120 
1121 SDValue LanaiTargetLowering::LowerConstantPool(SDValue Op,
1122                                                SelectionDAG &DAG) const {
1123   SDLoc DL(Op);
1124   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
1125   const Constant *C = N->getConstVal();
1126   const LanaiTargetObjectFile *TLOF =
1127       static_cast<const LanaiTargetObjectFile *>(
1128           getTargetMachine().getObjFileLowering());
1129 
1130   // If the code model is small or constant will be placed in the small section,
1131   // then assume address will fit in 21-bits.
1132   if (getTargetMachine().getCodeModel() == CodeModel::Small ||
1133       TLOF->isConstantInSmallSection(DAG.getDataLayout(), C)) {
1134     SDValue Small = DAG.getTargetConstantPool(
1135         C, MVT::i32, N->getAlign(), N->getOffset(), LanaiII::MO_NO_FLAG);
1136     return DAG.getNode(ISD::OR, DL, MVT::i32,
1137                        DAG.getRegister(Lanai::R0, MVT::i32),
1138                        DAG.getNode(LanaiISD::SMALL, DL, MVT::i32, Small));
1139   } else {
1140     uint8_t OpFlagHi = LanaiII::MO_ABS_HI;
1141     uint8_t OpFlagLo = LanaiII::MO_ABS_LO;
1142 
1143     SDValue Hi = DAG.getTargetConstantPool(C, MVT::i32, N->getAlign(),
1144                                            N->getOffset(), OpFlagHi);
1145     SDValue Lo = DAG.getTargetConstantPool(C, MVT::i32, N->getAlign(),
1146                                            N->getOffset(), OpFlagLo);
1147     Hi = DAG.getNode(LanaiISD::HI, DL, MVT::i32, Hi);
1148     Lo = DAG.getNode(LanaiISD::LO, DL, MVT::i32, Lo);
1149     SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Hi, Lo);
1150     return Result;
1151   }
1152 }
1153 
1154 SDValue LanaiTargetLowering::LowerGlobalAddress(SDValue Op,
1155                                                 SelectionDAG &DAG) const {
1156   SDLoc DL(Op);
1157   const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
1158   int64_t Offset = cast<GlobalAddressSDNode>(Op)->getOffset();
1159 
1160   const LanaiTargetObjectFile *TLOF =
1161       static_cast<const LanaiTargetObjectFile *>(
1162           getTargetMachine().getObjFileLowering());
1163 
1164   // If the code model is small or global variable will be placed in the small
1165   // section, then assume address will fit in 21-bits.
1166   const GlobalObject *GO = GV->getAliaseeObject();
1167   if (TLOF->isGlobalInSmallSection(GO, getTargetMachine())) {
1168     SDValue Small = DAG.getTargetGlobalAddress(
1169         GV, DL, getPointerTy(DAG.getDataLayout()), Offset, LanaiII::MO_NO_FLAG);
1170     return DAG.getNode(ISD::OR, DL, MVT::i32,
1171                        DAG.getRegister(Lanai::R0, MVT::i32),
1172                        DAG.getNode(LanaiISD::SMALL, DL, MVT::i32, Small));
1173   } else {
1174     uint8_t OpFlagHi = LanaiII::MO_ABS_HI;
1175     uint8_t OpFlagLo = LanaiII::MO_ABS_LO;
1176 
1177     // Create the TargetGlobalAddress node, folding in the constant offset.
1178     SDValue Hi = DAG.getTargetGlobalAddress(
1179         GV, DL, getPointerTy(DAG.getDataLayout()), Offset, OpFlagHi);
1180     SDValue Lo = DAG.getTargetGlobalAddress(
1181         GV, DL, getPointerTy(DAG.getDataLayout()), Offset, OpFlagLo);
1182     Hi = DAG.getNode(LanaiISD::HI, DL, MVT::i32, Hi);
1183     Lo = DAG.getNode(LanaiISD::LO, DL, MVT::i32, Lo);
1184     return DAG.getNode(ISD::OR, DL, MVT::i32, Hi, Lo);
1185   }
1186 }
1187 
1188 SDValue LanaiTargetLowering::LowerBlockAddress(SDValue Op,
1189                                                SelectionDAG &DAG) const {
1190   SDLoc DL(Op);
1191   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
1192 
1193   uint8_t OpFlagHi = LanaiII::MO_ABS_HI;
1194   uint8_t OpFlagLo = LanaiII::MO_ABS_LO;
1195 
1196   SDValue Hi = DAG.getBlockAddress(BA, MVT::i32, true, OpFlagHi);
1197   SDValue Lo = DAG.getBlockAddress(BA, MVT::i32, true, OpFlagLo);
1198   Hi = DAG.getNode(LanaiISD::HI, DL, MVT::i32, Hi);
1199   Lo = DAG.getNode(LanaiISD::LO, DL, MVT::i32, Lo);
1200   SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Hi, Lo);
1201   return Result;
1202 }
1203 
1204 SDValue LanaiTargetLowering::LowerJumpTable(SDValue Op,
1205                                             SelectionDAG &DAG) const {
1206   SDLoc DL(Op);
1207   JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
1208 
1209   // If the code model is small assume address will fit in 21-bits.
1210   if (getTargetMachine().getCodeModel() == CodeModel::Small) {
1211     SDValue Small = DAG.getTargetJumpTable(
1212         JT->getIndex(), getPointerTy(DAG.getDataLayout()), LanaiII::MO_NO_FLAG);
1213     return DAG.getNode(ISD::OR, DL, MVT::i32,
1214                        DAG.getRegister(Lanai::R0, MVT::i32),
1215                        DAG.getNode(LanaiISD::SMALL, DL, MVT::i32, Small));
1216   } else {
1217     uint8_t OpFlagHi = LanaiII::MO_ABS_HI;
1218     uint8_t OpFlagLo = LanaiII::MO_ABS_LO;
1219 
1220     SDValue Hi = DAG.getTargetJumpTable(
1221         JT->getIndex(), getPointerTy(DAG.getDataLayout()), OpFlagHi);
1222     SDValue Lo = DAG.getTargetJumpTable(
1223         JT->getIndex(), getPointerTy(DAG.getDataLayout()), OpFlagLo);
1224     Hi = DAG.getNode(LanaiISD::HI, DL, MVT::i32, Hi);
1225     Lo = DAG.getNode(LanaiISD::LO, DL, MVT::i32, Lo);
1226     SDValue Result = DAG.getNode(ISD::OR, DL, MVT::i32, Hi, Lo);
1227     return Result;
1228   }
1229 }
1230 
1231 SDValue LanaiTargetLowering::LowerSHL_PARTS(SDValue Op,
1232                                             SelectionDAG &DAG) const {
1233   EVT VT = Op.getValueType();
1234   unsigned VTBits = VT.getSizeInBits();
1235   SDLoc dl(Op);
1236   assert(Op.getNumOperands() == 3 && "Unexpected SHL!");
1237   SDValue ShOpLo = Op.getOperand(0);
1238   SDValue ShOpHi = Op.getOperand(1);
1239   SDValue ShAmt = Op.getOperand(2);
1240 
1241   // Performs the following for (ShOpLo + (ShOpHi << 32)) << ShAmt:
1242   //   LoBitsForHi = (ShAmt == 0) ? 0 : (ShOpLo >> (32-ShAmt))
1243   //   HiBitsForHi = ShOpHi << ShAmt
1244   //   Hi = (ShAmt >= 32) ? (ShOpLo << (ShAmt-32)) : (LoBitsForHi | HiBitsForHi)
1245   //   Lo = (ShAmt >= 32) ? 0 : (ShOpLo << ShAmt)
1246   //   return (Hi << 32) | Lo;
1247 
1248   SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
1249                                  DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
1250   SDValue LoBitsForHi = DAG.getNode(ISD::SRL, dl, VT, ShOpLo, RevShAmt);
1251 
1252   // If ShAmt == 0, we just calculated "(SRL ShOpLo, 32)" which is "undef". We
1253   // wanted 0, so CSEL it directly.
1254   SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
1255   SDValue SetCC = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
1256   LoBitsForHi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, LoBitsForHi);
1257 
1258   SDValue ExtraShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32, ShAmt,
1259                                    DAG.getConstant(VTBits, dl, MVT::i32));
1260   SDValue HiBitsForHi = DAG.getNode(ISD::SHL, dl, VT, ShOpHi, ShAmt);
1261   SDValue HiForNormalShift =
1262       DAG.getNode(ISD::OR, dl, VT, LoBitsForHi, HiBitsForHi);
1263 
1264   SDValue HiForBigShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ExtraShAmt);
1265 
1266   SetCC = DAG.getSetCC(dl, MVT::i32, ExtraShAmt, Zero, ISD::SETGE);
1267   SDValue Hi =
1268       DAG.getSelect(dl, MVT::i32, SetCC, HiForBigShift, HiForNormalShift);
1269 
1270   // Lanai shifts of larger than register sizes are wrapped rather than
1271   // clamped, so we can't just emit "lo << b" if b is too big.
1272   SDValue LoForNormalShift = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
1273   SDValue Lo = DAG.getSelect(
1274       dl, MVT::i32, SetCC, DAG.getConstant(0, dl, MVT::i32), LoForNormalShift);
1275 
1276   SDValue Ops[2] = {Lo, Hi};
1277   return DAG.getMergeValues(Ops, dl);
1278 }
1279 
1280 SDValue LanaiTargetLowering::LowerSRL_PARTS(SDValue Op,
1281                                             SelectionDAG &DAG) const {
1282   MVT VT = Op.getSimpleValueType();
1283   unsigned VTBits = VT.getSizeInBits();
1284   SDLoc dl(Op);
1285   SDValue ShOpLo = Op.getOperand(0);
1286   SDValue ShOpHi = Op.getOperand(1);
1287   SDValue ShAmt = Op.getOperand(2);
1288 
1289   // Performs the following for a >> b:
1290   //   unsigned r_high = a_high >> b;
1291   //   r_high = (32 - b <= 0) ? 0 : r_high;
1292   //
1293   //   unsigned r_low = a_low >> b;
1294   //   r_low = (32 - b <= 0) ? r_high : r_low;
1295   //   r_low = (b == 0) ? r_low : r_low | (a_high << (32 - b));
1296   //   return (unsigned long long)r_high << 32 | r_low;
1297   // Note: This takes advantage of Lanai's shift behavior to avoid needing to
1298   // mask the shift amount.
1299 
1300   SDValue Zero = DAG.getConstant(0, dl, MVT::i32);
1301   SDValue NegatedPlus32 = DAG.getNode(
1302       ISD::SUB, dl, MVT::i32, DAG.getConstant(VTBits, dl, MVT::i32), ShAmt);
1303   SDValue SetCC = DAG.getSetCC(dl, MVT::i32, NegatedPlus32, Zero, ISD::SETLE);
1304 
1305   SDValue Hi = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpHi, ShAmt);
1306   Hi = DAG.getSelect(dl, MVT::i32, SetCC, Zero, Hi);
1307 
1308   SDValue Lo = DAG.getNode(ISD::SRL, dl, MVT::i32, ShOpLo, ShAmt);
1309   Lo = DAG.getSelect(dl, MVT::i32, SetCC, Hi, Lo);
1310   SDValue CarryBits =
1311       DAG.getNode(ISD::SHL, dl, MVT::i32, ShOpHi, NegatedPlus32);
1312   SDValue ShiftIsZero = DAG.getSetCC(dl, MVT::i32, ShAmt, Zero, ISD::SETEQ);
1313   Lo = DAG.getSelect(dl, MVT::i32, ShiftIsZero, Lo,
1314                      DAG.getNode(ISD::OR, dl, MVT::i32, Lo, CarryBits));
1315 
1316   SDValue Ops[2] = {Lo, Hi};
1317   return DAG.getMergeValues(Ops, dl);
1318 }
1319 
1320 // Helper function that checks if N is a null or all ones constant.
1321 static inline bool isZeroOrAllOnes(SDValue N, bool AllOnes) {
1322   return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
1323 }
1324 
1325 // Return true if N is conditionally 0 or all ones.
1326 // Detects these expressions where cc is an i1 value:
1327 //
1328 //   (select cc 0, y)   [AllOnes=0]
1329 //   (select cc y, 0)   [AllOnes=0]
1330 //   (zext cc)          [AllOnes=0]
1331 //   (sext cc)          [AllOnes=0/1]
1332 //   (select cc -1, y)  [AllOnes=1]
1333 //   (select cc y, -1)  [AllOnes=1]
1334 //
1335 // * AllOnes determines whether to check for an all zero (AllOnes false) or an
1336 //   all ones operand (AllOnes true).
1337 // * Invert is set when N is the all zero/ones constant when CC is false.
1338 // * OtherOp is set to the alternative value of N.
1339 //
1340 // For example, for (select cc X, Y) and AllOnes = 0 if:
1341 // * X = 0, Invert = False and OtherOp = Y
1342 // * Y = 0, Invert = True and OtherOp = X
1343 static bool isConditionalZeroOrAllOnes(SDNode *N, bool AllOnes, SDValue &CC,
1344                                        bool &Invert, SDValue &OtherOp,
1345                                        SelectionDAG &DAG) {
1346   switch (N->getOpcode()) {
1347   default:
1348     return false;
1349   case ISD::SELECT: {
1350     CC = N->getOperand(0);
1351     SDValue N1 = N->getOperand(1);
1352     SDValue N2 = N->getOperand(2);
1353     if (isZeroOrAllOnes(N1, AllOnes)) {
1354       Invert = false;
1355       OtherOp = N2;
1356       return true;
1357     }
1358     if (isZeroOrAllOnes(N2, AllOnes)) {
1359       Invert = true;
1360       OtherOp = N1;
1361       return true;
1362     }
1363     return false;
1364   }
1365   case ISD::ZERO_EXTEND: {
1366     // (zext cc) can never be the all ones value.
1367     if (AllOnes)
1368       return false;
1369     CC = N->getOperand(0);
1370     if (CC.getValueType() != MVT::i1)
1371       return false;
1372     SDLoc dl(N);
1373     EVT VT = N->getValueType(0);
1374     OtherOp = DAG.getConstant(1, dl, VT);
1375     Invert = true;
1376     return true;
1377   }
1378   case ISD::SIGN_EXTEND: {
1379     CC = N->getOperand(0);
1380     if (CC.getValueType() != MVT::i1)
1381       return false;
1382     SDLoc dl(N);
1383     EVT VT = N->getValueType(0);
1384     Invert = !AllOnes;
1385     if (AllOnes)
1386       // When looking for an AllOnes constant, N is an sext, and the 'other'
1387       // value is 0.
1388       OtherOp = DAG.getConstant(0, dl, VT);
1389     else
1390       OtherOp = DAG.getAllOnesConstant(dl, VT);
1391     return true;
1392   }
1393   }
1394 }
1395 
1396 // Combine a constant select operand into its use:
1397 //
1398 //   (add (select cc, 0, c), x)  -> (select cc, x, (add, x, c))
1399 //   (sub x, (select cc, 0, c))  -> (select cc, x, (sub, x, c))
1400 //   (and (select cc, -1, c), x) -> (select cc, x, (and, x, c))  [AllOnes=1]
1401 //   (or  (select cc, 0, c), x)  -> (select cc, x, (or, x, c))
1402 //   (xor (select cc, 0, c), x)  -> (select cc, x, (xor, x, c))
1403 //
1404 // The transform is rejected if the select doesn't have a constant operand that
1405 // is null, or all ones when AllOnes is set.
1406 //
1407 // Also recognize sext/zext from i1:
1408 //
1409 //   (add (zext cc), x) -> (select cc (add x, 1), x)
1410 //   (add (sext cc), x) -> (select cc (add x, -1), x)
1411 //
1412 // These transformations eventually create predicated instructions.
1413 static SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
1414                                    TargetLowering::DAGCombinerInfo &DCI,
1415                                    bool AllOnes) {
1416   SelectionDAG &DAG = DCI.DAG;
1417   EVT VT = N->getValueType(0);
1418   SDValue NonConstantVal;
1419   SDValue CCOp;
1420   bool SwapSelectOps;
1421   if (!isConditionalZeroOrAllOnes(Slct.getNode(), AllOnes, CCOp, SwapSelectOps,
1422                                   NonConstantVal, DAG))
1423     return SDValue();
1424 
1425   // Slct is now know to be the desired identity constant when CC is true.
1426   SDValue TrueVal = OtherOp;
1427   SDValue FalseVal =
1428       DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
1429   // Unless SwapSelectOps says CC should be false.
1430   if (SwapSelectOps)
1431     std::swap(TrueVal, FalseVal);
1432 
1433   return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal);
1434 }
1435 
1436 // Attempt combineSelectAndUse on each operand of a commutative operator N.
1437 static SDValue
1438 combineSelectAndUseCommutative(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1439                                bool AllOnes) {
1440   SDValue N0 = N->getOperand(0);
1441   SDValue N1 = N->getOperand(1);
1442   if (N0.getNode()->hasOneUse())
1443     if (SDValue Result = combineSelectAndUse(N, N0, N1, DCI, AllOnes))
1444       return Result;
1445   if (N1.getNode()->hasOneUse())
1446     if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, AllOnes))
1447       return Result;
1448   return SDValue();
1449 }
1450 
1451 // PerformSUBCombine - Target-specific dag combine xforms for ISD::SUB.
1452 static SDValue PerformSUBCombine(SDNode *N,
1453                                  TargetLowering::DAGCombinerInfo &DCI) {
1454   SDValue N0 = N->getOperand(0);
1455   SDValue N1 = N->getOperand(1);
1456 
1457   // fold (sub x, (select cc, 0, c)) -> (select cc, x, (sub, x, c))
1458   if (N1.getNode()->hasOneUse())
1459     if (SDValue Result = combineSelectAndUse(N, N1, N0, DCI, /*AllOnes=*/false))
1460       return Result;
1461 
1462   return SDValue();
1463 }
1464 
1465 SDValue LanaiTargetLowering::PerformDAGCombine(SDNode *N,
1466                                                DAGCombinerInfo &DCI) const {
1467   switch (N->getOpcode()) {
1468   default:
1469     break;
1470   case ISD::ADD:
1471   case ISD::OR:
1472   case ISD::XOR:
1473     return combineSelectAndUseCommutative(N, DCI, /*AllOnes=*/false);
1474   case ISD::AND:
1475     return combineSelectAndUseCommutative(N, DCI, /*AllOnes=*/true);
1476   case ISD::SUB:
1477     return PerformSUBCombine(N, DCI);
1478   }
1479 
1480   return SDValue();
1481 }
1482 
1483 void LanaiTargetLowering::computeKnownBitsForTargetNode(
1484     const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1485     const SelectionDAG &DAG, unsigned Depth) const {
1486   unsigned BitWidth = Known.getBitWidth();
1487   switch (Op.getOpcode()) {
1488   default:
1489     break;
1490   case LanaiISD::SETCC:
1491     Known = KnownBits(BitWidth);
1492     Known.Zero.setBits(1, BitWidth);
1493     break;
1494   case LanaiISD::SELECT_CC:
1495     KnownBits Known2;
1496     Known = DAG.computeKnownBits(Op->getOperand(0), Depth + 1);
1497     Known2 = DAG.computeKnownBits(Op->getOperand(1), Depth + 1);
1498     Known = Known.intersectWith(Known2);
1499     break;
1500   }
1501 }
1502