xref: /llvm-project/llvm/lib/Target/Hexagon/HexagonISelDAGToDAG.cpp (revision f139bde8d85e4f7666f2fd739b61894fa58f2f18)
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the Hexagon target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "HexagonISelDAGToDAG.h"
14 #include "Hexagon.h"
15 #include "HexagonISelLowering.h"
16 #include "HexagonMachineFunctionInfo.h"
17 #include "HexagonTargetMachine.h"
18 #include "llvm/CodeGen/FunctionLoweringInfo.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/SelectionDAGISel.h"
21 #include "llvm/IR/Intrinsics.h"
22 #include "llvm/IR/IntrinsicsHexagon.h"
23 #include "llvm/Support/CommandLine.h"
24 #include "llvm/Support/Debug.h"
25 using namespace llvm;
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 #define PASS_NAME "Hexagon DAG->DAG Pattern Instruction Selection"
29 
30 static
31 cl::opt<bool>
32 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
33   cl::desc("Rebalance address calculation trees to improve "
34           "instruction selection"));
35 
36 // Rebalance only if this allows e.g. combining a GA with an offset or
37 // factoring out a shift.
38 static
39 cl::opt<bool>
40 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
41   cl::desc("Rebalance address tree only if this allows optimizations"));
42 
43 static
44 cl::opt<bool>
45 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
46   cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
47 
48 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
49   cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
50 
51 //===----------------------------------------------------------------------===//
52 // Instruction Selector Implementation
53 //===----------------------------------------------------------------------===//
54 
55 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
56 #include "HexagonGenDAGISel.inc"
57 
58 namespace llvm {
59 /// createHexagonISelDag - This pass converts a legalized DAG into a
60 /// Hexagon-specific DAG, ready for instruction scheduling.
61 FunctionPass *createHexagonISelDag(HexagonTargetMachine &TM,
62                                    CodeGenOptLevel OptLevel) {
63   return new HexagonDAGToDAGISelLegacy(TM, OptLevel);
64 }
65 }
66 
67 HexagonDAGToDAGISelLegacy::HexagonDAGToDAGISelLegacy(HexagonTargetMachine &tm,
68                                                      CodeGenOptLevel OptLevel)
69     : SelectionDAGISelLegacy(
70           ID, std::make_unique<HexagonDAGToDAGISel>(tm, OptLevel)) {}
71 
72 char HexagonDAGToDAGISelLegacy::ID = 0;
73 
74 INITIALIZE_PASS(HexagonDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
75 
76 void HexagonDAGToDAGISel::SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl) {
77   SDValue Chain = LD->getChain();
78   SDValue Base = LD->getBasePtr();
79   SDValue Offset = LD->getOffset();
80   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
81   EVT LoadedVT = LD->getMemoryVT();
82   unsigned Opcode = 0;
83 
84   // Check for zero extended loads. Treat any-extend loads as zero extended
85   // loads.
86   ISD::LoadExtType ExtType = LD->getExtensionType();
87   bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
88   bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
89 
90   assert(LoadedVT.isSimple());
91   switch (LoadedVT.getSimpleVT().SimpleTy) {
92   case MVT::i8:
93     if (IsZeroExt)
94       Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
95     else
96       Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
97     break;
98   case MVT::i16:
99     if (IsZeroExt)
100       Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
101     else
102       Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
103     break;
104   case MVT::i32:
105   case MVT::f32:
106   case MVT::v2i16:
107   case MVT::v4i8:
108     Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
109     break;
110   case MVT::i64:
111   case MVT::f64:
112   case MVT::v2i32:
113   case MVT::v4i16:
114   case MVT::v8i8:
115     Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
116     break;
117   case MVT::v64i8:
118   case MVT::v32i16:
119   case MVT::v16i32:
120   case MVT::v8i64:
121   case MVT::v128i8:
122   case MVT::v64i16:
123   case MVT::v32i32:
124   case MVT::v16i64:
125     if (isAlignedMemNode(LD)) {
126       if (LD->isNonTemporal())
127         Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
128       else
129         Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
130     } else {
131       Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
132     }
133     break;
134   default:
135     llvm_unreachable("Unexpected memory type in indexed load");
136   }
137 
138   SDValue IncV = CurDAG->getSignedTargetConstant(Inc, dl, MVT::i32);
139   MachineMemOperand *MemOp = LD->getMemOperand();
140 
141   auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
142         -> MachineSDNode* {
143     if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
144       SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
145       return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
146                                     Zero, SDValue(N, 0));
147     }
148     if (ExtType == ISD::SEXTLOAD)
149       return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
150                                     SDValue(N, 0));
151     return N;
152   };
153 
154   //                  Loaded value   Next address   Chain
155   SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
156   SDValue To[3];
157 
158   EVT ValueVT = LD->getValueType(0);
159   if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
160     // A load extending to i64 will actually produce i32, which will then
161     // need to be extended to i64.
162     assert(LoadedVT.getSizeInBits() <= 32);
163     ValueVT = MVT::i32;
164   }
165 
166   if (IsValidInc) {
167     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
168                                               MVT::i32, MVT::Other, Base,
169                                               IncV, Chain);
170     CurDAG->setNodeMemRefs(L, {MemOp});
171     To[1] = SDValue(L, 1); // Next address.
172     To[2] = SDValue(L, 2); // Chain.
173     // Handle special case for extension to i64.
174     if (LD->getValueType(0) == MVT::i64)
175       L = getExt64(L, dl);
176     To[0] = SDValue(L, 0); // Loaded (extended) value.
177   } else {
178     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
179     MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
180                                               Base, Zero, Chain);
181     CurDAG->setNodeMemRefs(L, {MemOp});
182     To[2] = SDValue(L, 1); // Chain.
183     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
184                                               Base, IncV);
185     To[1] = SDValue(A, 0); // Next address.
186     // Handle special case for extension to i64.
187     if (LD->getValueType(0) == MVT::i64)
188       L = getExt64(L, dl);
189     To[0] = SDValue(L, 0); // Loaded (extended) value.
190   }
191   ReplaceUses(From, To, 3);
192   CurDAG->RemoveDeadNode(LD);
193 }
194 
195 MachineSDNode *HexagonDAGToDAGISel::LoadInstrForLoadIntrinsic(SDNode *IntN) {
196   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
197     return nullptr;
198 
199   SDLoc dl(IntN);
200   unsigned IntNo = IntN->getConstantOperandVal(1);
201 
202   static std::map<unsigned,unsigned> LoadPciMap = {
203     { Intrinsic::hexagon_circ_ldb,  Hexagon::L2_loadrb_pci  },
204     { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
205     { Intrinsic::hexagon_circ_ldh,  Hexagon::L2_loadrh_pci  },
206     { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
207     { Intrinsic::hexagon_circ_ldw,  Hexagon::L2_loadri_pci  },
208     { Intrinsic::hexagon_circ_ldd,  Hexagon::L2_loadrd_pci  },
209   };
210   auto FLC = LoadPciMap.find(IntNo);
211   if (FLC != LoadPciMap.end()) {
212     EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
213     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
214     // Operands: { Base, Increment, Modifier, Chain }
215     auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
216     SDValue I =
217         CurDAG->getSignedTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
218     MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
219           { IntN->getOperand(2), I, IntN->getOperand(4),
220             IntN->getOperand(0) });
221     return Res;
222   }
223 
224   return nullptr;
225 }
226 
227 SDNode *HexagonDAGToDAGISel::StoreInstrForLoadIntrinsic(MachineSDNode *LoadN,
228       SDNode *IntN) {
229   // The "LoadN" is just a machine load instruction. The intrinsic also
230   // involves storing it. Generate an appropriate store to the location
231   // given in the intrinsic's operand(3).
232   uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
233   unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
234                       HexagonII::MemAccesSizeMask;
235   unsigned Size = 1U << (SizeBits-1);
236 
237   SDLoc dl(IntN);
238   MachinePointerInfo PI;
239   SDValue TS;
240   SDValue Loc = IntN->getOperand(3);
241 
242   if (Size >= 4)
243     TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
244                           Align(Size));
245   else
246     TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
247                                PI, MVT::getIntegerVT(Size * 8), Align(Size));
248 
249   SDNode *StoreN;
250   {
251     HandleSDNode Handle(TS);
252     SelectStore(TS.getNode());
253     StoreN = Handle.getValue().getNode();
254   }
255 
256   // Load's results are { Loaded value, Updated pointer, Chain }
257   ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
258   ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
259   return StoreN;
260 }
261 
262 bool HexagonDAGToDAGISel::tryLoadOfLoadIntrinsic(LoadSDNode *N) {
263   // The intrinsics for load circ/brev perform two operations:
264   // 1. Load a value V from the specified location, using the addressing
265   //    mode corresponding to the intrinsic.
266   // 2. Store V into a specified location. This location is typically a
267   //    local, temporary object.
268   // In many cases, the program using these intrinsics will immediately
269   // load V again from the local object. In those cases, when certain
270   // conditions are met, the last load can be removed.
271   // This function identifies and optimizes this pattern. If the pattern
272   // cannot be optimized, it returns nullptr, which will cause the load
273   // to be selected separately from the intrinsic (which will be handled
274   // in SelectIntrinsicWChain).
275 
276   SDValue Ch = N->getOperand(0);
277   SDValue Loc = N->getOperand(1);
278 
279   // Assume that the load and the intrinsic are connected directly with a
280   // chain:
281   //   t1: i32,ch = int.load ..., ..., ..., Loc, ...    // <-- C
282   //   t2: i32,ch = load t1:1, Loc, ...
283   SDNode *C = Ch.getNode();
284 
285   if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
286     return false;
287 
288   // The second load can only be eliminated if its extension type matches
289   // that of the load instruction corresponding to the intrinsic. The user
290   // can provide an address of an unsigned variable to store the result of
291   // a sign-extending intrinsic into (or the other way around).
292   ISD::LoadExtType IntExt;
293   switch (C->getConstantOperandVal(1)) {
294   case Intrinsic::hexagon_circ_ldub:
295   case Intrinsic::hexagon_circ_lduh:
296     IntExt = ISD::ZEXTLOAD;
297     break;
298   case Intrinsic::hexagon_circ_ldw:
299   case Intrinsic::hexagon_circ_ldd:
300     IntExt = ISD::NON_EXTLOAD;
301     break;
302   default:
303     IntExt = ISD::SEXTLOAD;
304     break;
305   }
306   if (N->getExtensionType() != IntExt)
307     return false;
308 
309   // Make sure the target location for the loaded value in the load intrinsic
310   // is the location from which LD (or N) is loading.
311   if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
312     return false;
313 
314   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(C)) {
315     SDNode *S = StoreInstrForLoadIntrinsic(L, C);
316     SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
317     SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
318     ReplaceUses(F, T, std::size(T));
319     // This transformation will leave the intrinsic dead. If it remains in
320     // the DAG, the selection code will see it again, but without the load,
321     // and it will generate a store that is normally required for it.
322     CurDAG->RemoveDeadNode(C);
323     return true;
324   }
325   return false;
326 }
327 
328 // Convert the bit-reverse load intrinsic to appropriate target instruction.
329 bool HexagonDAGToDAGISel::SelectBrevLdIntrinsic(SDNode *IntN) {
330   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
331     return false;
332 
333   const SDLoc &dl(IntN);
334   unsigned IntNo = IntN->getConstantOperandVal(1);
335 
336   static const std::map<unsigned, unsigned> LoadBrevMap = {
337     { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
338     { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
339     { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
340     { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
341     { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
342     { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
343   };
344   auto FLI = LoadBrevMap.find(IntNo);
345   if (FLI != LoadBrevMap.end()) {
346     EVT ValTy =
347         (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
348     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
349     // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
350     // modifier}.
351     // Operands of target instruction: { Base, Modifier, Chain }.
352     MachineSDNode *Res = CurDAG->getMachineNode(
353         FLI->second, dl, RTys,
354         {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
355 
356     MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
357     CurDAG->setNodeMemRefs(Res, {MemOp});
358 
359     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
360     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
361     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
362     CurDAG->RemoveDeadNode(IntN);
363     return true;
364   }
365   return false;
366 }
367 
368 /// Generate a machine instruction node for the new circular buffer intrinsics.
369 /// The new versions use a CSx register instead of the K field.
370 bool HexagonDAGToDAGISel::SelectNewCircIntrinsic(SDNode *IntN) {
371   if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
372     return false;
373 
374   SDLoc DL(IntN);
375   unsigned IntNo = IntN->getConstantOperandVal(1);
376   SmallVector<SDValue, 7> Ops;
377 
378   static std::map<unsigned,unsigned> LoadNPcMap = {
379     { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
380     { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
381     { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
382     { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
383     { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
384     { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
385     { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
386     { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
387     { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
388     { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
389     { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
390     { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
391   };
392   auto FLI = LoadNPcMap.find (IntNo);
393   if (FLI != LoadNPcMap.end()) {
394     EVT ValTy = MVT::i32;
395     if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
396         IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
397       ValTy = MVT::i64;
398     EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
399     // Handle load.*_pci case which has 6 operands.
400     if (IntN->getNumOperands() == 6) {
401       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
402       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
403       // Operands: { Base, Increment, Modifier, Start, Chain }.
404       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
405               IntN->getOperand(0) };
406     } else
407       // Handle load.*_pcr case which has 5 operands.
408       // Operands: { Base, Modifier, Start, Chain }.
409       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
410               IntN->getOperand(0) };
411     MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
412     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
413     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
414     ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
415     CurDAG->RemoveDeadNode(IntN);
416     return true;
417   }
418 
419   static std::map<unsigned,unsigned> StoreNPcMap = {
420     { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
421     { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
422     { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
423     { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
424     { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
425     { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
426     { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
427     { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
428     { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
429     { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
430   };
431   auto FSI = StoreNPcMap.find (IntNo);
432   if (FSI != StoreNPcMap.end()) {
433     EVT RTys[] = { MVT::i32, MVT::Other };
434     // Handle store.*_pci case which has 7 operands.
435     if (IntN->getNumOperands() == 7) {
436       auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
437       SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
438       // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
439       Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
440               IntN->getOperand(6), IntN->getOperand(0) };
441     } else
442       // Handle store.*_pcr case which has 6 operands.
443       // Operands: { Base, Modifier, Value, Start, Chain }.
444       Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
445               IntN->getOperand(5), IntN->getOperand(0) };
446     MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
447     ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
448     ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
449     CurDAG->RemoveDeadNode(IntN);
450     return true;
451   }
452 
453   return false;
454 }
455 
456 void HexagonDAGToDAGISel::SelectLoad(SDNode *N) {
457   SDLoc dl(N);
458   LoadSDNode *LD = cast<LoadSDNode>(N);
459 
460   // Handle indexed loads.
461   ISD::MemIndexedMode AM = LD->getAddressingMode();
462   if (AM != ISD::UNINDEXED) {
463     SelectIndexedLoad(LD, dl);
464     return;
465   }
466 
467   // Handle patterns using circ/brev load intrinsics.
468   if (tryLoadOfLoadIntrinsic(LD))
469     return;
470 
471   SelectCode(LD);
472 }
473 
474 void HexagonDAGToDAGISel::SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl) {
475   SDValue Chain = ST->getChain();
476   SDValue Base = ST->getBasePtr();
477   SDValue Offset = ST->getOffset();
478   SDValue Value = ST->getValue();
479   // Get the constant value.
480   int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
481   EVT StoredVT = ST->getMemoryVT();
482   EVT ValueVT = Value.getValueType();
483 
484   bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
485   unsigned Opcode = 0;
486 
487   assert(StoredVT.isSimple());
488   switch (StoredVT.getSimpleVT().SimpleTy) {
489   case MVT::i8:
490     Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
491     break;
492   case MVT::i16:
493     Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
494     break;
495   case MVT::i32:
496   case MVT::f32:
497   case MVT::v2i16:
498   case MVT::v4i8:
499     Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
500     break;
501   case MVT::i64:
502   case MVT::f64:
503   case MVT::v2i32:
504   case MVT::v4i16:
505   case MVT::v8i8:
506     Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
507     break;
508   case MVT::v64i8:
509   case MVT::v32i16:
510   case MVT::v16i32:
511   case MVT::v8i64:
512   case MVT::v128i8:
513   case MVT::v64i16:
514   case MVT::v32i32:
515   case MVT::v16i64:
516     if (isAlignedMemNode(ST)) {
517       if (ST->isNonTemporal())
518         Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
519       else
520         Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
521     } else {
522       Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
523     }
524     break;
525   default:
526     llvm_unreachable("Unexpected memory type in indexed store");
527   }
528 
529   if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
530     assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
531     Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
532                                            dl, MVT::i32, Value);
533   }
534 
535   SDValue IncV = CurDAG->getSignedTargetConstant(Inc, dl, MVT::i32);
536   MachineMemOperand *MemOp = ST->getMemOperand();
537 
538   //                  Next address   Chain
539   SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
540   SDValue To[2];
541 
542   if (IsValidInc) {
543     // Build post increment store.
544     SDValue Ops[] = { Base, IncV, Value, Chain };
545     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::i32, MVT::Other,
546                                               Ops);
547     CurDAG->setNodeMemRefs(S, {MemOp});
548     To[0] = SDValue(S, 0);
549     To[1] = SDValue(S, 1);
550   } else {
551     SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
552     SDValue Ops[] = { Base, Zero, Value, Chain };
553     MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
554     CurDAG->setNodeMemRefs(S, {MemOp});
555     To[1] = SDValue(S, 0);
556     MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
557                                               Base, IncV);
558     To[0] = SDValue(A, 0);
559   }
560 
561   ReplaceUses(From, To, 2);
562   CurDAG->RemoveDeadNode(ST);
563 }
564 
565 void HexagonDAGToDAGISel::SelectStore(SDNode *N) {
566   SDLoc dl(N);
567   StoreSDNode *ST = cast<StoreSDNode>(N);
568 
569   // Handle indexed stores.
570   ISD::MemIndexedMode AM = ST->getAddressingMode();
571   if (AM != ISD::UNINDEXED) {
572     SelectIndexedStore(ST, dl);
573     return;
574   }
575 
576   SelectCode(ST);
577 }
578 
579 void HexagonDAGToDAGISel::SelectSHL(SDNode *N) {
580   SDLoc dl(N);
581   SDValue Shl_0 = N->getOperand(0);
582   SDValue Shl_1 = N->getOperand(1);
583 
584   auto Default = [this,N] () -> void { SelectCode(N); };
585 
586   if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
587     return Default();
588 
589   // RHS is const.
590   int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
591 
592   if (Shl_0.getOpcode() == ISD::MUL) {
593     SDValue Mul_0 = Shl_0.getOperand(0); // Val
594     SDValue Mul_1 = Shl_0.getOperand(1); // Const
595     // RHS of mul is const.
596     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
597       int32_t ValConst = C->getSExtValue() << ShlConst;
598       if (isInt<9>(ValConst)) {
599         SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
600         SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
601                                                 MVT::i32, Mul_0, Val);
602         ReplaceNode(N, Result);
603         return;
604       }
605     }
606     return Default();
607   }
608 
609   if (Shl_0.getOpcode() == ISD::SUB) {
610     SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
611     SDValue Sub_1 = Shl_0.getOperand(1); // Val
612     if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
613       if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
614         return Default();
615       SDValue Shl2_0 = Sub_1.getOperand(0); // Val
616       SDValue Shl2_1 = Sub_1.getOperand(1); // Const
617       if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
618         int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
619         if (isInt<9>(-ValConst)) {
620           SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
621           SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
622                                                   MVT::i32, Shl2_0, Val);
623           ReplaceNode(N, Result);
624           return;
625         }
626       }
627     }
628   }
629 
630   return Default();
631 }
632 
633 //
634 // Handling intrinsics for circular load and bitreverse load.
635 //
636 void HexagonDAGToDAGISel::SelectIntrinsicWChain(SDNode *N) {
637   if (MachineSDNode *L = LoadInstrForLoadIntrinsic(N)) {
638     StoreInstrForLoadIntrinsic(L, N);
639     CurDAG->RemoveDeadNode(N);
640     return;
641   }
642 
643   // Handle bit-reverse load intrinsics.
644   if (SelectBrevLdIntrinsic(N))
645     return;
646 
647   if (SelectNewCircIntrinsic(N))
648     return;
649 
650   unsigned IntNo = N->getConstantOperandVal(1);
651   if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
652       IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
653       IntNo == Intrinsic::hexagon_V6_vgathermh ||
654       IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
655       IntNo == Intrinsic::hexagon_V6_vgathermhw ||
656       IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
657     SelectV65Gather(N);
658     return;
659   }
660   if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
661       IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
662       IntNo == Intrinsic::hexagon_V6_vgathermhq ||
663       IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
664       IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
665       IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
666     SelectV65GatherPred(N);
667     return;
668   }
669 
670   SelectCode(N);
671 }
672 
673 void HexagonDAGToDAGISel::SelectIntrinsicWOChain(SDNode *N) {
674   unsigned IID = N->getConstantOperandVal(0);
675   unsigned Bits;
676   switch (IID) {
677   case Intrinsic::hexagon_S2_vsplatrb:
678     Bits = 8;
679     break;
680   case Intrinsic::hexagon_S2_vsplatrh:
681     Bits = 16;
682     break;
683   case Intrinsic::hexagon_V6_vaddcarry:
684   case Intrinsic::hexagon_V6_vaddcarry_128B:
685   case Intrinsic::hexagon_V6_vsubcarry:
686   case Intrinsic::hexagon_V6_vsubcarry_128B:
687     SelectHVXDualOutput(N);
688     return;
689   default:
690     SelectCode(N);
691     return;
692   }
693 
694   SDValue V = N->getOperand(1);
695   SDValue U;
696   // Splat intrinsics.
697   if (keepsLowBits(V, Bits, U)) {
698     SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
699                                 N->getOperand(0), U);
700     ReplaceNode(N, R.getNode());
701     SelectCode(R.getNode());
702     return;
703   }
704   SelectCode(N);
705 }
706 
707 void HexagonDAGToDAGISel::SelectExtractSubvector(SDNode *N) {
708   SDValue Inp = N->getOperand(0);
709   MVT ResTy = N->getValueType(0).getSimpleVT();
710   unsigned Idx = N->getConstantOperandVal(1);
711 
712   [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
713   [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
714   assert(InpTy.getVectorElementType() == ResTy.getVectorElementType());
715   assert(2 * ResLen == InpTy.getVectorNumElements());
716   assert(ResTy.getSizeInBits() == 32);
717   assert(Idx == 0 || Idx == ResLen);
718 
719   unsigned SubReg = Idx == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
720   SDValue Ext = CurDAG->getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
721 
722   ReplaceNode(N, Ext.getNode());
723 }
724 
725 //
726 // Map floating point constant values.
727 //
728 void HexagonDAGToDAGISel::SelectConstantFP(SDNode *N) {
729   SDLoc dl(N);
730   auto *CN = cast<ConstantFPSDNode>(N);
731   APInt A = CN->getValueAPF().bitcastToAPInt();
732   if (N->getValueType(0) == MVT::f32) {
733     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
734     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
735     return;
736   }
737   if (N->getValueType(0) == MVT::f64) {
738     SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
739     ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
740     return;
741   }
742 
743   SelectCode(N);
744 }
745 
746 //
747 // Map boolean values.
748 //
749 void HexagonDAGToDAGISel::SelectConstant(SDNode *N) {
750   if (N->getValueType(0) == MVT::i1) {
751     assert(!(N->getAsZExtVal() >> 1));
752     unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
753                       ? Hexagon::PS_true
754                       : Hexagon::PS_false;
755     ReplaceNode(N, CurDAG->getMachineNode(Opc, SDLoc(N), MVT::i1));
756     return;
757   }
758 
759   SelectCode(N);
760 }
761 
762 void HexagonDAGToDAGISel::SelectFrameIndex(SDNode *N) {
763   MachineFrameInfo &MFI = MF->getFrameInfo();
764   const HexagonFrameLowering *HFI = HST->getFrameLowering();
765   int FX = cast<FrameIndexSDNode>(N)->getIndex();
766   Align StkA = HFI->getStackAlign();
767   Align MaxA = MFI.getMaxAlign();
768   SDValue FI = CurDAG->getTargetFrameIndex(FX, MVT::i32);
769   SDLoc DL(N);
770   SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
771   SDNode *R = nullptr;
772 
773   // Use PS_fi when:
774   // - the object is fixed, or
775   // - there are no objects with higher-than-default alignment, or
776   // - there are no dynamically allocated objects.
777   // Otherwise, use PS_fia.
778   if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
779     R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
780   } else {
781     auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
782     Register AR = HMFI.getStackAlignBaseReg();
783     SDValue CH = CurDAG->getEntryNode();
784     SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
785     R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
786   }
787 
788   ReplaceNode(N, R);
789 }
790 
791 void HexagonDAGToDAGISel::SelectAddSubCarry(SDNode *N) {
792   unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
793                                                          : Hexagon::A4_subp_c;
794   SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
795                                      { N->getOperand(0), N->getOperand(1),
796                                        N->getOperand(2) });
797   ReplaceNode(N, C);
798 }
799 
800 void HexagonDAGToDAGISel::SelectVAlign(SDNode *N) {
801   MVT ResTy = N->getValueType(0).getSimpleVT();
802   if (HST->isHVXVectorType(ResTy, true))
803     return SelectHvxVAlign(N);
804 
805   const SDLoc &dl(N);
806   unsigned VecLen = ResTy.getSizeInBits();
807   if (VecLen == 32) {
808     SDValue Ops[] = {
809       CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
810       N->getOperand(0),
811       CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
812       N->getOperand(1),
813       CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
814     };
815     SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
816                                        MVT::i64, Ops);
817 
818     // Shift right by "(Addr & 0x3) * 8" bytes.
819     SDNode *C;
820     SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
821     SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
822     if (HST->useCompound()) {
823       C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
824                                  M0, N->getOperand(2), M1);
825     } else {
826       SDNode *T = CurDAG->getMachineNode(Hexagon::S2_asl_i_r, dl, MVT::i32,
827                                          N->getOperand(2), M1);
828       C = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
829                                  SDValue(T, 0), M0);
830     }
831     SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
832                                        SDValue(R, 0), SDValue(C, 0));
833     SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
834                                                SDValue(S, 0));
835     ReplaceNode(N, E.getNode());
836   } else {
837     assert(VecLen == 64);
838     SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
839                                         N->getOperand(2));
840     SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
841                                         N->getOperand(0), N->getOperand(1),
842                                         SDValue(Pu,0));
843     ReplaceNode(N, VA);
844   }
845 }
846 
847 void HexagonDAGToDAGISel::SelectVAlignAddr(SDNode *N) {
848   const SDLoc &dl(N);
849   SDValue A = N->getOperand(1);
850   int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
851   assert(isPowerOf2_32(-Mask));
852 
853   SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
854   SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
855                                       N->getOperand(0), M);
856   ReplaceNode(N, AA);
857 }
858 
859 // Handle these nodes here to avoid having to write patterns for all
860 // combinations of input/output types. In all cases, the resulting
861 // instruction is the same.
862 void HexagonDAGToDAGISel::SelectTypecast(SDNode *N) {
863   SDValue Op = N->getOperand(0);
864   MVT OpTy = Op.getValueType().getSimpleVT();
865   SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
866                                   CurDAG->getVTList(OpTy), {Op});
867   ReplaceNode(T, Op.getNode());
868 }
869 
870 void HexagonDAGToDAGISel::SelectP2D(SDNode *N) {
871   MVT ResTy = N->getValueType(0).getSimpleVT();
872   SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
873                                      N->getOperand(0));
874   ReplaceNode(N, T);
875 }
876 
877 void HexagonDAGToDAGISel::SelectD2P(SDNode *N) {
878   const SDLoc &dl(N);
879   MVT ResTy = N->getValueType(0).getSimpleVT();
880   SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
881   SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
882                                      N->getOperand(0), Zero);
883   ReplaceNode(N, T);
884 }
885 
886 void HexagonDAGToDAGISel::SelectV2Q(SDNode *N) {
887   const SDLoc &dl(N);
888   MVT ResTy = N->getValueType(0).getSimpleVT();
889   // The argument to V2Q should be a single vector.
890   MVT OpTy = N->getOperand(0).getValueType().getSimpleVT(); (void)OpTy;
891   assert(HST->getVectorLength() * 8 == OpTy.getSizeInBits());
892 
893   SDValue C = CurDAG->getSignedTargetConstant(-1, dl, MVT::i32);
894   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
895   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
896                                      N->getOperand(0), SDValue(R,0));
897   ReplaceNode(N, T);
898 }
899 
900 void HexagonDAGToDAGISel::SelectQ2V(SDNode *N) {
901   const SDLoc &dl(N);
902   MVT ResTy = N->getValueType(0).getSimpleVT();
903   // The result of V2Q should be a single vector.
904   assert(HST->getVectorLength() * 8 == ResTy.getSizeInBits());
905 
906   SDValue C = CurDAG->getSignedTargetConstant(-1, dl, MVT::i32);
907   SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
908   SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
909                                      N->getOperand(0), SDValue(R,0));
910   ReplaceNode(N, T);
911 }
912 
913 void HexagonDAGToDAGISel::FDiv(SDNode *N) {
914   const SDLoc &dl(N);
915   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
916   SmallVector<SDValue, 2> Ops;
917   Ops = {N->getOperand(0), N->getOperand(1)};
918   SDVTList VTs;
919   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
920   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
921   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
922 
923   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
924   SDNode *constNode =
925       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
926 
927   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
928   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
929                                        SDValue(constNode, 0), SDValue(D, 0),
930                                        SDValue(ResScale, 0));
931   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
932                                           SDValue(ResScale, 0), SDValue(Err, 0),
933                                           SDValue(ResScale, 0));
934   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
935                                           SDValue(constNode, 0), SDValue(D, 0),
936                                           SDValue(NewRec, 0));
937   SDNode *q = CurDAG->getMachineNode(
938       Hexagon::A2_andir, dl, MVT::f32, SDValue(n, 0),
939       CurDAG->getTargetConstant(0x80000000, dl, MVT::i32));
940   SDNode *NewQ =
941       CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(q, 0),
942                              SDValue(n, 0), SDValue(NewRec, 0));
943   SDNode *NNewRec = CurDAG->getMachineNode(
944       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
945       SDValue(newErr, 0), SDValue(NewRec, 0));
946   SDNode *qErr =
947       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
948                              SDValue(D, 0), SDValue(NewQ, 0));
949   SDNode *NNewQ = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
950                                          SDValue(NewQ, 0), SDValue(qErr, 0),
951                                          SDValue(NNewRec, 0));
952 
953   SDNode *NqErr =
954       CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32, SDValue(n, 0),
955                              SDValue(NNewQ, 0), SDValue(D, 0));
956   std::array<SDValue, 4> temp1 = {SDValue(NNewQ, 0), SDValue(NqErr, 0),
957                                   SDValue(NNewRec, 0), SDValue(ResScale, 1)};
958   ArrayRef<SDValue> OpValue1(temp1);
959   SDNode *FinalNewQ =
960       CurDAG->getMachineNode(Hexagon::F2_sffma_sc, dl, MVT::f32, OpValue1);
961   ReplaceNode(N, FinalNewQ);
962 }
963 
964 void HexagonDAGToDAGISel::FastFDiv(SDNode *N) {
965   const SDLoc &dl(N);
966   ArrayRef<EVT> ResultType(N->value_begin(), N->value_end());
967   SmallVector<SDValue, 2> Ops;
968   Ops = {N->getOperand(0), N->getOperand(1)};
969   SDVTList VTs;
970   VTs = CurDAG->getVTList(MVT::f32, MVT::f32);
971   SDNode *ResScale = CurDAG->getMachineNode(Hexagon::F2_sfrecipa, dl, VTs, Ops);
972   SDNode *D = CurDAG->getMachineNode(Hexagon::F2_sffixupd, dl, MVT::f32, Ops);
973 
974   SDValue C = CurDAG->getTargetConstant(0x3f800000, dl, MVT::i32);
975   SDNode *constNode =
976       CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, C);
977 
978   SDNode *n = CurDAG->getMachineNode(Hexagon::F2_sffixupn, dl, MVT::f32, Ops);
979   SDNode *Err = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
980                                        SDValue(constNode, 0), SDValue(D, 0),
981                                        SDValue(ResScale, 0));
982   SDNode *NewRec = CurDAG->getMachineNode(Hexagon::F2_sffma_lib, dl, MVT::f32,
983                                           SDValue(ResScale, 0), SDValue(Err, 0),
984                                           SDValue(ResScale, 0));
985   SDNode *newErr = CurDAG->getMachineNode(Hexagon::F2_sffms_lib, dl, MVT::f32,
986                                           SDValue(constNode, 0), SDValue(D, 0),
987                                           SDValue(NewRec, 0));
988 
989   SDNode *NNewRec = CurDAG->getMachineNode(
990       Hexagon::F2_sffma_lib, dl, MVT::f32, SDValue(NewRec, 0),
991       SDValue(newErr, 0), SDValue(NewRec, 0));
992   SDNode *FinalNewQ = CurDAG->getMachineNode(
993       Hexagon::F2_sfmpy, dl, MVT::f32, SDValue(NNewRec, 0), SDValue(n, 0));
994   ReplaceNode(N, FinalNewQ);
995 }
996 
997 void HexagonDAGToDAGISel::SelectFDiv(SDNode *N) {
998   if (N->getFlags().hasAllowReassociation())
999     FastFDiv(N);
1000   else
1001     FDiv(N);
1002 }
1003 
1004 void HexagonDAGToDAGISel::Select(SDNode *N) {
1005   if (N->isMachineOpcode())
1006     return N->setNodeId(-1);  // Already selected.
1007 
1008   auto isHvxOp = [this](SDNode *N) {
1009     for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
1010       if (HST->isHVXVectorType(N->getValueType(i), true))
1011         return true;
1012     }
1013     for (SDValue I : N->ops()) {
1014       if (HST->isHVXVectorType(I.getValueType(), true))
1015         return true;
1016     }
1017     return false;
1018   };
1019 
1020   if (HST->useHVXOps() && isHvxOp(N)) {
1021     switch (N->getOpcode()) {
1022     case ISD::EXTRACT_SUBVECTOR:  return SelectHvxExtractSubvector(N);
1023     case ISD::VECTOR_SHUFFLE:     return SelectHvxShuffle(N);
1024 
1025     case HexagonISD::VROR:        return SelectHvxRor(N);
1026     }
1027   }
1028 
1029   switch (N->getOpcode()) {
1030   case ISD::Constant:             return SelectConstant(N);
1031   case ISD::ConstantFP:           return SelectConstantFP(N);
1032   case ISD::FrameIndex:           return SelectFrameIndex(N);
1033   case ISD::SHL:                  return SelectSHL(N);
1034   case ISD::LOAD:                 return SelectLoad(N);
1035   case ISD::STORE:                return SelectStore(N);
1036   case ISD::INTRINSIC_W_CHAIN:    return SelectIntrinsicWChain(N);
1037   case ISD::INTRINSIC_WO_CHAIN:   return SelectIntrinsicWOChain(N);
1038   case ISD::EXTRACT_SUBVECTOR:    return SelectExtractSubvector(N);
1039 
1040   case HexagonISD::ADDC:
1041   case HexagonISD::SUBC:          return SelectAddSubCarry(N);
1042   case HexagonISD::VALIGN:        return SelectVAlign(N);
1043   case HexagonISD::VALIGNADDR:    return SelectVAlignAddr(N);
1044   case HexagonISD::TYPECAST:      return SelectTypecast(N);
1045   case HexagonISD::P2D:           return SelectP2D(N);
1046   case HexagonISD::D2P:           return SelectD2P(N);
1047   case HexagonISD::Q2V:           return SelectQ2V(N);
1048   case HexagonISD::V2Q:           return SelectV2Q(N);
1049   case ISD::FDIV:
1050     return SelectFDiv(N);
1051   }
1052 
1053   SelectCode(N);
1054 }
1055 
1056 bool HexagonDAGToDAGISel::SelectInlineAsmMemoryOperand(
1057     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
1058     std::vector<SDValue> &OutOps) {
1059   SDValue Inp = Op, Res;
1060 
1061   switch (ConstraintID) {
1062   default:
1063     return true;
1064   case InlineAsm::ConstraintCode::o: // Offsetable.
1065   case InlineAsm::ConstraintCode::v: // Not offsetable.
1066   case InlineAsm::ConstraintCode::m: // Memory.
1067     if (SelectAddrFI(Inp, Res))
1068       OutOps.push_back(Res);
1069     else
1070       OutOps.push_back(Inp);
1071     break;
1072   }
1073 
1074   OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
1075   return false;
1076 }
1077 
1078 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
1079   // I is an operand of U. Check if U is an arithmetic (binary) operation
1080   // usable in a memop, where the other operand is a loaded value, and the
1081   // result of U is stored in the same location.
1082 
1083   if (!U->hasOneUse())
1084     return false;
1085   unsigned Opc = U->getOpcode();
1086   switch (Opc) {
1087     case ISD::ADD:
1088     case ISD::SUB:
1089     case ISD::AND:
1090     case ISD::OR:
1091       break;
1092     default:
1093       return false;
1094   }
1095 
1096   SDValue S0 = U->getOperand(0);
1097   SDValue S1 = U->getOperand(1);
1098   SDValue SY = (S0.getNode() == I) ? S1 : S0;
1099 
1100   SDNode *UUse = *U->user_begin();
1101   if (UUse->getNumValues() != 1)
1102     return false;
1103 
1104   // Check if one of the inputs to U is a load instruction and the output
1105   // is used by a store instruction. If so and they also have the same
1106   // base pointer, then don't preoprocess this node sequence as it
1107   // can be matched to a memop.
1108   SDNode *SYNode = SY.getNode();
1109   if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
1110     SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
1111     SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
1112     if (LDBasePtr == STBasePtr)
1113       return true;
1114   }
1115   return false;
1116 }
1117 
1118 
1119 // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1120 //            (or (select c 0 y) z)  ->  (select c z (or y z))
1121 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
1122   SelectionDAG &DAG = *CurDAG;
1123 
1124   for (auto *I : Nodes) {
1125     if (I->getOpcode() != ISD::OR)
1126       continue;
1127 
1128     auto IsSelect0 = [](const SDValue &Op) -> bool {
1129       if (Op.getOpcode() != ISD::SELECT)
1130         return false;
1131       return isNullConstant(Op.getOperand(1)) ||
1132              isNullConstant(Op.getOperand(2));
1133     };
1134 
1135     SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
1136     EVT VT = I->getValueType(0);
1137     bool SelN0 = IsSelect0(N0);
1138     SDValue SOp = SelN0 ? N0 : N1;
1139     SDValue VOp = SelN0 ? N1 : N0;
1140 
1141     if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
1142       SDValue SC = SOp.getOperand(0);
1143       SDValue SX = SOp.getOperand(1);
1144       SDValue SY = SOp.getOperand(2);
1145       SDLoc DLS = SOp;
1146       if (isNullConstant(SY)) {
1147         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1148         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1149         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1150       } else if (isNullConstant(SX)) {
1151         SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1152         SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1153         DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1154       }
1155     }
1156   }
1157 }
1158 
1159 // Transform: (store ch val (add x (add (shl y c) e)))
1160 //        to: (store ch val (add x (shl (add y d) c))),
1161 // where e = (shl d c) for some integer d.
1162 // The purpose of this is to enable generation of loads/stores with
1163 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1164 // value c must be 0, 1 or 2.
1165 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1166   SelectionDAG &DAG = *CurDAG;
1167 
1168   for (auto *I : Nodes) {
1169     if (I->getOpcode() != ISD::STORE)
1170       continue;
1171 
1172     // I matched: (store ch val Off)
1173     SDValue Off = I->getOperand(2);
1174     // Off needs to match: (add x (add (shl y c) (shl d c))))
1175     if (Off.getOpcode() != ISD::ADD)
1176       continue;
1177     // Off matched: (add x T0)
1178     SDValue T0 = Off.getOperand(1);
1179     // T0 needs to match: (add T1 T2):
1180     if (T0.getOpcode() != ISD::ADD)
1181       continue;
1182     // T0 matched: (add T1 T2)
1183     SDValue T1 = T0.getOperand(0);
1184     SDValue T2 = T0.getOperand(1);
1185     // T1 needs to match: (shl y c)
1186     if (T1.getOpcode() != ISD::SHL)
1187       continue;
1188     SDValue C = T1.getOperand(1);
1189     ConstantSDNode *CN = dyn_cast<ConstantSDNode>(C.getNode());
1190     if (CN == nullptr)
1191       continue;
1192     unsigned CV = CN->getZExtValue();
1193     if (CV > 2)
1194       continue;
1195     // T2 needs to match e, where e = (shl d c) for some d.
1196     ConstantSDNode *EN = dyn_cast<ConstantSDNode>(T2.getNode());
1197     if (EN == nullptr)
1198       continue;
1199     unsigned EV = EN->getZExtValue();
1200     if (EV % (1 << CV) != 0)
1201       continue;
1202     unsigned DV = EV / (1 << CV);
1203 
1204     // Replace T0 with: (shl (add y d) c)
1205     SDLoc DL = SDLoc(I);
1206     EVT VT = T0.getValueType();
1207     SDValue D = DAG.getConstant(DV, DL, VT);
1208     // NewAdd = (add y d)
1209     SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1210     // NewShl = (shl NewAdd c)
1211     SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1212     ReplaceNode(T0.getNode(), NewShl.getNode());
1213   }
1214 }
1215 
1216 // Transform: (load ch (add x (and (srl y c) Mask)))
1217 //        to: (load ch (add x (shl (srl y d) d-c)))
1218 // where
1219 // Mask = 00..0 111..1 0.0
1220 //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1221 //          |     +-------- 1s
1222 //          +-------------- at most c 0s
1223 // Motivating example:
1224 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1225 //                     to (add x (and (srl y 3) 1FFFFFFC))
1226 // which results in a constant-extended and(##...,lsr). This transformation
1227 // undoes this simplification for cases where the shl can be folded into
1228 // an addressing mode.
1229 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1230   SelectionDAG &DAG = *CurDAG;
1231 
1232   for (SDNode *N : Nodes) {
1233     unsigned Opc = N->getOpcode();
1234     if (Opc != ISD::LOAD && Opc != ISD::STORE)
1235       continue;
1236     SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1237     // Addr must match: (add x T0)
1238     if (Addr.getOpcode() != ISD::ADD)
1239       continue;
1240     SDValue T0 = Addr.getOperand(1);
1241     // T0 must match: (and T1 Mask)
1242     if (T0.getOpcode() != ISD::AND)
1243       continue;
1244 
1245     // We have an AND.
1246     //
1247     // Check the first operand. It must be: (srl y c).
1248     SDValue S = T0.getOperand(0);
1249     if (S.getOpcode() != ISD::SRL)
1250       continue;
1251     ConstantSDNode *SN = dyn_cast<ConstantSDNode>(S.getOperand(1).getNode());
1252     if (SN == nullptr)
1253       continue;
1254     if (SN->getAPIntValue().getBitWidth() != 32)
1255       continue;
1256     uint32_t CV = SN->getZExtValue();
1257 
1258     // Check the second operand: the supposed mask.
1259     ConstantSDNode *MN = dyn_cast<ConstantSDNode>(T0.getOperand(1).getNode());
1260     if (MN == nullptr)
1261       continue;
1262     if (MN->getAPIntValue().getBitWidth() != 32)
1263       continue;
1264     uint32_t Mask = MN->getZExtValue();
1265     // Examine the mask.
1266     uint32_t TZ = llvm::countr_zero(Mask);
1267     uint32_t M1 = llvm::countr_one(Mask >> TZ);
1268     uint32_t LZ = llvm::countl_zero(Mask);
1269     // Trailing zeros + middle ones + leading zeros must equal the width.
1270     if (TZ + M1 + LZ != 32)
1271       continue;
1272     // The number of trailing zeros will be encoded in the addressing mode.
1273     if (TZ > 2)
1274       continue;
1275     // The number of leading zeros must be at most c.
1276     if (LZ > CV)
1277       continue;
1278 
1279     // All looks good.
1280     SDValue Y = S.getOperand(0);
1281     EVT VT = Addr.getValueType();
1282     SDLoc dl(S);
1283     // TZ = D-C, so D = TZ+C.
1284     SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1285     SDValue DC = DAG.getConstant(TZ, dl, VT);
1286     SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1287     SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1288     ReplaceNode(T0.getNode(), NewShl.getNode());
1289   }
1290 }
1291 
1292 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1293 //                                                  (op ... 1 ...))
1294 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1295   SelectionDAG &DAG = *CurDAG;
1296 
1297   for (SDNode *N : Nodes) {
1298     unsigned Opc = N->getOpcode();
1299     if (Opc != ISD::ZERO_EXTEND)
1300       continue;
1301     SDValue OpI1 = N->getOperand(0);
1302     EVT OpVT = OpI1.getValueType();
1303     if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1304       continue;
1305     for (SDUse &Use : N->uses()) {
1306       SDNode *U = Use.getUser();
1307       if (U->getNumValues() != 1)
1308         continue;
1309       EVT UVT = U->getValueType(0);
1310       if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1311         continue;
1312       // Do not generate select for all i1 vector type.
1313       if (UVT.isVector() && UVT.getVectorElementType() == MVT::i1)
1314         continue;
1315       if (isMemOPCandidate(N, U))
1316         continue;
1317 
1318       // Potentially simplifiable operation.
1319       unsigned I1N = Use.getOperandNo();
1320       SmallVector<SDValue,2> Ops(U->getNumOperands());
1321       for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1322         Ops[i] = U->getOperand(i);
1323       EVT BVT = Ops[I1N].getValueType();
1324 
1325       const SDLoc &dl(U);
1326       SDValue C0 = DAG.getConstant(0, dl, BVT);
1327       SDValue C1 = DAG.getConstant(1, dl, BVT);
1328       SDValue If0, If1;
1329 
1330       if (isa<MachineSDNode>(U)) {
1331         unsigned UseOpc = U->getMachineOpcode();
1332         Ops[I1N] = C0;
1333         If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1334         Ops[I1N] = C1;
1335         If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1336       } else {
1337         unsigned UseOpc = U->getOpcode();
1338         Ops[I1N] = C0;
1339         If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1340         Ops[I1N] = C1;
1341         If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1342       }
1343       // We're generating a SELECT way after legalization, so keep the types
1344       // simple.
1345       unsigned UW = UVT.getSizeInBits();
1346       EVT SVT = (UW == 32 || UW == 64) ? MVT::getIntegerVT(UW) : UVT;
1347       SDValue Sel = DAG.getNode(ISD::SELECT, dl, SVT, OpI1,
1348                                 DAG.getBitcast(SVT, If1),
1349                                 DAG.getBitcast(SVT, If0));
1350       SDValue Ret = DAG.getBitcast(UVT, Sel);
1351       DAG.ReplaceAllUsesWith(U, Ret.getNode());
1352     }
1353   }
1354 }
1355 
1356 void HexagonDAGToDAGISel::PreprocessISelDAG() {
1357   // Repack all nodes before calling each preprocessing function,
1358   // because each of them can modify the set of nodes.
1359   auto getNodes = [this]() -> std::vector<SDNode *> {
1360     std::vector<SDNode *> T;
1361     T.reserve(CurDAG->allnodes_size());
1362     for (SDNode &N : CurDAG->allnodes())
1363       T.push_back(&N);
1364     return T;
1365   };
1366 
1367   if (HST->useHVXOps())
1368     PreprocessHvxISelDAG();
1369 
1370   // Transform: (or (select c x 0) z)  ->  (select c (or x z) z)
1371   //            (or (select c 0 y) z)  ->  (select c z (or y z))
1372   ppSimplifyOrSelect0(getNodes());
1373 
1374   // Transform: (store ch val (add x (add (shl y c) e)))
1375   //        to: (store ch val (add x (shl (add y d) c))),
1376   // where e = (shl d c) for some integer d.
1377   // The purpose of this is to enable generation of loads/stores with
1378   // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1379   // value c must be 0, 1 or 2.
1380   ppAddrReorderAddShl(getNodes());
1381 
1382   // Transform: (load ch (add x (and (srl y c) Mask)))
1383   //        to: (load ch (add x (shl (srl y d) d-c)))
1384   // where
1385   // Mask = 00..0 111..1 0.0
1386   //          |     |     +-- d-c 0s, and d-c is 0, 1 or 2.
1387   //          |     +-------- 1s
1388   //          +-------------- at most c 0s
1389   // Motivating example:
1390   // DAG combiner optimizes (add x (shl (srl y 5) 2))
1391   //                     to (add x (and (srl y 3) 1FFFFFFC))
1392   // which results in a constant-extended and(##...,lsr). This transformation
1393   // undoes this simplification for cases where the shl can be folded into
1394   // an addressing mode.
1395   ppAddrRewriteAndSrl(getNodes());
1396 
1397   // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1398   //                                                  (op ... 1 ...))
1399   ppHoistZextI1(getNodes());
1400 
1401   DEBUG_WITH_TYPE("isel", {
1402     dbgs() << "Preprocessed (Hexagon) selection DAG:";
1403     CurDAG->dump();
1404   });
1405 
1406   if (EnableAddressRebalancing) {
1407     rebalanceAddressTrees();
1408 
1409     DEBUG_WITH_TYPE("isel", {
1410       dbgs() << "Address tree balanced selection DAG:";
1411       CurDAG->dump();
1412     });
1413   }
1414 }
1415 
1416 void HexagonDAGToDAGISel::emitFunctionEntryCode() {
1417   auto &HST = MF->getSubtarget<HexagonSubtarget>();
1418   auto &HFI = *HST.getFrameLowering();
1419   if (!HFI.needsAligna(*MF))
1420     return;
1421 
1422   MachineFrameInfo &MFI = MF->getFrameInfo();
1423   MachineBasicBlock *EntryBB = &MF->front();
1424   Align EntryMaxA = MFI.getMaxAlign();
1425 
1426   // Reserve the first non-volatile register.
1427   Register AP = 0;
1428   auto &HRI = *HST.getRegisterInfo();
1429   BitVector Reserved = HRI.getReservedRegs(*MF);
1430   for (const MCPhysReg *R = HRI.getCalleeSavedRegs(MF); *R; ++R) {
1431     if (Reserved[*R])
1432       continue;
1433     AP = *R;
1434     break;
1435   }
1436   assert(AP.isValid() && "Couldn't reserve stack align register");
1437   BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AP)
1438       .addImm(EntryMaxA.value());
1439   MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseReg(AP);
1440 }
1441 
1442 void HexagonDAGToDAGISel::updateAligna() {
1443   auto &HFI = *MF->getSubtarget<HexagonSubtarget>().getFrameLowering();
1444   if (!HFI.needsAligna(*MF))
1445     return;
1446   auto *AlignaI = const_cast<MachineInstr*>(HFI.getAlignaInstr(*MF));
1447   assert(AlignaI != nullptr);
1448   unsigned MaxA = MF->getFrameInfo().getMaxAlign().value();
1449   if (AlignaI->getOperand(1).getImm() < MaxA)
1450     AlignaI->getOperand(1).setImm(MaxA);
1451 }
1452 
1453 // Match a frame index that can be used in an addressing mode.
1454 bool HexagonDAGToDAGISel::SelectAddrFI(SDValue &N, SDValue &R) {
1455   if (N.getOpcode() != ISD::FrameIndex)
1456     return false;
1457   auto &HFI = *HST->getFrameLowering();
1458   MachineFrameInfo &MFI = MF->getFrameInfo();
1459   int FX = cast<FrameIndexSDNode>(N)->getIndex();
1460   if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1461     return false;
1462   R = CurDAG->getTargetFrameIndex(FX, MVT::i32);
1463   return true;
1464 }
1465 
1466 inline bool HexagonDAGToDAGISel::SelectAddrGA(SDValue &N, SDValue &R) {
1467   return SelectGlobalAddress(N, R, false, Align(1));
1468 }
1469 
1470 inline bool HexagonDAGToDAGISel::SelectAddrGP(SDValue &N, SDValue &R) {
1471   return SelectGlobalAddress(N, R, true, Align(1));
1472 }
1473 
1474 inline bool HexagonDAGToDAGISel::SelectAnyImm(SDValue &N, SDValue &R) {
1475   return SelectAnyImmediate(N, R, Align(1));
1476 }
1477 
1478 inline bool HexagonDAGToDAGISel::SelectAnyImm0(SDValue &N, SDValue &R) {
1479   return SelectAnyImmediate(N, R, Align(1));
1480 }
1481 inline bool HexagonDAGToDAGISel::SelectAnyImm1(SDValue &N, SDValue &R) {
1482   return SelectAnyImmediate(N, R, Align(2));
1483 }
1484 inline bool HexagonDAGToDAGISel::SelectAnyImm2(SDValue &N, SDValue &R) {
1485   return SelectAnyImmediate(N, R, Align(4));
1486 }
1487 inline bool HexagonDAGToDAGISel::SelectAnyImm3(SDValue &N, SDValue &R) {
1488   return SelectAnyImmediate(N, R, Align(8));
1489 }
1490 
1491 inline bool HexagonDAGToDAGISel::SelectAnyInt(SDValue &N, SDValue &R) {
1492   EVT T = N.getValueType();
1493   if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1494     return false;
1495   uint32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1496   R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1497   return true;
1498 }
1499 
1500 bool HexagonDAGToDAGISel::SelectAnyImmediate(SDValue &N, SDValue &R,
1501                                              Align Alignment) {
1502   switch (N.getOpcode()) {
1503   case ISD::Constant: {
1504     if (N.getValueType() != MVT::i32)
1505       return false;
1506     uint32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1507     if (!isAligned(Alignment, V))
1508       return false;
1509     R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1510     return true;
1511   }
1512   case HexagonISD::JT:
1513   case HexagonISD::CP:
1514     // These are assumed to always be aligned at least 8-byte boundary.
1515     if (Alignment > Align(8))
1516       return false;
1517     R = N.getOperand(0);
1518     return true;
1519   case ISD::ExternalSymbol:
1520     // Symbols may be aligned at any boundary.
1521     if (Alignment > Align(1))
1522       return false;
1523     R = N;
1524     return true;
1525   case ISD::BlockAddress:
1526     // Block address is always aligned at least 4-byte boundary.
1527     if (Alignment > Align(4) ||
1528         !isAligned(Alignment, cast<BlockAddressSDNode>(N)->getOffset()))
1529       return false;
1530     R = N;
1531     return true;
1532   }
1533 
1534   if (SelectGlobalAddress(N, R, false, Alignment) ||
1535       SelectGlobalAddress(N, R, true, Alignment))
1536     return true;
1537 
1538   return false;
1539 }
1540 
1541 bool HexagonDAGToDAGISel::SelectGlobalAddress(SDValue &N, SDValue &R,
1542                                               bool UseGP, Align Alignment) {
1543   switch (N.getOpcode()) {
1544   case ISD::ADD: {
1545     SDValue N0 = N.getOperand(0);
1546     SDValue N1 = N.getOperand(1);
1547     unsigned GAOpc = N0.getOpcode();
1548     if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1549       return false;
1550     if (!UseGP && GAOpc != HexagonISD::CONST32)
1551       return false;
1552     if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1553       if (!isAligned(Alignment, Const->getZExtValue()))
1554         return false;
1555       SDValue Addr = N0.getOperand(0);
1556       if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1557         if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1558           uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1559           R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1560                                              N.getValueType(), NewOff);
1561           return true;
1562         }
1563       }
1564     }
1565     break;
1566   }
1567   case HexagonISD::CP:
1568   case HexagonISD::JT:
1569   case HexagonISD::CONST32:
1570     // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1571     // want in the instruction.
1572     if (!UseGP)
1573       R = N.getOperand(0);
1574     return !UseGP;
1575   case HexagonISD::CONST32_GP:
1576     if (UseGP)
1577       R = N.getOperand(0);
1578     return UseGP;
1579   default:
1580     return false;
1581   }
1582 
1583   return false;
1584 }
1585 
1586 bool HexagonDAGToDAGISel::DetectUseSxtw(SDValue &N, SDValue &R) {
1587   // This (complex pattern) function is meant to detect a sign-extension
1588   // i32->i64 on a per-operand basis. This would allow writing single
1589   // patterns that would cover a number of combinations of different ways
1590   // a sign-extensions could be written. For example:
1591   //   (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1592   // could match either one of these:
1593   //   (mul (sext x) (sext_inreg y))
1594   //   (mul (sext-load *p) (sext_inreg y))
1595   //   (mul (sext_inreg x) (sext y))
1596   // etc.
1597   //
1598   // The returned value will have type i64 and its low word will
1599   // contain the value being extended. The high bits are not specified.
1600   // The returned type is i64 because the original type of N was i64,
1601   // but the users of this function should only use the low-word of the
1602   // result, e.g.
1603   //  (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1604 
1605   if (N.getValueType() != MVT::i64)
1606     return false;
1607   unsigned Opc = N.getOpcode();
1608   switch (Opc) {
1609     case ISD::SIGN_EXTEND:
1610     case ISD::SIGN_EXTEND_INREG: {
1611       // sext_inreg has the source type as a separate operand.
1612       EVT T = Opc == ISD::SIGN_EXTEND
1613                 ? N.getOperand(0).getValueType()
1614                 : cast<VTSDNode>(N.getOperand(1))->getVT();
1615       unsigned SW = T.getSizeInBits();
1616       if (SW == 32)
1617         R = N.getOperand(0);
1618       else if (SW < 32)
1619         R = N;
1620       else
1621         return false;
1622       break;
1623     }
1624     case ISD::LOAD: {
1625       LoadSDNode *L = cast<LoadSDNode>(N);
1626       if (L->getExtensionType() != ISD::SEXTLOAD)
1627         return false;
1628       // All extending loads extend to i32, so even if the value in
1629       // memory is shorter than 32 bits, it will be i32 after the load.
1630       if (L->getMemoryVT().getSizeInBits() > 32)
1631         return false;
1632       R = N;
1633       break;
1634     }
1635     case ISD::SRA: {
1636       auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1637       if (!S || S->getZExtValue() != 32)
1638         return false;
1639       R = N;
1640       break;
1641     }
1642     default:
1643       return false;
1644   }
1645   EVT RT = R.getValueType();
1646   if (RT == MVT::i64)
1647     return true;
1648   assert(RT == MVT::i32);
1649   // This is only to produce a value of type i64. Do not rely on the
1650   // high bits produced by this.
1651   const SDLoc &dl(N);
1652   SDValue Ops[] = {
1653     CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1654     R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1655     R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1656   };
1657   SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1658                                      MVT::i64, Ops);
1659   R = SDValue(T, 0);
1660   return true;
1661 }
1662 
1663 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1664       SDValue &Src) {
1665   unsigned Opc = Val.getOpcode();
1666   switch (Opc) {
1667   case ISD::SIGN_EXTEND:
1668   case ISD::ZERO_EXTEND:
1669   case ISD::ANY_EXTEND: {
1670     const SDValue &Op0 = Val.getOperand(0);
1671     EVT T = Op0.getValueType();
1672     if (T.isInteger() && T.getSizeInBits() == NumBits) {
1673       Src = Op0;
1674       return true;
1675     }
1676     break;
1677   }
1678   case ISD::SIGN_EXTEND_INREG:
1679   case ISD::AssertSext:
1680   case ISD::AssertZext:
1681     if (Val.getOperand(0).getValueType().isInteger()) {
1682       VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1683       if (T->getVT().getSizeInBits() == NumBits) {
1684         Src = Val.getOperand(0);
1685         return true;
1686       }
1687     }
1688     break;
1689   case ISD::AND: {
1690     // Check if this is an AND with NumBits of lower bits set to 1.
1691     uint64_t Mask = (1ULL << NumBits) - 1;
1692     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1693       if (C->getZExtValue() == Mask) {
1694         Src = Val.getOperand(1);
1695         return true;
1696       }
1697     }
1698     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1699       if (C->getZExtValue() == Mask) {
1700         Src = Val.getOperand(0);
1701         return true;
1702       }
1703     }
1704     break;
1705   }
1706   case ISD::OR:
1707   case ISD::XOR: {
1708     // OR/XOR with the lower NumBits bits set to 0.
1709     uint64_t Mask = (1ULL << NumBits) - 1;
1710     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1711       if ((C->getZExtValue() & Mask) == 0) {
1712         Src = Val.getOperand(1);
1713         return true;
1714       }
1715     }
1716     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1717       if ((C->getZExtValue() & Mask) == 0) {
1718         Src = Val.getOperand(0);
1719         return true;
1720       }
1721     }
1722     break;
1723   }
1724   default:
1725     break;
1726   }
1727   return false;
1728 }
1729 
1730 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1731   return N->getAlign().value() >= N->getMemoryVT().getStoreSize();
1732 }
1733 
1734 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1735   unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1736   switch (N->getMemoryVT().getStoreSize()) {
1737     case 1:
1738       return StackSize <= 56;   // 1*2^6 - 8
1739     case 2:
1740       return StackSize <= 120;  // 2*2^6 - 8
1741     case 4:
1742       return StackSize <= 248;  // 4*2^6 - 8
1743     default:
1744       return false;
1745   }
1746 }
1747 
1748 // Return true when the given node fits in a positive half word.
1749 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1750   if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1751     int64_t V = CN->getSExtValue();
1752     return V > 0 && isInt<16>(V);
1753   }
1754   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1755     const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1756     return VN->getVT().getSizeInBits() <= 16;
1757   }
1758   return false;
1759 }
1760 
1761 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1762   return !CheckSingleUse || N->hasOneUse();
1763 }
1764 
1765 ////////////////////////////////////////////////////////////////////////////////
1766 // Rebalancing of address calculation trees
1767 
1768 static bool isOpcodeHandled(const SDNode *N) {
1769   switch (N->getOpcode()) {
1770     case ISD::ADD:
1771     case ISD::MUL:
1772       return true;
1773     case ISD::SHL:
1774       // We only handle constant shifts because these can be easily flattened
1775       // into multiplications by 2^Op1.
1776       return isa<ConstantSDNode>(N->getOperand(1).getNode());
1777     default:
1778       return false;
1779   }
1780 }
1781 
1782 /// Return the weight of an SDNode
1783 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1784   if (!isOpcodeHandled(N))
1785     return 1;
1786   assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1787   assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1788   assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1789   return RootWeights[N];
1790 }
1791 
1792 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1793   if (!isOpcodeHandled(N))
1794     return 0;
1795   assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1796       "Cannot query height of unvisited/RAUW'd node!");
1797   return RootHeights[N];
1798 }
1799 
1800 namespace {
1801 struct WeightedLeaf {
1802   SDValue Value;
1803   int Weight;
1804   int InsertionOrder;
1805 
1806   WeightedLeaf() {}
1807 
1808   WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1809     Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1810     assert(Weight >= 0 && "Weight must be >= 0");
1811   }
1812 
1813   static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1814     assert(A.Value.getNode() && B.Value.getNode());
1815     return A.Weight == B.Weight ?
1816             (A.InsertionOrder > B.InsertionOrder) :
1817             (A.Weight > B.Weight);
1818   }
1819 };
1820 
1821 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1822 /// constants and allows removal of non-top elements while maintaining the
1823 /// priority order.
1824 class LeafPrioQueue {
1825   SmallVector<WeightedLeaf, 8> Q;
1826   bool HaveConst;
1827   WeightedLeaf ConstElt;
1828   unsigned Opcode;
1829 
1830 public:
1831   bool empty() {
1832     return (!HaveConst && Q.empty());
1833   }
1834 
1835   size_t size() {
1836     return Q.size() + HaveConst;
1837   }
1838 
1839   bool hasConst() {
1840     return HaveConst;
1841   }
1842 
1843   const WeightedLeaf &top() {
1844     if (HaveConst)
1845       return ConstElt;
1846     return Q.front();
1847   }
1848 
1849   WeightedLeaf pop() {
1850     if (HaveConst) {
1851       HaveConst = false;
1852       return ConstElt;
1853     }
1854     std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1855     return Q.pop_back_val();
1856   }
1857 
1858   void push(WeightedLeaf L, bool SeparateConst=true) {
1859     if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1860       if (Opcode == ISD::MUL &&
1861           cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1862         return;
1863       if (Opcode == ISD::ADD &&
1864           cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1865         return;
1866 
1867       HaveConst = true;
1868       ConstElt = L;
1869     } else {
1870       Q.push_back(L);
1871       std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1872     }
1873   }
1874 
1875   /// Push L to the bottom of the queue regardless of its weight. If L is
1876   /// constant, it will not be folded with other constants in the queue.
1877   void pushToBottom(WeightedLeaf L) {
1878     L.Weight = 1000;
1879     push(L, false);
1880   }
1881 
1882   /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1883   /// lowest weight and remove it from the queue.
1884   WeightedLeaf findSHL(uint64_t MaxAmount);
1885 
1886   WeightedLeaf findMULbyConst();
1887 
1888   LeafPrioQueue(unsigned Opcode) :
1889     HaveConst(false), Opcode(Opcode) { }
1890 };
1891 } // end anonymous namespace
1892 
1893 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1894   int ResultPos;
1895   WeightedLeaf Result;
1896 
1897   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1898     const WeightedLeaf &L = Q[Pos];
1899     const SDValue &Val = L.Value;
1900     if (Val.getOpcode() != ISD::SHL ||
1901         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1902         Val.getConstantOperandVal(1) > MaxAmount)
1903       continue;
1904     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1905         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1906     {
1907       Result = L;
1908       ResultPos = Pos;
1909     }
1910   }
1911 
1912   if (Result.Value.getNode()) {
1913     Q.erase(&Q[ResultPos]);
1914     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1915   }
1916 
1917   return Result;
1918 }
1919 
1920 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1921   int ResultPos;
1922   WeightedLeaf Result;
1923 
1924   for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1925     const WeightedLeaf &L = Q[Pos];
1926     const SDValue &Val = L.Value;
1927     if (Val.getOpcode() != ISD::MUL ||
1928         !isa<ConstantSDNode>(Val.getOperand(1)) ||
1929         Val.getConstantOperandVal(1) > 127)
1930       continue;
1931     if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1932         (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1933     {
1934       Result = L;
1935       ResultPos = Pos;
1936     }
1937   }
1938 
1939   if (Result.Value.getNode()) {
1940     Q.erase(&Q[ResultPos]);
1941     std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1942   }
1943 
1944   return Result;
1945 }
1946 
1947 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1948   uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1949   return CurDAG->getConstant(MulFactor, SDLoc(N),
1950                              N->getOperand(1).getValueType());
1951 }
1952 
1953 /// @returns the value x for which 2^x is a factor of Val
1954 static unsigned getPowerOf2Factor(SDValue Val) {
1955   if (Val.getOpcode() == ISD::MUL) {
1956     unsigned MaxFactor = 0;
1957     for (int i = 0; i < 2; ++i) {
1958       ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(i));
1959       if (!C)
1960         continue;
1961       const APInt &CInt = C->getAPIntValue();
1962       if (CInt.getBoolValue())
1963         MaxFactor = CInt.countr_zero();
1964     }
1965     return MaxFactor;
1966   }
1967   if (Val.getOpcode() == ISD::SHL) {
1968     if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1969       return 0;
1970     return (unsigned) Val.getConstantOperandVal(1);
1971   }
1972 
1973   return 0;
1974 }
1975 
1976 /// @returns true if V>>Amount will eliminate V's operation on its child
1977 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1978   if (V.getOpcode() == ISD::MUL) {
1979     SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1980     for (int i = 0; i < 2; ++i)
1981       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1982           V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1983         uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1984         return (NewConst == 1);
1985       }
1986   } else if (V.getOpcode() == ISD::SHL) {
1987     return (Amount == V.getConstantOperandVal(1));
1988   }
1989 
1990   return false;
1991 }
1992 
1993 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1994   SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1995   if (V.getOpcode() == ISD::MUL) {
1996     for (int i=0; i < 2; ++i) {
1997       if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1998           V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1999         uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
2000         if (NewConst == 1)
2001           return Ops[!i];
2002         Ops[i] = CurDAG->getConstant(NewConst,
2003                                      SDLoc(V), V.getValueType());
2004         break;
2005       }
2006     }
2007   } else if (V.getOpcode() == ISD::SHL) {
2008     uint64_t ShiftAmount = V.getConstantOperandVal(1);
2009     if (ShiftAmount == Power)
2010       return Ops[0];
2011     Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
2012                                  SDLoc(V), V.getValueType());
2013   }
2014 
2015   return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
2016 }
2017 
2018 static bool isTargetConstant(const SDValue &V) {
2019   return V.getOpcode() == HexagonISD::CONST32 ||
2020          V.getOpcode() == HexagonISD::CONST32_GP;
2021 }
2022 
2023 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
2024   if (GAUsesInFunction.count(V))
2025     return GAUsesInFunction[V];
2026 
2027   unsigned Result = 0;
2028   const Function &CurF = CurDAG->getMachineFunction().getFunction();
2029   for (const User *U : V->users()) {
2030     if (isa<Instruction>(U) &&
2031         cast<Instruction>(U)->getParent()->getParent() == &CurF)
2032       ++Result;
2033   }
2034 
2035   GAUsesInFunction[V] = Result;
2036 
2037   return Result;
2038 }
2039 
2040 /// Note - After calling this, N may be dead. It may have been replaced by a
2041 /// new node, so always use the returned value in place of N.
2042 ///
2043 /// @returns The SDValue taking the place of N (which could be N if it is
2044 /// unchanged)
2045 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
2046   assert(RootWeights.count(N) && "Cannot balance non-root node.");
2047   assert(RootWeights[N] != -2 && "This node was RAUW'd!");
2048   assert(!TopLevel || N->getOpcode() == ISD::ADD);
2049 
2050   // Return early if this node was already visited
2051   if (RootWeights[N] != -1)
2052     return SDValue(N, 0);
2053 
2054   assert(isOpcodeHandled(N));
2055 
2056   SDValue Op0 = N->getOperand(0);
2057   SDValue Op1 = N->getOperand(1);
2058 
2059   // Return early if the operands will remain unchanged or are all roots
2060   if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
2061       (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
2062     SDNode *Op0N = Op0.getNode();
2063     int Weight;
2064     if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
2065       Weight = getWeight(balanceSubTree(Op0N).getNode());
2066       // Weight = calculateWeight(Op0N);
2067     } else
2068       Weight = getWeight(Op0N);
2069 
2070     SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
2071     if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
2072       Weight += getWeight(balanceSubTree(Op1N).getNode());
2073       // Weight += calculateWeight(Op1N);
2074     } else
2075       Weight += getWeight(Op1N);
2076 
2077     RootWeights[N] = Weight;
2078     RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
2079                               getHeight(N->getOperand(1).getNode())) + 1;
2080 
2081     LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
2082                       << " Height=" << RootHeights[N] << "): ");
2083     LLVM_DEBUG(N->dump(CurDAG));
2084 
2085     return SDValue(N, 0);
2086   }
2087 
2088   LLVM_DEBUG(dbgs() << "** Balancing root node: ");
2089   LLVM_DEBUG(N->dump(CurDAG));
2090 
2091   unsigned NOpcode = N->getOpcode();
2092 
2093   LeafPrioQueue Leaves(NOpcode);
2094   SmallVector<SDValue, 4> Worklist;
2095   Worklist.push_back(SDValue(N, 0));
2096 
2097   // SHL nodes will be converted to MUL nodes
2098   if (NOpcode == ISD::SHL)
2099     NOpcode = ISD::MUL;
2100 
2101   bool CanFactorize = false;
2102   WeightedLeaf Mul1, Mul2;
2103   unsigned MaxPowerOf2 = 0;
2104   WeightedLeaf GA;
2105 
2106   // Do not try to factor out a shift if there is already a shift at the tip of
2107   // the tree.
2108   bool HaveTopLevelShift = false;
2109   if (TopLevel &&
2110       ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
2111                         Op0.getConstantOperandVal(1) < 4) ||
2112        (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
2113                         Op1.getConstantOperandVal(1) < 4)))
2114     HaveTopLevelShift = true;
2115 
2116   // Flatten the subtree into an ordered list of leaves; at the same time
2117   // determine whether the tree is already balanced.
2118   int InsertionOrder = 0;
2119   SmallDenseMap<SDValue, int> NodeHeights;
2120   bool Imbalanced = false;
2121   int CurrentWeight = 0;
2122   while (!Worklist.empty()) {
2123     SDValue Child = Worklist.pop_back_val();
2124 
2125     if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
2126       // CASE 1: Child is a root note
2127 
2128       int Weight = RootWeights[Child.getNode()];
2129       if (Weight == -1) {
2130         Child = balanceSubTree(Child.getNode());
2131         // calculateWeight(Child.getNode());
2132         Weight = getWeight(Child.getNode());
2133       } else if (Weight == -2) {
2134         // Whoops, this node was RAUWd by one of the balanceSubTree calls we
2135         // made. Our worklist isn't up to date anymore.
2136         // Restart the whole process.
2137         LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2138         return balanceSubTree(N, TopLevel);
2139       }
2140 
2141       NodeHeights[Child] = 1;
2142       CurrentWeight += Weight;
2143 
2144       unsigned PowerOf2;
2145       if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
2146           (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
2147           Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
2148         // Try to identify two factorizable MUL/SHL children greedily. Leave
2149         // them out of the priority queue for now so we can deal with them
2150         // after.
2151         if (!Mul1.Value.getNode()) {
2152           Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
2153           MaxPowerOf2 = PowerOf2;
2154         } else {
2155           Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
2156           MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
2157 
2158           // Our addressing modes can only shift by a maximum of 3
2159           if (MaxPowerOf2 > 3)
2160             MaxPowerOf2 = 3;
2161 
2162           CanFactorize = true;
2163         }
2164       } else
2165         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2166     } else if (!isOpcodeHandled(Child.getNode())) {
2167       // CASE 2: Child is an unhandled kind of node (e.g. constant)
2168       int Weight = getWeight(Child.getNode());
2169 
2170       NodeHeights[Child] = getHeight(Child.getNode());
2171       CurrentWeight += Weight;
2172 
2173       if (isTargetConstant(Child) && !GA.Value.getNode())
2174         GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2175       else
2176         Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2177     } else {
2178       // CASE 3: Child is a subtree of same opcode
2179       // Visit children first, then flatten.
2180       unsigned ChildOpcode = Child.getOpcode();
2181       assert(ChildOpcode == NOpcode ||
2182              (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2183 
2184       // Convert SHL to MUL
2185       SDValue Op1;
2186       if (ChildOpcode == ISD::SHL)
2187         Op1 = getMultiplierForSHL(Child.getNode());
2188       else
2189         Op1 = Child->getOperand(1);
2190 
2191       if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2192         assert(!NodeHeights.count(Child) && "Parent visited before children?");
2193         // Visit children first, then re-visit this node
2194         Worklist.push_back(Child);
2195         Worklist.push_back(Op1);
2196         Worklist.push_back(Child->getOperand(0));
2197       } else {
2198         // Back at this node after visiting the children
2199         if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2200           Imbalanced = true;
2201 
2202         NodeHeights[Child] = std::max(NodeHeights[Op1],
2203                                       NodeHeights[Child->getOperand(0)]) + 1;
2204       }
2205     }
2206   }
2207 
2208   LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2209                     << " weight=" << CurrentWeight
2210                     << " imbalanced=" << Imbalanced << "\n");
2211 
2212   // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2213   //  This factors out a shift in order to match memw(a<<Y+b).
2214   if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2215                        willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2216     LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2217     int Weight = Mul1.Weight + Mul2.Weight;
2218     int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2219     SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2220     SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2221     SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2222                                   Mul1Factored, Mul2Factored);
2223     SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2224                                         Mul1.Value.getValueType());
2225     SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2226                                   Sum, Const);
2227     NodeHeights[New] = Height;
2228     Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2229   } else if (Mul1.Value.getNode()) {
2230     // We failed to factorize two MULs, so now the Muls are left outside the
2231     // queue... add them back.
2232     Leaves.push(Mul1);
2233     if (Mul2.Value.getNode())
2234       Leaves.push(Mul2);
2235     CanFactorize = false;
2236   }
2237 
2238   // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2239   // and the root node itself is not used more than twice. This reduces the
2240   // amount of additional constant extenders introduced by this optimization.
2241   bool CombinedGA = false;
2242   if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2243       GA.Value.hasOneUse() && N->use_size() < 3) {
2244     GlobalAddressSDNode *GANode =
2245       cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2246     ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2247 
2248     if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2249         getTargetLowering()->isOffsetFoldingLegal(GANode)) {
2250       LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2251                         << Offset->getSExtValue() << "): ");
2252       LLVM_DEBUG(GANode->dump(CurDAG));
2253 
2254       SDValue NewTGA =
2255         CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2256             GANode->getValueType(0),
2257             GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2258       GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2259           GA.Value.getValueType(), NewTGA);
2260       GA.Weight += Leaves.top().Weight;
2261 
2262       NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2263       CombinedGA = true;
2264 
2265       Leaves.pop(); // Remove the offset constant from the queue
2266     }
2267   }
2268 
2269   if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2270       (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2271     RootWeights[N] = CurrentWeight;
2272     RootHeights[N] = NodeHeights[SDValue(N, 0)];
2273 
2274     return SDValue(N, 0);
2275   }
2276 
2277   // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2278   if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2279     WeightedLeaf SHL = Leaves.findSHL(31);
2280     if (SHL.Value.getNode()) {
2281       int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2282       GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2283                                  GA.Value.getValueType(),
2284                                  GA.Value, SHL.Value);
2285       GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2286       NodeHeights[GA.Value] = Height;
2287     }
2288   }
2289 
2290   if (GA.Value.getNode())
2291     Leaves.push(GA);
2292 
2293   // If this is the top level and we haven't factored out a shift, we should try
2294   // to move a constant to the bottom to match addressing modes like memw(rX+C)
2295   if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2296     LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2297     Leaves.pushToBottom(Leaves.pop());
2298   }
2299 
2300   const DataLayout &DL = CurDAG->getDataLayout();
2301   const TargetLowering &TLI = *getTargetLowering();
2302 
2303   // Rebuild the tree using Huffman's algorithm
2304   while (Leaves.size() > 1) {
2305     WeightedLeaf L0 = Leaves.pop();
2306 
2307     // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2308     // otherwise just get the next leaf
2309     WeightedLeaf L1 = Leaves.findMULbyConst();
2310     if (!L1.Value.getNode())
2311       L1 = Leaves.pop();
2312 
2313     assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2314 
2315     SDValue V0 = L0.Value;
2316     int V0Weight = L0.Weight;
2317     SDValue V1 = L1.Value;
2318     int V1Weight = L1.Weight;
2319 
2320     // Make sure that none of these nodes have been RAUW'd
2321     if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2322         (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2323       LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2324       return balanceSubTree(N, TopLevel);
2325     }
2326 
2327     ConstantSDNode *V0C = dyn_cast<ConstantSDNode>(V0);
2328     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(V1);
2329     EVT VT = N->getValueType(0);
2330     SDValue NewNode;
2331 
2332     if (V0C && !V1C) {
2333       std::swap(V0, V1);
2334       std::swap(V0C, V1C);
2335     }
2336 
2337     // Calculate height of this node
2338     assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2339            "Children must have been visited before re-combining them!");
2340     int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2341 
2342     // Rebuild this node (and restore SHL from MUL if needed)
2343     if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2344       NewNode = CurDAG->getNode(
2345           ISD::SHL, SDLoc(V0), VT, V0,
2346           CurDAG->getConstant(
2347               V1C->getAPIntValue().logBase2(), SDLoc(N),
2348               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2349     else
2350       NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2351 
2352     NodeHeights[NewNode] = Height;
2353 
2354     int Weight = V0Weight + V1Weight;
2355     Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2356 
2357     LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2358                       << ",Height=" << Height << "):\n");
2359     LLVM_DEBUG(NewNode.dump());
2360   }
2361 
2362   assert(Leaves.size() == 1);
2363   SDValue NewRoot = Leaves.top().Value;
2364 
2365   assert(NodeHeights.count(NewRoot));
2366   int Height = NodeHeights[NewRoot];
2367 
2368   // Restore SHL if we earlier converted it to a MUL
2369   if (NewRoot.getOpcode() == ISD::MUL) {
2370     ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2371     if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2372       EVT VT = NewRoot.getValueType();
2373       SDValue V0 = NewRoot.getOperand(0);
2374       NewRoot = CurDAG->getNode(
2375           ISD::SHL, SDLoc(NewRoot), VT, V0,
2376           CurDAG->getConstant(
2377               V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2378               TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2379     }
2380   }
2381 
2382   if (N != NewRoot.getNode()) {
2383     LLVM_DEBUG(dbgs() << "--> Root is now: ");
2384     LLVM_DEBUG(NewRoot.dump());
2385 
2386     // Replace all uses of old root by new root
2387     CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2388     // Mark that we have RAUW'd N
2389     RootWeights[N] = -2;
2390   } else {
2391     LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2392   }
2393 
2394   RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2395   RootHeights[NewRoot.getNode()] = Height;
2396 
2397   return NewRoot;
2398 }
2399 
2400 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2401   for (SDNode &Node : llvm::make_early_inc_range(CurDAG->allnodes())) {
2402     SDNode *N = &Node;
2403     if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2404       continue;
2405 
2406     SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2407     if (BasePtr.getOpcode() != ISD::ADD)
2408       continue;
2409 
2410     // We've already processed this node
2411     if (RootWeights.count(BasePtr.getNode()))
2412       continue;
2413 
2414     LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2415     LLVM_DEBUG(N->dump(CurDAG));
2416 
2417     // FindRoots
2418     SmallVector<SDNode *, 4> Worklist;
2419 
2420     Worklist.push_back(BasePtr.getOperand(0).getNode());
2421     Worklist.push_back(BasePtr.getOperand(1).getNode());
2422 
2423     while (!Worklist.empty()) {
2424       SDNode *N = Worklist.pop_back_val();
2425       unsigned Opcode = N->getOpcode();
2426 
2427       if (!isOpcodeHandled(N))
2428         continue;
2429 
2430       Worklist.push_back(N->getOperand(0).getNode());
2431       Worklist.push_back(N->getOperand(1).getNode());
2432 
2433       // Not a root if it has only one use and same opcode as its parent
2434       if (N->hasOneUse() && Opcode == N->user_begin()->getOpcode())
2435         continue;
2436 
2437       // This root node has already been processed
2438       if (RootWeights.count(N))
2439         continue;
2440 
2441       RootWeights[N] = -1;
2442     }
2443 
2444     // Balance node itself
2445     RootWeights[BasePtr.getNode()] = -1;
2446     SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2447 
2448     if (N->getOpcode() == ISD::LOAD)
2449       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2450             NewBasePtr, N->getOperand(2));
2451     else
2452       N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2453             NewBasePtr, N->getOperand(3));
2454 
2455     LLVM_DEBUG(dbgs() << "--> Final node: ");
2456     LLVM_DEBUG(N->dump(CurDAG));
2457   }
2458 
2459   CurDAG->RemoveDeadNodes();
2460   GAUsesInFunction.clear();
2461   RootHeights.clear();
2462   RootWeights.clear();
2463 }
2464