10b57cec5SDimitry Andric //===-- AMDGPUISelLowering.cpp - AMDGPU Common DAG lowering functions -----===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This is the parent TargetLowering class for hardware code gen 110b57cec5SDimitry Andric /// targets. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 140b57cec5SDimitry Andric 150b57cec5SDimitry Andric #include "AMDGPUISelLowering.h" 160b57cec5SDimitry Andric #include "AMDGPU.h" 17e8d8bef9SDimitry Andric #include "AMDGPUInstrInfo.h" 18e8d8bef9SDimitry Andric #include "AMDGPUMachineFunction.h" 190b57cec5SDimitry Andric #include "SIMachineFunctionInfo.h" 200b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 2106c3fb27SDimitry Andric #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 2281ad6265SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 230b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 24e8d8bef9SDimitry Andric #include "llvm/IR/IntrinsicsAMDGPU.h" 2506c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h" 26e8d8bef9SDimitry Andric #include "llvm/Support/CommandLine.h" 270b57cec5SDimitry Andric #include "llvm/Support/KnownBits.h" 28e8d8bef9SDimitry Andric #include "llvm/Target/TargetMachine.h" 29e8d8bef9SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 310b57cec5SDimitry Andric 320b57cec5SDimitry Andric #include "AMDGPUGenCallingConv.inc" 330b57cec5SDimitry Andric 345ffd83dbSDimitry Andric static cl::opt<bool> AMDGPUBypassSlowDiv( 355ffd83dbSDimitry Andric "amdgpu-bypass-slow-div", 365ffd83dbSDimitry Andric cl::desc("Skip 64-bit divide for dynamic 32-bit values"), 375ffd83dbSDimitry Andric cl::init(true)); 385ffd83dbSDimitry Andric 390b57cec5SDimitry Andric // Find a larger type to do a load / store of a vector with. 400b57cec5SDimitry Andric EVT AMDGPUTargetLowering::getEquivalentMemType(LLVMContext &Ctx, EVT VT) { 410b57cec5SDimitry Andric unsigned StoreSize = VT.getStoreSizeInBits(); 420b57cec5SDimitry Andric if (StoreSize <= 32) 430b57cec5SDimitry Andric return EVT::getIntegerVT(Ctx, StoreSize); 440b57cec5SDimitry Andric 450fca6ea1SDimitry Andric if (StoreSize % 32 == 0) 460b57cec5SDimitry Andric return EVT::getVectorVT(Ctx, MVT::i32, StoreSize / 32); 470fca6ea1SDimitry Andric 480fca6ea1SDimitry Andric return VT; 490b57cec5SDimitry Andric } 500b57cec5SDimitry Andric 510b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsUnsigned(SDValue Op, SelectionDAG &DAG) { 52349cc55cSDimitry Andric return DAG.computeKnownBits(Op).countMaxActiveBits(); 530b57cec5SDimitry Andric } 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::numBitsSigned(SDValue Op, SelectionDAG &DAG) { 560b57cec5SDimitry Andric // In order for this to be a signed 24-bit value, bit 23, must 570b57cec5SDimitry Andric // be a sign bit. 5804eeddc0SDimitry Andric return DAG.ComputeMaxSignificantBits(Op); 590b57cec5SDimitry Andric } 600b57cec5SDimitry Andric 610b57cec5SDimitry Andric AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, 620b57cec5SDimitry Andric const AMDGPUSubtarget &STI) 630b57cec5SDimitry Andric : TargetLowering(TM), Subtarget(&STI) { 640fca6ea1SDimitry Andric // Always lower memset, memcpy, and memmove intrinsics to load/store 650fca6ea1SDimitry Andric // instructions, rather then generating calls to memset, mempcy or memmove. 660fca6ea1SDimitry Andric MaxStoresPerMemset = MaxStoresPerMemsetOptSize = ~0U; 670fca6ea1SDimitry Andric MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = ~0U; 680fca6ea1SDimitry Andric MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = ~0U; 690fca6ea1SDimitry Andric 700fca6ea1SDimitry Andric // Enable ganging up loads and stores in the memcpy DAG lowering. 710fca6ea1SDimitry Andric MaxGluedStoresPerMemcpy = 16; 720fca6ea1SDimitry Andric 730b57cec5SDimitry Andric // Lower floating point store/load to integer store/load to reduce the number 740b57cec5SDimitry Andric // of patterns in tablegen. 750b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f32, Promote); 760b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f32, MVT::i32); 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f32, Promote); 790b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f32, MVT::v2i32); 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f32, Promote); 820b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f32, MVT::v3i32); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f32, Promote); 850b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f32, MVT::v4i32); 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v5f32, Promote); 880b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v5f32, MVT::v5i32); 890b57cec5SDimitry Andric 90fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v6f32, Promote); 91fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v6f32, MVT::v6i32); 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v7f32, Promote); 94fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v7f32, MVT::v7i32); 95fe6060f1SDimitry Andric 960b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f32, Promote); 970b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f32, MVT::v8i32); 980b57cec5SDimitry Andric 99bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v9f32, Promote); 100bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v9f32, MVT::v9i32); 101bdd1243dSDimitry Andric 102bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v10f32, Promote); 103bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v10f32, MVT::v10i32); 104bdd1243dSDimitry Andric 105bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v11f32, Promote); 106bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v11f32, MVT::v11i32); 107bdd1243dSDimitry Andric 108bdd1243dSDimitry Andric setOperationAction(ISD::LOAD, MVT::v12f32, Promote); 109bdd1243dSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v12f32, MVT::v12i32); 110bdd1243dSDimitry Andric 1110b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f32, Promote); 1120b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f32, MVT::v16i32); 1130b57cec5SDimitry Andric 1140b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v32f32, Promote); 1150b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v32f32, MVT::v32i32); 1160b57cec5SDimitry Andric 1170b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::i64, Promote); 1180b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i64, MVT::v2i32); 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2i64, Promote); 1210b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2i64, MVT::v4i32); 1220b57cec5SDimitry Andric 1230b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::f64, Promote); 1240b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::f64, MVT::v2i32); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric setOperationAction(ISD::LOAD, MVT::v2f64, Promote); 1270b57cec5SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v2f64, MVT::v4i32); 1280b57cec5SDimitry Andric 129fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3i64, Promote); 130fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3i64, MVT::v6i32); 131fe6060f1SDimitry Andric 1325ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4i64, Promote); 1335ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4i64, MVT::v8i32); 1345ffd83dbSDimitry Andric 135fe6060f1SDimitry Andric setOperationAction(ISD::LOAD, MVT::v3f64, Promote); 136fe6060f1SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v3f64, MVT::v6i32); 137fe6060f1SDimitry Andric 1385ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v4f64, Promote); 1395ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v4f64, MVT::v8i32); 1405ffd83dbSDimitry Andric 1415ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8i64, Promote); 1425ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8i64, MVT::v16i32); 1435ffd83dbSDimitry Andric 1445ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v8f64, Promote); 1455ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v8f64, MVT::v16i32); 1465ffd83dbSDimitry Andric 1475ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16i64, Promote); 1485ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16i64, MVT::v32i32); 1495ffd83dbSDimitry Andric 1505ffd83dbSDimitry Andric setOperationAction(ISD::LOAD, MVT::v16f64, Promote); 1515ffd83dbSDimitry Andric AddPromotedToType(ISD::LOAD, MVT::v16f64, MVT::v32i32); 1525ffd83dbSDimitry Andric 15306c3fb27SDimitry Andric setOperationAction(ISD::LOAD, MVT::i128, Promote); 15406c3fb27SDimitry Andric AddPromotedToType(ISD::LOAD, MVT::i128, MVT::v4i32); 15506c3fb27SDimitry Andric 1560fca6ea1SDimitry Andric // TODO: Would be better to consume as directly legal 1570fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f32, Promote); 1580fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f32, MVT::i32); 1590fca6ea1SDimitry Andric 1600fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f64, Promote); 1610fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f64, MVT::i64); 1620fca6ea1SDimitry Andric 1630fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::f16, Promote); 1640fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::f16, MVT::i16); 1650fca6ea1SDimitry Andric 1660fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_LOAD, MVT::bf16, Promote); 1670fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_LOAD, MVT::bf16, MVT::i16); 1680fca6ea1SDimitry Andric 1690fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f32, Promote); 1700fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f32, MVT::i32); 1710fca6ea1SDimitry Andric 1720fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f64, Promote); 1730fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f64, MVT::i64); 1740fca6ea1SDimitry Andric 1750fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::f16, Promote); 1760fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::f16, MVT::i16); 1770fca6ea1SDimitry Andric 1780fca6ea1SDimitry Andric setOperationAction(ISD::ATOMIC_STORE, MVT::bf16, Promote); 1790fca6ea1SDimitry Andric AddPromotedToType(ISD::ATOMIC_STORE, MVT::bf16, MVT::i16); 1800fca6ea1SDimitry Andric 1810b57cec5SDimitry Andric // There are no 64-bit extloads. These should be done as a 32-bit extload and 1820b57cec5SDimitry Andric // an extension to 64-bit. 18381ad6265SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) 18481ad6265SDimitry Andric setLoadExtAction({ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}, MVT::i64, VT, 18581ad6265SDimitry Andric Expand); 1860b57cec5SDimitry Andric 1870b57cec5SDimitry Andric for (MVT VT : MVT::integer_valuetypes()) { 1880b57cec5SDimitry Andric if (VT == MVT::i64) 1890b57cec5SDimitry Andric continue; 1900b57cec5SDimitry Andric 19181ad6265SDimitry Andric for (auto Op : {ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}) { 19281ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i1, Promote); 19381ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i8, Legal); 19481ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i16, Legal); 19581ad6265SDimitry Andric setLoadExtAction(Op, VT, MVT::i32, Expand); 19681ad6265SDimitry Andric } 1970b57cec5SDimitry Andric } 1980b57cec5SDimitry Andric 19981ad6265SDimitry Andric for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) 20081ad6265SDimitry Andric for (auto MemVT : 20181ad6265SDimitry Andric {MVT::v2i8, MVT::v4i8, MVT::v2i16, MVT::v3i16, MVT::v4i16}) 20281ad6265SDimitry Andric setLoadExtAction({ISD::SEXTLOAD, ISD::ZEXTLOAD, ISD::EXTLOAD}, VT, MemVT, 20381ad6265SDimitry Andric Expand); 2040b57cec5SDimitry Andric 2050b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); 206bdd1243dSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::bf16, Expand); 2070b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2f16, Expand); 208cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f32, MVT::v2bf16, Expand); 2098bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3f16, Expand); 210cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f32, MVT::v3bf16, Expand); 2110b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4f16, Expand); 212cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f32, MVT::v4bf16, Expand); 2130b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8f16, Expand); 214cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f32, MVT::v8bf16, Expand); 2158bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16f16, Expand); 216cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f32, MVT::v16bf16, Expand); 2178bcb0991SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32f16, Expand); 218cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v32f32, MVT::v32bf16, Expand); 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); 2210b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f32, Expand); 222fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f32, Expand); 2230b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f32, Expand); 2240b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f32, Expand); 2255ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f32, Expand); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); 228bdd1243dSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::bf16, Expand); 2290b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2f16, Expand); 230cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v2f64, MVT::v2bf16, Expand); 231fe6060f1SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3f16, Expand); 232cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v3f64, MVT::v3bf16, Expand); 2330b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4f16, Expand); 234cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v4f64, MVT::v4bf16, Expand); 2350b57cec5SDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8f16, Expand); 236cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v8f64, MVT::v8bf16, Expand); 2375ffd83dbSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16f16, Expand); 238cb14a3feSDimitry Andric setLoadExtAction(ISD::EXTLOAD, MVT::v16f64, MVT::v16bf16, Expand); 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f32, Promote); 2410b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f32, MVT::i32); 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f32, Promote); 2440b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f32, MVT::v2i32); 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f32, Promote); 2470b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f32, MVT::v3i32); 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v4f32, Promote); 2500b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f32, MVT::v4i32); 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v5f32, Promote); 2530b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v5f32, MVT::v5i32); 2540b57cec5SDimitry Andric 255fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v6f32, Promote); 256fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v6f32, MVT::v6i32); 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v7f32, Promote); 259fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v7f32, MVT::v7i32); 260fe6060f1SDimitry Andric 2610b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v8f32, Promote); 2620b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f32, MVT::v8i32); 2630b57cec5SDimitry Andric 264bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v9f32, Promote); 265bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v9f32, MVT::v9i32); 266bdd1243dSDimitry Andric 267bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v10f32, Promote); 268bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v10f32, MVT::v10i32); 269bdd1243dSDimitry Andric 270bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v11f32, Promote); 271bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v11f32, MVT::v11i32); 272bdd1243dSDimitry Andric 273bdd1243dSDimitry Andric setOperationAction(ISD::STORE, MVT::v12f32, Promote); 274bdd1243dSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v12f32, MVT::v12i32); 275bdd1243dSDimitry Andric 2760b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v16f32, Promote); 2770b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f32, MVT::v16i32); 2780b57cec5SDimitry Andric 2790b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v32f32, Promote); 2800b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v32f32, MVT::v32i32); 2810b57cec5SDimitry Andric 2820b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::i64, Promote); 2830b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i64, MVT::v2i32); 2840b57cec5SDimitry Andric 2850b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2i64, Promote); 2860b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2i64, MVT::v4i32); 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::f64, Promote); 2890b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::f64, MVT::v2i32); 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric setOperationAction(ISD::STORE, MVT::v2f64, Promote); 2920b57cec5SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v2f64, MVT::v4i32); 2930b57cec5SDimitry Andric 294fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3i64, Promote); 295fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3i64, MVT::v6i32); 296fe6060f1SDimitry Andric 297fe6060f1SDimitry Andric setOperationAction(ISD::STORE, MVT::v3f64, Promote); 298fe6060f1SDimitry Andric AddPromotedToType(ISD::STORE, MVT::v3f64, MVT::v6i32); 299fe6060f1SDimitry Andric 3005ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4i64, Promote); 3015ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4i64, MVT::v8i32); 3025ffd83dbSDimitry Andric 3035ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v4f64, Promote); 3045ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v4f64, MVT::v8i32); 3055ffd83dbSDimitry Andric 3065ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8i64, Promote); 3075ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8i64, MVT::v16i32); 3085ffd83dbSDimitry Andric 3095ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v8f64, Promote); 3105ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v8f64, MVT::v16i32); 3115ffd83dbSDimitry Andric 3125ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16i64, Promote); 3135ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16i64, MVT::v32i32); 3145ffd83dbSDimitry Andric 3155ffd83dbSDimitry Andric setOperationAction(ISD::STORE, MVT::v16f64, Promote); 3165ffd83dbSDimitry Andric AddPromotedToType(ISD::STORE, MVT::v16f64, MVT::v32i32); 3175ffd83dbSDimitry Andric 31806c3fb27SDimitry Andric setOperationAction(ISD::STORE, MVT::i128, Promote); 31906c3fb27SDimitry Andric AddPromotedToType(ISD::STORE, MVT::i128, MVT::v4i32); 32006c3fb27SDimitry Andric 3210b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i1, Expand); 3220b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i8, Expand); 3230b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i16, Expand); 3240b57cec5SDimitry Andric setTruncStoreAction(MVT::i64, MVT::i32, Expand); 3250b57cec5SDimitry Andric 3260b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i1, Expand); 3270b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i8, Expand); 3280b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i16, Expand); 3290b57cec5SDimitry Andric setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand); 3300b57cec5SDimitry Andric 331bdd1243dSDimitry Andric setTruncStoreAction(MVT::f32, MVT::bf16, Expand); 3320b57cec5SDimitry Andric setTruncStoreAction(MVT::f32, MVT::f16, Expand); 3330fca6ea1SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2bf16, Expand); 3340b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f32, MVT::v2f16, Expand); 3350fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3f32, MVT::v3bf16, Expand); 3368bcb0991SDimitry Andric setTruncStoreAction(MVT::v3f32, MVT::v3f16, Expand); 3370fca6ea1SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4bf16, Expand); 3380b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f32, MVT::v4f16, Expand); 3390fca6ea1SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8bf16, Expand); 3400b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f32, MVT::v8f16, Expand); 3410fca6ea1SDimitry Andric setTruncStoreAction(MVT::v16f32, MVT::v16bf16, Expand); 3428bcb0991SDimitry Andric setTruncStoreAction(MVT::v16f32, MVT::v16f16, Expand); 3430fca6ea1SDimitry Andric setTruncStoreAction(MVT::v32f32, MVT::v32bf16, Expand); 3448bcb0991SDimitry Andric setTruncStoreAction(MVT::v32f32, MVT::v32f16, Expand); 3450b57cec5SDimitry Andric 346bdd1243dSDimitry Andric setTruncStoreAction(MVT::f64, MVT::bf16, Expand); 3470b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f16, Expand); 3480b57cec5SDimitry Andric setTruncStoreAction(MVT::f64, MVT::f32, Expand); 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f32, Expand); 3510fca6ea1SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2bf16, Expand); 3520b57cec5SDimitry Andric setTruncStoreAction(MVT::v2f64, MVT::v2f16, Expand); 3530b57cec5SDimitry Andric 3540fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i32, MVT::v3i8, Expand); 3550fca6ea1SDimitry Andric 356fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i32, Expand); 357fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i16, Expand); 3580fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i8, Expand); 3590fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3i64, MVT::v3i1, Expand); 360fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f32, Expand); 3610fca6ea1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3bf16, Expand); 362fe6060f1SDimitry Andric setTruncStoreAction(MVT::v3f64, MVT::v3f16, Expand); 363fe6060f1SDimitry Andric 3645ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i32, Expand); 3655ffd83dbSDimitry Andric setTruncStoreAction(MVT::v4i64, MVT::v4i16, Expand); 3660b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f32, Expand); 3670fca6ea1SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4bf16, Expand); 3680b57cec5SDimitry Andric setTruncStoreAction(MVT::v4f64, MVT::v4f16, Expand); 3690b57cec5SDimitry Andric 3700b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f32, Expand); 3710fca6ea1SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8bf16, Expand); 3720b57cec5SDimitry Andric setTruncStoreAction(MVT::v8f64, MVT::v8f16, Expand); 3730b57cec5SDimitry Andric 3745ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f32, Expand); 3750fca6ea1SDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16bf16, Expand); 3765ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16f64, MVT::v16f16, Expand); 3775ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); 3785ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i16, Expand); 3795ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); 3805ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i8, Expand); 3815ffd83dbSDimitry Andric setTruncStoreAction(MVT::v16i64, MVT::v16i1, Expand); 3820b57cec5SDimitry Andric 38381ad6265SDimitry Andric setOperationAction(ISD::Constant, {MVT::i32, MVT::i64}, Legal); 38481ad6265SDimitry Andric setOperationAction(ISD::ConstantFP, {MVT::f32, MVT::f64}, Legal); 3850b57cec5SDimitry Andric 38681ad6265SDimitry Andric setOperationAction({ISD::BR_JT, ISD::BRIND}, MVT::Other, Expand); 3870b57cec5SDimitry Andric 3885f757f3fSDimitry Andric // For R600, this is totally unsupported, just custom lower to produce an 3895f757f3fSDimitry Andric // error. 3900b57cec5SDimitry Andric setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric // Library functions. These default to Expand, but we have instructions 3930b57cec5SDimitry Andric // for them. 3945f757f3fSDimitry Andric setOperationAction({ISD::FCEIL, ISD::FPOW, ISD::FABS, ISD::FFLOOR, 3955f757f3fSDimitry Andric ISD::FROUNDEVEN, ISD::FTRUNC, ISD::FMINNUM, ISD::FMAXNUM}, 39681ad6265SDimitry Andric MVT::f32, Legal); 3970b57cec5SDimitry Andric 39806c3fb27SDimitry Andric setOperationAction(ISD::FLOG2, MVT::f32, Custom); 39981ad6265SDimitry Andric setOperationAction(ISD::FROUND, {MVT::f32, MVT::f64}, Custom); 4000b57cec5SDimitry Andric 4015f757f3fSDimitry Andric setOperationAction( 4025f757f3fSDimitry Andric {ISD::FLOG, ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10}, MVT::f32, 40306c3fb27SDimitry Andric Custom); 4040b57cec5SDimitry Andric 405bdd1243dSDimitry Andric setOperationAction(ISD::FNEARBYINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); 406bdd1243dSDimitry Andric 4075f757f3fSDimitry Andric setOperationAction(ISD::FRINT, {MVT::f16, MVT::f32, MVT::f64}, Custom); 4080b57cec5SDimitry Andric 40981ad6265SDimitry Andric setOperationAction(ISD::FREM, {MVT::f16, MVT::f32, MVT::f64}, Custom); 4100b57cec5SDimitry Andric 411bdd1243dSDimitry Andric if (Subtarget->has16BitInsts()) 412bdd1243dSDimitry Andric setOperationAction(ISD::IS_FPCLASS, {MVT::f16, MVT::f32, MVT::f64}, Legal); 41306c3fb27SDimitry Andric else { 414bdd1243dSDimitry Andric setOperationAction(ISD::IS_FPCLASS, {MVT::f32, MVT::f64}, Legal); 41506c3fb27SDimitry Andric setOperationAction({ISD::FLOG2, ISD::FEXP2}, MVT::f16, Custom); 41606c3fb27SDimitry Andric } 41706c3fb27SDimitry Andric 4185f757f3fSDimitry Andric setOperationAction({ISD::FLOG10, ISD::FLOG, ISD::FEXP, ISD::FEXP10}, MVT::f16, 4195f757f3fSDimitry Andric Custom); 420bdd1243dSDimitry Andric 421bdd1243dSDimitry Andric // FIXME: These IS_FPCLASS vector fp types are marked custom so it reaches 422bdd1243dSDimitry Andric // scalarization code. Can be removed when IS_FPCLASS expand isn't called by 423bdd1243dSDimitry Andric // default unless marked custom/legal. 424bdd1243dSDimitry Andric setOperationAction( 425bdd1243dSDimitry Andric ISD::IS_FPCLASS, 426bdd1243dSDimitry Andric {MVT::v2f16, MVT::v3f16, MVT::v4f16, MVT::v16f16, MVT::v2f32, MVT::v3f32, 427bdd1243dSDimitry Andric MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32, 428bdd1243dSDimitry Andric MVT::v2f64, MVT::v3f64, MVT::v4f64, MVT::v8f64, MVT::v16f64}, 429bdd1243dSDimitry Andric Custom); 430bdd1243dSDimitry Andric 4310b57cec5SDimitry Andric // Expand to fneg + fadd. 4320b57cec5SDimitry Andric setOperationAction(ISD::FSUB, MVT::f64, Expand); 4330b57cec5SDimitry Andric 43481ad6265SDimitry Andric setOperationAction(ISD::CONCAT_VECTORS, 43581ad6265SDimitry Andric {MVT::v3i32, MVT::v3f32, MVT::v4i32, MVT::v4f32, 43681ad6265SDimitry Andric MVT::v5i32, MVT::v5f32, MVT::v6i32, MVT::v6f32, 437bdd1243dSDimitry Andric MVT::v7i32, MVT::v7f32, MVT::v8i32, MVT::v8f32, 438bdd1243dSDimitry Andric MVT::v9i32, MVT::v9f32, MVT::v10i32, MVT::v10f32, 439bdd1243dSDimitry Andric MVT::v11i32, MVT::v11f32, MVT::v12i32, MVT::v12f32}, 44081ad6265SDimitry Andric Custom); 4411db9f3b2SDimitry Andric 4421db9f3b2SDimitry Andric // FIXME: Why is v8f16/v8bf16 missing? 44381ad6265SDimitry Andric setOperationAction( 44481ad6265SDimitry Andric ISD::EXTRACT_SUBVECTOR, 4451db9f3b2SDimitry Andric {MVT::v2f16, MVT::v2bf16, MVT::v2i16, MVT::v4f16, MVT::v4bf16, 4461db9f3b2SDimitry Andric MVT::v4i16, MVT::v2f32, MVT::v2i32, MVT::v3f32, MVT::v3i32, 4471db9f3b2SDimitry Andric MVT::v4f32, MVT::v4i32, MVT::v5f32, MVT::v5i32, MVT::v6f32, 4481db9f3b2SDimitry Andric MVT::v6i32, MVT::v7f32, MVT::v7i32, MVT::v8f32, MVT::v8i32, 4491db9f3b2SDimitry Andric MVT::v9f32, MVT::v9i32, MVT::v10i32, MVT::v10f32, MVT::v11i32, 4501db9f3b2SDimitry Andric MVT::v11f32, MVT::v12i32, MVT::v12f32, MVT::v16f16, MVT::v16bf16, 4511db9f3b2SDimitry Andric MVT::v16i16, MVT::v16f32, MVT::v16i32, MVT::v32f32, MVT::v32i32, 4521db9f3b2SDimitry Andric MVT::v2f64, MVT::v2i64, MVT::v3f64, MVT::v3i64, MVT::v4f64, 4531db9f3b2SDimitry Andric MVT::v4i64, MVT::v8f64, MVT::v8i64, MVT::v16f64, MVT::v16i64, 4541db9f3b2SDimitry Andric MVT::v32i16, MVT::v32f16, MVT::v32bf16}, 45581ad6265SDimitry Andric Custom); 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric setOperationAction(ISD::FP16_TO_FP, MVT::f64, Expand); 45881ad6265SDimitry Andric setOperationAction(ISD::FP_TO_FP16, {MVT::f64, MVT::f32}, Custom); 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 }; 4610b57cec5SDimitry Andric for (MVT VT : ScalarIntVTs) { 4620b57cec5SDimitry Andric // These should use [SU]DIVREM, so set them to expand 46381ad6265SDimitry Andric setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM}, VT, 46481ad6265SDimitry Andric Expand); 4650b57cec5SDimitry Andric 4660b57cec5SDimitry Andric // GPU does not have divrem function for signed or unsigned. 46781ad6265SDimitry Andric setOperationAction({ISD::SDIVREM, ISD::UDIVREM}, VT, Custom); 4680b57cec5SDimitry Andric 4690b57cec5SDimitry Andric // GPU does not have [S|U]MUL_LOHI functions as a single instruction. 47081ad6265SDimitry Andric setOperationAction({ISD::SMUL_LOHI, ISD::UMUL_LOHI}, VT, Expand); 4710b57cec5SDimitry Andric 47281ad6265SDimitry Andric setOperationAction({ISD::BSWAP, ISD::CTTZ, ISD::CTLZ}, VT, Expand); 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric // AMDGPU uses ADDC/SUBC/ADDE/SUBE 47581ad6265SDimitry Andric setOperationAction({ISD::ADDC, ISD::SUBC, ISD::ADDE, ISD::SUBE}, VT, Legal); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 4785ffd83dbSDimitry Andric // The hardware supports 32-bit FSHR, but not FSHL. 4795ffd83dbSDimitry Andric setOperationAction(ISD::FSHR, MVT::i32, Legal); 4805ffd83dbSDimitry Andric 4810b57cec5SDimitry Andric // The hardware supports 32-bit ROTR, but not ROTL. 48281ad6265SDimitry Andric setOperationAction(ISD::ROTL, {MVT::i32, MVT::i64}, Expand); 4830b57cec5SDimitry Andric setOperationAction(ISD::ROTR, MVT::i64, Expand); 4840b57cec5SDimitry Andric 48581ad6265SDimitry Andric setOperationAction({ISD::MULHU, ISD::MULHS}, MVT::i16, Expand); 486e8d8bef9SDimitry Andric 48781ad6265SDimitry Andric setOperationAction({ISD::MUL, ISD::MULHU, ISD::MULHS}, MVT::i64, Expand); 48881ad6265SDimitry Andric setOperationAction( 48981ad6265SDimitry Andric {ISD::UINT_TO_FP, ISD::SINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT}, 49081ad6265SDimitry Andric MVT::i64, Custom); 4910b57cec5SDimitry Andric setOperationAction(ISD::SELECT_CC, MVT::i64, Expand); 4920b57cec5SDimitry Andric 49381ad6265SDimitry Andric setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, MVT::i32, 49481ad6265SDimitry Andric Legal); 4950b57cec5SDimitry Andric 49681ad6265SDimitry Andric setOperationAction( 49781ad6265SDimitry Andric {ISD::CTTZ, ISD::CTTZ_ZERO_UNDEF, ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, 49881ad6265SDimitry Andric MVT::i64, Custom); 4990b57cec5SDimitry Andric 5007a6dacacSDimitry Andric for (auto VT : {MVT::i8, MVT::i16}) 5017a6dacacSDimitry Andric setOperationAction({ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF}, VT, Custom); 5027a6dacacSDimitry Andric 5030b57cec5SDimitry Andric static const MVT::SimpleValueType VectorIntTypes[] = { 504bdd1243dSDimitry Andric MVT::v2i32, MVT::v3i32, MVT::v4i32, MVT::v5i32, MVT::v6i32, MVT::v7i32, 505bdd1243dSDimitry Andric MVT::v9i32, MVT::v10i32, MVT::v11i32, MVT::v12i32}; 5060b57cec5SDimitry Andric 5070b57cec5SDimitry Andric for (MVT VT : VectorIntTypes) { 5080b57cec5SDimitry Andric // Expand the following operations for the current type by default. 50981ad6265SDimitry Andric setOperationAction({ISD::ADD, ISD::AND, ISD::FP_TO_SINT, 51081ad6265SDimitry Andric ISD::FP_TO_UINT, ISD::MUL, ISD::MULHU, 51181ad6265SDimitry Andric ISD::MULHS, ISD::OR, ISD::SHL, 51281ad6265SDimitry Andric ISD::SRA, ISD::SRL, ISD::ROTL, 51381ad6265SDimitry Andric ISD::ROTR, ISD::SUB, ISD::SINT_TO_FP, 51481ad6265SDimitry Andric ISD::UINT_TO_FP, ISD::SDIV, ISD::UDIV, 51581ad6265SDimitry Andric ISD::SREM, ISD::UREM, ISD::SMUL_LOHI, 51681ad6265SDimitry Andric ISD::UMUL_LOHI, ISD::SDIVREM, ISD::UDIVREM, 51781ad6265SDimitry Andric ISD::SELECT, ISD::VSELECT, ISD::SELECT_CC, 51881ad6265SDimitry Andric ISD::XOR, ISD::BSWAP, ISD::CTPOP, 51981ad6265SDimitry Andric ISD::CTTZ, ISD::CTLZ, ISD::VECTOR_SHUFFLE, 52081ad6265SDimitry Andric ISD::SETCC}, 52181ad6265SDimitry Andric VT, Expand); 5220b57cec5SDimitry Andric } 5230b57cec5SDimitry Andric 5240b57cec5SDimitry Andric static const MVT::SimpleValueType FloatVectorTypes[] = { 525bdd1243dSDimitry Andric MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32, MVT::v6f32, MVT::v7f32, 526bdd1243dSDimitry Andric MVT::v9f32, MVT::v10f32, MVT::v11f32, MVT::v12f32}; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric for (MVT VT : FloatVectorTypes) { 52981ad6265SDimitry Andric setOperationAction( 5305f757f3fSDimitry Andric {ISD::FABS, ISD::FMINNUM, ISD::FMAXNUM, 5315f757f3fSDimitry Andric ISD::FADD, ISD::FCEIL, ISD::FCOS, 5325f757f3fSDimitry Andric ISD::FDIV, ISD::FEXP2, ISD::FEXP, 5335f757f3fSDimitry Andric ISD::FEXP10, ISD::FLOG2, ISD::FREM, 5345f757f3fSDimitry Andric ISD::FLOG, ISD::FLOG10, ISD::FPOW, 5355f757f3fSDimitry Andric ISD::FFLOOR, ISD::FTRUNC, ISD::FMUL, 5365f757f3fSDimitry Andric ISD::FMA, ISD::FRINT, ISD::FNEARBYINT, 5375f757f3fSDimitry Andric ISD::FSQRT, ISD::FSIN, ISD::FSUB, 5385f757f3fSDimitry Andric ISD::FNEG, ISD::VSELECT, ISD::SELECT_CC, 5395f757f3fSDimitry Andric ISD::FCOPYSIGN, ISD::VECTOR_SHUFFLE, ISD::SETCC, 5405f757f3fSDimitry Andric ISD::FCANONICALIZE, ISD::FROUNDEVEN}, 54181ad6265SDimitry Andric VT, Expand); 5420b57cec5SDimitry Andric } 5430b57cec5SDimitry Andric 5440b57cec5SDimitry Andric // This causes using an unrolled select operation rather than expansion with 5450b57cec5SDimitry Andric // bit operations. This is in general better, but the alternative using BFI 5460b57cec5SDimitry Andric // instructions may be better if the select sources are SGPRs. 5470b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v2f32, Promote); 5480b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v2f32, MVT::v2i32); 5490b57cec5SDimitry Andric 5500b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v3f32, Promote); 5510b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v3f32, MVT::v3i32); 5520b57cec5SDimitry Andric 5530b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v4f32, Promote); 5540b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v4f32, MVT::v4i32); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric setOperationAction(ISD::SELECT, MVT::v5f32, Promote); 5570b57cec5SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v5f32, MVT::v5i32); 5580b57cec5SDimitry Andric 559fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v6f32, Promote); 560fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v6f32, MVT::v6i32); 561fe6060f1SDimitry Andric 562fe6060f1SDimitry Andric setOperationAction(ISD::SELECT, MVT::v7f32, Promote); 563fe6060f1SDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v7f32, MVT::v7i32); 564fe6060f1SDimitry Andric 565bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v9f32, Promote); 566bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v9f32, MVT::v9i32); 567bdd1243dSDimitry Andric 568bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v10f32, Promote); 569bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v10f32, MVT::v10i32); 570bdd1243dSDimitry Andric 571bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v11f32, Promote); 572bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v11f32, MVT::v11i32); 573bdd1243dSDimitry Andric 574bdd1243dSDimitry Andric setOperationAction(ISD::SELECT, MVT::v12f32, Promote); 575bdd1243dSDimitry Andric AddPromotedToType(ISD::SELECT, MVT::v12f32, MVT::v12i32); 576bdd1243dSDimitry Andric 5770b57cec5SDimitry Andric setSchedulingPreference(Sched::RegPressure); 5780b57cec5SDimitry Andric setJumpIsExpensive(true); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric // FIXME: This is only partially true. If we have to do vector compares, any 5810b57cec5SDimitry Andric // SGPR pair can be a condition register. If we have a uniform condition, we 5820b57cec5SDimitry Andric // are better off doing SALU operations, where there is only one SCC. For now, 5830b57cec5SDimitry Andric // we don't have a way of knowing during instruction selection if a condition 5840b57cec5SDimitry Andric // will be uniform and we always use vector compares. Assume we are using 5850b57cec5SDimitry Andric // vector compares until that is fixed. 5860b57cec5SDimitry Andric setHasMultipleConditionRegisters(true); 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric setMinCmpXchgSizeInBits(32); 5890b57cec5SDimitry Andric setSupportsUnalignedAtomics(false); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric PredictableSelectIsExpensive = false; 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric // We want to find all load dependencies for long chains of stores to enable 5940b57cec5SDimitry Andric // merging into very wide vectors. The problem is with vectors with > 4 5950b57cec5SDimitry Andric // elements. MergeConsecutiveStores will attempt to merge these because x8/x16 5960b57cec5SDimitry Andric // vectors are a legal type, even though we have to split the loads 5970b57cec5SDimitry Andric // usually. When we can more precisely specify load legality per address 5980b57cec5SDimitry Andric // space, we should be able to make FindBetterChain/MergeConsecutiveStores 5990b57cec5SDimitry Andric // smarter so that they can figure out what to do in 2 iterations without all 6000b57cec5SDimitry Andric // N > 4 stores on the same chain. 6010b57cec5SDimitry Andric GatherAllAliasesMaxDepth = 16; 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric // memcpy/memmove/memset are expanded in the IR, so we shouldn't need to worry 6040b57cec5SDimitry Andric // about these during lowering. 6050b57cec5SDimitry Andric MaxStoresPerMemcpy = 0xffffffff; 6060b57cec5SDimitry Andric MaxStoresPerMemmove = 0xffffffff; 6070b57cec5SDimitry Andric MaxStoresPerMemset = 0xffffffff; 6080b57cec5SDimitry Andric 6095ffd83dbSDimitry Andric // The expansion for 64-bit division is enormous. 6105ffd83dbSDimitry Andric if (AMDGPUBypassSlowDiv) 6115ffd83dbSDimitry Andric addBypassSlowDiv(64, 32); 6125ffd83dbSDimitry Andric 61381ad6265SDimitry Andric setTargetDAGCombine({ISD::BITCAST, ISD::SHL, 61481ad6265SDimitry Andric ISD::SRA, ISD::SRL, 61581ad6265SDimitry Andric ISD::TRUNCATE, ISD::MUL, 61681ad6265SDimitry Andric ISD::SMUL_LOHI, ISD::UMUL_LOHI, 61781ad6265SDimitry Andric ISD::MULHU, ISD::MULHS, 61881ad6265SDimitry Andric ISD::SELECT, ISD::SELECT_CC, 61981ad6265SDimitry Andric ISD::STORE, ISD::FADD, 62081ad6265SDimitry Andric ISD::FSUB, ISD::FNEG, 62181ad6265SDimitry Andric ISD::FABS, ISD::AssertZext, 62281ad6265SDimitry Andric ISD::AssertSext, ISD::INTRINSIC_WO_CHAIN}); 623cb14a3feSDimitry Andric 624cb14a3feSDimitry Andric setMaxAtomicSizeInBitsSupported(64); 625b3edf446SDimitry Andric setMaxDivRemBitWidthSupported(64); 6260fca6ea1SDimitry Andric setMaxLargeFPConvertBitWidthSupported(64); 6270b57cec5SDimitry Andric } 6280b57cec5SDimitry Andric 629e8d8bef9SDimitry Andric bool AMDGPUTargetLowering::mayIgnoreSignedZero(SDValue Op) const { 630e8d8bef9SDimitry Andric if (getTargetMachine().Options.NoSignedZerosFPMath) 631e8d8bef9SDimitry Andric return true; 632e8d8bef9SDimitry Andric 633e8d8bef9SDimitry Andric const auto Flags = Op.getNode()->getFlags(); 634e8d8bef9SDimitry Andric if (Flags.hasNoSignedZeros()) 635e8d8bef9SDimitry Andric return true; 636e8d8bef9SDimitry Andric 637e8d8bef9SDimitry Andric return false; 638e8d8bef9SDimitry Andric } 639e8d8bef9SDimitry Andric 6400b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6410b57cec5SDimitry Andric // Target Information 6420b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric LLVM_READNONE 64506c3fb27SDimitry Andric static bool fnegFoldsIntoOpcode(unsigned Opc) { 6460b57cec5SDimitry Andric switch (Opc) { 6470b57cec5SDimitry Andric case ISD::FADD: 6480b57cec5SDimitry Andric case ISD::FSUB: 6490b57cec5SDimitry Andric case ISD::FMUL: 6500b57cec5SDimitry Andric case ISD::FMA: 6510b57cec5SDimitry Andric case ISD::FMAD: 6520b57cec5SDimitry Andric case ISD::FMINNUM: 6530b57cec5SDimitry Andric case ISD::FMAXNUM: 6540b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 6550b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 6565f757f3fSDimitry Andric case ISD::FMINIMUM: 6575f757f3fSDimitry Andric case ISD::FMAXIMUM: 65806c3fb27SDimitry Andric case ISD::SELECT: 6590b57cec5SDimitry Andric case ISD::FSIN: 6600b57cec5SDimitry Andric case ISD::FTRUNC: 6610b57cec5SDimitry Andric case ISD::FRINT: 6620b57cec5SDimitry Andric case ISD::FNEARBYINT: 6635f757f3fSDimitry Andric case ISD::FROUNDEVEN: 6640b57cec5SDimitry Andric case ISD::FCANONICALIZE: 6650b57cec5SDimitry Andric case AMDGPUISD::RCP: 6660b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 6670b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 6680b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 6690b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 6700b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 6710b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 6720b57cec5SDimitry Andric case AMDGPUISD::FMED3: 673e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy 6740b57cec5SDimitry Andric return true; 67506c3fb27SDimitry Andric case ISD::BITCAST: 67606c3fb27SDimitry Andric llvm_unreachable("bitcast is special cased"); 6770b57cec5SDimitry Andric default: 6780b57cec5SDimitry Andric return false; 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric } 6810b57cec5SDimitry Andric 68206c3fb27SDimitry Andric static bool fnegFoldsIntoOp(const SDNode *N) { 68306c3fb27SDimitry Andric unsigned Opc = N->getOpcode(); 68406c3fb27SDimitry Andric if (Opc == ISD::BITCAST) { 68506c3fb27SDimitry Andric // TODO: Is there a benefit to checking the conditions performFNegCombine 68606c3fb27SDimitry Andric // does? We don't for the other cases. 68706c3fb27SDimitry Andric SDValue BCSrc = N->getOperand(0); 68806c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) { 68906c3fb27SDimitry Andric return BCSrc.getNumOperands() == 2 && 69006c3fb27SDimitry Andric BCSrc.getOperand(1).getValueSizeInBits() == 32; 69106c3fb27SDimitry Andric } 69206c3fb27SDimitry Andric 69306c3fb27SDimitry Andric return BCSrc.getOpcode() == ISD::SELECT && BCSrc.getValueType() == MVT::f32; 69406c3fb27SDimitry Andric } 69506c3fb27SDimitry Andric 69606c3fb27SDimitry Andric return fnegFoldsIntoOpcode(Opc); 69706c3fb27SDimitry Andric } 69806c3fb27SDimitry Andric 6990b57cec5SDimitry Andric /// \p returns true if the operation will definitely need to use a 64-bit 7000b57cec5SDimitry Andric /// encoding, and thus will use a VOP3 encoding regardless of the source 7010b57cec5SDimitry Andric /// modifiers. 7020b57cec5SDimitry Andric LLVM_READONLY 7030b57cec5SDimitry Andric static bool opMustUseVOP3Encoding(const SDNode *N, MVT VT) { 70406c3fb27SDimitry Andric return (N->getNumOperands() > 2 && N->getOpcode() != ISD::SELECT) || 70506c3fb27SDimitry Andric VT == MVT::f64; 70606c3fb27SDimitry Andric } 70706c3fb27SDimitry Andric 70806c3fb27SDimitry Andric /// Return true if v_cndmask_b32 will support fabs/fneg source modifiers for the 70906c3fb27SDimitry Andric /// type for ISD::SELECT. 71006c3fb27SDimitry Andric LLVM_READONLY 71106c3fb27SDimitry Andric static bool selectSupportsSourceMods(const SDNode *N) { 71206c3fb27SDimitry Andric // TODO: Only applies if select will be vector 71306c3fb27SDimitry Andric return N->getValueType(0) == MVT::f32; 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric // Most FP instructions support source modifiers, but this could be refined 7170b57cec5SDimitry Andric // slightly. 7180b57cec5SDimitry Andric LLVM_READONLY 7190b57cec5SDimitry Andric static bool hasSourceMods(const SDNode *N) { 7200b57cec5SDimitry Andric if (isa<MemSDNode>(N)) 7210b57cec5SDimitry Andric return false; 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric switch (N->getOpcode()) { 7240b57cec5SDimitry Andric case ISD::CopyToReg: 7250b57cec5SDimitry Andric case ISD::FDIV: 7260b57cec5SDimitry Andric case ISD::FREM: 7270b57cec5SDimitry Andric case ISD::INLINEASM: 7280b57cec5SDimitry Andric case ISD::INLINEASM_BR: 7290b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 7308bcb0991SDimitry Andric case ISD::INTRINSIC_W_CHAIN: 7310b57cec5SDimitry Andric 7320b57cec5SDimitry Andric // TODO: Should really be looking at the users of the bitcast. These are 7330b57cec5SDimitry Andric // problematic because bitcasts are used to legalize all stores to integer 7340b57cec5SDimitry Andric // types. 7350b57cec5SDimitry Andric case ISD::BITCAST: 7360b57cec5SDimitry Andric return false; 7378bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 738647cbc5dSDimitry Andric switch (N->getConstantOperandVal(0)) { 7398bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1: 7408bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2: 7418bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_mov: 7428bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p1_f16: 7438bcb0991SDimitry Andric case Intrinsic::amdgcn_interp_p2_f16: 7448bcb0991SDimitry Andric return false; 7458bcb0991SDimitry Andric default: 7468bcb0991SDimitry Andric return true; 7478bcb0991SDimitry Andric } 7488bcb0991SDimitry Andric } 74906c3fb27SDimitry Andric case ISD::SELECT: 75006c3fb27SDimitry Andric return selectSupportsSourceMods(N); 7510b57cec5SDimitry Andric default: 7520b57cec5SDimitry Andric return true; 7530b57cec5SDimitry Andric } 7540b57cec5SDimitry Andric } 7550b57cec5SDimitry Andric 7560b57cec5SDimitry Andric bool AMDGPUTargetLowering::allUsesHaveSourceMods(const SDNode *N, 7570b57cec5SDimitry Andric unsigned CostThreshold) { 7580b57cec5SDimitry Andric // Some users (such as 3-operand FMA/MAD) must use a VOP3 encoding, and thus 7590b57cec5SDimitry Andric // it is truly free to use a source modifier in all cases. If there are 7600b57cec5SDimitry Andric // multiple users but for each one will necessitate using VOP3, there will be 7610b57cec5SDimitry Andric // a code size increase. Try to avoid increasing code size unless we know it 7620b57cec5SDimitry Andric // will save on the instruction count. 7630b57cec5SDimitry Andric unsigned NumMayIncreaseSize = 0; 7640b57cec5SDimitry Andric MVT VT = N->getValueType(0).getScalarType().getSimpleVT(); 7650b57cec5SDimitry Andric 76606c3fb27SDimitry Andric assert(!N->use_empty()); 76706c3fb27SDimitry Andric 7680b57cec5SDimitry Andric // XXX - Should this limit number of uses to check? 7690b57cec5SDimitry Andric for (const SDNode *U : N->uses()) { 7700b57cec5SDimitry Andric if (!hasSourceMods(U)) 7710b57cec5SDimitry Andric return false; 7720b57cec5SDimitry Andric 7730b57cec5SDimitry Andric if (!opMustUseVOP3Encoding(U, VT)) { 7740b57cec5SDimitry Andric if (++NumMayIncreaseSize > CostThreshold) 7750b57cec5SDimitry Andric return false; 7760b57cec5SDimitry Andric } 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric 7790b57cec5SDimitry Andric return true; 7800b57cec5SDimitry Andric } 7810b57cec5SDimitry Andric 7825ffd83dbSDimitry Andric EVT AMDGPUTargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, 7835ffd83dbSDimitry Andric ISD::NodeType ExtendKind) const { 7845ffd83dbSDimitry Andric assert(!VT.isVector() && "only scalar expected"); 7855ffd83dbSDimitry Andric 7865ffd83dbSDimitry Andric // Round to the next multiple of 32-bits. 7875ffd83dbSDimitry Andric unsigned Size = VT.getSizeInBits(); 7885ffd83dbSDimitry Andric if (Size <= 32) 7895ffd83dbSDimitry Andric return MVT::i32; 7905ffd83dbSDimitry Andric return EVT::getIntegerVT(Context, 32 * ((Size + 31) / 32)); 7915ffd83dbSDimitry Andric } 7925ffd83dbSDimitry Andric 7930b57cec5SDimitry Andric MVT AMDGPUTargetLowering::getVectorIdxTy(const DataLayout &) const { 7940b57cec5SDimitry Andric return MVT::i32; 7950b57cec5SDimitry Andric } 7960b57cec5SDimitry Andric 7970b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSelectSupported(SelectSupportKind SelType) const { 7980b57cec5SDimitry Andric return true; 7990b57cec5SDimitry Andric } 8000b57cec5SDimitry Andric 8010b57cec5SDimitry Andric // The backend supports 32 and 64 bit floating point immediates. 8020b57cec5SDimitry Andric // FIXME: Why are we reporting vectors of FP immediates as legal? 8030b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, 8040b57cec5SDimitry Andric bool ForCodeSize) const { 8050b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 8060b57cec5SDimitry Andric return (ScalarVT == MVT::f32 || ScalarVT == MVT::f64 || 8070b57cec5SDimitry Andric (ScalarVT == MVT::f16 && Subtarget->has16BitInsts())); 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric // We don't want to shrink f64 / f32 constants. 8110b57cec5SDimitry Andric bool AMDGPUTargetLowering::ShouldShrinkFPConstant(EVT VT) const { 8120b57cec5SDimitry Andric EVT ScalarVT = VT.getScalarType(); 8130b57cec5SDimitry Andric return (ScalarVT != MVT::f32 && ScalarVT != MVT::f64); 8140b57cec5SDimitry Andric } 8150b57cec5SDimitry Andric 8160b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, 8170b57cec5SDimitry Andric ISD::LoadExtType ExtTy, 8180b57cec5SDimitry Andric EVT NewVT) const { 8190b57cec5SDimitry Andric // TODO: This may be worth removing. Check regression tests for diffs. 8200b57cec5SDimitry Andric if (!TargetLoweringBase::shouldReduceLoadWidth(N, ExtTy, NewVT)) 8210b57cec5SDimitry Andric return false; 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric unsigned NewSize = NewVT.getStoreSizeInBits(); 8240b57cec5SDimitry Andric 8255ffd83dbSDimitry Andric // If we are reducing to a 32-bit load or a smaller multi-dword load, 8265ffd83dbSDimitry Andric // this is always better. 8275ffd83dbSDimitry Andric if (NewSize >= 32) 8280b57cec5SDimitry Andric return true; 8290b57cec5SDimitry Andric 8300b57cec5SDimitry Andric EVT OldVT = N->getValueType(0); 8310b57cec5SDimitry Andric unsigned OldSize = OldVT.getStoreSizeInBits(); 8320b57cec5SDimitry Andric 8330b57cec5SDimitry Andric MemSDNode *MN = cast<MemSDNode>(N); 8340b57cec5SDimitry Andric unsigned AS = MN->getAddressSpace(); 8350b57cec5SDimitry Andric // Do not shrink an aligned scalar load to sub-dword. 8360b57cec5SDimitry Andric // Scalar engine cannot do sub-dword loads. 8377a6dacacSDimitry Andric // TODO: Update this for GFX12 which does have scalar sub-dword loads. 83881ad6265SDimitry Andric if (OldSize >= 32 && NewSize < 32 && MN->getAlign() >= Align(4) && 8390b57cec5SDimitry Andric (AS == AMDGPUAS::CONSTANT_ADDRESS || 8400b57cec5SDimitry Andric AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT || 84181ad6265SDimitry Andric (isa<LoadSDNode>(N) && AS == AMDGPUAS::GLOBAL_ADDRESS && 84281ad6265SDimitry Andric MN->isInvariant())) && 8430b57cec5SDimitry Andric AMDGPUInstrInfo::isUniformMMO(MN->getMemOperand())) 8440b57cec5SDimitry Andric return false; 8450b57cec5SDimitry Andric 8460b57cec5SDimitry Andric // Don't produce extloads from sub 32-bit types. SI doesn't have scalar 8470b57cec5SDimitry Andric // extloads, so doing one requires using a buffer_load. In cases where we 8480b57cec5SDimitry Andric // still couldn't use a scalar load, using the wider load shouldn't really 8490b57cec5SDimitry Andric // hurt anything. 8500b57cec5SDimitry Andric 8510b57cec5SDimitry Andric // If the old size already had to be an extload, there's no harm in continuing 8520b57cec5SDimitry Andric // to reduce the width. 8530b57cec5SDimitry Andric return (OldSize < 32); 8540b57cec5SDimitry Andric } 8550b57cec5SDimitry Andric 8560b57cec5SDimitry Andric bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, 8570b57cec5SDimitry Andric const SelectionDAG &DAG, 8580b57cec5SDimitry Andric const MachineMemOperand &MMO) const { 8590b57cec5SDimitry Andric 8600b57cec5SDimitry Andric assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); 8610b57cec5SDimitry Andric 8620b57cec5SDimitry Andric if (LoadTy.getScalarType() == MVT::i32) 8630b57cec5SDimitry Andric return false; 8640b57cec5SDimitry Andric 8650b57cec5SDimitry Andric unsigned LScalarSize = LoadTy.getScalarSizeInBits(); 8660b57cec5SDimitry Andric unsigned CastScalarSize = CastTy.getScalarSizeInBits(); 8670b57cec5SDimitry Andric 8680b57cec5SDimitry Andric if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) 8690b57cec5SDimitry Andric return false; 8700b57cec5SDimitry Andric 871bdd1243dSDimitry Andric unsigned Fast = 0; 8728bcb0991SDimitry Andric return allowsMemoryAccessForAlignment(*DAG.getContext(), DAG.getDataLayout(), 8738bcb0991SDimitry Andric CastTy, MMO, &Fast) && 8748bcb0991SDimitry Andric Fast; 8750b57cec5SDimitry Andric } 8760b57cec5SDimitry Andric 8770b57cec5SDimitry Andric // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also 8780b57cec5SDimitry Andric // profitable with the expansion for 64-bit since it's generally good to 8790b57cec5SDimitry Andric // speculate things. 880bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCttz(Type *Ty) const { 8810b57cec5SDimitry Andric return true; 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric 884bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const { 8850b57cec5SDimitry Andric return true; 8860b57cec5SDimitry Andric } 8870b57cec5SDimitry Andric 8880b57cec5SDimitry Andric bool AMDGPUTargetLowering::isSDNodeAlwaysUniform(const SDNode *N) const { 8890b57cec5SDimitry Andric switch (N->getOpcode()) { 8900b57cec5SDimitry Andric case ISD::EntryToken: 8910b57cec5SDimitry Andric case ISD::TokenFactor: 8920b57cec5SDimitry Andric return true; 893e8d8bef9SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 894647cbc5dSDimitry Andric unsigned IntrID = N->getConstantOperandVal(0); 8950fca6ea1SDimitry Andric return AMDGPU::isIntrinsicAlwaysUniform(IntrID); 8960b57cec5SDimitry Andric } 8970b57cec5SDimitry Andric case ISD::LOAD: 8988bcb0991SDimitry Andric if (cast<LoadSDNode>(N)->getMemOperand()->getAddrSpace() == 8998bcb0991SDimitry Andric AMDGPUAS::CONSTANT_ADDRESS_32BIT) 9000b57cec5SDimitry Andric return true; 9010b57cec5SDimitry Andric return false; 90281ad6265SDimitry Andric case AMDGPUISD::SETCC: // ballot-style instruction 90381ad6265SDimitry Andric return true; 9040b57cec5SDimitry Andric } 905e8d8bef9SDimitry Andric return false; 9060b57cec5SDimitry Andric } 9070b57cec5SDimitry Andric 9085ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::getNegatedExpression( 9095ffd83dbSDimitry Andric SDValue Op, SelectionDAG &DAG, bool LegalOperations, bool ForCodeSize, 9105ffd83dbSDimitry Andric NegatibleCost &Cost, unsigned Depth) const { 9115ffd83dbSDimitry Andric 9125ffd83dbSDimitry Andric switch (Op.getOpcode()) { 9135ffd83dbSDimitry Andric case ISD::FMA: 9145ffd83dbSDimitry Andric case ISD::FMAD: { 9155ffd83dbSDimitry Andric // Negating a fma is not free if it has users without source mods. 9165ffd83dbSDimitry Andric if (!allUsesHaveSourceMods(Op.getNode())) 9175ffd83dbSDimitry Andric return SDValue(); 9185ffd83dbSDimitry Andric break; 9195ffd83dbSDimitry Andric } 92006c3fb27SDimitry Andric case AMDGPUISD::RCP: { 92106c3fb27SDimitry Andric SDValue Src = Op.getOperand(0); 92206c3fb27SDimitry Andric EVT VT = Op.getValueType(); 92306c3fb27SDimitry Andric SDLoc SL(Op); 92406c3fb27SDimitry Andric 92506c3fb27SDimitry Andric SDValue NegSrc = getNegatedExpression(Src, DAG, LegalOperations, 92606c3fb27SDimitry Andric ForCodeSize, Cost, Depth + 1); 92706c3fb27SDimitry Andric if (NegSrc) 92806c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SL, VT, NegSrc, Op->getFlags()); 92906c3fb27SDimitry Andric return SDValue(); 93006c3fb27SDimitry Andric } 9315ffd83dbSDimitry Andric default: 9325ffd83dbSDimitry Andric break; 9335ffd83dbSDimitry Andric } 9345ffd83dbSDimitry Andric 9355ffd83dbSDimitry Andric return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, 9365ffd83dbSDimitry Andric ForCodeSize, Cost, Depth); 9375ffd83dbSDimitry Andric } 9385ffd83dbSDimitry Andric 9390b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 9400b57cec5SDimitry Andric // Target Properties 9410b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 9420b57cec5SDimitry Andric 9430b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFAbsFree(EVT VT) const { 9440b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 9450b57cec5SDimitry Andric 9460b57cec5SDimitry Andric // Packed operations do not have a fabs modifier. 9470b57cec5SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || 9480fca6ea1SDimitry Andric (Subtarget->has16BitInsts() && (VT == MVT::f16 || VT == MVT::bf16)); 9490b57cec5SDimitry Andric } 9500b57cec5SDimitry Andric 9510b57cec5SDimitry Andric bool AMDGPUTargetLowering::isFNegFree(EVT VT) const { 9520b57cec5SDimitry Andric assert(VT.isFloatingPoint()); 953fe6060f1SDimitry Andric // Report this based on the end legalized type. 954fe6060f1SDimitry Andric VT = VT.getScalarType(); 9550fca6ea1SDimitry Andric return VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f16 || VT == MVT::bf16; 9560b57cec5SDimitry Andric } 9570b57cec5SDimitry Andric 95806c3fb27SDimitry Andric bool AMDGPUTargetLowering:: storeOfVectorConstantIsCheap(bool IsZero, EVT MemVT, 9590b57cec5SDimitry Andric unsigned NumElem, 9600b57cec5SDimitry Andric unsigned AS) const { 9610b57cec5SDimitry Andric return true; 9620b57cec5SDimitry Andric } 9630b57cec5SDimitry Andric 9640b57cec5SDimitry Andric bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const { 9650b57cec5SDimitry Andric // There are few operations which truly have vector input operands. Any vector 9660b57cec5SDimitry Andric // operation is going to involve operations on each component, and a 9670b57cec5SDimitry Andric // build_vector will be a copy per element, so it always makes sense to use a 9680b57cec5SDimitry Andric // build_vector input in place of the extracted element to avoid a copy into a 9690b57cec5SDimitry Andric // super register. 9700b57cec5SDimitry Andric // 9710b57cec5SDimitry Andric // We should probably only do this if all users are extracts only, but this 9720b57cec5SDimitry Andric // should be the common case. 9730b57cec5SDimitry Andric return true; 9740b57cec5SDimitry Andric } 9750b57cec5SDimitry Andric 9760b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { 9770b57cec5SDimitry Andric // Truncate is just accessing a subregister. 9780b57cec5SDimitry Andric 9790b57cec5SDimitry Andric unsigned SrcSize = Source.getSizeInBits(); 9800b57cec5SDimitry Andric unsigned DestSize = Dest.getSizeInBits(); 9810b57cec5SDimitry Andric 9820b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0 ; 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric 9850b57cec5SDimitry Andric bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { 9860b57cec5SDimitry Andric // Truncate is just accessing a subregister. 9870b57cec5SDimitry Andric 9880b57cec5SDimitry Andric unsigned SrcSize = Source->getScalarSizeInBits(); 9890b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 9900b57cec5SDimitry Andric 9910b57cec5SDimitry Andric if (DestSize== 16 && Subtarget->has16BitInsts()) 9920b57cec5SDimitry Andric return SrcSize >= 32; 9930b57cec5SDimitry Andric 9940b57cec5SDimitry Andric return DestSize < SrcSize && DestSize % 32 == 0; 9950b57cec5SDimitry Andric } 9960b57cec5SDimitry Andric 9970b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { 9980b57cec5SDimitry Andric unsigned SrcSize = Src->getScalarSizeInBits(); 9990b57cec5SDimitry Andric unsigned DestSize = Dest->getScalarSizeInBits(); 10000b57cec5SDimitry Andric 10010b57cec5SDimitry Andric if (SrcSize == 16 && Subtarget->has16BitInsts()) 10020b57cec5SDimitry Andric return DestSize >= 32; 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric return SrcSize == 32 && DestSize == 64; 10050b57cec5SDimitry Andric } 10060b57cec5SDimitry Andric 10070b57cec5SDimitry Andric bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { 10080b57cec5SDimitry Andric // Any register load of a 64-bit value really requires 2 32-bit moves. For all 10090b57cec5SDimitry Andric // practical purposes, the extra mov 0 to load a 64-bit is free. As used, 10100b57cec5SDimitry Andric // this will enable reducing 64-bit operations the 32-bit, which is always 10110b57cec5SDimitry Andric // good. 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric if (Src == MVT::i16) 10140b57cec5SDimitry Andric return Dest == MVT::i32 ||Dest == MVT::i64 ; 10150b57cec5SDimitry Andric 10160b57cec5SDimitry Andric return Src == MVT::i32 && Dest == MVT::i64; 10170b57cec5SDimitry Andric } 10180b57cec5SDimitry Andric 10190b57cec5SDimitry Andric bool AMDGPUTargetLowering::isNarrowingProfitable(EVT SrcVT, EVT DestVT) const { 10200b57cec5SDimitry Andric // There aren't really 64-bit registers, but pairs of 32-bit ones and only a 10210b57cec5SDimitry Andric // limited number of native 64-bit operations. Shrinking an operation to fit 10220b57cec5SDimitry Andric // in a single 32-bit register should always be helpful. As currently used, 10230b57cec5SDimitry Andric // this is much less general than the name suggests, and is only used in 10240b57cec5SDimitry Andric // places trying to reduce the sizes of loads. Shrinking loads to < 32-bits is 10250b57cec5SDimitry Andric // not profitable, and may actually be harmful. 10260b57cec5SDimitry Andric return SrcVT.getSizeInBits() > 32 && DestVT.getSizeInBits() == 32; 10270b57cec5SDimitry Andric } 10280b57cec5SDimitry Andric 1029bdd1243dSDimitry Andric bool AMDGPUTargetLowering::isDesirableToCommuteWithShift( 1030bdd1243dSDimitry Andric const SDNode* N, CombineLevel Level) const { 1031bdd1243dSDimitry Andric assert((N->getOpcode() == ISD::SHL || N->getOpcode() == ISD::SRA || 1032bdd1243dSDimitry Andric N->getOpcode() == ISD::SRL) && 1033bdd1243dSDimitry Andric "Expected shift op"); 1034bdd1243dSDimitry Andric // Always commute pre-type legalization and right shifts. 1035bdd1243dSDimitry Andric // We're looking for shl(or(x,y),z) patterns. 1036bdd1243dSDimitry Andric if (Level < CombineLevel::AfterLegalizeTypes || 1037bdd1243dSDimitry Andric N->getOpcode() != ISD::SHL || N->getOperand(0).getOpcode() != ISD::OR) 1038bdd1243dSDimitry Andric return true; 1039bdd1243dSDimitry Andric 1040bdd1243dSDimitry Andric // If only user is a i32 right-shift, then don't destroy a BFE pattern. 1041bdd1243dSDimitry Andric if (N->getValueType(0) == MVT::i32 && N->use_size() == 1 && 1042bdd1243dSDimitry Andric (N->use_begin()->getOpcode() == ISD::SRA || 1043bdd1243dSDimitry Andric N->use_begin()->getOpcode() == ISD::SRL)) 1044bdd1243dSDimitry Andric return false; 1045bdd1243dSDimitry Andric 1046bdd1243dSDimitry Andric // Don't destroy or(shl(load_zext(),c), load_zext()) patterns. 1047bdd1243dSDimitry Andric auto IsShiftAndLoad = [](SDValue LHS, SDValue RHS) { 1048bdd1243dSDimitry Andric if (LHS.getOpcode() != ISD::SHL) 1049bdd1243dSDimitry Andric return false; 1050bdd1243dSDimitry Andric auto *RHSLd = dyn_cast<LoadSDNode>(RHS); 1051bdd1243dSDimitry Andric auto *LHS0 = dyn_cast<LoadSDNode>(LHS.getOperand(0)); 1052bdd1243dSDimitry Andric auto *LHS1 = dyn_cast<ConstantSDNode>(LHS.getOperand(1)); 1053bdd1243dSDimitry Andric return LHS0 && LHS1 && RHSLd && LHS0->getExtensionType() == ISD::ZEXTLOAD && 1054bdd1243dSDimitry Andric LHS1->getAPIntValue() == LHS0->getMemoryVT().getScalarSizeInBits() && 1055bdd1243dSDimitry Andric RHSLd->getExtensionType() == ISD::ZEXTLOAD; 1056bdd1243dSDimitry Andric }; 1057bdd1243dSDimitry Andric SDValue LHS = N->getOperand(0).getOperand(0); 1058bdd1243dSDimitry Andric SDValue RHS = N->getOperand(0).getOperand(1); 1059bdd1243dSDimitry Andric return !(IsShiftAndLoad(LHS, RHS) || IsShiftAndLoad(RHS, LHS)); 1060bdd1243dSDimitry Andric } 1061bdd1243dSDimitry Andric 10620b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 10630b57cec5SDimitry Andric // TargetLowering Callbacks 10640b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 10650b57cec5SDimitry Andric 10660b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForCall(CallingConv::ID CC, 10670b57cec5SDimitry Andric bool IsVarArg) { 10680b57cec5SDimitry Andric switch (CC) { 10690b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 10700b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 10710b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 10720b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 10730b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 10740b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 10750b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 10760b57cec5SDimitry Andric return CC_AMDGPU; 10775f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_Chain: 10785f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve: 10795f757f3fSDimitry Andric return CC_AMDGPU_CS_CHAIN; 10800b57cec5SDimitry Andric case CallingConv::C: 10810b57cec5SDimitry Andric case CallingConv::Fast: 10820b57cec5SDimitry Andric case CallingConv::Cold: 10830b57cec5SDimitry Andric return CC_AMDGPU_Func; 1084e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx: 1085e8d8bef9SDimitry Andric return CC_SI_Gfx; 10860b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 10870b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 10880b57cec5SDimitry Andric default: 10890b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention for call"); 10900b57cec5SDimitry Andric } 10910b57cec5SDimitry Andric } 10920b57cec5SDimitry Andric 10930b57cec5SDimitry Andric CCAssignFn *AMDGPUCallLowering::CCAssignFnForReturn(CallingConv::ID CC, 10940b57cec5SDimitry Andric bool IsVarArg) { 10950b57cec5SDimitry Andric switch (CC) { 10960b57cec5SDimitry Andric case CallingConv::AMDGPU_KERNEL: 10970b57cec5SDimitry Andric case CallingConv::SPIR_KERNEL: 10980b57cec5SDimitry Andric llvm_unreachable("kernels should not be handled here"); 10990b57cec5SDimitry Andric case CallingConv::AMDGPU_VS: 11000b57cec5SDimitry Andric case CallingConv::AMDGPU_GS: 11010b57cec5SDimitry Andric case CallingConv::AMDGPU_PS: 11020b57cec5SDimitry Andric case CallingConv::AMDGPU_CS: 11035f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_Chain: 11045f757f3fSDimitry Andric case CallingConv::AMDGPU_CS_ChainPreserve: 11050b57cec5SDimitry Andric case CallingConv::AMDGPU_HS: 11060b57cec5SDimitry Andric case CallingConv::AMDGPU_ES: 11070b57cec5SDimitry Andric case CallingConv::AMDGPU_LS: 11080b57cec5SDimitry Andric return RetCC_SI_Shader; 1109e8d8bef9SDimitry Andric case CallingConv::AMDGPU_Gfx: 1110e8d8bef9SDimitry Andric return RetCC_SI_Gfx; 11110b57cec5SDimitry Andric case CallingConv::C: 11120b57cec5SDimitry Andric case CallingConv::Fast: 11130b57cec5SDimitry Andric case CallingConv::Cold: 11140b57cec5SDimitry Andric return RetCC_AMDGPU_Func; 11150b57cec5SDimitry Andric default: 11160b57cec5SDimitry Andric report_fatal_error("Unsupported calling convention."); 11170b57cec5SDimitry Andric } 11180b57cec5SDimitry Andric } 11190b57cec5SDimitry Andric 11200b57cec5SDimitry Andric /// The SelectionDAGBuilder will automatically promote function arguments 11210b57cec5SDimitry Andric /// with illegal types. However, this does not work for the AMDGPU targets 11220b57cec5SDimitry Andric /// since the function arguments are stored in memory as these illegal types. 11230b57cec5SDimitry Andric /// In order to handle this properly we need to get the original types sizes 11240b57cec5SDimitry Andric /// from the LLVM IR Function and fixup the ISD:InputArg values before 11250b57cec5SDimitry Andric /// passing them to AnalyzeFormalArguments() 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric /// When the SelectionDAGBuilder computes the Ins, it takes care of splitting 11280b57cec5SDimitry Andric /// input values across multiple registers. Each item in the Ins array 11290b57cec5SDimitry Andric /// represents a single value that will be stored in registers. Ins[x].VT is 11300b57cec5SDimitry Andric /// the value type of the value that will be stored in the register, so 11310b57cec5SDimitry Andric /// whatever SDNode we lower the argument to needs to be this type. 11320b57cec5SDimitry Andric /// 11330b57cec5SDimitry Andric /// In order to correctly lower the arguments we need to know the size of each 11340b57cec5SDimitry Andric /// argument. Since Ins[x].VT gives us the size of the register that will 11350b57cec5SDimitry Andric /// hold the value, we need to look at Ins[x].ArgVT to see the 'real' type 1136349cc55cSDimitry Andric /// for the original function argument so that we can deduce the correct memory 11370b57cec5SDimitry Andric /// type to use for Ins[x]. In most cases the correct memory type will be 11380b57cec5SDimitry Andric /// Ins[x].ArgVT. However, this will not always be the case. If, for example, 11390b57cec5SDimitry Andric /// we have a kernel argument of type v8i8, this argument will be split into 11400b57cec5SDimitry Andric /// 8 parts and each part will be represented by its own item in the Ins array. 11410b57cec5SDimitry Andric /// For each part the Ins[x].ArgVT will be the v8i8, which is the full type of 11420b57cec5SDimitry Andric /// the argument before it was split. From this, we deduce that the memory type 11430b57cec5SDimitry Andric /// for each individual part is i8. We pass the memory type as LocVT to the 11440b57cec5SDimitry Andric /// calling convention analysis function and the register type (Ins[x].VT) as 11450b57cec5SDimitry Andric /// the ValVT. 11460b57cec5SDimitry Andric void AMDGPUTargetLowering::analyzeFormalArgumentsCompute( 11470b57cec5SDimitry Andric CCState &State, 11480b57cec5SDimitry Andric const SmallVectorImpl<ISD::InputArg> &Ins) const { 11490b57cec5SDimitry Andric const MachineFunction &MF = State.getMachineFunction(); 11500b57cec5SDimitry Andric const Function &Fn = MF.getFunction(); 11510b57cec5SDimitry Andric LLVMContext &Ctx = Fn.getParent()->getContext(); 11520b57cec5SDimitry Andric const AMDGPUSubtarget &ST = AMDGPUSubtarget::get(MF); 115306c3fb27SDimitry Andric const unsigned ExplicitOffset = ST.getExplicitKernelArgOffset(); 11540b57cec5SDimitry Andric CallingConv::ID CC = Fn.getCallingConv(); 11550b57cec5SDimitry Andric 11565ffd83dbSDimitry Andric Align MaxAlign = Align(1); 11570b57cec5SDimitry Andric uint64_t ExplicitArgOffset = 0; 11580fca6ea1SDimitry Andric const DataLayout &DL = Fn.getDataLayout(); 11590b57cec5SDimitry Andric 11600b57cec5SDimitry Andric unsigned InIndex = 0; 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric for (const Argument &Arg : Fn.args()) { 1163e8d8bef9SDimitry Andric const bool IsByRef = Arg.hasByRefAttr(); 11640b57cec5SDimitry Andric Type *BaseArgTy = Arg.getType(); 1165e8d8bef9SDimitry Andric Type *MemArgTy = IsByRef ? Arg.getParamByRefType() : BaseArgTy; 116681ad6265SDimitry Andric Align Alignment = DL.getValueOrABITypeAlignment( 1167bdd1243dSDimitry Andric IsByRef ? Arg.getParamAlign() : std::nullopt, MemArgTy); 116881ad6265SDimitry Andric MaxAlign = std::max(Alignment, MaxAlign); 1169e8d8bef9SDimitry Andric uint64_t AllocSize = DL.getTypeAllocSize(MemArgTy); 11700b57cec5SDimitry Andric 11715ffd83dbSDimitry Andric uint64_t ArgOffset = alignTo(ExplicitArgOffset, Alignment) + ExplicitOffset; 11725ffd83dbSDimitry Andric ExplicitArgOffset = alignTo(ExplicitArgOffset, Alignment) + AllocSize; 11730b57cec5SDimitry Andric 11740b57cec5SDimitry Andric // We're basically throwing away everything passed into us and starting over 11750b57cec5SDimitry Andric // to get accurate in-memory offsets. The "PartOffset" is completely useless 11760b57cec5SDimitry Andric // to us as computed in Ins. 11770b57cec5SDimitry Andric // 11780b57cec5SDimitry Andric // We also need to figure out what type legalization is trying to do to get 11790b57cec5SDimitry Andric // the correct memory offsets. 11800b57cec5SDimitry Andric 11810b57cec5SDimitry Andric SmallVector<EVT, 16> ValueVTs; 11820b57cec5SDimitry Andric SmallVector<uint64_t, 16> Offsets; 11830b57cec5SDimitry Andric ComputeValueVTs(*this, DL, BaseArgTy, ValueVTs, &Offsets, ArgOffset); 11840b57cec5SDimitry Andric 11850b57cec5SDimitry Andric for (unsigned Value = 0, NumValues = ValueVTs.size(); 11860b57cec5SDimitry Andric Value != NumValues; ++Value) { 11870b57cec5SDimitry Andric uint64_t BasePartOffset = Offsets[Value]; 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric EVT ArgVT = ValueVTs[Value]; 11900b57cec5SDimitry Andric EVT MemVT = ArgVT; 11910b57cec5SDimitry Andric MVT RegisterVT = getRegisterTypeForCallingConv(Ctx, CC, ArgVT); 11920b57cec5SDimitry Andric unsigned NumRegs = getNumRegistersForCallingConv(Ctx, CC, ArgVT); 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric if (NumRegs == 1) { 11950b57cec5SDimitry Andric // This argument is not split, so the IR type is the memory type. 11960b57cec5SDimitry Andric if (ArgVT.isExtended()) { 11970b57cec5SDimitry Andric // We have an extended type, like i24, so we should just use the 11980b57cec5SDimitry Andric // register type. 11990b57cec5SDimitry Andric MemVT = RegisterVT; 12000b57cec5SDimitry Andric } else { 12010b57cec5SDimitry Andric MemVT = ArgVT; 12020b57cec5SDimitry Andric } 12030b57cec5SDimitry Andric } else if (ArgVT.isVector() && RegisterVT.isVector() && 12040b57cec5SDimitry Andric ArgVT.getScalarType() == RegisterVT.getScalarType()) { 12050b57cec5SDimitry Andric assert(ArgVT.getVectorNumElements() > RegisterVT.getVectorNumElements()); 12060b57cec5SDimitry Andric // We have a vector value which has been split into a vector with 12070b57cec5SDimitry Andric // the same scalar type, but fewer elements. This should handle 12080b57cec5SDimitry Andric // all the floating-point vector types. 12090b57cec5SDimitry Andric MemVT = RegisterVT; 12100b57cec5SDimitry Andric } else if (ArgVT.isVector() && 12110b57cec5SDimitry Andric ArgVT.getVectorNumElements() == NumRegs) { 12120b57cec5SDimitry Andric // This arg has been split so that each element is stored in a separate 12130b57cec5SDimitry Andric // register. 12140b57cec5SDimitry Andric MemVT = ArgVT.getScalarType(); 12150b57cec5SDimitry Andric } else if (ArgVT.isExtended()) { 12160b57cec5SDimitry Andric // We have an extended type, like i65. 12170b57cec5SDimitry Andric MemVT = RegisterVT; 12180b57cec5SDimitry Andric } else { 12190b57cec5SDimitry Andric unsigned MemoryBits = ArgVT.getStoreSizeInBits() / NumRegs; 12200b57cec5SDimitry Andric assert(ArgVT.getStoreSizeInBits() % NumRegs == 0); 12210b57cec5SDimitry Andric if (RegisterVT.isInteger()) { 12220b57cec5SDimitry Andric MemVT = EVT::getIntegerVT(State.getContext(), MemoryBits); 12230b57cec5SDimitry Andric } else if (RegisterVT.isVector()) { 12240b57cec5SDimitry Andric assert(!RegisterVT.getScalarType().isFloatingPoint()); 12250b57cec5SDimitry Andric unsigned NumElements = RegisterVT.getVectorNumElements(); 12260b57cec5SDimitry Andric assert(MemoryBits % NumElements == 0); 12270b57cec5SDimitry Andric // This vector type has been split into another vector type with 12280b57cec5SDimitry Andric // a different elements size. 12290b57cec5SDimitry Andric EVT ScalarVT = EVT::getIntegerVT(State.getContext(), 12300b57cec5SDimitry Andric MemoryBits / NumElements); 12310b57cec5SDimitry Andric MemVT = EVT::getVectorVT(State.getContext(), ScalarVT, NumElements); 12320b57cec5SDimitry Andric } else { 12330b57cec5SDimitry Andric llvm_unreachable("cannot deduce memory type."); 12340b57cec5SDimitry Andric } 12350b57cec5SDimitry Andric } 12360b57cec5SDimitry Andric 12370b57cec5SDimitry Andric // Convert one element vectors to scalar. 12380b57cec5SDimitry Andric if (MemVT.isVector() && MemVT.getVectorNumElements() == 1) 12390b57cec5SDimitry Andric MemVT = MemVT.getScalarType(); 12400b57cec5SDimitry Andric 12410b57cec5SDimitry Andric // Round up vec3/vec5 argument. 12420b57cec5SDimitry Andric if (MemVT.isVector() && !MemVT.isPow2VectorType()) { 12430b57cec5SDimitry Andric assert(MemVT.getVectorNumElements() == 3 || 1244bdd1243dSDimitry Andric MemVT.getVectorNumElements() == 5 || 1245bdd1243dSDimitry Andric (MemVT.getVectorNumElements() >= 9 && 1246bdd1243dSDimitry Andric MemVT.getVectorNumElements() <= 12)); 12470b57cec5SDimitry Andric MemVT = MemVT.getPow2VectorType(State.getContext()); 12485ffd83dbSDimitry Andric } else if (!MemVT.isSimple() && !MemVT.isVector()) { 12495ffd83dbSDimitry Andric MemVT = MemVT.getRoundIntegerType(State.getContext()); 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric unsigned PartOffset = 0; 12530b57cec5SDimitry Andric for (unsigned i = 0; i != NumRegs; ++i) { 12540b57cec5SDimitry Andric State.addLoc(CCValAssign::getCustomMem(InIndex++, RegisterVT, 12550b57cec5SDimitry Andric BasePartOffset + PartOffset, 12560b57cec5SDimitry Andric MemVT.getSimpleVT(), 12570b57cec5SDimitry Andric CCValAssign::Full)); 12580b57cec5SDimitry Andric PartOffset += MemVT.getStoreSize(); 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric } 12620b57cec5SDimitry Andric } 12630b57cec5SDimitry Andric 12640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerReturn( 12650b57cec5SDimitry Andric SDValue Chain, CallingConv::ID CallConv, 12660b57cec5SDimitry Andric bool isVarArg, 12670b57cec5SDimitry Andric const SmallVectorImpl<ISD::OutputArg> &Outs, 12680b57cec5SDimitry Andric const SmallVectorImpl<SDValue> &OutVals, 12690b57cec5SDimitry Andric const SDLoc &DL, SelectionDAG &DAG) const { 12700b57cec5SDimitry Andric // FIXME: Fails for r600 tests 12710b57cec5SDimitry Andric //assert(!isVarArg && Outs.empty() && OutVals.empty() && 12720b57cec5SDimitry Andric // "wave terminate should not have return values"); 12730b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::ENDPGM, DL, MVT::Other, Chain); 12740b57cec5SDimitry Andric } 12750b57cec5SDimitry Andric 12760b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 12770b57cec5SDimitry Andric // Target specific lowering 12780b57cec5SDimitry Andric //===---------------------------------------------------------------------===// 12790b57cec5SDimitry Andric 12800b57cec5SDimitry Andric /// Selects the correct CCAssignFn for a given CallingConvention value. 12810b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForCall(CallingConv::ID CC, 12820b57cec5SDimitry Andric bool IsVarArg) { 12830b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForCall(CC, IsVarArg); 12840b57cec5SDimitry Andric } 12850b57cec5SDimitry Andric 12860b57cec5SDimitry Andric CCAssignFn *AMDGPUTargetLowering::CCAssignFnForReturn(CallingConv::ID CC, 12870b57cec5SDimitry Andric bool IsVarArg) { 12880b57cec5SDimitry Andric return AMDGPUCallLowering::CCAssignFnForReturn(CC, IsVarArg); 12890b57cec5SDimitry Andric } 12900b57cec5SDimitry Andric 12910b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::addTokenForArgument(SDValue Chain, 12920b57cec5SDimitry Andric SelectionDAG &DAG, 12930b57cec5SDimitry Andric MachineFrameInfo &MFI, 12940b57cec5SDimitry Andric int ClobberedFI) const { 12950b57cec5SDimitry Andric SmallVector<SDValue, 8> ArgChains; 12960b57cec5SDimitry Andric int64_t FirstByte = MFI.getObjectOffset(ClobberedFI); 12970b57cec5SDimitry Andric int64_t LastByte = FirstByte + MFI.getObjectSize(ClobberedFI) - 1; 12980b57cec5SDimitry Andric 12990b57cec5SDimitry Andric // Include the original chain at the beginning of the list. When this is 13000b57cec5SDimitry Andric // used by target LowerCall hooks, this helps legalize find the 13010b57cec5SDimitry Andric // CALLSEQ_BEGIN node. 13020b57cec5SDimitry Andric ArgChains.push_back(Chain); 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric // Add a chain value for each stack argument corresponding 1305349cc55cSDimitry Andric for (SDNode *U : DAG.getEntryNode().getNode()->uses()) { 1306349cc55cSDimitry Andric if (LoadSDNode *L = dyn_cast<LoadSDNode>(U)) { 13070b57cec5SDimitry Andric if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(L->getBasePtr())) { 13080b57cec5SDimitry Andric if (FI->getIndex() < 0) { 13090b57cec5SDimitry Andric int64_t InFirstByte = MFI.getObjectOffset(FI->getIndex()); 13100b57cec5SDimitry Andric int64_t InLastByte = InFirstByte; 13110b57cec5SDimitry Andric InLastByte += MFI.getObjectSize(FI->getIndex()) - 1; 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric if ((InFirstByte <= FirstByte && FirstByte <= InLastByte) || 13140b57cec5SDimitry Andric (FirstByte <= InFirstByte && InFirstByte <= LastByte)) 13150b57cec5SDimitry Andric ArgChains.push_back(SDValue(L, 1)); 13160b57cec5SDimitry Andric } 13170b57cec5SDimitry Andric } 13180b57cec5SDimitry Andric } 13190b57cec5SDimitry Andric } 13200b57cec5SDimitry Andric 13210b57cec5SDimitry Andric // Build a tokenfactor for all the chains. 13220b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ArgChains); 13230b57cec5SDimitry Andric } 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerUnhandledCall(CallLoweringInfo &CLI, 13260b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals, 13270b57cec5SDimitry Andric StringRef Reason) const { 13280b57cec5SDimitry Andric SDValue Callee = CLI.Callee; 13290b57cec5SDimitry Andric SelectionDAG &DAG = CLI.DAG; 13300b57cec5SDimitry Andric 13310b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric StringRef FuncName("<unknown>"); 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric if (const ExternalSymbolSDNode *G = dyn_cast<ExternalSymbolSDNode>(Callee)) 13360b57cec5SDimitry Andric FuncName = G->getSymbol(); 13370b57cec5SDimitry Andric else if (const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) 13380b57cec5SDimitry Andric FuncName = G->getGlobal()->getName(); 13390b57cec5SDimitry Andric 13400b57cec5SDimitry Andric DiagnosticInfoUnsupported NoCalls( 13410b57cec5SDimitry Andric Fn, Reason + FuncName, CLI.DL.getDebugLoc()); 13420b57cec5SDimitry Andric DAG.getContext()->diagnose(NoCalls); 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andric if (!CLI.IsTailCall) { 13450fca6ea1SDimitry Andric for (ISD::InputArg &Arg : CLI.Ins) 13460fca6ea1SDimitry Andric InVals.push_back(DAG.getUNDEF(Arg.VT)); 13470b57cec5SDimitry Andric } 13480b57cec5SDimitry Andric 13490b57cec5SDimitry Andric return DAG.getEntryNode(); 13500b57cec5SDimitry Andric } 13510b57cec5SDimitry Andric 13520b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCall(CallLoweringInfo &CLI, 13530b57cec5SDimitry Andric SmallVectorImpl<SDValue> &InVals) const { 13540b57cec5SDimitry Andric return lowerUnhandledCall(CLI, InVals, "unsupported call to function "); 13550b57cec5SDimitry Andric } 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, 13580b57cec5SDimitry Andric SelectionDAG &DAG) const { 13590b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 13600b57cec5SDimitry Andric 13610b57cec5SDimitry Andric DiagnosticInfoUnsupported NoDynamicAlloca(Fn, "unsupported dynamic alloca", 13620b57cec5SDimitry Andric SDLoc(Op).getDebugLoc()); 13630b57cec5SDimitry Andric DAG.getContext()->diagnose(NoDynamicAlloca); 13640b57cec5SDimitry Andric auto Ops = {DAG.getConstant(0, SDLoc(), Op.getValueType()), Op.getOperand(0)}; 13650b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc()); 13660b57cec5SDimitry Andric } 13670b57cec5SDimitry Andric 13680b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op, 13690b57cec5SDimitry Andric SelectionDAG &DAG) const { 13700b57cec5SDimitry Andric switch (Op.getOpcode()) { 13710b57cec5SDimitry Andric default: 13720b57cec5SDimitry Andric Op->print(errs(), &DAG); 13730b57cec5SDimitry Andric llvm_unreachable("Custom lowering code for this " 13740b57cec5SDimitry Andric "instruction is not implemented yet!"); 13750b57cec5SDimitry Andric break; 13760b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: return LowerSIGN_EXTEND_INREG(Op, DAG); 13770b57cec5SDimitry Andric case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, DAG); 13780b57cec5SDimitry Andric case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op, DAG); 13790b57cec5SDimitry Andric case ISD::UDIVREM: return LowerUDIVREM(Op, DAG); 13800b57cec5SDimitry Andric case ISD::SDIVREM: return LowerSDIVREM(Op, DAG); 13810b57cec5SDimitry Andric case ISD::FREM: return LowerFREM(Op, DAG); 13820b57cec5SDimitry Andric case ISD::FCEIL: return LowerFCEIL(Op, DAG); 13830b57cec5SDimitry Andric case ISD::FTRUNC: return LowerFTRUNC(Op, DAG); 13840b57cec5SDimitry Andric case ISD::FRINT: return LowerFRINT(Op, DAG); 13850b57cec5SDimitry Andric case ISD::FNEARBYINT: return LowerFNEARBYINT(Op, DAG); 1386bdd1243dSDimitry Andric case ISD::FROUNDEVEN: 1387bdd1243dSDimitry Andric return LowerFROUNDEVEN(Op, DAG); 13880b57cec5SDimitry Andric case ISD::FROUND: return LowerFROUND(Op, DAG); 13890b57cec5SDimitry Andric case ISD::FFLOOR: return LowerFFLOOR(Op, DAG); 139006c3fb27SDimitry Andric case ISD::FLOG2: 139106c3fb27SDimitry Andric return LowerFLOG2(Op, DAG); 13920b57cec5SDimitry Andric case ISD::FLOG: 13930b57cec5SDimitry Andric case ISD::FLOG10: 139406c3fb27SDimitry Andric return LowerFLOGCommon(Op, DAG); 13950b57cec5SDimitry Andric case ISD::FEXP: 13965f757f3fSDimitry Andric case ISD::FEXP10: 13970b57cec5SDimitry Andric return lowerFEXP(Op, DAG); 139806c3fb27SDimitry Andric case ISD::FEXP2: 139906c3fb27SDimitry Andric return lowerFEXP2(Op, DAG); 14000b57cec5SDimitry Andric case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); 14010b57cec5SDimitry Andric case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); 14020b57cec5SDimitry Andric case ISD::FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); 1403fe6060f1SDimitry Andric case ISD::FP_TO_SINT: 1404fe6060f1SDimitry Andric case ISD::FP_TO_UINT: 1405fe6060f1SDimitry Andric return LowerFP_TO_INT(Op, DAG); 14060b57cec5SDimitry Andric case ISD::CTTZ: 14070b57cec5SDimitry Andric case ISD::CTTZ_ZERO_UNDEF: 14080b57cec5SDimitry Andric case ISD::CTLZ: 14090b57cec5SDimitry Andric case ISD::CTLZ_ZERO_UNDEF: 14100b57cec5SDimitry Andric return LowerCTLZ_CTTZ(Op, DAG); 14110b57cec5SDimitry Andric case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); 14120b57cec5SDimitry Andric } 14130b57cec5SDimitry Andric return Op; 14140b57cec5SDimitry Andric } 14150b57cec5SDimitry Andric 14160b57cec5SDimitry Andric void AMDGPUTargetLowering::ReplaceNodeResults(SDNode *N, 14170b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results, 14180b57cec5SDimitry Andric SelectionDAG &DAG) const { 14190b57cec5SDimitry Andric switch (N->getOpcode()) { 14200b57cec5SDimitry Andric case ISD::SIGN_EXTEND_INREG: 14210b57cec5SDimitry Andric // Different parts of legalization seem to interpret which type of 14220b57cec5SDimitry Andric // sign_extend_inreg is the one to check for custom lowering. The extended 14230b57cec5SDimitry Andric // from type is what really matters, but some places check for custom 14240b57cec5SDimitry Andric // lowering of the result type. This results in trying to use 14250b57cec5SDimitry Andric // ReplaceNodeResults to sext_in_reg to an illegal type, so we'll just do 14260b57cec5SDimitry Andric // nothing here and let the illegal result integer be handled normally. 14270b57cec5SDimitry Andric return; 142806c3fb27SDimitry Andric case ISD::FLOG2: 142906c3fb27SDimitry Andric if (SDValue Lowered = LowerFLOG2(SDValue(N, 0), DAG)) 143006c3fb27SDimitry Andric Results.push_back(Lowered); 143106c3fb27SDimitry Andric return; 143206c3fb27SDimitry Andric case ISD::FLOG: 143306c3fb27SDimitry Andric case ISD::FLOG10: 143406c3fb27SDimitry Andric if (SDValue Lowered = LowerFLOGCommon(SDValue(N, 0), DAG)) 143506c3fb27SDimitry Andric Results.push_back(Lowered); 143606c3fb27SDimitry Andric return; 143706c3fb27SDimitry Andric case ISD::FEXP2: 143806c3fb27SDimitry Andric if (SDValue Lowered = lowerFEXP2(SDValue(N, 0), DAG)) 143906c3fb27SDimitry Andric Results.push_back(Lowered); 144006c3fb27SDimitry Andric return; 144106c3fb27SDimitry Andric case ISD::FEXP: 14425f757f3fSDimitry Andric case ISD::FEXP10: 144306c3fb27SDimitry Andric if (SDValue Lowered = lowerFEXP(SDValue(N, 0), DAG)) 144406c3fb27SDimitry Andric Results.push_back(Lowered); 144506c3fb27SDimitry Andric return; 14467a6dacacSDimitry Andric case ISD::CTLZ: 14477a6dacacSDimitry Andric case ISD::CTLZ_ZERO_UNDEF: 14487a6dacacSDimitry Andric if (auto Lowered = lowerCTLZResults(SDValue(N, 0u), DAG)) 14497a6dacacSDimitry Andric Results.push_back(Lowered); 14507a6dacacSDimitry Andric return; 14510b57cec5SDimitry Andric default: 14520b57cec5SDimitry Andric return; 14530b57cec5SDimitry Andric } 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric 14560b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerGlobalAddress(AMDGPUMachineFunction* MFI, 14570b57cec5SDimitry Andric SDValue Op, 14580b57cec5SDimitry Andric SelectionDAG &DAG) const { 14590b57cec5SDimitry Andric 14600b57cec5SDimitry Andric const DataLayout &DL = DAG.getDataLayout(); 14610b57cec5SDimitry Andric GlobalAddressSDNode *G = cast<GlobalAddressSDNode>(Op); 14620b57cec5SDimitry Andric const GlobalValue *GV = G->getGlobal(); 14630b57cec5SDimitry Andric 146406c3fb27SDimitry Andric if (!MFI->isModuleEntryFunction()) { 146506c3fb27SDimitry Andric if (std::optional<uint32_t> Address = 146606c3fb27SDimitry Andric AMDGPUMachineFunction::getLDSAbsoluteAddress(*GV)) { 146706c3fb27SDimitry Andric return DAG.getConstant(*Address, SDLoc(Op), Op.getValueType()); 146806c3fb27SDimitry Andric } 146906c3fb27SDimitry Andric } 147006c3fb27SDimitry Andric 14710b57cec5SDimitry Andric if (G->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS || 14720b57cec5SDimitry Andric G->getAddressSpace() == AMDGPUAS::REGION_ADDRESS) { 1473fe6060f1SDimitry Andric if (!MFI->isModuleEntryFunction() && 14740fca6ea1SDimitry Andric GV->getName() != "llvm.amdgcn.module.lds") { 14755ffd83dbSDimitry Andric SDLoc DL(Op); 14760b57cec5SDimitry Andric const Function &Fn = DAG.getMachineFunction().getFunction(); 14770b57cec5SDimitry Andric DiagnosticInfoUnsupported BadLDSDecl( 14785ffd83dbSDimitry Andric Fn, "local memory global used by non-kernel function", 14795ffd83dbSDimitry Andric DL.getDebugLoc(), DS_Warning); 14800b57cec5SDimitry Andric DAG.getContext()->diagnose(BadLDSDecl); 14815ffd83dbSDimitry Andric 14825ffd83dbSDimitry Andric // We currently don't have a way to correctly allocate LDS objects that 14835ffd83dbSDimitry Andric // aren't directly associated with a kernel. We do force inlining of 14845ffd83dbSDimitry Andric // functions that use local objects. However, if these dead functions are 14855ffd83dbSDimitry Andric // not eliminated, we don't want a compile time error. Just emit a warning 14865ffd83dbSDimitry Andric // and a trap, since there should be no callable path here. 14875ffd83dbSDimitry Andric SDValue Trap = DAG.getNode(ISD::TRAP, DL, MVT::Other, DAG.getEntryNode()); 14885ffd83dbSDimitry Andric SDValue OutputChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, 14895ffd83dbSDimitry Andric Trap, DAG.getRoot()); 14905ffd83dbSDimitry Andric DAG.setRoot(OutputChain); 14915ffd83dbSDimitry Andric return DAG.getUNDEF(Op.getValueType()); 14920b57cec5SDimitry Andric } 14930b57cec5SDimitry Andric 14940b57cec5SDimitry Andric // XXX: What does the value of G->getOffset() mean? 14950b57cec5SDimitry Andric assert(G->getOffset() == 0 && 14960b57cec5SDimitry Andric "Do not know what to do with an non-zero offset"); 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric // TODO: We could emit code to handle the initialization somewhere. 1499349cc55cSDimitry Andric // We ignore the initializer for now and legalize it to allow selection. 1500349cc55cSDimitry Andric // The initializer will anyway get errored out during assembly emission. 15015ffd83dbSDimitry Andric unsigned Offset = MFI->allocateLDSGlobal(DL, *cast<GlobalVariable>(GV)); 15020b57cec5SDimitry Andric return DAG.getConstant(Offset, SDLoc(Op), Op.getValueType()); 15030b57cec5SDimitry Andric } 15040b57cec5SDimitry Andric return SDValue(); 15050b57cec5SDimitry Andric } 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCONCAT_VECTORS(SDValue Op, 15080b57cec5SDimitry Andric SelectionDAG &DAG) const { 15090b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 1510bdd1243dSDimitry Andric SDLoc SL(Op); 15110b57cec5SDimitry Andric 15120b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1513bdd1243dSDimitry Andric if (VT.getVectorElementType().getSizeInBits() < 32) { 1514bdd1243dSDimitry Andric unsigned OpBitSize = Op.getOperand(0).getValueType().getSizeInBits(); 1515bdd1243dSDimitry Andric if (OpBitSize >= 32 && OpBitSize % 32 == 0) { 1516bdd1243dSDimitry Andric unsigned NewNumElt = OpBitSize / 32; 1517bdd1243dSDimitry Andric EVT NewEltVT = (NewNumElt == 1) ? MVT::i32 1518bdd1243dSDimitry Andric : EVT::getVectorVT(*DAG.getContext(), 1519bdd1243dSDimitry Andric MVT::i32, NewNumElt); 1520bdd1243dSDimitry Andric for (const SDUse &U : Op->ops()) { 1521bdd1243dSDimitry Andric SDValue In = U.get(); 1522bdd1243dSDimitry Andric SDValue NewIn = DAG.getNode(ISD::BITCAST, SL, NewEltVT, In); 1523bdd1243dSDimitry Andric if (NewNumElt > 1) 1524bdd1243dSDimitry Andric DAG.ExtractVectorElements(NewIn, Args); 1525bdd1243dSDimitry Andric else 1526bdd1243dSDimitry Andric Args.push_back(NewIn); 1527bdd1243dSDimitry Andric } 15280b57cec5SDimitry Andric 1529bdd1243dSDimitry Andric EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, 1530bdd1243dSDimitry Andric NewNumElt * Op.getNumOperands()); 1531bdd1243dSDimitry Andric SDValue BV = DAG.getBuildVector(NewVT, SL, Args); 15320b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, BV); 15330b57cec5SDimitry Andric } 1534bdd1243dSDimitry Andric } 15350b57cec5SDimitry Andric 15360b57cec5SDimitry Andric for (const SDUse &U : Op->ops()) 15370b57cec5SDimitry Andric DAG.ExtractVectorElements(U.get(), Args); 15380b57cec5SDimitry Andric 1539bdd1243dSDimitry Andric return DAG.getBuildVector(Op.getValueType(), SL, Args); 15400b57cec5SDimitry Andric } 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op, 15430b57cec5SDimitry Andric SelectionDAG &DAG) const { 154406c3fb27SDimitry Andric SDLoc SL(Op); 15450b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 1546647cbc5dSDimitry Andric unsigned Start = Op.getConstantOperandVal(1); 15470b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1548fe6060f1SDimitry Andric EVT SrcVT = Op.getOperand(0).getValueType(); 1549fe6060f1SDimitry Andric 155006c3fb27SDimitry Andric if (VT.getScalarSizeInBits() == 16 && Start % 2 == 0) { 155106c3fb27SDimitry Andric unsigned NumElt = VT.getVectorNumElements(); 155206c3fb27SDimitry Andric unsigned NumSrcElt = SrcVT.getVectorNumElements(); 155306c3fb27SDimitry Andric assert(NumElt % 2 == 0 && NumSrcElt % 2 == 0 && "expect legal types"); 1554fe6060f1SDimitry Andric 155506c3fb27SDimitry Andric // Extract 32-bit registers at a time. 155606c3fb27SDimitry Andric EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumSrcElt / 2); 155706c3fb27SDimitry Andric EVT NewVT = NumElt == 2 155806c3fb27SDimitry Andric ? MVT::i32 155906c3fb27SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElt / 2); 156006c3fb27SDimitry Andric SDValue Tmp = DAG.getNode(ISD::BITCAST, SL, NewSrcVT, Op.getOperand(0)); 156104eeddc0SDimitry Andric 156206c3fb27SDimitry Andric DAG.ExtractVectorElements(Tmp, Args, Start / 2, NumElt / 2); 156306c3fb27SDimitry Andric if (NumElt == 2) 156406c3fb27SDimitry Andric Tmp = Args[0]; 156506c3fb27SDimitry Andric else 156606c3fb27SDimitry Andric Tmp = DAG.getBuildVector(NewVT, SL, Args); 156706c3fb27SDimitry Andric 156806c3fb27SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, VT, Tmp); 156906c3fb27SDimitry Andric } 157081ad6265SDimitry Andric 15710b57cec5SDimitry Andric DAG.ExtractVectorElements(Op.getOperand(0), Args, Start, 15720b57cec5SDimitry Andric VT.getVectorNumElements()); 15730b57cec5SDimitry Andric 157406c3fb27SDimitry Andric return DAG.getBuildVector(Op.getValueType(), SL, Args); 15750b57cec5SDimitry Andric } 15760b57cec5SDimitry Andric 157706c3fb27SDimitry Andric // TODO: Handle fabs too 157806c3fb27SDimitry Andric static SDValue peekFNeg(SDValue Val) { 157906c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FNEG) 158006c3fb27SDimitry Andric return Val.getOperand(0); 15810b57cec5SDimitry Andric 158206c3fb27SDimitry Andric return Val; 158306c3fb27SDimitry Andric } 158406c3fb27SDimitry Andric 158506c3fb27SDimitry Andric static SDValue peekFPSignOps(SDValue Val) { 158606c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FNEG) 158706c3fb27SDimitry Andric Val = Val.getOperand(0); 158806c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FABS) 158906c3fb27SDimitry Andric Val = Val.getOperand(0); 159006c3fb27SDimitry Andric if (Val.getOpcode() == ISD::FCOPYSIGN) 159106c3fb27SDimitry Andric Val = Val.getOperand(0); 159206c3fb27SDimitry Andric return Val; 159306c3fb27SDimitry Andric } 159406c3fb27SDimitry Andric 159506c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacyImpl( 159606c3fb27SDimitry Andric const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, 159706c3fb27SDimitry Andric SDValue False, SDValue CC, DAGCombinerInfo &DCI) const { 15980b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 15990b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get(); 16000b57cec5SDimitry Andric switch (CCOpcode) { 16010b57cec5SDimitry Andric case ISD::SETOEQ: 16020b57cec5SDimitry Andric case ISD::SETONE: 16030b57cec5SDimitry Andric case ISD::SETUNE: 16040b57cec5SDimitry Andric case ISD::SETNE: 16050b57cec5SDimitry Andric case ISD::SETUEQ: 16060b57cec5SDimitry Andric case ISD::SETEQ: 16070b57cec5SDimitry Andric case ISD::SETFALSE: 16080b57cec5SDimitry Andric case ISD::SETFALSE2: 16090b57cec5SDimitry Andric case ISD::SETTRUE: 16100b57cec5SDimitry Andric case ISD::SETTRUE2: 16110b57cec5SDimitry Andric case ISD::SETUO: 16120b57cec5SDimitry Andric case ISD::SETO: 16130b57cec5SDimitry Andric break; 16140b57cec5SDimitry Andric case ISD::SETULE: 16150b57cec5SDimitry Andric case ISD::SETULT: { 16160b57cec5SDimitry Andric if (LHS == True) 16170b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 16180b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 16190b57cec5SDimitry Andric } 16200b57cec5SDimitry Andric case ISD::SETOLE: 16210b57cec5SDimitry Andric case ISD::SETOLT: 16220b57cec5SDimitry Andric case ISD::SETLE: 16230b57cec5SDimitry Andric case ISD::SETLT: { 16240b57cec5SDimitry Andric // Ordered. Assume ordered for undefined. 16250b57cec5SDimitry Andric 16260b57cec5SDimitry Andric // Only do this after legalization to avoid interfering with other combines 16270b57cec5SDimitry Andric // which might occur. 16280b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 16290b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 16300b57cec5SDimitry Andric return SDValue(); 16310b57cec5SDimitry Andric 16320b57cec5SDimitry Andric // We need to permute the operands to get the correct NaN behavior. The 16330b57cec5SDimitry Andric // selected operand is the second one based on the failing compare with NaN, 16340b57cec5SDimitry Andric // so permute it based on the compare type the hardware uses. 16350b57cec5SDimitry Andric if (LHS == True) 16360b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 16370b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 16380b57cec5SDimitry Andric } 16390b57cec5SDimitry Andric case ISD::SETUGE: 16400b57cec5SDimitry Andric case ISD::SETUGT: { 16410b57cec5SDimitry Andric if (LHS == True) 16420b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, RHS, LHS); 16430b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, LHS, RHS); 16440b57cec5SDimitry Andric } 16450b57cec5SDimitry Andric case ISD::SETGT: 16460b57cec5SDimitry Andric case ISD::SETGE: 16470b57cec5SDimitry Andric case ISD::SETOGE: 16480b57cec5SDimitry Andric case ISD::SETOGT: { 16490b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG && 16500b57cec5SDimitry Andric !DCI.isCalledByLegalizer()) 16510b57cec5SDimitry Andric return SDValue(); 16520b57cec5SDimitry Andric 16530b57cec5SDimitry Andric if (LHS == True) 16540b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMAX_LEGACY, DL, VT, LHS, RHS); 16550b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FMIN_LEGACY, DL, VT, RHS, LHS); 16560b57cec5SDimitry Andric } 16570b57cec5SDimitry Andric case ISD::SETCC_INVALID: 16580b57cec5SDimitry Andric llvm_unreachable("Invalid setcc condcode!"); 16590b57cec5SDimitry Andric } 16600b57cec5SDimitry Andric return SDValue(); 16610b57cec5SDimitry Andric } 16620b57cec5SDimitry Andric 166306c3fb27SDimitry Andric /// Generate Min/Max node 166406c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::combineFMinMaxLegacy(const SDLoc &DL, EVT VT, 166506c3fb27SDimitry Andric SDValue LHS, SDValue RHS, 166606c3fb27SDimitry Andric SDValue True, SDValue False, 166706c3fb27SDimitry Andric SDValue CC, 166806c3fb27SDimitry Andric DAGCombinerInfo &DCI) const { 166906c3fb27SDimitry Andric if ((LHS == True && RHS == False) || (LHS == False && RHS == True)) 167006c3fb27SDimitry Andric return combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, True, False, CC, DCI); 167106c3fb27SDimitry Andric 167206c3fb27SDimitry Andric SelectionDAG &DAG = DCI.DAG; 167306c3fb27SDimitry Andric 167406c3fb27SDimitry Andric // If we can't directly match this, try to see if we can fold an fneg to 167506c3fb27SDimitry Andric // match. 167606c3fb27SDimitry Andric 167706c3fb27SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS); 167806c3fb27SDimitry Andric ConstantFPSDNode *CFalse = dyn_cast<ConstantFPSDNode>(False); 167906c3fb27SDimitry Andric SDValue NegTrue = peekFNeg(True); 168006c3fb27SDimitry Andric 168106c3fb27SDimitry Andric // Undo the combine foldFreeOpFromSelect does if it helps us match the 168206c3fb27SDimitry Andric // fmin/fmax. 168306c3fb27SDimitry Andric // 168406c3fb27SDimitry Andric // select (fcmp olt (lhs, K)), (fneg lhs), -K 168506c3fb27SDimitry Andric // -> fneg (fmin_legacy lhs, K) 168606c3fb27SDimitry Andric // 168706c3fb27SDimitry Andric // TODO: Use getNegatedExpression 168806c3fb27SDimitry Andric if (LHS == NegTrue && CFalse && CRHS) { 168906c3fb27SDimitry Andric APFloat NegRHS = neg(CRHS->getValueAPF()); 169006c3fb27SDimitry Andric if (NegRHS == CFalse->getValueAPF()) { 169106c3fb27SDimitry Andric SDValue Combined = 169206c3fb27SDimitry Andric combineFMinMaxLegacyImpl(DL, VT, LHS, RHS, NegTrue, False, CC, DCI); 169306c3fb27SDimitry Andric if (Combined) 169406c3fb27SDimitry Andric return DAG.getNode(ISD::FNEG, DL, VT, Combined); 169506c3fb27SDimitry Andric return SDValue(); 169606c3fb27SDimitry Andric } 169706c3fb27SDimitry Andric } 169806c3fb27SDimitry Andric 169906c3fb27SDimitry Andric return SDValue(); 170006c3fb27SDimitry Andric } 170106c3fb27SDimitry Andric 17020b57cec5SDimitry Andric std::pair<SDValue, SDValue> 17030b57cec5SDimitry Andric AMDGPUTargetLowering::split64BitValue(SDValue Op, SelectionDAG &DAG) const { 17040b57cec5SDimitry Andric SDLoc SL(Op); 17050b57cec5SDimitry Andric 17060b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 17070b57cec5SDimitry Andric 17080b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 17090b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 17100b57cec5SDimitry Andric 17110b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 17120b57cec5SDimitry Andric SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 17130b57cec5SDimitry Andric 1714bdd1243dSDimitry Andric return std::pair(Lo, Hi); 17150b57cec5SDimitry Andric } 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getLoHalf64(SDValue Op, SelectionDAG &DAG) const { 17180b57cec5SDimitry Andric SDLoc SL(Op); 17190b57cec5SDimitry Andric 17200b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 17210b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 17220b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero); 17230b57cec5SDimitry Andric } 17240b57cec5SDimitry Andric 17250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getHiHalf64(SDValue Op, SelectionDAG &DAG) const { 17260b57cec5SDimitry Andric SDLoc SL(Op); 17270b57cec5SDimitry Andric 17280b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Op); 17290b57cec5SDimitry Andric const SDValue One = DAG.getConstant(1, SL, MVT::i32); 17300b57cec5SDimitry Andric return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One); 17310b57cec5SDimitry Andric } 17320b57cec5SDimitry Andric 17330b57cec5SDimitry Andric // Split a vector type into two parts. The first part is a power of two vector. 17340b57cec5SDimitry Andric // The second part is whatever is left over, and is a scalar if it would 17350b57cec5SDimitry Andric // otherwise be a 1-vector. 17360b57cec5SDimitry Andric std::pair<EVT, EVT> 17370b57cec5SDimitry Andric AMDGPUTargetLowering::getSplitDestVTs(const EVT &VT, SelectionDAG &DAG) const { 17380b57cec5SDimitry Andric EVT LoVT, HiVT; 17390b57cec5SDimitry Andric EVT EltVT = VT.getVectorElementType(); 17400b57cec5SDimitry Andric unsigned NumElts = VT.getVectorNumElements(); 17410b57cec5SDimitry Andric unsigned LoNumElts = PowerOf2Ceil((NumElts + 1) / 2); 17420b57cec5SDimitry Andric LoVT = EVT::getVectorVT(*DAG.getContext(), EltVT, LoNumElts); 17430b57cec5SDimitry Andric HiVT = NumElts - LoNumElts == 1 17440b57cec5SDimitry Andric ? EltVT 17450b57cec5SDimitry Andric : EVT::getVectorVT(*DAG.getContext(), EltVT, NumElts - LoNumElts); 1746bdd1243dSDimitry Andric return std::pair(LoVT, HiVT); 17470b57cec5SDimitry Andric } 17480b57cec5SDimitry Andric 17490b57cec5SDimitry Andric // Split a vector value into two parts of types LoVT and HiVT. HiVT could be 17500b57cec5SDimitry Andric // scalar. 17510b57cec5SDimitry Andric std::pair<SDValue, SDValue> 17520b57cec5SDimitry Andric AMDGPUTargetLowering::splitVector(const SDValue &N, const SDLoc &DL, 17530b57cec5SDimitry Andric const EVT &LoVT, const EVT &HiVT, 17540b57cec5SDimitry Andric SelectionDAG &DAG) const { 17550b57cec5SDimitry Andric assert(LoVT.getVectorNumElements() + 17560b57cec5SDimitry Andric (HiVT.isVector() ? HiVT.getVectorNumElements() : 1) <= 17570b57cec5SDimitry Andric N.getValueType().getVectorNumElements() && 17580b57cec5SDimitry Andric "More vector elements requested than available!"); 17590b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, 17605ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, DL)); 17610b57cec5SDimitry Andric SDValue Hi = DAG.getNode( 17620b57cec5SDimitry Andric HiVT.isVector() ? ISD::EXTRACT_SUBVECTOR : ISD::EXTRACT_VECTOR_ELT, DL, 17635ffd83dbSDimitry Andric HiVT, N, DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), DL)); 1764bdd1243dSDimitry Andric return std::pair(Lo, Hi); 17650b57cec5SDimitry Andric } 17660b57cec5SDimitry Andric 17670b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorLoad(const SDValue Op, 17680b57cec5SDimitry Andric SelectionDAG &DAG) const { 17690b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 17700b57cec5SDimitry Andric EVT VT = Op.getValueType(); 1771480093f4SDimitry Andric SDLoc SL(Op); 17720b57cec5SDimitry Andric 17730b57cec5SDimitry Andric 17740b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 17750b57cec5SDimitry Andric // weird 1 element vectors. 1776480093f4SDimitry Andric if (VT.getVectorNumElements() == 2) { 1777480093f4SDimitry Andric SDValue Ops[2]; 1778480093f4SDimitry Andric std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(Load, DAG); 1779480093f4SDimitry Andric return DAG.getMergeValues(Ops, SL); 1780480093f4SDimitry Andric } 17810b57cec5SDimitry Andric 17820b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 17830b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 17860b57cec5SDimitry Andric 17870b57cec5SDimitry Andric EVT LoVT, HiVT; 17880b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 17890b57cec5SDimitry Andric SDValue Lo, Hi; 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 17920b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 17930b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Op, SL, LoVT, HiVT, DAG); 17940b57cec5SDimitry Andric 17950b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 179681ad6265SDimitry Andric Align BaseAlign = Load->getAlign(); 179781ad6265SDimitry Andric Align HiAlign = commonAlignment(BaseAlign, Size); 17980b57cec5SDimitry Andric 17990b57cec5SDimitry Andric SDValue LoLoad = DAG.getExtLoad(Load->getExtensionType(), SL, LoVT, 18000b57cec5SDimitry Andric Load->getChain(), BasePtr, SrcValue, LoMemVT, 18010b57cec5SDimitry Andric BaseAlign, Load->getMemOperand()->getFlags()); 18025f757f3fSDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, TypeSize::getFixed(Size)); 18030b57cec5SDimitry Andric SDValue HiLoad = 18040b57cec5SDimitry Andric DAG.getExtLoad(Load->getExtensionType(), SL, HiVT, Load->getChain(), 18050b57cec5SDimitry Andric HiPtr, SrcValue.getWithOffset(LoMemVT.getStoreSize()), 18060b57cec5SDimitry Andric HiMemVT, HiAlign, Load->getMemOperand()->getFlags()); 18070b57cec5SDimitry Andric 18080b57cec5SDimitry Andric SDValue Join; 18090b57cec5SDimitry Andric if (LoVT == HiVT) { 18100b57cec5SDimitry Andric // This is the case that the vector is power of two so was evenly split. 18110b57cec5SDimitry Andric Join = DAG.getNode(ISD::CONCAT_VECTORS, SL, VT, LoLoad, HiLoad); 18120b57cec5SDimitry Andric } else { 18130b57cec5SDimitry Andric Join = DAG.getNode(ISD::INSERT_SUBVECTOR, SL, VT, DAG.getUNDEF(VT), LoLoad, 18145ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL)); 18155ffd83dbSDimitry Andric Join = DAG.getNode( 18165ffd83dbSDimitry Andric HiVT.isVector() ? ISD::INSERT_SUBVECTOR : ISD::INSERT_VECTOR_ELT, SL, 18175ffd83dbSDimitry Andric VT, Join, HiLoad, 18185ffd83dbSDimitry Andric DAG.getVectorIdxConstant(LoVT.getVectorNumElements(), SL)); 18190b57cec5SDimitry Andric } 18200b57cec5SDimitry Andric 18210b57cec5SDimitry Andric SDValue Ops[] = {Join, DAG.getNode(ISD::TokenFactor, SL, MVT::Other, 18220b57cec5SDimitry Andric LoLoad.getValue(1), HiLoad.getValue(1))}; 18230b57cec5SDimitry Andric 18240b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SL); 18250b57cec5SDimitry Andric } 18260b57cec5SDimitry Andric 1827e8d8bef9SDimitry Andric SDValue AMDGPUTargetLowering::WidenOrSplitVectorLoad(SDValue Op, 18280b57cec5SDimitry Andric SelectionDAG &DAG) const { 18290b57cec5SDimitry Andric LoadSDNode *Load = cast<LoadSDNode>(Op); 18300b57cec5SDimitry Andric EVT VT = Op.getValueType(); 18310b57cec5SDimitry Andric SDValue BasePtr = Load->getBasePtr(); 18320b57cec5SDimitry Andric EVT MemVT = Load->getMemoryVT(); 18330b57cec5SDimitry Andric SDLoc SL(Op); 18340b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Load->getMemOperand()->getPointerInfo(); 183581ad6265SDimitry Andric Align BaseAlign = Load->getAlign(); 1836e8d8bef9SDimitry Andric unsigned NumElements = MemVT.getVectorNumElements(); 1837e8d8bef9SDimitry Andric 1838e8d8bef9SDimitry Andric // Widen from vec3 to vec4 when the load is at least 8-byte aligned 1839e8d8bef9SDimitry Andric // or 16-byte fully dereferenceable. Otherwise, split the vector load. 1840e8d8bef9SDimitry Andric if (NumElements != 3 || 184181ad6265SDimitry Andric (BaseAlign < Align(8) && 1842e8d8bef9SDimitry Andric !SrcValue.isDereferenceable(16, *DAG.getContext(), DAG.getDataLayout()))) 1843e8d8bef9SDimitry Andric return SplitVectorLoad(Op, DAG); 1844e8d8bef9SDimitry Andric 1845e8d8bef9SDimitry Andric assert(NumElements == 3); 18460b57cec5SDimitry Andric 18470b57cec5SDimitry Andric EVT WideVT = 18480b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), VT.getVectorElementType(), 4); 18490b57cec5SDimitry Andric EVT WideMemVT = 18500b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MemVT.getVectorElementType(), 4); 18510b57cec5SDimitry Andric SDValue WideLoad = DAG.getExtLoad( 18520b57cec5SDimitry Andric Load->getExtensionType(), SL, WideVT, Load->getChain(), BasePtr, SrcValue, 18530b57cec5SDimitry Andric WideMemVT, BaseAlign, Load->getMemOperand()->getFlags()); 18540b57cec5SDimitry Andric return DAG.getMergeValues( 18550b57cec5SDimitry Andric {DAG.getNode(ISD::EXTRACT_SUBVECTOR, SL, VT, WideLoad, 18565ffd83dbSDimitry Andric DAG.getVectorIdxConstant(0, SL)), 18570b57cec5SDimitry Andric WideLoad.getValue(1)}, 18580b57cec5SDimitry Andric SL); 18590b57cec5SDimitry Andric } 18600b57cec5SDimitry Andric 18610b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op, 18620b57cec5SDimitry Andric SelectionDAG &DAG) const { 18630b57cec5SDimitry Andric StoreSDNode *Store = cast<StoreSDNode>(Op); 18640b57cec5SDimitry Andric SDValue Val = Store->getValue(); 18650b57cec5SDimitry Andric EVT VT = Val.getValueType(); 18660b57cec5SDimitry Andric 18670b57cec5SDimitry Andric // If this is a 2 element vector, we really want to scalarize and not create 18680b57cec5SDimitry Andric // weird 1 element vectors. 18690b57cec5SDimitry Andric if (VT.getVectorNumElements() == 2) 18700b57cec5SDimitry Andric return scalarizeVectorStore(Store, DAG); 18710b57cec5SDimitry Andric 18720b57cec5SDimitry Andric EVT MemVT = Store->getMemoryVT(); 18730b57cec5SDimitry Andric SDValue Chain = Store->getChain(); 18740b57cec5SDimitry Andric SDValue BasePtr = Store->getBasePtr(); 18750b57cec5SDimitry Andric SDLoc SL(Op); 18760b57cec5SDimitry Andric 18770b57cec5SDimitry Andric EVT LoVT, HiVT; 18780b57cec5SDimitry Andric EVT LoMemVT, HiMemVT; 18790b57cec5SDimitry Andric SDValue Lo, Hi; 18800b57cec5SDimitry Andric 18810b57cec5SDimitry Andric std::tie(LoVT, HiVT) = getSplitDestVTs(VT, DAG); 18820b57cec5SDimitry Andric std::tie(LoMemVT, HiMemVT) = getSplitDestVTs(MemVT, DAG); 18830b57cec5SDimitry Andric std::tie(Lo, Hi) = splitVector(Val, SL, LoVT, HiVT, DAG); 18840b57cec5SDimitry Andric 18850b57cec5SDimitry Andric SDValue HiPtr = DAG.getObjectPtrOffset(SL, BasePtr, LoMemVT.getStoreSize()); 18860b57cec5SDimitry Andric 18870b57cec5SDimitry Andric const MachinePointerInfo &SrcValue = Store->getMemOperand()->getPointerInfo(); 188881ad6265SDimitry Andric Align BaseAlign = Store->getAlign(); 18890b57cec5SDimitry Andric unsigned Size = LoMemVT.getStoreSize(); 189081ad6265SDimitry Andric Align HiAlign = commonAlignment(BaseAlign, Size); 18910b57cec5SDimitry Andric 18920b57cec5SDimitry Andric SDValue LoStore = 18930b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Lo, BasePtr, SrcValue, LoMemVT, BaseAlign, 18940b57cec5SDimitry Andric Store->getMemOperand()->getFlags()); 18950b57cec5SDimitry Andric SDValue HiStore = 18960b57cec5SDimitry Andric DAG.getTruncStore(Chain, SL, Hi, HiPtr, SrcValue.getWithOffset(Size), 18970b57cec5SDimitry Andric HiMemVT, HiAlign, Store->getMemOperand()->getFlags()); 18980b57cec5SDimitry Andric 18990b57cec5SDimitry Andric return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoStore, HiStore); 19000b57cec5SDimitry Andric } 19010b57cec5SDimitry Andric 19020b57cec5SDimitry Andric // This is a shortcut for integer division because we have fast i32<->f32 19030b57cec5SDimitry Andric // conversions, and fast f32 reciprocal instructions. The fractional part of a 19040b57cec5SDimitry Andric // float is enough to accurately represent up to a 24-bit signed integer. 19050b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerDIVREM24(SDValue Op, SelectionDAG &DAG, 19060b57cec5SDimitry Andric bool Sign) const { 19070b57cec5SDimitry Andric SDLoc DL(Op); 19080b57cec5SDimitry Andric EVT VT = Op.getValueType(); 19090b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 19100b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 19110b57cec5SDimitry Andric MVT IntVT = MVT::i32; 19120b57cec5SDimitry Andric MVT FltVT = MVT::f32; 19130b57cec5SDimitry Andric 19140b57cec5SDimitry Andric unsigned LHSSignBits = DAG.ComputeNumSignBits(LHS); 19150b57cec5SDimitry Andric if (LHSSignBits < 9) 19160b57cec5SDimitry Andric return SDValue(); 19170b57cec5SDimitry Andric 19180b57cec5SDimitry Andric unsigned RHSSignBits = DAG.ComputeNumSignBits(RHS); 19190b57cec5SDimitry Andric if (RHSSignBits < 9) 19200b57cec5SDimitry Andric return SDValue(); 19210b57cec5SDimitry Andric 19220b57cec5SDimitry Andric unsigned BitSize = VT.getSizeInBits(); 19230b57cec5SDimitry Andric unsigned SignBits = std::min(LHSSignBits, RHSSignBits); 19240b57cec5SDimitry Andric unsigned DivBits = BitSize - SignBits; 19250b57cec5SDimitry Andric if (Sign) 19260b57cec5SDimitry Andric ++DivBits; 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric ISD::NodeType ToFp = Sign ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; 19290b57cec5SDimitry Andric ISD::NodeType ToInt = Sign ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; 19300b57cec5SDimitry Andric 19310b57cec5SDimitry Andric SDValue jq = DAG.getConstant(1, DL, IntVT); 19320b57cec5SDimitry Andric 19330b57cec5SDimitry Andric if (Sign) { 19340b57cec5SDimitry Andric // char|short jq = ia ^ ib; 19350b57cec5SDimitry Andric jq = DAG.getNode(ISD::XOR, DL, VT, LHS, RHS); 19360b57cec5SDimitry Andric 19370b57cec5SDimitry Andric // jq = jq >> (bitsize - 2) 19380b57cec5SDimitry Andric jq = DAG.getNode(ISD::SRA, DL, VT, jq, 19390b57cec5SDimitry Andric DAG.getConstant(BitSize - 2, DL, VT)); 19400b57cec5SDimitry Andric 19410b57cec5SDimitry Andric // jq = jq | 0x1 19420b57cec5SDimitry Andric jq = DAG.getNode(ISD::OR, DL, VT, jq, DAG.getConstant(1, DL, VT)); 19430b57cec5SDimitry Andric } 19440b57cec5SDimitry Andric 19450b57cec5SDimitry Andric // int ia = (int)LHS; 19460b57cec5SDimitry Andric SDValue ia = LHS; 19470b57cec5SDimitry Andric 19480b57cec5SDimitry Andric // int ib, (int)RHS; 19490b57cec5SDimitry Andric SDValue ib = RHS; 19500b57cec5SDimitry Andric 19510b57cec5SDimitry Andric // float fa = (float)ia; 19520b57cec5SDimitry Andric SDValue fa = DAG.getNode(ToFp, DL, FltVT, ia); 19530b57cec5SDimitry Andric 19540b57cec5SDimitry Andric // float fb = (float)ib; 19550b57cec5SDimitry Andric SDValue fb = DAG.getNode(ToFp, DL, FltVT, ib); 19560b57cec5SDimitry Andric 19570b57cec5SDimitry Andric SDValue fq = DAG.getNode(ISD::FMUL, DL, FltVT, 19580b57cec5SDimitry Andric fa, DAG.getNode(AMDGPUISD::RCP, DL, FltVT, fb)); 19590b57cec5SDimitry Andric 19600b57cec5SDimitry Andric // fq = trunc(fq); 19610b57cec5SDimitry Andric fq = DAG.getNode(ISD::FTRUNC, DL, FltVT, fq); 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric // float fqneg = -fq; 19640b57cec5SDimitry Andric SDValue fqneg = DAG.getNode(ISD::FNEG, DL, FltVT, fq); 19650b57cec5SDimitry Andric 1966480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 1967bdd1243dSDimitry Andric 1968bdd1243dSDimitry Andric bool UseFmadFtz = false; 1969bdd1243dSDimitry Andric if (Subtarget->isGCN()) { 1970bdd1243dSDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 197106c3fb27SDimitry Andric UseFmadFtz = 197206c3fb27SDimitry Andric MFI->getMode().FP32Denormals != DenormalMode::getPreserveSign(); 1973bdd1243dSDimitry Andric } 1974480093f4SDimitry Andric 19750b57cec5SDimitry Andric // float fr = mad(fqneg, fb, fa); 1976bdd1243dSDimitry Andric unsigned OpCode = !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA 1977bdd1243dSDimitry Andric : UseFmadFtz ? (unsigned)AMDGPUISD::FMAD_FTZ 1978bdd1243dSDimitry Andric : (unsigned)ISD::FMAD; 19790b57cec5SDimitry Andric SDValue fr = DAG.getNode(OpCode, DL, FltVT, fqneg, fb, fa); 19800b57cec5SDimitry Andric 19810b57cec5SDimitry Andric // int iq = (int)fq; 19820b57cec5SDimitry Andric SDValue iq = DAG.getNode(ToInt, DL, IntVT, fq); 19830b57cec5SDimitry Andric 19840b57cec5SDimitry Andric // fr = fabs(fr); 19850b57cec5SDimitry Andric fr = DAG.getNode(ISD::FABS, DL, FltVT, fr); 19860b57cec5SDimitry Andric 19870b57cec5SDimitry Andric // fb = fabs(fb); 19880b57cec5SDimitry Andric fb = DAG.getNode(ISD::FABS, DL, FltVT, fb); 19890b57cec5SDimitry Andric 19900b57cec5SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 19910b57cec5SDimitry Andric 19920b57cec5SDimitry Andric // int cv = fr >= fb; 19930b57cec5SDimitry Andric SDValue cv = DAG.getSetCC(DL, SetCCVT, fr, fb, ISD::SETOGE); 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric // jq = (cv ? jq : 0); 19960b57cec5SDimitry Andric jq = DAG.getNode(ISD::SELECT, DL, VT, cv, jq, DAG.getConstant(0, DL, VT)); 19970b57cec5SDimitry Andric 19980b57cec5SDimitry Andric // dst = iq + jq; 19990b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::ADD, DL, VT, iq, jq); 20000b57cec5SDimitry Andric 20010b57cec5SDimitry Andric // Rem needs compensation, it's easier to recompute it 20020b57cec5SDimitry Andric SDValue Rem = DAG.getNode(ISD::MUL, DL, VT, Div, RHS); 20030b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, LHS, Rem); 20040b57cec5SDimitry Andric 20050b57cec5SDimitry Andric // Truncate to number of bits this divide really is. 20060b57cec5SDimitry Andric if (Sign) { 20070b57cec5SDimitry Andric SDValue InRegSize 20080b57cec5SDimitry Andric = DAG.getValueType(EVT::getIntegerVT(*DAG.getContext(), DivBits)); 20090b57cec5SDimitry Andric Div = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Div, InRegSize); 20100b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, Rem, InRegSize); 20110b57cec5SDimitry Andric } else { 20120b57cec5SDimitry Andric SDValue TruncMask = DAG.getConstant((UINT64_C(1) << DivBits) - 1, DL, VT); 20130b57cec5SDimitry Andric Div = DAG.getNode(ISD::AND, DL, VT, Div, TruncMask); 20140b57cec5SDimitry Andric Rem = DAG.getNode(ISD::AND, DL, VT, Rem, TruncMask); 20150b57cec5SDimitry Andric } 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric return DAG.getMergeValues({ Div, Rem }, DL); 20180b57cec5SDimitry Andric } 20190b57cec5SDimitry Andric 20200b57cec5SDimitry Andric void AMDGPUTargetLowering::LowerUDIVREM64(SDValue Op, 20210b57cec5SDimitry Andric SelectionDAG &DAG, 20220b57cec5SDimitry Andric SmallVectorImpl<SDValue> &Results) const { 20230b57cec5SDimitry Andric SDLoc DL(Op); 20240b57cec5SDimitry Andric EVT VT = Op.getValueType(); 20250b57cec5SDimitry Andric 20260b57cec5SDimitry Andric assert(VT == MVT::i64 && "LowerUDIVREM64 expects an i64"); 20270b57cec5SDimitry Andric 20280b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 20290b57cec5SDimitry Andric 20300b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, HalfVT); 20310b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, HalfVT); 20320b57cec5SDimitry Andric 20330b57cec5SDimitry Andric //HiLo split 203406c3fb27SDimitry Andric SDValue LHS_Lo, LHS_Hi; 20350b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 203606c3fb27SDimitry Andric std::tie(LHS_Lo, LHS_Hi) = DAG.SplitScalar(LHS, DL, HalfVT, HalfVT); 20370b57cec5SDimitry Andric 203806c3fb27SDimitry Andric SDValue RHS_Lo, RHS_Hi; 20390b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 204006c3fb27SDimitry Andric std::tie(RHS_Lo, RHS_Hi) = DAG.SplitScalar(RHS, DL, HalfVT, HalfVT); 20410b57cec5SDimitry Andric 20420b57cec5SDimitry Andric if (DAG.MaskedValueIsZero(RHS, APInt::getHighBitsSet(64, 32)) && 20430b57cec5SDimitry Andric DAG.MaskedValueIsZero(LHS, APInt::getHighBitsSet(64, 32))) { 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 20460b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 20470b57cec5SDimitry Andric 20480b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(0), Zero}); 20490b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {Res.getValue(1), Zero}); 20500b57cec5SDimitry Andric 20510b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV)); 20520b57cec5SDimitry Andric Results.push_back(DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM)); 20530b57cec5SDimitry Andric return; 20540b57cec5SDimitry Andric } 20550b57cec5SDimitry Andric 20560b57cec5SDimitry Andric if (isTypeLegal(MVT::i64)) { 2057349cc55cSDimitry Andric // The algorithm here is based on ideas from "Software Integer Division", 2058349cc55cSDimitry Andric // Tom Rodeheffer, August 2008. 2059349cc55cSDimitry Andric 2060480093f4SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 2061480093f4SDimitry Andric const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>(); 2062480093f4SDimitry Andric 20630b57cec5SDimitry Andric // Compute denominator reciprocal. 206406c3fb27SDimitry Andric unsigned FMAD = 206506c3fb27SDimitry Andric !Subtarget->hasMadMacF32Insts() ? (unsigned)ISD::FMA 206606c3fb27SDimitry Andric : MFI->getMode().FP32Denormals == DenormalMode::getPreserveSign() 206706c3fb27SDimitry Andric ? (unsigned)ISD::FMAD 206806c3fb27SDimitry Andric : (unsigned)AMDGPUISD::FMAD_FTZ; 20690b57cec5SDimitry Andric 20700b57cec5SDimitry Andric SDValue Cvt_Lo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Lo); 20710b57cec5SDimitry Andric SDValue Cvt_Hi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, RHS_Hi); 20720b57cec5SDimitry Andric SDValue Mad1 = DAG.getNode(FMAD, DL, MVT::f32, Cvt_Hi, 20730b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x4f800000).bitsToFloat(), DL, MVT::f32), 20740b57cec5SDimitry Andric Cvt_Lo); 20750b57cec5SDimitry Andric SDValue Rcp = DAG.getNode(AMDGPUISD::RCP, DL, MVT::f32, Mad1); 20760b57cec5SDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Rcp, 20770b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x5f7ffffc).bitsToFloat(), DL, MVT::f32)); 20780b57cec5SDimitry Andric SDValue Mul2 = DAG.getNode(ISD::FMUL, DL, MVT::f32, Mul1, 20790b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0x2f800000).bitsToFloat(), DL, MVT::f32)); 20800b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, DL, MVT::f32, Mul2); 20810b57cec5SDimitry Andric SDValue Mad2 = DAG.getNode(FMAD, DL, MVT::f32, Trunc, 20820b57cec5SDimitry Andric DAG.getConstantFP(APInt(32, 0xcf800000).bitsToFloat(), DL, MVT::f32), 20830b57cec5SDimitry Andric Mul1); 20840b57cec5SDimitry Andric SDValue Rcp_Lo = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Mad2); 20850b57cec5SDimitry Andric SDValue Rcp_Hi = DAG.getNode(ISD::FP_TO_UINT, DL, HalfVT, Trunc); 20860b57cec5SDimitry Andric SDValue Rcp64 = DAG.getBitcast(VT, 20870b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Rcp_Lo, Rcp_Hi})); 20880b57cec5SDimitry Andric 20890b57cec5SDimitry Andric SDValue Zero64 = DAG.getConstant(0, DL, VT); 20900b57cec5SDimitry Andric SDValue One64 = DAG.getConstant(1, DL, VT); 20910b57cec5SDimitry Andric SDValue Zero1 = DAG.getConstant(0, DL, MVT::i1); 20920b57cec5SDimitry Andric SDVTList HalfCarryVT = DAG.getVTList(HalfVT, MVT::i1); 20930b57cec5SDimitry Andric 2094349cc55cSDimitry Andric // First round of UNR (Unsigned integer Newton-Raphson). 20950b57cec5SDimitry Andric SDValue Neg_RHS = DAG.getNode(ISD::SUB, DL, VT, Zero64, RHS); 20960b57cec5SDimitry Andric SDValue Mullo1 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Rcp64); 20970b57cec5SDimitry Andric SDValue Mulhi1 = DAG.getNode(ISD::MULHU, DL, VT, Rcp64, Mullo1); 209806c3fb27SDimitry Andric SDValue Mulhi1_Lo, Mulhi1_Hi; 209906c3fb27SDimitry Andric std::tie(Mulhi1_Lo, Mulhi1_Hi) = 210006c3fb27SDimitry Andric DAG.SplitScalar(Mulhi1, DL, HalfVT, HalfVT); 210106c3fb27SDimitry Andric SDValue Add1_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Lo, 21020b57cec5SDimitry Andric Mulhi1_Lo, Zero1); 210306c3fb27SDimitry Andric SDValue Add1_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Rcp_Hi, 21040b57cec5SDimitry Andric Mulhi1_Hi, Add1_Lo.getValue(1)); 21050b57cec5SDimitry Andric SDValue Add1 = DAG.getBitcast(VT, 21060b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add1_Lo, Add1_Hi})); 21070b57cec5SDimitry Andric 2108349cc55cSDimitry Andric // Second round of UNR. 21090b57cec5SDimitry Andric SDValue Mullo2 = DAG.getNode(ISD::MUL, DL, VT, Neg_RHS, Add1); 21100b57cec5SDimitry Andric SDValue Mulhi2 = DAG.getNode(ISD::MULHU, DL, VT, Add1, Mullo2); 211106c3fb27SDimitry Andric SDValue Mulhi2_Lo, Mulhi2_Hi; 211206c3fb27SDimitry Andric std::tie(Mulhi2_Lo, Mulhi2_Hi) = 211306c3fb27SDimitry Andric DAG.SplitScalar(Mulhi2, DL, HalfVT, HalfVT); 211406c3fb27SDimitry Andric SDValue Add2_Lo = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Lo, 21150b57cec5SDimitry Andric Mulhi2_Lo, Zero1); 211606c3fb27SDimitry Andric SDValue Add2_Hi = DAG.getNode(ISD::UADDO_CARRY, DL, HalfCarryVT, Add1_Hi, 2117349cc55cSDimitry Andric Mulhi2_Hi, Add2_Lo.getValue(1)); 21180b57cec5SDimitry Andric SDValue Add2 = DAG.getBitcast(VT, 21190b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Add2_Lo, Add2_Hi})); 2120349cc55cSDimitry Andric 21210b57cec5SDimitry Andric SDValue Mulhi3 = DAG.getNode(ISD::MULHU, DL, VT, LHS, Add2); 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric SDValue Mul3 = DAG.getNode(ISD::MUL, DL, VT, RHS, Mulhi3); 21240b57cec5SDimitry Andric 212506c3fb27SDimitry Andric SDValue Mul3_Lo, Mul3_Hi; 212606c3fb27SDimitry Andric std::tie(Mul3_Lo, Mul3_Hi) = DAG.SplitScalar(Mul3, DL, HalfVT, HalfVT); 212706c3fb27SDimitry Andric SDValue Sub1_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Lo, 21280b57cec5SDimitry Andric Mul3_Lo, Zero1); 212906c3fb27SDimitry Andric SDValue Sub1_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, LHS_Hi, 21300b57cec5SDimitry Andric Mul3_Hi, Sub1_Lo.getValue(1)); 21310b57cec5SDimitry Andric SDValue Sub1_Mi = DAG.getNode(ISD::SUB, DL, HalfVT, LHS_Hi, Mul3_Hi); 21320b57cec5SDimitry Andric SDValue Sub1 = DAG.getBitcast(VT, 21330b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub1_Lo, Sub1_Hi})); 21340b57cec5SDimitry Andric 21350b57cec5SDimitry Andric SDValue MinusOne = DAG.getConstant(0xffffffffu, DL, HalfVT); 21360b57cec5SDimitry Andric SDValue C1 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, MinusOne, Zero, 21370b57cec5SDimitry Andric ISD::SETUGE); 21380b57cec5SDimitry Andric SDValue C2 = DAG.getSelectCC(DL, Sub1_Lo, RHS_Lo, MinusOne, Zero, 21390b57cec5SDimitry Andric ISD::SETUGE); 21400b57cec5SDimitry Andric SDValue C3 = DAG.getSelectCC(DL, Sub1_Hi, RHS_Hi, C2, C1, ISD::SETEQ); 21410b57cec5SDimitry Andric 21420b57cec5SDimitry Andric // TODO: Here and below portions of the code can be enclosed into if/endif. 21430b57cec5SDimitry Andric // Currently control flow is unconditional and we have 4 selects after 21440b57cec5SDimitry Andric // potential endif to substitute PHIs. 21450b57cec5SDimitry Andric 21460b57cec5SDimitry Andric // if C3 != 0 ... 214706c3fb27SDimitry Andric SDValue Sub2_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Lo, 21480b57cec5SDimitry Andric RHS_Lo, Zero1); 214906c3fb27SDimitry Andric SDValue Sub2_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub1_Mi, 21500b57cec5SDimitry Andric RHS_Hi, Sub1_Lo.getValue(1)); 215106c3fb27SDimitry Andric SDValue Sub2_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi, 21520b57cec5SDimitry Andric Zero, Sub2_Lo.getValue(1)); 21530b57cec5SDimitry Andric SDValue Sub2 = DAG.getBitcast(VT, 21540b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub2_Lo, Sub2_Hi})); 21550b57cec5SDimitry Andric 21560b57cec5SDimitry Andric SDValue Add3 = DAG.getNode(ISD::ADD, DL, VT, Mulhi3, One64); 21570b57cec5SDimitry Andric 21580b57cec5SDimitry Andric SDValue C4 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, MinusOne, Zero, 21590b57cec5SDimitry Andric ISD::SETUGE); 21600b57cec5SDimitry Andric SDValue C5 = DAG.getSelectCC(DL, Sub2_Lo, RHS_Lo, MinusOne, Zero, 21610b57cec5SDimitry Andric ISD::SETUGE); 21620b57cec5SDimitry Andric SDValue C6 = DAG.getSelectCC(DL, Sub2_Hi, RHS_Hi, C5, C4, ISD::SETEQ); 21630b57cec5SDimitry Andric 21640b57cec5SDimitry Andric // if (C6 != 0) 21650b57cec5SDimitry Andric SDValue Add4 = DAG.getNode(ISD::ADD, DL, VT, Add3, One64); 21660b57cec5SDimitry Andric 216706c3fb27SDimitry Andric SDValue Sub3_Lo = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Lo, 21680b57cec5SDimitry Andric RHS_Lo, Zero1); 216906c3fb27SDimitry Andric SDValue Sub3_Mi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub2_Mi, 21700b57cec5SDimitry Andric RHS_Hi, Sub2_Lo.getValue(1)); 217106c3fb27SDimitry Andric SDValue Sub3_Hi = DAG.getNode(ISD::USUBO_CARRY, DL, HalfCarryVT, Sub3_Mi, 21720b57cec5SDimitry Andric Zero, Sub3_Lo.getValue(1)); 21730b57cec5SDimitry Andric SDValue Sub3 = DAG.getBitcast(VT, 21740b57cec5SDimitry Andric DAG.getBuildVector(MVT::v2i32, DL, {Sub3_Lo, Sub3_Hi})); 21750b57cec5SDimitry Andric 21760b57cec5SDimitry Andric // endif C6 21770b57cec5SDimitry Andric // endif C3 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric SDValue Sel1 = DAG.getSelectCC(DL, C6, Zero, Add4, Add3, ISD::SETNE); 21800b57cec5SDimitry Andric SDValue Div = DAG.getSelectCC(DL, C3, Zero, Sel1, Mulhi3, ISD::SETNE); 21810b57cec5SDimitry Andric 21820b57cec5SDimitry Andric SDValue Sel2 = DAG.getSelectCC(DL, C6, Zero, Sub3, Sub2, ISD::SETNE); 21830b57cec5SDimitry Andric SDValue Rem = DAG.getSelectCC(DL, C3, Zero, Sel2, Sub1, ISD::SETNE); 21840b57cec5SDimitry Andric 21850b57cec5SDimitry Andric Results.push_back(Div); 21860b57cec5SDimitry Andric Results.push_back(Rem); 21870b57cec5SDimitry Andric 21880b57cec5SDimitry Andric return; 21890b57cec5SDimitry Andric } 21900b57cec5SDimitry Andric 21910b57cec5SDimitry Andric // r600 expandion. 21920b57cec5SDimitry Andric // Get Speculative values 21930b57cec5SDimitry Andric SDValue DIV_Part = DAG.getNode(ISD::UDIV, DL, HalfVT, LHS_Hi, RHS_Lo); 21940b57cec5SDimitry Andric SDValue REM_Part = DAG.getNode(ISD::UREM, DL, HalfVT, LHS_Hi, RHS_Lo); 21950b57cec5SDimitry Andric 21960b57cec5SDimitry Andric SDValue REM_Lo = DAG.getSelectCC(DL, RHS_Hi, Zero, REM_Part, LHS_Hi, ISD::SETEQ); 21970b57cec5SDimitry Andric SDValue REM = DAG.getBuildVector(MVT::v2i32, DL, {REM_Lo, Zero}); 21980b57cec5SDimitry Andric REM = DAG.getNode(ISD::BITCAST, DL, MVT::i64, REM); 21990b57cec5SDimitry Andric 22000b57cec5SDimitry Andric SDValue DIV_Hi = DAG.getSelectCC(DL, RHS_Hi, Zero, DIV_Part, Zero, ISD::SETEQ); 22010b57cec5SDimitry Andric SDValue DIV_Lo = Zero; 22020b57cec5SDimitry Andric 22030b57cec5SDimitry Andric const unsigned halfBitWidth = HalfVT.getSizeInBits(); 22040b57cec5SDimitry Andric 22050b57cec5SDimitry Andric for (unsigned i = 0; i < halfBitWidth; ++i) { 22060b57cec5SDimitry Andric const unsigned bitPos = halfBitWidth - i - 1; 22070b57cec5SDimitry Andric SDValue POS = DAG.getConstant(bitPos, DL, HalfVT); 22080b57cec5SDimitry Andric // Get value of high bit 22090b57cec5SDimitry Andric SDValue HBit = DAG.getNode(ISD::SRL, DL, HalfVT, LHS_Lo, POS); 22100b57cec5SDimitry Andric HBit = DAG.getNode(ISD::AND, DL, HalfVT, HBit, One); 22110b57cec5SDimitry Andric HBit = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, HBit); 22120b57cec5SDimitry Andric 22130b57cec5SDimitry Andric // Shift 22140b57cec5SDimitry Andric REM = DAG.getNode(ISD::SHL, DL, VT, REM, DAG.getConstant(1, DL, VT)); 22150b57cec5SDimitry Andric // Add LHS high bit 22160b57cec5SDimitry Andric REM = DAG.getNode(ISD::OR, DL, VT, REM, HBit); 22170b57cec5SDimitry Andric 22180b57cec5SDimitry Andric SDValue BIT = DAG.getConstant(1ULL << bitPos, DL, HalfVT); 22190b57cec5SDimitry Andric SDValue realBIT = DAG.getSelectCC(DL, REM, RHS, BIT, Zero, ISD::SETUGE); 22200b57cec5SDimitry Andric 22210b57cec5SDimitry Andric DIV_Lo = DAG.getNode(ISD::OR, DL, HalfVT, DIV_Lo, realBIT); 22220b57cec5SDimitry Andric 22230b57cec5SDimitry Andric // Update REM 22240b57cec5SDimitry Andric SDValue REM_sub = DAG.getNode(ISD::SUB, DL, VT, REM, RHS); 22250b57cec5SDimitry Andric REM = DAG.getSelectCC(DL, REM, RHS, REM_sub, REM, ISD::SETUGE); 22260b57cec5SDimitry Andric } 22270b57cec5SDimitry Andric 22280b57cec5SDimitry Andric SDValue DIV = DAG.getBuildVector(MVT::v2i32, DL, {DIV_Lo, DIV_Hi}); 22290b57cec5SDimitry Andric DIV = DAG.getNode(ISD::BITCAST, DL, MVT::i64, DIV); 22300b57cec5SDimitry Andric Results.push_back(DIV); 22310b57cec5SDimitry Andric Results.push_back(REM); 22320b57cec5SDimitry Andric } 22330b57cec5SDimitry Andric 22340b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUDIVREM(SDValue Op, 22350b57cec5SDimitry Andric SelectionDAG &DAG) const { 22360b57cec5SDimitry Andric SDLoc DL(Op); 22370b57cec5SDimitry Andric EVT VT = Op.getValueType(); 22380b57cec5SDimitry Andric 22390b57cec5SDimitry Andric if (VT == MVT::i64) { 22400b57cec5SDimitry Andric SmallVector<SDValue, 2> Results; 22410b57cec5SDimitry Andric LowerUDIVREM64(Op, DAG, Results); 22420b57cec5SDimitry Andric return DAG.getMergeValues(Results, DL); 22430b57cec5SDimitry Andric } 22440b57cec5SDimitry Andric 22450b57cec5SDimitry Andric if (VT == MVT::i32) { 22460b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, false)) 22470b57cec5SDimitry Andric return Res; 22480b57cec5SDimitry Andric } 22490b57cec5SDimitry Andric 22505ffd83dbSDimitry Andric SDValue X = Op.getOperand(0); 22515ffd83dbSDimitry Andric SDValue Y = Op.getOperand(1); 22520b57cec5SDimitry Andric 22535ffd83dbSDimitry Andric // See AMDGPUCodeGenPrepare::expandDivRem32 for a description of the 22545ffd83dbSDimitry Andric // algorithm used here. 22550b57cec5SDimitry Andric 22565ffd83dbSDimitry Andric // Initial estimate of inv(y). 22575ffd83dbSDimitry Andric SDValue Z = DAG.getNode(AMDGPUISD::URECIP, DL, VT, Y); 22580b57cec5SDimitry Andric 22595ffd83dbSDimitry Andric // One round of UNR. 22605ffd83dbSDimitry Andric SDValue NegY = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Y); 22615ffd83dbSDimitry Andric SDValue NegYZ = DAG.getNode(ISD::MUL, DL, VT, NegY, Z); 22625ffd83dbSDimitry Andric Z = DAG.getNode(ISD::ADD, DL, VT, Z, 22635ffd83dbSDimitry Andric DAG.getNode(ISD::MULHU, DL, VT, Z, NegYZ)); 22640b57cec5SDimitry Andric 22655ffd83dbSDimitry Andric // Quotient/remainder estimate. 22665ffd83dbSDimitry Andric SDValue Q = DAG.getNode(ISD::MULHU, DL, VT, X, Z); 22675ffd83dbSDimitry Andric SDValue R = 22685ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, X, DAG.getNode(ISD::MUL, DL, VT, Q, Y)); 22690b57cec5SDimitry Andric 22705ffd83dbSDimitry Andric // First quotient/remainder refinement. 22715ffd83dbSDimitry Andric EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 22725ffd83dbSDimitry Andric SDValue One = DAG.getConstant(1, DL, VT); 22735ffd83dbSDimitry Andric SDValue Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); 22745ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, 22755ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); 22765ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond, 22775ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R); 22780b57cec5SDimitry Andric 22795ffd83dbSDimitry Andric // Second quotient/remainder refinement. 22805ffd83dbSDimitry Andric Cond = DAG.getSetCC(DL, CCVT, R, Y, ISD::SETUGE); 22815ffd83dbSDimitry Andric Q = DAG.getNode(ISD::SELECT, DL, VT, Cond, 22825ffd83dbSDimitry Andric DAG.getNode(ISD::ADD, DL, VT, Q, One), Q); 22835ffd83dbSDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, Cond, 22845ffd83dbSDimitry Andric DAG.getNode(ISD::SUB, DL, VT, R, Y), R); 22850b57cec5SDimitry Andric 22865ffd83dbSDimitry Andric return DAG.getMergeValues({Q, R}, DL); 22870b57cec5SDimitry Andric } 22880b57cec5SDimitry Andric 22890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSDIVREM(SDValue Op, 22900b57cec5SDimitry Andric SelectionDAG &DAG) const { 22910b57cec5SDimitry Andric SDLoc DL(Op); 22920b57cec5SDimitry Andric EVT VT = Op.getValueType(); 22930b57cec5SDimitry Andric 22940b57cec5SDimitry Andric SDValue LHS = Op.getOperand(0); 22950b57cec5SDimitry Andric SDValue RHS = Op.getOperand(1); 22960b57cec5SDimitry Andric 22970b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, VT); 22980b57cec5SDimitry Andric SDValue NegOne = DAG.getConstant(-1, DL, VT); 22990b57cec5SDimitry Andric 23000b57cec5SDimitry Andric if (VT == MVT::i32) { 23010b57cec5SDimitry Andric if (SDValue Res = LowerDIVREM24(Op, DAG, true)) 23020b57cec5SDimitry Andric return Res; 23030b57cec5SDimitry Andric } 23040b57cec5SDimitry Andric 23050b57cec5SDimitry Andric if (VT == MVT::i64 && 23060b57cec5SDimitry Andric DAG.ComputeNumSignBits(LHS) > 32 && 23070b57cec5SDimitry Andric DAG.ComputeNumSignBits(RHS) > 32) { 23080b57cec5SDimitry Andric EVT HalfVT = VT.getHalfSizedIntegerVT(*DAG.getContext()); 23090b57cec5SDimitry Andric 23100b57cec5SDimitry Andric //HiLo split 23110b57cec5SDimitry Andric SDValue LHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, LHS, Zero); 23120b57cec5SDimitry Andric SDValue RHS_Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, HalfVT, RHS, Zero); 23130b57cec5SDimitry Andric SDValue DIVREM = DAG.getNode(ISD::SDIVREM, DL, DAG.getVTList(HalfVT, HalfVT), 23140b57cec5SDimitry Andric LHS_Lo, RHS_Lo); 23150b57cec5SDimitry Andric SDValue Res[2] = { 23160b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(0)), 23170b57cec5SDimitry Andric DAG.getNode(ISD::SIGN_EXTEND, DL, VT, DIVREM.getValue(1)) 23180b57cec5SDimitry Andric }; 23190b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 23200b57cec5SDimitry Andric } 23210b57cec5SDimitry Andric 23220b57cec5SDimitry Andric SDValue LHSign = DAG.getSelectCC(DL, LHS, Zero, NegOne, Zero, ISD::SETLT); 23230b57cec5SDimitry Andric SDValue RHSign = DAG.getSelectCC(DL, RHS, Zero, NegOne, Zero, ISD::SETLT); 23240b57cec5SDimitry Andric SDValue DSign = DAG.getNode(ISD::XOR, DL, VT, LHSign, RHSign); 23250b57cec5SDimitry Andric SDValue RSign = LHSign; // Remainder sign is the same as LHS 23260b57cec5SDimitry Andric 23270b57cec5SDimitry Andric LHS = DAG.getNode(ISD::ADD, DL, VT, LHS, LHSign); 23280b57cec5SDimitry Andric RHS = DAG.getNode(ISD::ADD, DL, VT, RHS, RHSign); 23290b57cec5SDimitry Andric 23300b57cec5SDimitry Andric LHS = DAG.getNode(ISD::XOR, DL, VT, LHS, LHSign); 23310b57cec5SDimitry Andric RHS = DAG.getNode(ISD::XOR, DL, VT, RHS, RHSign); 23320b57cec5SDimitry Andric 23330b57cec5SDimitry Andric SDValue Div = DAG.getNode(ISD::UDIVREM, DL, DAG.getVTList(VT, VT), LHS, RHS); 23340b57cec5SDimitry Andric SDValue Rem = Div.getValue(1); 23350b57cec5SDimitry Andric 23360b57cec5SDimitry Andric Div = DAG.getNode(ISD::XOR, DL, VT, Div, DSign); 23370b57cec5SDimitry Andric Rem = DAG.getNode(ISD::XOR, DL, VT, Rem, RSign); 23380b57cec5SDimitry Andric 23390b57cec5SDimitry Andric Div = DAG.getNode(ISD::SUB, DL, VT, Div, DSign); 23400b57cec5SDimitry Andric Rem = DAG.getNode(ISD::SUB, DL, VT, Rem, RSign); 23410b57cec5SDimitry Andric 23420b57cec5SDimitry Andric SDValue Res[2] = { 23430b57cec5SDimitry Andric Div, 23440b57cec5SDimitry Andric Rem 23450b57cec5SDimitry Andric }; 23460b57cec5SDimitry Andric return DAG.getMergeValues(Res, DL); 23470b57cec5SDimitry Andric } 23480b57cec5SDimitry Andric 2349e8d8bef9SDimitry Andric // (frem x, y) -> (fma (fneg (ftrunc (fdiv x, y))), y, x) 23500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFREM(SDValue Op, SelectionDAG &DAG) const { 23510b57cec5SDimitry Andric SDLoc SL(Op); 23520b57cec5SDimitry Andric EVT VT = Op.getValueType(); 2353e8d8bef9SDimitry Andric auto Flags = Op->getFlags(); 23540b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 23550b57cec5SDimitry Andric SDValue Y = Op.getOperand(1); 23560b57cec5SDimitry Andric 2357e8d8bef9SDimitry Andric SDValue Div = DAG.getNode(ISD::FDIV, SL, VT, X, Y, Flags); 2358e8d8bef9SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, VT, Div, Flags); 2359e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Trunc, Flags); 2360e8d8bef9SDimitry Andric // TODO: For f32 use FMAD instead if !hasFastFMA32? 2361e8d8bef9SDimitry Andric return DAG.getNode(ISD::FMA, SL, VT, Neg, Y, X, Flags); 23620b57cec5SDimitry Andric } 23630b57cec5SDimitry Andric 23640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFCEIL(SDValue Op, SelectionDAG &DAG) const { 23650b57cec5SDimitry Andric SDLoc SL(Op); 23660b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 23670b57cec5SDimitry Andric 23680b57cec5SDimitry Andric // result = trunc(src) 23690b57cec5SDimitry Andric // if (src > 0.0 && src != result) 23700b57cec5SDimitry Andric // result += 1.0 23710b57cec5SDimitry Andric 23720b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 23730b57cec5SDimitry Andric 23740b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 23750b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, MVT::f64); 23760b57cec5SDimitry Andric 23770b57cec5SDimitry Andric EVT SetCCVT = 23780b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 23790b57cec5SDimitry Andric 23800b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOGT); 23810b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 23820b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 23830b57cec5SDimitry Andric 23840b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, One, Zero); 23850b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 23860b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 23870b57cec5SDimitry Andric } 23880b57cec5SDimitry Andric 23890b57cec5SDimitry Andric static SDValue extractF64Exponent(SDValue Hi, const SDLoc &SL, 23900b57cec5SDimitry Andric SelectionDAG &DAG) { 23910b57cec5SDimitry Andric const unsigned FractBits = 52; 23920b57cec5SDimitry Andric const unsigned ExpBits = 11; 23930b57cec5SDimitry Andric 23940b57cec5SDimitry Andric SDValue ExpPart = DAG.getNode(AMDGPUISD::BFE_U32, SL, MVT::i32, 23950b57cec5SDimitry Andric Hi, 23960b57cec5SDimitry Andric DAG.getConstant(FractBits - 32, SL, MVT::i32), 23970b57cec5SDimitry Andric DAG.getConstant(ExpBits, SL, MVT::i32)); 23980b57cec5SDimitry Andric SDValue Exp = DAG.getNode(ISD::SUB, SL, MVT::i32, ExpPart, 23990b57cec5SDimitry Andric DAG.getConstant(1023, SL, MVT::i32)); 24000b57cec5SDimitry Andric 24010b57cec5SDimitry Andric return Exp; 24020b57cec5SDimitry Andric } 24030b57cec5SDimitry Andric 24040b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFTRUNC(SDValue Op, SelectionDAG &DAG) const { 24050b57cec5SDimitry Andric SDLoc SL(Op); 24060b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 24090b57cec5SDimitry Andric 24100b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 24110b57cec5SDimitry Andric 24120b57cec5SDimitry Andric // Extract the upper half, since this is where we will find the sign and 24130b57cec5SDimitry Andric // exponent. 2414349cc55cSDimitry Andric SDValue Hi = getHiHalf64(Src, DAG); 24150b57cec5SDimitry Andric 24160b57cec5SDimitry Andric SDValue Exp = extractF64Exponent(Hi, SL, DAG); 24170b57cec5SDimitry Andric 24180b57cec5SDimitry Andric const unsigned FractBits = 52; 24190b57cec5SDimitry Andric 24200b57cec5SDimitry Andric // Extract the sign bit. 24210b57cec5SDimitry Andric const SDValue SignBitMask = DAG.getConstant(UINT32_C(1) << 31, SL, MVT::i32); 24220b57cec5SDimitry Andric SDValue SignBit = DAG.getNode(ISD::AND, SL, MVT::i32, Hi, SignBitMask); 24230b57cec5SDimitry Andric 24240b57cec5SDimitry Andric // Extend back to 64-bits. 24250b57cec5SDimitry Andric SDValue SignBit64 = DAG.getBuildVector(MVT::v2i32, SL, {Zero, SignBit}); 24260b57cec5SDimitry Andric SignBit64 = DAG.getNode(ISD::BITCAST, SL, MVT::i64, SignBit64); 24270b57cec5SDimitry Andric 24280b57cec5SDimitry Andric SDValue BcInt = DAG.getNode(ISD::BITCAST, SL, MVT::i64, Src); 24290b57cec5SDimitry Andric const SDValue FractMask 24300b57cec5SDimitry Andric = DAG.getConstant((UINT64_C(1) << FractBits) - 1, SL, MVT::i64); 24310b57cec5SDimitry Andric 24320b57cec5SDimitry Andric SDValue Shr = DAG.getNode(ISD::SRA, SL, MVT::i64, FractMask, Exp); 24330b57cec5SDimitry Andric SDValue Not = DAG.getNOT(SL, Shr, MVT::i64); 24340b57cec5SDimitry Andric SDValue Tmp0 = DAG.getNode(ISD::AND, SL, MVT::i64, BcInt, Not); 24350b57cec5SDimitry Andric 24360b57cec5SDimitry Andric EVT SetCCVT = 24370b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i32); 24380b57cec5SDimitry Andric 24390b57cec5SDimitry Andric const SDValue FiftyOne = DAG.getConstant(FractBits - 1, SL, MVT::i32); 24400b57cec5SDimitry Andric 24410b57cec5SDimitry Andric SDValue ExpLt0 = DAG.getSetCC(SL, SetCCVT, Exp, Zero, ISD::SETLT); 24420b57cec5SDimitry Andric SDValue ExpGt51 = DAG.getSetCC(SL, SetCCVT, Exp, FiftyOne, ISD::SETGT); 24430b57cec5SDimitry Andric 24440b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpLt0, SignBit64, Tmp0); 24450b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::SELECT, SL, MVT::i64, ExpGt51, BcInt, Tmp1); 24460b57cec5SDimitry Andric 24470b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f64, Tmp2); 24480b57cec5SDimitry Andric } 24490b57cec5SDimitry Andric 24505f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUNDEVEN(SDValue Op, 24515f757f3fSDimitry Andric SelectionDAG &DAG) const { 24520b57cec5SDimitry Andric SDLoc SL(Op); 24530b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 24540b57cec5SDimitry Andric 24550b57cec5SDimitry Andric assert(Op.getValueType() == MVT::f64); 24560b57cec5SDimitry Andric 24570b57cec5SDimitry Andric APFloat C1Val(APFloat::IEEEdouble(), "0x1.0p+52"); 24580b57cec5SDimitry Andric SDValue C1 = DAG.getConstantFP(C1Val, SL, MVT::f64); 24590b57cec5SDimitry Andric SDValue CopySign = DAG.getNode(ISD::FCOPYSIGN, SL, MVT::f64, C1, Src); 24600b57cec5SDimitry Andric 24610b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 24620b57cec5SDimitry Andric 24630b57cec5SDimitry Andric SDValue Tmp1 = DAG.getNode(ISD::FADD, SL, MVT::f64, Src, CopySign); 24640b57cec5SDimitry Andric SDValue Tmp2 = DAG.getNode(ISD::FSUB, SL, MVT::f64, Tmp1, CopySign); 24650b57cec5SDimitry Andric 24660b57cec5SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, MVT::f64, Src); 24670b57cec5SDimitry Andric 24680b57cec5SDimitry Andric APFloat C2Val(APFloat::IEEEdouble(), "0x1.fffffffffffffp+51"); 24690b57cec5SDimitry Andric SDValue C2 = DAG.getConstantFP(C2Val, SL, MVT::f64); 24700b57cec5SDimitry Andric 24710b57cec5SDimitry Andric EVT SetCCVT = 24720b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 24730b57cec5SDimitry Andric SDValue Cond = DAG.getSetCC(SL, SetCCVT, Fabs, C2, ISD::SETOGT); 24740b57cec5SDimitry Andric 24750b57cec5SDimitry Andric return DAG.getSelect(SL, MVT::f64, Cond, Src, Tmp2); 24760b57cec5SDimitry Andric } 24770b57cec5SDimitry Andric 24785f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFNEARBYINT(SDValue Op, 24795f757f3fSDimitry Andric SelectionDAG &DAG) const { 24800b57cec5SDimitry Andric // FNEARBYINT and FRINT are the same, except in their handling of FP 24810b57cec5SDimitry Andric // exceptions. Those aren't really meaningful for us, and OpenCL only has 24820b57cec5SDimitry Andric // rint, so just treat them as equivalent. 24835f757f3fSDimitry Andric return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), Op.getValueType(), 24845f757f3fSDimitry Andric Op.getOperand(0)); 24850b57cec5SDimitry Andric } 24860b57cec5SDimitry Andric 24875f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::LowerFRINT(SDValue Op, SelectionDAG &DAG) const { 2488bdd1243dSDimitry Andric auto VT = Op.getValueType(); 2489bdd1243dSDimitry Andric auto Arg = Op.getOperand(0u); 24905f757f3fSDimitry Andric return DAG.getNode(ISD::FROUNDEVEN, SDLoc(Op), VT, Arg); 2491bdd1243dSDimitry Andric } 2492bdd1243dSDimitry Andric 24930b57cec5SDimitry Andric // XXX - May require not supporting f32 denormals? 24940b57cec5SDimitry Andric 24950b57cec5SDimitry Andric // Don't handle v2f16. The extra instructions to scalarize and repack around the 24960b57cec5SDimitry Andric // compare and vselect end up producing worse code than scalarizing the whole 24970b57cec5SDimitry Andric // operation. 24985ffd83dbSDimitry Andric SDValue AMDGPUTargetLowering::LowerFROUND(SDValue Op, SelectionDAG &DAG) const { 24990b57cec5SDimitry Andric SDLoc SL(Op); 25000b57cec5SDimitry Andric SDValue X = Op.getOperand(0); 25010b57cec5SDimitry Andric EVT VT = Op.getValueType(); 25020b57cec5SDimitry Andric 25030b57cec5SDimitry Andric SDValue T = DAG.getNode(ISD::FTRUNC, SL, VT, X); 25040b57cec5SDimitry Andric 25050b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 25060b57cec5SDimitry Andric 25070b57cec5SDimitry Andric SDValue Diff = DAG.getNode(ISD::FSUB, SL, VT, X, T); 25080b57cec5SDimitry Andric 25090b57cec5SDimitry Andric SDValue AbsDiff = DAG.getNode(ISD::FABS, SL, VT, Diff); 25100b57cec5SDimitry Andric 25110b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 25120b57cec5SDimitry Andric const SDValue One = DAG.getConstantFP(1.0, SL, VT); 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric EVT SetCCVT = 25150b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 25160b57cec5SDimitry Andric 25175f757f3fSDimitry Andric const SDValue Half = DAG.getConstantFP(0.5, SL, VT); 25180b57cec5SDimitry Andric SDValue Cmp = DAG.getSetCC(SL, SetCCVT, AbsDiff, Half, ISD::SETOGE); 25195f757f3fSDimitry Andric SDValue OneOrZeroFP = DAG.getNode(ISD::SELECT, SL, VT, Cmp, One, Zero); 25200b57cec5SDimitry Andric 25215f757f3fSDimitry Andric SDValue SignedOffset = DAG.getNode(ISD::FCOPYSIGN, SL, VT, OneOrZeroFP, X); 25225f757f3fSDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, T, SignedOffset); 25230b57cec5SDimitry Andric } 25240b57cec5SDimitry Andric 25250b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const { 25260b57cec5SDimitry Andric SDLoc SL(Op); 25270b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric // result = trunc(src); 25300b57cec5SDimitry Andric // if (src < 0.0 && src != result) 25310b57cec5SDimitry Andric // result += -1.0. 25320b57cec5SDimitry Andric 25330b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, MVT::f64, Src); 25340b57cec5SDimitry Andric 25350b57cec5SDimitry Andric const SDValue Zero = DAG.getConstantFP(0.0, SL, MVT::f64); 25360b57cec5SDimitry Andric const SDValue NegOne = DAG.getConstantFP(-1.0, SL, MVT::f64); 25370b57cec5SDimitry Andric 25380b57cec5SDimitry Andric EVT SetCCVT = 25390b57cec5SDimitry Andric getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::f64); 25400b57cec5SDimitry Andric 25410b57cec5SDimitry Andric SDValue Lt0 = DAG.getSetCC(SL, SetCCVT, Src, Zero, ISD::SETOLT); 25420b57cec5SDimitry Andric SDValue NeTrunc = DAG.getSetCC(SL, SetCCVT, Src, Trunc, ISD::SETONE); 25430b57cec5SDimitry Andric SDValue And = DAG.getNode(ISD::AND, SL, SetCCVT, Lt0, NeTrunc); 25440b57cec5SDimitry Andric 25450b57cec5SDimitry Andric SDValue Add = DAG.getNode(ISD::SELECT, SL, MVT::f64, And, NegOne, Zero); 25460b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 25470b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add); 25480b57cec5SDimitry Andric } 25490b57cec5SDimitry Andric 255006c3fb27SDimitry Andric /// Return true if it's known that \p Src can never be an f32 denormal value. 255106c3fb27SDimitry Andric static bool valueIsKnownNeverF32Denorm(SDValue Src) { 255206c3fb27SDimitry Andric switch (Src.getOpcode()) { 255306c3fb27SDimitry Andric case ISD::FP_EXTEND: 255406c3fb27SDimitry Andric return Src.getOperand(0).getValueType() == MVT::f16; 255506c3fb27SDimitry Andric case ISD::FP16_TO_FP: 25565f757f3fSDimitry Andric case ISD::FFREXP: 255706c3fb27SDimitry Andric return true; 25585f757f3fSDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 2559647cbc5dSDimitry Andric unsigned IntrinsicID = Src.getConstantOperandVal(0); 25605f757f3fSDimitry Andric switch (IntrinsicID) { 25615f757f3fSDimitry Andric case Intrinsic::amdgcn_frexp_mant: 25625f757f3fSDimitry Andric return true; 25635f757f3fSDimitry Andric default: 25645f757f3fSDimitry Andric return false; 25655f757f3fSDimitry Andric } 25665f757f3fSDimitry Andric } 256706c3fb27SDimitry Andric default: 256806c3fb27SDimitry Andric return false; 25690b57cec5SDimitry Andric } 25700b57cec5SDimitry Andric 257106c3fb27SDimitry Andric llvm_unreachable("covered opcode switch"); 257206c3fb27SDimitry Andric } 257306c3fb27SDimitry Andric 25745f757f3fSDimitry Andric bool AMDGPUTargetLowering::allowApproxFunc(const SelectionDAG &DAG, 25755f757f3fSDimitry Andric SDNodeFlags Flags) { 257606c3fb27SDimitry Andric if (Flags.hasApproximateFuncs()) 257706c3fb27SDimitry Andric return true; 257806c3fb27SDimitry Andric auto &Options = DAG.getTarget().Options; 257906c3fb27SDimitry Andric return Options.UnsafeFPMath || Options.ApproxFuncFPMath; 258006c3fb27SDimitry Andric } 258106c3fb27SDimitry Andric 25825f757f3fSDimitry Andric bool AMDGPUTargetLowering::needsDenormHandlingF32(const SelectionDAG &DAG, 25835f757f3fSDimitry Andric SDValue Src, 258406c3fb27SDimitry Andric SDNodeFlags Flags) { 258506c3fb27SDimitry Andric return !valueIsKnownNeverF32Denorm(Src) && 258606c3fb27SDimitry Andric DAG.getMachineFunction() 258706c3fb27SDimitry Andric .getDenormalMode(APFloat::IEEEsingle()) 258806c3fb27SDimitry Andric .Input != DenormalMode::PreserveSign; 258906c3fb27SDimitry Andric } 259006c3fb27SDimitry Andric 259106c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsLtSmallestNormal(SelectionDAG &DAG, 259206c3fb27SDimitry Andric SDValue Src, 259306c3fb27SDimitry Andric SDNodeFlags Flags) const { 259406c3fb27SDimitry Andric SDLoc SL(Src); 259506c3fb27SDimitry Andric EVT VT = Src.getValueType(); 259606c3fb27SDimitry Andric const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); 259706c3fb27SDimitry Andric SDValue SmallestNormal = 259806c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT); 259906c3fb27SDimitry Andric 260006c3fb27SDimitry Andric // Want to scale denormals up, but negatives and 0 work just as well on the 260106c3fb27SDimitry Andric // scaled path. 260206c3fb27SDimitry Andric SDValue IsLtSmallestNormal = DAG.getSetCC( 260306c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, 260406c3fb27SDimitry Andric SmallestNormal, ISD::SETOLT); 260506c3fb27SDimitry Andric 260606c3fb27SDimitry Andric return IsLtSmallestNormal; 260706c3fb27SDimitry Andric } 260806c3fb27SDimitry Andric 260906c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::getIsFinite(SelectionDAG &DAG, SDValue Src, 261006c3fb27SDimitry Andric SDNodeFlags Flags) const { 261106c3fb27SDimitry Andric SDLoc SL(Src); 261206c3fb27SDimitry Andric EVT VT = Src.getValueType(); 261306c3fb27SDimitry Andric const fltSemantics &Semantics = SelectionDAG::EVTToAPFloatSemantics(VT); 261406c3fb27SDimitry Andric SDValue Inf = DAG.getConstantFP(APFloat::getInf(Semantics), SL, VT); 261506c3fb27SDimitry Andric 261606c3fb27SDimitry Andric SDValue Fabs = DAG.getNode(ISD::FABS, SL, VT, Src, Flags); 261706c3fb27SDimitry Andric SDValue IsFinite = DAG.getSetCC( 261806c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Fabs, 261906c3fb27SDimitry Andric Inf, ISD::SETOLT); 262006c3fb27SDimitry Andric return IsFinite; 262106c3fb27SDimitry Andric } 262206c3fb27SDimitry Andric 262306c3fb27SDimitry Andric /// If denormal handling is required return the scaled input to FLOG2, and the 262406c3fb27SDimitry Andric /// check for denormal range. Otherwise, return null values. 262506c3fb27SDimitry Andric std::pair<SDValue, SDValue> 262606c3fb27SDimitry Andric AMDGPUTargetLowering::getScaledLogInput(SelectionDAG &DAG, const SDLoc SL, 262706c3fb27SDimitry Andric SDValue Src, SDNodeFlags Flags) const { 26288a4dda33SDimitry Andric if (!needsDenormHandlingF32(DAG, Src, Flags)) 262906c3fb27SDimitry Andric return {}; 263006c3fb27SDimitry Andric 263106c3fb27SDimitry Andric MVT VT = MVT::f32; 263206c3fb27SDimitry Andric const fltSemantics &Semantics = APFloat::IEEEsingle(); 263306c3fb27SDimitry Andric SDValue SmallestNormal = 263406c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getSmallestNormalized(Semantics), SL, VT); 263506c3fb27SDimitry Andric 263606c3fb27SDimitry Andric SDValue IsLtSmallestNormal = DAG.getSetCC( 263706c3fb27SDimitry Andric SL, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT), Src, 263806c3fb27SDimitry Andric SmallestNormal, ISD::SETOLT); 263906c3fb27SDimitry Andric 264006c3fb27SDimitry Andric SDValue Scale32 = DAG.getConstantFP(0x1.0p+32, SL, VT); 264106c3fb27SDimitry Andric SDValue One = DAG.getConstantFP(1.0, SL, VT); 264206c3fb27SDimitry Andric SDValue ScaleFactor = 264306c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, Scale32, One, Flags); 264406c3fb27SDimitry Andric 264506c3fb27SDimitry Andric SDValue ScaledInput = DAG.getNode(ISD::FMUL, SL, VT, Src, ScaleFactor, Flags); 264606c3fb27SDimitry Andric return {ScaledInput, IsLtSmallestNormal}; 264706c3fb27SDimitry Andric } 264806c3fb27SDimitry Andric 264906c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG2(SDValue Op, SelectionDAG &DAG) const { 265006c3fb27SDimitry Andric // v_log_f32 is good enough for OpenCL, except it doesn't handle denormals. 265106c3fb27SDimitry Andric // If we have to handle denormals, scale up the input and adjust the result. 265206c3fb27SDimitry Andric 265306c3fb27SDimitry Andric // scaled = x * (is_denormal ? 0x1.0p+32 : 1.0) 265406c3fb27SDimitry Andric // log2 = amdgpu_log2 - (is_denormal ? 32.0 : 0.0) 265506c3fb27SDimitry Andric 265606c3fb27SDimitry Andric SDLoc SL(Op); 265706c3fb27SDimitry Andric EVT VT = Op.getValueType(); 265806c3fb27SDimitry Andric SDValue Src = Op.getOperand(0); 265906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags(); 266006c3fb27SDimitry Andric 266106c3fb27SDimitry Andric if (VT == MVT::f16) { 266206c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32. 266306c3fb27SDimitry Andric assert(!Subtarget->has16BitInsts()); 266406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags); 266506c3fb27SDimitry Andric SDValue Log = DAG.getNode(AMDGPUISD::LOG, SL, MVT::f32, Ext, Flags); 266606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Log, 266706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags); 266806c3fb27SDimitry Andric } 266906c3fb27SDimitry Andric 267006c3fb27SDimitry Andric auto [ScaledInput, IsLtSmallestNormal] = 267106c3fb27SDimitry Andric getScaledLogInput(DAG, SL, Src, Flags); 267206c3fb27SDimitry Andric if (!ScaledInput) 267306c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::LOG, SL, VT, Src, Flags); 267406c3fb27SDimitry Andric 267506c3fb27SDimitry Andric SDValue Log2 = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags); 267606c3fb27SDimitry Andric 267706c3fb27SDimitry Andric SDValue ThirtyTwo = DAG.getConstantFP(32.0, SL, VT); 267806c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 267906c3fb27SDimitry Andric SDValue ResultOffset = 268006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, IsLtSmallestNormal, ThirtyTwo, Zero); 268106c3fb27SDimitry Andric return DAG.getNode(ISD::FSUB, SL, VT, Log2, ResultOffset, Flags); 268206c3fb27SDimitry Andric } 268306c3fb27SDimitry Andric 268406c3fb27SDimitry Andric static SDValue getMad(SelectionDAG &DAG, const SDLoc &SL, EVT VT, SDValue X, 268506c3fb27SDimitry Andric SDValue Y, SDValue C, SDNodeFlags Flags = SDNodeFlags()) { 268606c3fb27SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Y, Flags); 268706c3fb27SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, Mul, C, Flags); 268806c3fb27SDimitry Andric } 268906c3fb27SDimitry Andric 269006c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGCommon(SDValue Op, 269106c3fb27SDimitry Andric SelectionDAG &DAG) const { 269206c3fb27SDimitry Andric SDValue X = Op.getOperand(0); 269306c3fb27SDimitry Andric EVT VT = Op.getValueType(); 269406c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags(); 269506c3fb27SDimitry Andric SDLoc DL(Op); 269606c3fb27SDimitry Andric 269706c3fb27SDimitry Andric const bool IsLog10 = Op.getOpcode() == ISD::FLOG10; 269806c3fb27SDimitry Andric assert(IsLog10 || Op.getOpcode() == ISD::FLOG); 269906c3fb27SDimitry Andric 270006c3fb27SDimitry Andric const auto &Options = getTargetMachine().Options; 270106c3fb27SDimitry Andric if (VT == MVT::f16 || Flags.hasApproximateFuncs() || 270206c3fb27SDimitry Andric Options.ApproxFuncFPMath || Options.UnsafeFPMath) { 270306c3fb27SDimitry Andric 270406c3fb27SDimitry Andric if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { 270506c3fb27SDimitry Andric // Log and multiply in f32 is good enough for f16. 270606c3fb27SDimitry Andric X = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, X, Flags); 270706c3fb27SDimitry Andric } 270806c3fb27SDimitry Andric 27098a4dda33SDimitry Andric SDValue Lowered = LowerFLOGUnsafe(X, DL, DAG, IsLog10, Flags); 271006c3fb27SDimitry Andric if (VT == MVT::f16 && !Subtarget->has16BitInsts()) { 271106c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, DL, VT, Lowered, 271206c3fb27SDimitry Andric DAG.getTargetConstant(0, DL, MVT::i32), Flags); 271306c3fb27SDimitry Andric } 271406c3fb27SDimitry Andric 271506c3fb27SDimitry Andric return Lowered; 271606c3fb27SDimitry Andric } 271706c3fb27SDimitry Andric 271806c3fb27SDimitry Andric auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, DL, X, Flags); 271906c3fb27SDimitry Andric if (ScaledInput) 272006c3fb27SDimitry Andric X = ScaledInput; 272106c3fb27SDimitry Andric 272206c3fb27SDimitry Andric SDValue Y = DAG.getNode(AMDGPUISD::LOG, DL, VT, X, Flags); 272306c3fb27SDimitry Andric 272406c3fb27SDimitry Andric SDValue R; 272506c3fb27SDimitry Andric if (Subtarget->hasFastFMAF32()) { 272606c3fb27SDimitry Andric // c+cc are ln(2)/ln(10) to more than 49 bits 272706c3fb27SDimitry Andric const float c_log10 = 0x1.344134p-2f; 272806c3fb27SDimitry Andric const float cc_log10 = 0x1.09f79ep-26f; 272906c3fb27SDimitry Andric 273006c3fb27SDimitry Andric // c + cc is ln(2) to more than 49 bits 273106c3fb27SDimitry Andric const float c_log = 0x1.62e42ep-1f; 273206c3fb27SDimitry Andric const float cc_log = 0x1.efa39ep-25f; 273306c3fb27SDimitry Andric 273406c3fb27SDimitry Andric SDValue C = DAG.getConstantFP(IsLog10 ? c_log10 : c_log, DL, VT); 273506c3fb27SDimitry Andric SDValue CC = DAG.getConstantFP(IsLog10 ? cc_log10 : cc_log, DL, VT); 273606c3fb27SDimitry Andric 273706c3fb27SDimitry Andric R = DAG.getNode(ISD::FMUL, DL, VT, Y, C, Flags); 273806c3fb27SDimitry Andric SDValue NegR = DAG.getNode(ISD::FNEG, DL, VT, R, Flags); 273906c3fb27SDimitry Andric SDValue FMA0 = DAG.getNode(ISD::FMA, DL, VT, Y, C, NegR, Flags); 274006c3fb27SDimitry Andric SDValue FMA1 = DAG.getNode(ISD::FMA, DL, VT, Y, CC, FMA0, Flags); 274106c3fb27SDimitry Andric R = DAG.getNode(ISD::FADD, DL, VT, R, FMA1, Flags); 274206c3fb27SDimitry Andric } else { 274306c3fb27SDimitry Andric // ch+ct is ln(2)/ln(10) to more than 36 bits 274406c3fb27SDimitry Andric const float ch_log10 = 0x1.344000p-2f; 274506c3fb27SDimitry Andric const float ct_log10 = 0x1.3509f6p-18f; 274606c3fb27SDimitry Andric 274706c3fb27SDimitry Andric // ch + ct is ln(2) to more than 36 bits 274806c3fb27SDimitry Andric const float ch_log = 0x1.62e000p-1f; 274906c3fb27SDimitry Andric const float ct_log = 0x1.0bfbe8p-15f; 275006c3fb27SDimitry Andric 275106c3fb27SDimitry Andric SDValue CH = DAG.getConstantFP(IsLog10 ? ch_log10 : ch_log, DL, VT); 275206c3fb27SDimitry Andric SDValue CT = DAG.getConstantFP(IsLog10 ? ct_log10 : ct_log, DL, VT); 275306c3fb27SDimitry Andric 275406c3fb27SDimitry Andric SDValue YAsInt = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Y); 275506c3fb27SDimitry Andric SDValue MaskConst = DAG.getConstant(0xfffff000, DL, MVT::i32); 275606c3fb27SDimitry Andric SDValue YHInt = DAG.getNode(ISD::AND, DL, MVT::i32, YAsInt, MaskConst); 275706c3fb27SDimitry Andric SDValue YH = DAG.getNode(ISD::BITCAST, DL, MVT::f32, YHInt); 275806c3fb27SDimitry Andric SDValue YT = DAG.getNode(ISD::FSUB, DL, VT, Y, YH, Flags); 275906c3fb27SDimitry Andric 276006c3fb27SDimitry Andric SDValue YTCT = DAG.getNode(ISD::FMUL, DL, VT, YT, CT, Flags); 276106c3fb27SDimitry Andric SDValue Mad0 = getMad(DAG, DL, VT, YH, CT, YTCT, Flags); 276206c3fb27SDimitry Andric SDValue Mad1 = getMad(DAG, DL, VT, YT, CH, Mad0, Flags); 276306c3fb27SDimitry Andric R = getMad(DAG, DL, VT, YH, CH, Mad1); 276406c3fb27SDimitry Andric } 276506c3fb27SDimitry Andric 276606c3fb27SDimitry Andric const bool IsFiniteOnly = (Flags.hasNoNaNs() || Options.NoNaNsFPMath) && 276706c3fb27SDimitry Andric (Flags.hasNoInfs() || Options.NoInfsFPMath); 276806c3fb27SDimitry Andric 276906c3fb27SDimitry Andric // TODO: Check if known finite from source value. 277006c3fb27SDimitry Andric if (!IsFiniteOnly) { 277106c3fb27SDimitry Andric SDValue IsFinite = getIsFinite(DAG, Y, Flags); 277206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, DL, VT, IsFinite, R, Y, Flags); 277306c3fb27SDimitry Andric } 277406c3fb27SDimitry Andric 277506c3fb27SDimitry Andric if (IsScaled) { 277606c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0f, DL, VT); 277706c3fb27SDimitry Andric SDValue ShiftK = 277806c3fb27SDimitry Andric DAG.getConstantFP(IsLog10 ? 0x1.344136p+3f : 0x1.62e430p+4f, DL, VT); 277906c3fb27SDimitry Andric SDValue Shift = 278006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, DL, VT, IsScaled, ShiftK, Zero, Flags); 278106c3fb27SDimitry Andric R = DAG.getNode(ISD::FSUB, DL, VT, R, Shift, Flags); 278206c3fb27SDimitry Andric } 278306c3fb27SDimitry Andric 278406c3fb27SDimitry Andric return R; 278506c3fb27SDimitry Andric } 278606c3fb27SDimitry Andric 278706c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOG10(SDValue Op, SelectionDAG &DAG) const { 278806c3fb27SDimitry Andric return LowerFLOGCommon(Op, DAG); 278906c3fb27SDimitry Andric } 279006c3fb27SDimitry Andric 279106c3fb27SDimitry Andric // Do f32 fast math expansion for flog2 or flog10. This is accurate enough for a 279206c3fb27SDimitry Andric // promote f16 operation. 279306c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::LowerFLOGUnsafe(SDValue Src, const SDLoc &SL, 27948a4dda33SDimitry Andric SelectionDAG &DAG, bool IsLog10, 279506c3fb27SDimitry Andric SDNodeFlags Flags) const { 279606c3fb27SDimitry Andric EVT VT = Src.getValueType(); 27975f757f3fSDimitry Andric unsigned LogOp = 27985f757f3fSDimitry Andric VT == MVT::f32 ? (unsigned)AMDGPUISD::LOG : (unsigned)ISD::FLOG2; 27998a4dda33SDimitry Andric 28008a4dda33SDimitry Andric double Log2BaseInverted = 28018a4dda33SDimitry Andric IsLog10 ? numbers::ln2 / numbers::ln10 : numbers::ln2; 28028a4dda33SDimitry Andric 28038a4dda33SDimitry Andric if (VT == MVT::f32) { 28048a4dda33SDimitry Andric auto [ScaledInput, IsScaled] = getScaledLogInput(DAG, SL, Src, Flags); 28058a4dda33SDimitry Andric if (ScaledInput) { 28068a4dda33SDimitry Andric SDValue LogSrc = DAG.getNode(AMDGPUISD::LOG, SL, VT, ScaledInput, Flags); 28078a4dda33SDimitry Andric SDValue ScaledResultOffset = 28088a4dda33SDimitry Andric DAG.getConstantFP(-32.0 * Log2BaseInverted, SL, VT); 28098a4dda33SDimitry Andric 28108a4dda33SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0f, SL, VT); 28118a4dda33SDimitry Andric 28128a4dda33SDimitry Andric SDValue ResultOffset = DAG.getNode(ISD::SELECT, SL, VT, IsScaled, 28138a4dda33SDimitry Andric ScaledResultOffset, Zero, Flags); 28148a4dda33SDimitry Andric 28158a4dda33SDimitry Andric SDValue Log2Inv = DAG.getConstantFP(Log2BaseInverted, SL, VT); 28168a4dda33SDimitry Andric 28178a4dda33SDimitry Andric if (Subtarget->hasFastFMAF32()) 28188a4dda33SDimitry Andric return DAG.getNode(ISD::FMA, SL, VT, LogSrc, Log2Inv, ResultOffset, 28198a4dda33SDimitry Andric Flags); 28208a4dda33SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, LogSrc, Log2Inv, Flags); 28218a4dda33SDimitry Andric return DAG.getNode(ISD::FADD, SL, VT, Mul, ResultOffset); 28228a4dda33SDimitry Andric } 28238a4dda33SDimitry Andric } 28248a4dda33SDimitry Andric 282506c3fb27SDimitry Andric SDValue Log2Operand = DAG.getNode(LogOp, SL, VT, Src, Flags); 282606c3fb27SDimitry Andric SDValue Log2BaseInvertedOperand = DAG.getConstantFP(Log2BaseInverted, SL, VT); 282706c3fb27SDimitry Andric 282806c3fb27SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Log2Operand, Log2BaseInvertedOperand, 282906c3fb27SDimitry Andric Flags); 283006c3fb27SDimitry Andric } 283106c3fb27SDimitry Andric 283206c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP2(SDValue Op, SelectionDAG &DAG) const { 283306c3fb27SDimitry Andric // v_exp_f32 is good enough for OpenCL, except it doesn't handle denormals. 283406c3fb27SDimitry Andric // If we have to handle denormals, scale up the input and adjust the result. 283506c3fb27SDimitry Andric 283606c3fb27SDimitry Andric SDLoc SL(Op); 283706c3fb27SDimitry Andric EVT VT = Op.getValueType(); 283806c3fb27SDimitry Andric SDValue Src = Op.getOperand(0); 283906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags(); 284006c3fb27SDimitry Andric 284106c3fb27SDimitry Andric if (VT == MVT::f16) { 284206c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32. 284306c3fb27SDimitry Andric assert(!Subtarget->has16BitInsts()); 284406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, Src, Flags); 284506c3fb27SDimitry Andric SDValue Log = DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Ext, Flags); 284606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Log, 284706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags); 284806c3fb27SDimitry Andric } 284906c3fb27SDimitry Andric 285006c3fb27SDimitry Andric assert(VT == MVT::f32); 285106c3fb27SDimitry Andric 28528a4dda33SDimitry Andric if (!needsDenormHandlingF32(DAG, Src, Flags)) 285306c3fb27SDimitry Andric return DAG.getNode(AMDGPUISD::EXP, SL, MVT::f32, Src, Flags); 285406c3fb27SDimitry Andric 285506c3fb27SDimitry Andric // bool needs_scaling = x < -0x1.f80000p+6f; 285606c3fb27SDimitry Andric // v_exp_f32(x + (s ? 0x1.0p+6f : 0.0f)) * (s ? 0x1.0p-64f : 1.0f); 285706c3fb27SDimitry Andric 285806c3fb27SDimitry Andric // -nextafter(128.0, -1) 285906c3fb27SDimitry Andric SDValue RangeCheckConst = DAG.getConstantFP(-0x1.f80000p+6f, SL, VT); 286006c3fb27SDimitry Andric 286106c3fb27SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 286206c3fb27SDimitry Andric 286306c3fb27SDimitry Andric SDValue NeedsScaling = 286406c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, Src, RangeCheckConst, ISD::SETOLT); 286506c3fb27SDimitry Andric 286606c3fb27SDimitry Andric SDValue SixtyFour = DAG.getConstantFP(0x1.0p+6f, SL, VT); 286706c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 286806c3fb27SDimitry Andric 286906c3fb27SDimitry Andric SDValue AddOffset = 287006c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, SixtyFour, Zero); 287106c3fb27SDimitry Andric 287206c3fb27SDimitry Andric SDValue AddInput = DAG.getNode(ISD::FADD, SL, VT, Src, AddOffset, Flags); 287306c3fb27SDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, AddInput, Flags); 287406c3fb27SDimitry Andric 287506c3fb27SDimitry Andric SDValue TwoExpNeg64 = DAG.getConstantFP(0x1.0p-64f, SL, VT); 287606c3fb27SDimitry Andric SDValue One = DAG.getConstantFP(1.0, SL, VT); 287706c3fb27SDimitry Andric SDValue ResultScale = 287806c3fb27SDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, TwoExpNeg64, One); 287906c3fb27SDimitry Andric 288006c3fb27SDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScale, Flags); 288106c3fb27SDimitry Andric } 288206c3fb27SDimitry Andric 28835f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXPUnsafe(SDValue X, const SDLoc &SL, 288406c3fb27SDimitry Andric SelectionDAG &DAG, 288506c3fb27SDimitry Andric SDNodeFlags Flags) const { 28865f757f3fSDimitry Andric EVT VT = X.getValueType(); 28875f757f3fSDimitry Andric const SDValue Log2E = DAG.getConstantFP(numbers::log2e, SL, VT); 28885f757f3fSDimitry Andric 28895f757f3fSDimitry Andric if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) { 28900b57cec5SDimitry Andric // exp2(M_LOG2E_F * f); 28915f757f3fSDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, VT, X, Log2E, Flags); 28925f757f3fSDimitry Andric return DAG.getNode(VT == MVT::f32 ? (unsigned)AMDGPUISD::EXP 28935f757f3fSDimitry Andric : (unsigned)ISD::FEXP2, 28945f757f3fSDimitry Andric SL, VT, Mul, Flags); 28955f757f3fSDimitry Andric } 28965f757f3fSDimitry Andric 28975f757f3fSDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 28985f757f3fSDimitry Andric 28995f757f3fSDimitry Andric SDValue Threshold = DAG.getConstantFP(-0x1.5d58a0p+6f, SL, VT); 29005f757f3fSDimitry Andric SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT); 29015f757f3fSDimitry Andric 29025f757f3fSDimitry Andric SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+6f, SL, VT); 29035f757f3fSDimitry Andric 29045f757f3fSDimitry Andric SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags); 29055f757f3fSDimitry Andric 29065f757f3fSDimitry Andric SDValue AdjustedX = 29075f757f3fSDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X); 29085f757f3fSDimitry Andric 29095f757f3fSDimitry Andric SDValue ExpInput = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, Log2E, Flags); 29105f757f3fSDimitry Andric 29115f757f3fSDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, ExpInput, Flags); 29125f757f3fSDimitry Andric 29135f757f3fSDimitry Andric SDValue ResultScaleFactor = DAG.getConstantFP(0x1.969d48p-93f, SL, VT); 29145f757f3fSDimitry Andric SDValue AdjustedResult = 29155f757f3fSDimitry Andric DAG.getNode(ISD::FMUL, SL, VT, Exp2, ResultScaleFactor, Flags); 29165f757f3fSDimitry Andric 29175f757f3fSDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, Exp2, 29185f757f3fSDimitry Andric Flags); 29195f757f3fSDimitry Andric } 29205f757f3fSDimitry Andric 29215f757f3fSDimitry Andric /// Emit approx-funcs appropriate lowering for exp10. inf/nan should still be 29225f757f3fSDimitry Andric /// handled correctly. 29235f757f3fSDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP10Unsafe(SDValue X, const SDLoc &SL, 29245f757f3fSDimitry Andric SelectionDAG &DAG, 29255f757f3fSDimitry Andric SDNodeFlags Flags) const { 29265f757f3fSDimitry Andric const EVT VT = X.getValueType(); 29275f757f3fSDimitry Andric const unsigned Exp2Op = VT == MVT::f32 ? AMDGPUISD::EXP : ISD::FEXP2; 29285f757f3fSDimitry Andric 29295f757f3fSDimitry Andric if (VT != MVT::f32 || !needsDenormHandlingF32(DAG, X, Flags)) { 29305f757f3fSDimitry Andric // exp2(x * 0x1.a92000p+1f) * exp2(x * 0x1.4f0978p-11f); 29315f757f3fSDimitry Andric SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT); 29325f757f3fSDimitry Andric SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT); 29335f757f3fSDimitry Andric 29345f757f3fSDimitry Andric SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, X, K0, Flags); 29355f757f3fSDimitry Andric SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags); 29365f757f3fSDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, X, K1, Flags); 29375f757f3fSDimitry Andric SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags); 29385f757f3fSDimitry Andric return DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1); 29395f757f3fSDimitry Andric } 29405f757f3fSDimitry Andric 29415f757f3fSDimitry Andric // bool s = x < -0x1.2f7030p+5f; 29425f757f3fSDimitry Andric // x += s ? 0x1.0p+5f : 0.0f; 29435f757f3fSDimitry Andric // exp10 = exp2(x * 0x1.a92000p+1f) * 29445f757f3fSDimitry Andric // exp2(x * 0x1.4f0978p-11f) * 29455f757f3fSDimitry Andric // (s ? 0x1.9f623ep-107f : 1.0f); 29465f757f3fSDimitry Andric 29475f757f3fSDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 29485f757f3fSDimitry Andric 29495f757f3fSDimitry Andric SDValue Threshold = DAG.getConstantFP(-0x1.2f7030p+5f, SL, VT); 29505f757f3fSDimitry Andric SDValue NeedsScaling = DAG.getSetCC(SL, SetCCVT, X, Threshold, ISD::SETOLT); 29515f757f3fSDimitry Andric 29525f757f3fSDimitry Andric SDValue ScaleOffset = DAG.getConstantFP(0x1.0p+5f, SL, VT); 29535f757f3fSDimitry Andric SDValue ScaledX = DAG.getNode(ISD::FADD, SL, VT, X, ScaleOffset, Flags); 29545f757f3fSDimitry Andric SDValue AdjustedX = 29555f757f3fSDimitry Andric DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, ScaledX, X); 29565f757f3fSDimitry Andric 29575f757f3fSDimitry Andric SDValue K0 = DAG.getConstantFP(0x1.a92000p+1f, SL, VT); 29585f757f3fSDimitry Andric SDValue K1 = DAG.getConstantFP(0x1.4f0978p-11f, SL, VT); 29595f757f3fSDimitry Andric 29605f757f3fSDimitry Andric SDValue Mul0 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K0, Flags); 29615f757f3fSDimitry Andric SDValue Exp2_0 = DAG.getNode(Exp2Op, SL, VT, Mul0, Flags); 29625f757f3fSDimitry Andric SDValue Mul1 = DAG.getNode(ISD::FMUL, SL, VT, AdjustedX, K1, Flags); 29635f757f3fSDimitry Andric SDValue Exp2_1 = DAG.getNode(Exp2Op, SL, VT, Mul1, Flags); 29645f757f3fSDimitry Andric 29655f757f3fSDimitry Andric SDValue MulExps = DAG.getNode(ISD::FMUL, SL, VT, Exp2_0, Exp2_1, Flags); 29665f757f3fSDimitry Andric 29675f757f3fSDimitry Andric SDValue ResultScaleFactor = DAG.getConstantFP(0x1.9f623ep-107f, SL, VT); 29685f757f3fSDimitry Andric SDValue AdjustedResult = 29695f757f3fSDimitry Andric DAG.getNode(ISD::FMUL, SL, VT, MulExps, ResultScaleFactor, Flags); 29705f757f3fSDimitry Andric 29715f757f3fSDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NeedsScaling, AdjustedResult, MulExps, 297206c3fb27SDimitry Andric Flags); 297306c3fb27SDimitry Andric } 297406c3fb27SDimitry Andric 29750b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::lowerFEXP(SDValue Op, SelectionDAG &DAG) const { 29760b57cec5SDimitry Andric EVT VT = Op.getValueType(); 29770b57cec5SDimitry Andric SDLoc SL(Op); 297806c3fb27SDimitry Andric SDValue X = Op.getOperand(0); 297906c3fb27SDimitry Andric SDNodeFlags Flags = Op->getFlags(); 29805f757f3fSDimitry Andric const bool IsExp10 = Op.getOpcode() == ISD::FEXP10; 29810b57cec5SDimitry Andric 298206c3fb27SDimitry Andric if (VT.getScalarType() == MVT::f16) { 298306c3fb27SDimitry Andric // v_exp_f16 (fmul x, log2e) 298406c3fb27SDimitry Andric if (allowApproxFunc(DAG, Flags)) // TODO: Does this really require fast? 298506c3fb27SDimitry Andric return lowerFEXPUnsafe(X, SL, DAG, Flags); 298606c3fb27SDimitry Andric 298706c3fb27SDimitry Andric if (VT.isVector()) 298806c3fb27SDimitry Andric return SDValue(); 298906c3fb27SDimitry Andric 299006c3fb27SDimitry Andric // exp(f16 x) -> 299106c3fb27SDimitry Andric // fptrunc (v_exp_f32 (fmul (fpext x), log2e)) 299206c3fb27SDimitry Andric 299306c3fb27SDimitry Andric // Nothing in half is a denormal when promoted to f32. 299406c3fb27SDimitry Andric SDValue Ext = DAG.getNode(ISD::FP_EXTEND, SL, MVT::f32, X, Flags); 299506c3fb27SDimitry Andric SDValue Lowered = lowerFEXPUnsafe(Ext, SL, DAG, Flags); 299606c3fb27SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Lowered, 299706c3fb27SDimitry Andric DAG.getTargetConstant(0, SL, MVT::i32), Flags); 299806c3fb27SDimitry Andric } 299906c3fb27SDimitry Andric 300006c3fb27SDimitry Andric assert(VT == MVT::f32); 300106c3fb27SDimitry Andric 300206c3fb27SDimitry Andric // TODO: Interpret allowApproxFunc as ignoring DAZ. This is currently copying 300306c3fb27SDimitry Andric // library behavior. Also, is known-not-daz source sufficient? 30045f757f3fSDimitry Andric if (allowApproxFunc(DAG, Flags)) { 30055f757f3fSDimitry Andric return IsExp10 ? lowerFEXP10Unsafe(X, SL, DAG, Flags) 30065f757f3fSDimitry Andric : lowerFEXPUnsafe(X, SL, DAG, Flags); 300706c3fb27SDimitry Andric } 300806c3fb27SDimitry Andric 300906c3fb27SDimitry Andric // Algorithm: 301006c3fb27SDimitry Andric // 301106c3fb27SDimitry Andric // e^x = 2^(x/ln(2)) = 2^(x*(64/ln(2))/64) 301206c3fb27SDimitry Andric // 301306c3fb27SDimitry Andric // x*(64/ln(2)) = n + f, |f| <= 0.5, n is integer 301406c3fb27SDimitry Andric // n = 64*m + j, 0 <= j < 64 301506c3fb27SDimitry Andric // 301606c3fb27SDimitry Andric // e^x = 2^((64*m + j + f)/64) 301706c3fb27SDimitry Andric // = (2^m) * (2^(j/64)) * 2^(f/64) 301806c3fb27SDimitry Andric // = (2^m) * (2^(j/64)) * e^(f*(ln(2)/64)) 301906c3fb27SDimitry Andric // 302006c3fb27SDimitry Andric // f = x*(64/ln(2)) - n 302106c3fb27SDimitry Andric // r = f*(ln(2)/64) = x - n*(ln(2)/64) 302206c3fb27SDimitry Andric // 302306c3fb27SDimitry Andric // e^x = (2^m) * (2^(j/64)) * e^r 302406c3fb27SDimitry Andric // 302506c3fb27SDimitry Andric // (2^(j/64)) is precomputed 302606c3fb27SDimitry Andric // 302706c3fb27SDimitry Andric // e^r = 1 + r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! 302806c3fb27SDimitry Andric // e^r = 1 + q 302906c3fb27SDimitry Andric // 303006c3fb27SDimitry Andric // q = r + (r^2)/2! + (r^3)/3! + (r^4)/4! + (r^5)/5! 303106c3fb27SDimitry Andric // 303206c3fb27SDimitry Andric // e^x = (2^m) * ( (2^(j/64)) + q*(2^(j/64)) ) 303306c3fb27SDimitry Andric SDNodeFlags FlagsNoContract = Flags; 303406c3fb27SDimitry Andric FlagsNoContract.setAllowContract(false); 303506c3fb27SDimitry Andric 303606c3fb27SDimitry Andric SDValue PH, PL; 303706c3fb27SDimitry Andric if (Subtarget->hasFastFMAF32()) { 303806c3fb27SDimitry Andric const float c_exp = numbers::log2ef; 303906c3fb27SDimitry Andric const float cc_exp = 0x1.4ae0bep-26f; // c+cc are 49 bits 304006c3fb27SDimitry Andric const float c_exp10 = 0x1.a934f0p+1f; 304106c3fb27SDimitry Andric const float cc_exp10 = 0x1.2f346ep-24f; 304206c3fb27SDimitry Andric 304306c3fb27SDimitry Andric SDValue C = DAG.getConstantFP(IsExp10 ? c_exp10 : c_exp, SL, VT); 304406c3fb27SDimitry Andric SDValue CC = DAG.getConstantFP(IsExp10 ? cc_exp10 : cc_exp, SL, VT); 304506c3fb27SDimitry Andric 304606c3fb27SDimitry Andric PH = DAG.getNode(ISD::FMUL, SL, VT, X, C, Flags); 304706c3fb27SDimitry Andric SDValue NegPH = DAG.getNode(ISD::FNEG, SL, VT, PH, Flags); 304806c3fb27SDimitry Andric SDValue FMA0 = DAG.getNode(ISD::FMA, SL, VT, X, C, NegPH, Flags); 304906c3fb27SDimitry Andric PL = DAG.getNode(ISD::FMA, SL, VT, X, CC, FMA0, Flags); 305006c3fb27SDimitry Andric } else { 305106c3fb27SDimitry Andric const float ch_exp = 0x1.714000p+0f; 305206c3fb27SDimitry Andric const float cl_exp = 0x1.47652ap-12f; // ch + cl are 36 bits 305306c3fb27SDimitry Andric 305406c3fb27SDimitry Andric const float ch_exp10 = 0x1.a92000p+1f; 305506c3fb27SDimitry Andric const float cl_exp10 = 0x1.4f0978p-11f; 305606c3fb27SDimitry Andric 305706c3fb27SDimitry Andric SDValue CH = DAG.getConstantFP(IsExp10 ? ch_exp10 : ch_exp, SL, VT); 305806c3fb27SDimitry Andric SDValue CL = DAG.getConstantFP(IsExp10 ? cl_exp10 : cl_exp, SL, VT); 305906c3fb27SDimitry Andric 306006c3fb27SDimitry Andric SDValue XAsInt = DAG.getNode(ISD::BITCAST, SL, MVT::i32, X); 306106c3fb27SDimitry Andric SDValue MaskConst = DAG.getConstant(0xfffff000, SL, MVT::i32); 306206c3fb27SDimitry Andric SDValue XHAsInt = DAG.getNode(ISD::AND, SL, MVT::i32, XAsInt, MaskConst); 306306c3fb27SDimitry Andric SDValue XH = DAG.getNode(ISD::BITCAST, SL, VT, XHAsInt); 306406c3fb27SDimitry Andric SDValue XL = DAG.getNode(ISD::FSUB, SL, VT, X, XH, Flags); 306506c3fb27SDimitry Andric 306606c3fb27SDimitry Andric PH = DAG.getNode(ISD::FMUL, SL, VT, XH, CH, Flags); 306706c3fb27SDimitry Andric 306806c3fb27SDimitry Andric SDValue XLCL = DAG.getNode(ISD::FMUL, SL, VT, XL, CL, Flags); 306906c3fb27SDimitry Andric SDValue Mad0 = getMad(DAG, SL, VT, XL, CH, XLCL, Flags); 307006c3fb27SDimitry Andric PL = getMad(DAG, SL, VT, XH, CL, Mad0, Flags); 307106c3fb27SDimitry Andric } 307206c3fb27SDimitry Andric 30735f757f3fSDimitry Andric SDValue E = DAG.getNode(ISD::FROUNDEVEN, SL, VT, PH, Flags); 307406c3fb27SDimitry Andric 307506c3fb27SDimitry Andric // It is unsafe to contract this fsub into the PH multiply. 307606c3fb27SDimitry Andric SDValue PHSubE = DAG.getNode(ISD::FSUB, SL, VT, PH, E, FlagsNoContract); 307706c3fb27SDimitry Andric 307806c3fb27SDimitry Andric SDValue A = DAG.getNode(ISD::FADD, SL, VT, PHSubE, PL, Flags); 307906c3fb27SDimitry Andric SDValue IntE = DAG.getNode(ISD::FP_TO_SINT, SL, MVT::i32, E); 308006c3fb27SDimitry Andric SDValue Exp2 = DAG.getNode(AMDGPUISD::EXP, SL, VT, A, Flags); 308106c3fb27SDimitry Andric 308206c3fb27SDimitry Andric SDValue R = DAG.getNode(ISD::FLDEXP, SL, VT, Exp2, IntE, Flags); 308306c3fb27SDimitry Andric 308406c3fb27SDimitry Andric SDValue UnderflowCheckConst = 308506c3fb27SDimitry Andric DAG.getConstantFP(IsExp10 ? -0x1.66d3e8p+5f : -0x1.9d1da0p+6f, SL, VT); 308606c3fb27SDimitry Andric 308706c3fb27SDimitry Andric EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 308806c3fb27SDimitry Andric SDValue Zero = DAG.getConstantFP(0.0, SL, VT); 308906c3fb27SDimitry Andric SDValue Underflow = 309006c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, X, UnderflowCheckConst, ISD::SETOLT); 309106c3fb27SDimitry Andric 309206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, SL, VT, Underflow, Zero, R); 309306c3fb27SDimitry Andric const auto &Options = getTargetMachine().Options; 309406c3fb27SDimitry Andric 309506c3fb27SDimitry Andric if (!Flags.hasNoInfs() && !Options.NoInfsFPMath) { 309606c3fb27SDimitry Andric SDValue OverflowCheckConst = 309706c3fb27SDimitry Andric DAG.getConstantFP(IsExp10 ? 0x1.344136p+5f : 0x1.62e430p+6f, SL, VT); 309806c3fb27SDimitry Andric SDValue Overflow = 309906c3fb27SDimitry Andric DAG.getSetCC(SL, SetCCVT, X, OverflowCheckConst, ISD::SETOGT); 310006c3fb27SDimitry Andric SDValue Inf = 310106c3fb27SDimitry Andric DAG.getConstantFP(APFloat::getInf(APFloat::IEEEsingle()), SL, VT); 310206c3fb27SDimitry Andric R = DAG.getNode(ISD::SELECT, SL, VT, Overflow, Inf, R); 310306c3fb27SDimitry Andric } 310406c3fb27SDimitry Andric 310506c3fb27SDimitry Andric return R; 31060b57cec5SDimitry Andric } 31070b57cec5SDimitry Andric 31080b57cec5SDimitry Andric static bool isCtlzOpc(unsigned Opc) { 31090b57cec5SDimitry Andric return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF; 31100b57cec5SDimitry Andric } 31110b57cec5SDimitry Andric 31120b57cec5SDimitry Andric static bool isCttzOpc(unsigned Opc) { 31130b57cec5SDimitry Andric return Opc == ISD::CTTZ || Opc == ISD::CTTZ_ZERO_UNDEF; 31140b57cec5SDimitry Andric } 31150b57cec5SDimitry Andric 31167a6dacacSDimitry Andric SDValue AMDGPUTargetLowering::lowerCTLZResults(SDValue Op, 31177a6dacacSDimitry Andric SelectionDAG &DAG) const { 31187a6dacacSDimitry Andric auto SL = SDLoc(Op); 31190fca6ea1SDimitry Andric auto Opc = Op.getOpcode(); 31207a6dacacSDimitry Andric auto Arg = Op.getOperand(0u); 31217a6dacacSDimitry Andric auto ResultVT = Op.getValueType(); 31227a6dacacSDimitry Andric 31237a6dacacSDimitry Andric if (ResultVT != MVT::i8 && ResultVT != MVT::i16) 31247a6dacacSDimitry Andric return {}; 31257a6dacacSDimitry Andric 31260fca6ea1SDimitry Andric assert(isCtlzOpc(Opc)); 31277a6dacacSDimitry Andric assert(ResultVT == Arg.getValueType()); 31287a6dacacSDimitry Andric 31290fca6ea1SDimitry Andric const uint64_t NumBits = ResultVT.getFixedSizeInBits(); 31300fca6ea1SDimitry Andric SDValue NumExtBits = DAG.getConstant(32u - NumBits, SL, MVT::i32); 31310fca6ea1SDimitry Andric SDValue NewOp; 31320fca6ea1SDimitry Andric 31330fca6ea1SDimitry Andric if (Opc == ISD::CTLZ_ZERO_UNDEF) { 31340fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::ANY_EXTEND, SL, MVT::i32, Arg); 31350fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::SHL, SL, MVT::i32, NewOp, NumExtBits); 31360fca6ea1SDimitry Andric NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp); 31370fca6ea1SDimitry Andric } else { 31380fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Arg); 31390fca6ea1SDimitry Andric NewOp = DAG.getNode(Opc, SL, MVT::i32, NewOp); 31400fca6ea1SDimitry Andric NewOp = DAG.getNode(ISD::SUB, SL, MVT::i32, NewOp, NumExtBits); 31410fca6ea1SDimitry Andric } 31420fca6ea1SDimitry Andric 31437a6dacacSDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, ResultVT, NewOp); 31447a6dacacSDimitry Andric } 31457a6dacacSDimitry Andric 31460b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerCTLZ_CTTZ(SDValue Op, SelectionDAG &DAG) const { 31470b57cec5SDimitry Andric SDLoc SL(Op); 31480b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 31490b57cec5SDimitry Andric 3150349cc55cSDimitry Andric assert(isCtlzOpc(Op.getOpcode()) || isCttzOpc(Op.getOpcode())); 3151349cc55cSDimitry Andric bool Ctlz = isCtlzOpc(Op.getOpcode()); 3152349cc55cSDimitry Andric unsigned NewOpc = Ctlz ? AMDGPUISD::FFBH_U32 : AMDGPUISD::FFBL_B32; 31530b57cec5SDimitry Andric 3154349cc55cSDimitry Andric bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF || 3155349cc55cSDimitry Andric Op.getOpcode() == ISD::CTTZ_ZERO_UNDEF; 3156cb14a3feSDimitry Andric bool Is64BitScalar = !Src->isDivergent() && Src.getValueType() == MVT::i64; 31570b57cec5SDimitry Andric 3158cb14a3feSDimitry Andric if (Src.getValueType() == MVT::i32 || Is64BitScalar) { 3159349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin (ffbh src), 32) 3160349cc55cSDimitry Andric // (cttz hi:lo) -> (umin (ffbl src), 32) 3161349cc55cSDimitry Andric // (ctlz_zero_undef src) -> (ffbh src) 3162349cc55cSDimitry Andric // (cttz_zero_undef src) -> (ffbl src) 3163cb14a3feSDimitry Andric 3164cb14a3feSDimitry Andric // 64-bit scalar version produce 32-bit result 3165cb14a3feSDimitry Andric // (ctlz hi:lo) -> (umin (S_FLBIT_I32_B64 src), 64) 3166cb14a3feSDimitry Andric // (cttz hi:lo) -> (umin (S_FF1_I32_B64 src), 64) 3167cb14a3feSDimitry Andric // (ctlz_zero_undef src) -> (S_FLBIT_I32_B64 src) 3168cb14a3feSDimitry Andric // (cttz_zero_undef src) -> (S_FF1_I32_B64 src) 3169349cc55cSDimitry Andric SDValue NewOpr = DAG.getNode(NewOpc, SL, MVT::i32, Src); 3170349cc55cSDimitry Andric if (!ZeroUndef) { 3171cb14a3feSDimitry Andric const SDValue ConstVal = DAG.getConstant( 3172cb14a3feSDimitry Andric Op.getValueType().getScalarSizeInBits(), SL, MVT::i32); 3173cb14a3feSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, ConstVal); 3174349cc55cSDimitry Andric } 3175cb14a3feSDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, Src.getValueType(), NewOpr); 31760b57cec5SDimitry Andric } 31770b57cec5SDimitry Andric 3178349cc55cSDimitry Andric SDValue Lo, Hi; 3179349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 3180349cc55cSDimitry Andric 3181349cc55cSDimitry Andric SDValue OprLo = DAG.getNode(NewOpc, SL, MVT::i32, Lo); 3182349cc55cSDimitry Andric SDValue OprHi = DAG.getNode(NewOpc, SL, MVT::i32, Hi); 3183349cc55cSDimitry Andric 3184349cc55cSDimitry Andric // (ctlz hi:lo) -> (umin3 (ffbh hi), (uaddsat (ffbh lo), 32), 64) 3185349cc55cSDimitry Andric // (cttz hi:lo) -> (umin3 (uaddsat (ffbl hi), 32), (ffbl lo), 64) 3186349cc55cSDimitry Andric // (ctlz_zero_undef hi:lo) -> (umin (ffbh hi), (add (ffbh lo), 32)) 3187349cc55cSDimitry Andric // (cttz_zero_undef hi:lo) -> (umin (add (ffbl hi), 32), (ffbl lo)) 3188349cc55cSDimitry Andric 3189349cc55cSDimitry Andric unsigned AddOpc = ZeroUndef ? ISD::ADD : ISD::UADDSAT; 3190349cc55cSDimitry Andric const SDValue Const32 = DAG.getConstant(32, SL, MVT::i32); 3191349cc55cSDimitry Andric if (Ctlz) 3192349cc55cSDimitry Andric OprLo = DAG.getNode(AddOpc, SL, MVT::i32, OprLo, Const32); 3193349cc55cSDimitry Andric else 3194349cc55cSDimitry Andric OprHi = DAG.getNode(AddOpc, SL, MVT::i32, OprHi, Const32); 3195349cc55cSDimitry Andric 3196349cc55cSDimitry Andric SDValue NewOpr; 3197349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, OprLo, OprHi); 31980b57cec5SDimitry Andric if (!ZeroUndef) { 3199349cc55cSDimitry Andric const SDValue Const64 = DAG.getConstant(64, SL, MVT::i32); 3200349cc55cSDimitry Andric NewOpr = DAG.getNode(ISD::UMIN, SL, MVT::i32, NewOpr, Const64); 32010b57cec5SDimitry Andric } 32020b57cec5SDimitry Andric 32030b57cec5SDimitry Andric return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewOpr); 32040b57cec5SDimitry Andric } 32050b57cec5SDimitry Andric 32060b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, 32070b57cec5SDimitry Andric bool Signed) const { 3208349cc55cSDimitry Andric // The regular method converting a 64-bit integer to float roughly consists of 3209349cc55cSDimitry Andric // 2 steps: normalization and rounding. In fact, after normalization, the 3210349cc55cSDimitry Andric // conversion from a 64-bit integer to a float is essentially the same as the 3211349cc55cSDimitry Andric // one from a 32-bit integer. The only difference is that it has more 3212349cc55cSDimitry Andric // trailing bits to be rounded. To leverage the native 32-bit conversion, a 3213349cc55cSDimitry Andric // 64-bit integer could be preprocessed and fit into a 32-bit integer then 3214349cc55cSDimitry Andric // converted into the correct float number. The basic steps for the unsigned 3215349cc55cSDimitry Andric // conversion are illustrated in the following pseudo code: 3216349cc55cSDimitry Andric // 3217349cc55cSDimitry Andric // f32 uitofp(i64 u) { 3218349cc55cSDimitry Andric // i32 hi, lo = split(u); 3219349cc55cSDimitry Andric // // Only count the leading zeros in hi as we have native support of the 3220349cc55cSDimitry Andric // // conversion from i32 to f32. If hi is all 0s, the conversion is 3221349cc55cSDimitry Andric // // reduced to a 32-bit one automatically. 3222349cc55cSDimitry Andric // i32 shamt = clz(hi); // Return 32 if hi is all 0s. 3223349cc55cSDimitry Andric // u <<= shamt; 3224349cc55cSDimitry Andric // hi, lo = split(u); 3225349cc55cSDimitry Andric // hi |= (lo != 0) ? 1 : 0; // Adjust rounding bit in hi based on lo. 3226349cc55cSDimitry Andric // // convert it as a 32-bit integer and scale the result back. 3227349cc55cSDimitry Andric // return uitofp(hi) * 2^(32 - shamt); 32280b57cec5SDimitry Andric // } 3229349cc55cSDimitry Andric // 3230349cc55cSDimitry Andric // The signed one follows the same principle but uses 'ffbh_i32' to count its 3231349cc55cSDimitry Andric // sign bits instead. If 'ffbh_i32' is not available, its absolute value is 3232349cc55cSDimitry Andric // converted instead followed by negation based its sign bit. 32330b57cec5SDimitry Andric 32340b57cec5SDimitry Andric SDLoc SL(Op); 32350b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 32360b57cec5SDimitry Andric 3237349cc55cSDimitry Andric SDValue Lo, Hi; 3238349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 3239349cc55cSDimitry Andric SDValue Sign; 3240349cc55cSDimitry Andric SDValue ShAmt; 3241349cc55cSDimitry Andric if (Signed && Subtarget->isGCN()) { 3242349cc55cSDimitry Andric // We also need to consider the sign bit in Lo if Hi has just sign bits, 3243349cc55cSDimitry Andric // i.e. Hi is 0 or -1. However, that only needs to take the MSB into 3244349cc55cSDimitry Andric // account. That is, the maximal shift is 3245349cc55cSDimitry Andric // - 32 if Lo and Hi have opposite signs; 3246349cc55cSDimitry Andric // - 33 if Lo and Hi have the same sign. 3247349cc55cSDimitry Andric // 3248349cc55cSDimitry Andric // Or, MaxShAmt = 33 + OppositeSign, where 3249349cc55cSDimitry Andric // 3250349cc55cSDimitry Andric // OppositeSign is defined as ((Lo ^ Hi) >> 31), which is 3251349cc55cSDimitry Andric // - -1 if Lo and Hi have opposite signs; and 3252349cc55cSDimitry Andric // - 0 otherwise. 3253349cc55cSDimitry Andric // 3254349cc55cSDimitry Andric // All in all, ShAmt is calculated as 3255349cc55cSDimitry Andric // 3256349cc55cSDimitry Andric // umin(sffbh(Hi), 33 + (Lo^Hi)>>31) - 1. 3257349cc55cSDimitry Andric // 3258349cc55cSDimitry Andric // or 3259349cc55cSDimitry Andric // 3260349cc55cSDimitry Andric // umin(sffbh(Hi) - 1, 32 + (Lo^Hi)>>31). 3261349cc55cSDimitry Andric // 3262349cc55cSDimitry Andric // to reduce the critical path. 3263349cc55cSDimitry Andric SDValue OppositeSign = DAG.getNode( 3264349cc55cSDimitry Andric ISD::SRA, SL, MVT::i32, DAG.getNode(ISD::XOR, SL, MVT::i32, Lo, Hi), 3265349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 3266349cc55cSDimitry Andric SDValue MaxShAmt = 3267349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), 3268349cc55cSDimitry Andric OppositeSign); 3269349cc55cSDimitry Andric // Count the leading sign bits. 3270349cc55cSDimitry Andric ShAmt = DAG.getNode(AMDGPUISD::FFBH_I32, SL, MVT::i32, Hi); 3271349cc55cSDimitry Andric // Different from unsigned conversion, the shift should be one bit less to 3272349cc55cSDimitry Andric // preserve the sign bit. 3273349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, ShAmt, 3274349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32)); 3275349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::UMIN, SL, MVT::i32, ShAmt, MaxShAmt); 3276349cc55cSDimitry Andric } else { 32770b57cec5SDimitry Andric if (Signed) { 3278349cc55cSDimitry Andric // Without 'ffbh_i32', only leading zeros could be counted. Take the 3279349cc55cSDimitry Andric // absolute value first. 3280349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i64, Src, 3281349cc55cSDimitry Andric DAG.getConstant(63, SL, MVT::i64)); 3282349cc55cSDimitry Andric SDValue Abs = 3283349cc55cSDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64, 3284349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i64, Src, Sign), Sign); 3285349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Abs, DAG); 32860b57cec5SDimitry Andric } 3287349cc55cSDimitry Andric // Count the leading zeros. 3288349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::CTLZ, SL, MVT::i32, Hi); 3289349cc55cSDimitry Andric // The shift amount for signed integers is [0, 32]. 3290349cc55cSDimitry Andric } 3291349cc55cSDimitry Andric // Normalize the given 64-bit integer. 3292349cc55cSDimitry Andric SDValue Norm = DAG.getNode(ISD::SHL, SL, MVT::i64, Src, ShAmt); 3293349cc55cSDimitry Andric // Split it again. 3294349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Norm, DAG); 3295349cc55cSDimitry Andric // Calculate the adjust bit for rounding. 3296349cc55cSDimitry Andric // (lo != 0) ? 1 : 0 => (lo >= 1) ? 1 : 0 => umin(1, lo) 3297349cc55cSDimitry Andric SDValue Adjust = DAG.getNode(ISD::UMIN, SL, MVT::i32, 3298349cc55cSDimitry Andric DAG.getConstant(1, SL, MVT::i32), Lo); 3299349cc55cSDimitry Andric // Get the 32-bit normalized integer. 3300349cc55cSDimitry Andric Norm = DAG.getNode(ISD::OR, SL, MVT::i32, Hi, Adjust); 3301349cc55cSDimitry Andric // Convert the normalized 32-bit integer into f32. 3302349cc55cSDimitry Andric unsigned Opc = 3303349cc55cSDimitry Andric (Signed && Subtarget->isGCN()) ? ISD::SINT_TO_FP : ISD::UINT_TO_FP; 3304349cc55cSDimitry Andric SDValue FVal = DAG.getNode(Opc, SL, MVT::f32, Norm); 33050b57cec5SDimitry Andric 3306349cc55cSDimitry Andric // Finally, need to scale back the converted floating number as the original 3307349cc55cSDimitry Andric // 64-bit integer is converted as a 32-bit one. 3308349cc55cSDimitry Andric ShAmt = DAG.getNode(ISD::SUB, SL, MVT::i32, DAG.getConstant(32, SL, MVT::i32), 3309349cc55cSDimitry Andric ShAmt); 3310349cc55cSDimitry Andric // On GCN, use LDEXP directly. 3311349cc55cSDimitry Andric if (Subtarget->isGCN()) 331206c3fb27SDimitry Andric return DAG.getNode(ISD::FLDEXP, SL, MVT::f32, FVal, ShAmt); 33130b57cec5SDimitry Andric 3314349cc55cSDimitry Andric // Otherwise, align 'ShAmt' to the exponent part and add it into the exponent 3315349cc55cSDimitry Andric // part directly to emulate the multiplication of 2^ShAmt. That 8-bit 3316349cc55cSDimitry Andric // exponent is enough to avoid overflowing into the sign bit. 3317349cc55cSDimitry Andric SDValue Exp = DAG.getNode(ISD::SHL, SL, MVT::i32, ShAmt, 3318349cc55cSDimitry Andric DAG.getConstant(23, SL, MVT::i32)); 3319349cc55cSDimitry Andric SDValue IVal = 3320349cc55cSDimitry Andric DAG.getNode(ISD::ADD, SL, MVT::i32, 3321349cc55cSDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, FVal), Exp); 3322349cc55cSDimitry Andric if (Signed) { 3323349cc55cSDimitry Andric // Set the sign bit. 3324349cc55cSDimitry Andric Sign = DAG.getNode(ISD::SHL, SL, MVT::i32, 3325349cc55cSDimitry Andric DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, Sign), 3326349cc55cSDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 3327349cc55cSDimitry Andric IVal = DAG.getNode(ISD::OR, SL, MVT::i32, IVal, Sign); 3328349cc55cSDimitry Andric } 3329349cc55cSDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::f32, IVal); 33300b57cec5SDimitry Andric } 33310b57cec5SDimitry Andric 33320b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, 33330b57cec5SDimitry Andric bool Signed) const { 33340b57cec5SDimitry Andric SDLoc SL(Op); 33350b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 33360b57cec5SDimitry Andric 3337349cc55cSDimitry Andric SDValue Lo, Hi; 3338349cc55cSDimitry Andric std::tie(Lo, Hi) = split64BitValue(Src, DAG); 33390b57cec5SDimitry Andric 33400b57cec5SDimitry Andric SDValue CvtHi = DAG.getNode(Signed ? ISD::SINT_TO_FP : ISD::UINT_TO_FP, 33410b57cec5SDimitry Andric SL, MVT::f64, Hi); 33420b57cec5SDimitry Andric 33430b57cec5SDimitry Andric SDValue CvtLo = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f64, Lo); 33440b57cec5SDimitry Andric 334506c3fb27SDimitry Andric SDValue LdExp = DAG.getNode(ISD::FLDEXP, SL, MVT::f64, CvtHi, 33460b57cec5SDimitry Andric DAG.getConstant(32, SL, MVT::i32)); 33470b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 33480b57cec5SDimitry Andric return DAG.getNode(ISD::FADD, SL, MVT::f64, LdExp, CvtLo); 33490b57cec5SDimitry Andric } 33500b57cec5SDimitry Andric 33510b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op, 33520b57cec5SDimitry Andric SelectionDAG &DAG) const { 33530b57cec5SDimitry Andric // TODO: Factor out code common with LowerSINT_TO_FP. 33540b57cec5SDimitry Andric EVT DestVT = Op.getValueType(); 3355480093f4SDimitry Andric SDValue Src = Op.getOperand(0); 3356480093f4SDimitry Andric EVT SrcVT = Src.getValueType(); 3357480093f4SDimitry Andric 3358480093f4SDimitry Andric if (SrcVT == MVT::i16) { 3359480093f4SDimitry Andric if (DestVT == MVT::f16) 3360480093f4SDimitry Andric return Op; 3361480093f4SDimitry Andric SDLoc DL(Op); 3362480093f4SDimitry Andric 3363480093f4SDimitry Andric // Promote src to i32 3364480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Src); 3365480093f4SDimitry Andric return DAG.getNode(ISD::UINT_TO_FP, DL, DestVT, Ext); 3366480093f4SDimitry Andric } 3367480093f4SDimitry Andric 33681db9f3b2SDimitry Andric if (DestVT == MVT::bf16) { 33691db9f3b2SDimitry Andric SDLoc SL(Op); 33701db9f3b2SDimitry Andric SDValue ToF32 = DAG.getNode(ISD::UINT_TO_FP, SL, MVT::f32, Src); 33711db9f3b2SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true); 33721db9f3b2SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag); 33731db9f3b2SDimitry Andric } 33741db9f3b2SDimitry Andric 33751db9f3b2SDimitry Andric if (SrcVT != MVT::i64) 33761db9f3b2SDimitry Andric return Op; 3377480093f4SDimitry Andric 33780b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 33790b57cec5SDimitry Andric SDLoc DL(Op); 33800b57cec5SDimitry Andric 33810b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 3382bdd1243dSDimitry Andric SDValue FPRoundFlag = 3383bdd1243dSDimitry Andric DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true); 33840b57cec5SDimitry Andric SDValue FPRound = 33850b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 33860b57cec5SDimitry Andric 33870b57cec5SDimitry Andric return FPRound; 33880b57cec5SDimitry Andric } 33890b57cec5SDimitry Andric 33900b57cec5SDimitry Andric if (DestVT == MVT::f32) 33910b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, false); 33920b57cec5SDimitry Andric 33930b57cec5SDimitry Andric assert(DestVT == MVT::f64); 33940b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, false); 33950b57cec5SDimitry Andric } 33960b57cec5SDimitry Andric 33970b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op, 33980b57cec5SDimitry Andric SelectionDAG &DAG) const { 3399480093f4SDimitry Andric EVT DestVT = Op.getValueType(); 3400480093f4SDimitry Andric 3401480093f4SDimitry Andric SDValue Src = Op.getOperand(0); 3402480093f4SDimitry Andric EVT SrcVT = Src.getValueType(); 3403480093f4SDimitry Andric 3404480093f4SDimitry Andric if (SrcVT == MVT::i16) { 3405480093f4SDimitry Andric if (DestVT == MVT::f16) 3406480093f4SDimitry Andric return Op; 3407480093f4SDimitry Andric 3408480093f4SDimitry Andric SDLoc DL(Op); 3409480093f4SDimitry Andric // Promote src to i32 3410480093f4SDimitry Andric SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32, Src); 3411480093f4SDimitry Andric return DAG.getNode(ISD::SINT_TO_FP, DL, DestVT, Ext); 3412480093f4SDimitry Andric } 3413480093f4SDimitry Andric 34141db9f3b2SDimitry Andric if (DestVT == MVT::bf16) { 34151db9f3b2SDimitry Andric SDLoc SL(Op); 34161db9f3b2SDimitry Andric SDValue ToF32 = DAG.getNode(ISD::SINT_TO_FP, SL, MVT::f32, Src); 34171db9f3b2SDimitry Andric SDValue FPRoundFlag = DAG.getIntPtrConstant(0, SL, /*isTarget=*/true); 34181db9f3b2SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, MVT::bf16, ToF32, FPRoundFlag); 34191db9f3b2SDimitry Andric } 34201db9f3b2SDimitry Andric 34211db9f3b2SDimitry Andric if (SrcVT != MVT::i64) 34221db9f3b2SDimitry Andric return Op; 34230b57cec5SDimitry Andric 34240b57cec5SDimitry Andric // TODO: Factor out code common with LowerUINT_TO_FP. 34250b57cec5SDimitry Andric 34260b57cec5SDimitry Andric if (Subtarget->has16BitInsts() && DestVT == MVT::f16) { 34270b57cec5SDimitry Andric SDLoc DL(Op); 34280b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 34290b57cec5SDimitry Andric 34300b57cec5SDimitry Andric SDValue IntToFp32 = DAG.getNode(Op.getOpcode(), DL, MVT::f32, Src); 3431bdd1243dSDimitry Andric SDValue FPRoundFlag = 3432bdd1243dSDimitry Andric DAG.getIntPtrConstant(0, SDLoc(Op), /*isTarget=*/true); 34330b57cec5SDimitry Andric SDValue FPRound = 34340b57cec5SDimitry Andric DAG.getNode(ISD::FP_ROUND, DL, MVT::f16, IntToFp32, FPRoundFlag); 34350b57cec5SDimitry Andric 34360b57cec5SDimitry Andric return FPRound; 34370b57cec5SDimitry Andric } 34380b57cec5SDimitry Andric 34390b57cec5SDimitry Andric if (DestVT == MVT::f32) 34400b57cec5SDimitry Andric return LowerINT_TO_FP32(Op, DAG, true); 34410b57cec5SDimitry Andric 34420b57cec5SDimitry Andric assert(DestVT == MVT::f64); 34430b57cec5SDimitry Andric return LowerINT_TO_FP64(Op, DAG, true); 34440b57cec5SDimitry Andric } 34450b57cec5SDimitry Andric 3446fe6060f1SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT64(SDValue Op, SelectionDAG &DAG, 34470b57cec5SDimitry Andric bool Signed) const { 34480b57cec5SDimitry Andric SDLoc SL(Op); 34490b57cec5SDimitry Andric 34500b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 3451fe6060f1SDimitry Andric EVT SrcVT = Src.getValueType(); 34520b57cec5SDimitry Andric 3453fe6060f1SDimitry Andric assert(SrcVT == MVT::f32 || SrcVT == MVT::f64); 34540b57cec5SDimitry Andric 3455fe6060f1SDimitry Andric // The basic idea of converting a floating point number into a pair of 32-bit 3456fe6060f1SDimitry Andric // integers is illustrated as follows: 3457fe6060f1SDimitry Andric // 3458fe6060f1SDimitry Andric // tf := trunc(val); 3459fe6060f1SDimitry Andric // hif := floor(tf * 2^-32); 3460fe6060f1SDimitry Andric // lof := tf - hif * 2^32; // lof is always positive due to floor. 3461fe6060f1SDimitry Andric // hi := fptoi(hif); 3462fe6060f1SDimitry Andric // lo := fptoi(lof); 3463fe6060f1SDimitry Andric // 3464fe6060f1SDimitry Andric SDValue Trunc = DAG.getNode(ISD::FTRUNC, SL, SrcVT, Src); 3465fe6060f1SDimitry Andric SDValue Sign; 3466fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) { 3467fe6060f1SDimitry Andric // However, a 32-bit floating point number has only 23 bits mantissa and 3468fe6060f1SDimitry Andric // it's not enough to hold all the significant bits of `lof` if val is 3469fe6060f1SDimitry Andric // negative. To avoid the loss of precision, We need to take the absolute 3470fe6060f1SDimitry Andric // value after truncating and flip the result back based on the original 3471fe6060f1SDimitry Andric // signedness. 3472fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::SRA, SL, MVT::i32, 3473fe6060f1SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::i32, Trunc), 3474fe6060f1SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 3475fe6060f1SDimitry Andric Trunc = DAG.getNode(ISD::FABS, SL, SrcVT, Trunc); 3476fe6060f1SDimitry Andric } 3477fe6060f1SDimitry Andric 3478fe6060f1SDimitry Andric SDValue K0, K1; 3479fe6060f1SDimitry Andric if (SrcVT == MVT::f64) { 348006c3fb27SDimitry Andric K0 = DAG.getConstantFP( 348106c3fb27SDimitry Andric llvm::bit_cast<double>(UINT64_C(/*2^-32*/ 0x3df0000000000000)), SL, 348206c3fb27SDimitry Andric SrcVT); 348306c3fb27SDimitry Andric K1 = DAG.getConstantFP( 348406c3fb27SDimitry Andric llvm::bit_cast<double>(UINT64_C(/*-2^32*/ 0xc1f0000000000000)), SL, 348506c3fb27SDimitry Andric SrcVT); 3486fe6060f1SDimitry Andric } else { 348706c3fb27SDimitry Andric K0 = DAG.getConstantFP( 348806c3fb27SDimitry Andric llvm::bit_cast<float>(UINT32_C(/*2^-32*/ 0x2f800000)), SL, SrcVT); 348906c3fb27SDimitry Andric K1 = DAG.getConstantFP( 349006c3fb27SDimitry Andric llvm::bit_cast<float>(UINT32_C(/*-2^32*/ 0xcf800000)), SL, SrcVT); 3491fe6060f1SDimitry Andric } 34920b57cec5SDimitry Andric // TODO: Should this propagate fast-math-flags? 3493fe6060f1SDimitry Andric SDValue Mul = DAG.getNode(ISD::FMUL, SL, SrcVT, Trunc, K0); 34940b57cec5SDimitry Andric 3495fe6060f1SDimitry Andric SDValue FloorMul = DAG.getNode(ISD::FFLOOR, SL, SrcVT, Mul); 34960b57cec5SDimitry Andric 3497fe6060f1SDimitry Andric SDValue Fma = DAG.getNode(ISD::FMA, SL, SrcVT, FloorMul, K1, Trunc); 34980b57cec5SDimitry Andric 3499fe6060f1SDimitry Andric SDValue Hi = DAG.getNode((Signed && SrcVT == MVT::f64) ? ISD::FP_TO_SINT 3500fe6060f1SDimitry Andric : ISD::FP_TO_UINT, 3501fe6060f1SDimitry Andric SL, MVT::i32, FloorMul); 35020b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::FP_TO_UINT, SL, MVT::i32, Fma); 35030b57cec5SDimitry Andric 3504fe6060f1SDimitry Andric SDValue Result = DAG.getNode(ISD::BITCAST, SL, MVT::i64, 3505fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Lo, Hi})); 35060b57cec5SDimitry Andric 3507fe6060f1SDimitry Andric if (Signed && SrcVT == MVT::f32) { 3508fe6060f1SDimitry Andric assert(Sign); 3509fe6060f1SDimitry Andric // Flip the result based on the signedness, which is either all 0s or 1s. 3510fe6060f1SDimitry Andric Sign = DAG.getNode(ISD::BITCAST, SL, MVT::i64, 3511fe6060f1SDimitry Andric DAG.getBuildVector(MVT::v2i32, SL, {Sign, Sign})); 3512fe6060f1SDimitry Andric // r := xor(r, sign) - sign; 3513fe6060f1SDimitry Andric Result = 3514fe6060f1SDimitry Andric DAG.getNode(ISD::SUB, SL, MVT::i64, 3515fe6060f1SDimitry Andric DAG.getNode(ISD::XOR, SL, MVT::i64, Result, Sign), Sign); 3516fe6060f1SDimitry Andric } 3517fe6060f1SDimitry Andric 3518fe6060f1SDimitry Andric return Result; 35190b57cec5SDimitry Andric } 35200b57cec5SDimitry Andric 35210b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) const { 35220b57cec5SDimitry Andric SDLoc DL(Op); 35230b57cec5SDimitry Andric SDValue N0 = Op.getOperand(0); 35240b57cec5SDimitry Andric 35250b57cec5SDimitry Andric // Convert to target node to get known bits 35260b57cec5SDimitry Andric if (N0.getValueType() == MVT::f32) 35270b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::FP_TO_FP16, DL, Op.getValueType(), N0); 35280b57cec5SDimitry Andric 35290b57cec5SDimitry Andric if (getTargetMachine().Options.UnsafeFPMath) { 35300b57cec5SDimitry Andric // There is a generic expand for FP_TO_FP16 with unsafe fast math. 35310b57cec5SDimitry Andric return SDValue(); 35320b57cec5SDimitry Andric } 35330b57cec5SDimitry Andric 35340b57cec5SDimitry Andric assert(N0.getSimpleValueType() == MVT::f64); 35350b57cec5SDimitry Andric 35360b57cec5SDimitry Andric // f64 -> f16 conversion using round-to-nearest-even rounding mode. 35370b57cec5SDimitry Andric const unsigned ExpMask = 0x7ff; 35380b57cec5SDimitry Andric const unsigned ExpBiasf64 = 1023; 35390b57cec5SDimitry Andric const unsigned ExpBiasf16 = 15; 35400b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, DL, MVT::i32); 35410b57cec5SDimitry Andric SDValue One = DAG.getConstant(1, DL, MVT::i32); 35420b57cec5SDimitry Andric SDValue U = DAG.getNode(ISD::BITCAST, DL, MVT::i64, N0); 35430b57cec5SDimitry Andric SDValue UH = DAG.getNode(ISD::SRL, DL, MVT::i64, U, 35440b57cec5SDimitry Andric DAG.getConstant(32, DL, MVT::i64)); 35450b57cec5SDimitry Andric UH = DAG.getZExtOrTrunc(UH, DL, MVT::i32); 35460b57cec5SDimitry Andric U = DAG.getZExtOrTrunc(U, DL, MVT::i32); 35470b57cec5SDimitry Andric SDValue E = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 35480b57cec5SDimitry Andric DAG.getConstant(20, DL, MVT::i64)); 35490b57cec5SDimitry Andric E = DAG.getNode(ISD::AND, DL, MVT::i32, E, 35500b57cec5SDimitry Andric DAG.getConstant(ExpMask, DL, MVT::i32)); 35510b57cec5SDimitry Andric // Subtract the fp64 exponent bias (1023) to get the real exponent and 35520b57cec5SDimitry Andric // add the f16 bias (15) to get the biased exponent for the f16 format. 35530b57cec5SDimitry Andric E = DAG.getNode(ISD::ADD, DL, MVT::i32, E, 35540b57cec5SDimitry Andric DAG.getConstant(-ExpBiasf64 + ExpBiasf16, DL, MVT::i32)); 35550b57cec5SDimitry Andric 35560b57cec5SDimitry Andric SDValue M = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 35570b57cec5SDimitry Andric DAG.getConstant(8, DL, MVT::i32)); 35580b57cec5SDimitry Andric M = DAG.getNode(ISD::AND, DL, MVT::i32, M, 35590b57cec5SDimitry Andric DAG.getConstant(0xffe, DL, MVT::i32)); 35600b57cec5SDimitry Andric 35610b57cec5SDimitry Andric SDValue MaskedSig = DAG.getNode(ISD::AND, DL, MVT::i32, UH, 35620b57cec5SDimitry Andric DAG.getConstant(0x1ff, DL, MVT::i32)); 35630b57cec5SDimitry Andric MaskedSig = DAG.getNode(ISD::OR, DL, MVT::i32, MaskedSig, U); 35640b57cec5SDimitry Andric 35650b57cec5SDimitry Andric SDValue Lo40Set = DAG.getSelectCC(DL, MaskedSig, Zero, Zero, One, ISD::SETEQ); 35660b57cec5SDimitry Andric M = DAG.getNode(ISD::OR, DL, MVT::i32, M, Lo40Set); 35670b57cec5SDimitry Andric 35680b57cec5SDimitry Andric // (M != 0 ? 0x0200 : 0) | 0x7c00; 35690b57cec5SDimitry Andric SDValue I = DAG.getNode(ISD::OR, DL, MVT::i32, 35700b57cec5SDimitry Andric DAG.getSelectCC(DL, M, Zero, DAG.getConstant(0x0200, DL, MVT::i32), 35710b57cec5SDimitry Andric Zero, ISD::SETNE), DAG.getConstant(0x7c00, DL, MVT::i32)); 35720b57cec5SDimitry Andric 35730b57cec5SDimitry Andric // N = M | (E << 12); 35740b57cec5SDimitry Andric SDValue N = DAG.getNode(ISD::OR, DL, MVT::i32, M, 35750b57cec5SDimitry Andric DAG.getNode(ISD::SHL, DL, MVT::i32, E, 35760b57cec5SDimitry Andric DAG.getConstant(12, DL, MVT::i32))); 35770b57cec5SDimitry Andric 35780b57cec5SDimitry Andric // B = clamp(1-E, 0, 13); 35790b57cec5SDimitry Andric SDValue OneSubExp = DAG.getNode(ISD::SUB, DL, MVT::i32, 35800b57cec5SDimitry Andric One, E); 35810b57cec5SDimitry Andric SDValue B = DAG.getNode(ISD::SMAX, DL, MVT::i32, OneSubExp, Zero); 35820b57cec5SDimitry Andric B = DAG.getNode(ISD::SMIN, DL, MVT::i32, B, 35830b57cec5SDimitry Andric DAG.getConstant(13, DL, MVT::i32)); 35840b57cec5SDimitry Andric 35850b57cec5SDimitry Andric SDValue SigSetHigh = DAG.getNode(ISD::OR, DL, MVT::i32, M, 35860b57cec5SDimitry Andric DAG.getConstant(0x1000, DL, MVT::i32)); 35870b57cec5SDimitry Andric 35880b57cec5SDimitry Andric SDValue D = DAG.getNode(ISD::SRL, DL, MVT::i32, SigSetHigh, B); 35890b57cec5SDimitry Andric SDValue D0 = DAG.getNode(ISD::SHL, DL, MVT::i32, D, B); 35900b57cec5SDimitry Andric SDValue D1 = DAG.getSelectCC(DL, D0, SigSetHigh, One, Zero, ISD::SETNE); 35910b57cec5SDimitry Andric D = DAG.getNode(ISD::OR, DL, MVT::i32, D, D1); 35920b57cec5SDimitry Andric 35930b57cec5SDimitry Andric SDValue V = DAG.getSelectCC(DL, E, One, D, N, ISD::SETLT); 35940b57cec5SDimitry Andric SDValue VLow3 = DAG.getNode(ISD::AND, DL, MVT::i32, V, 35950b57cec5SDimitry Andric DAG.getConstant(0x7, DL, MVT::i32)); 35960b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, DL, MVT::i32, V, 35970b57cec5SDimitry Andric DAG.getConstant(2, DL, MVT::i32)); 35980b57cec5SDimitry Andric SDValue V0 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(3, DL, MVT::i32), 35990b57cec5SDimitry Andric One, Zero, ISD::SETEQ); 36000b57cec5SDimitry Andric SDValue V1 = DAG.getSelectCC(DL, VLow3, DAG.getConstant(5, DL, MVT::i32), 36010b57cec5SDimitry Andric One, Zero, ISD::SETGT); 36020b57cec5SDimitry Andric V1 = DAG.getNode(ISD::OR, DL, MVT::i32, V0, V1); 36030b57cec5SDimitry Andric V = DAG.getNode(ISD::ADD, DL, MVT::i32, V, V1); 36040b57cec5SDimitry Andric 36050b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(30, DL, MVT::i32), 36060b57cec5SDimitry Andric DAG.getConstant(0x7c00, DL, MVT::i32), V, ISD::SETGT); 36070b57cec5SDimitry Andric V = DAG.getSelectCC(DL, E, DAG.getConstant(1039, DL, MVT::i32), 36080b57cec5SDimitry Andric I, V, ISD::SETEQ); 36090b57cec5SDimitry Andric 36100b57cec5SDimitry Andric // Extract the sign bit. 36110b57cec5SDimitry Andric SDValue Sign = DAG.getNode(ISD::SRL, DL, MVT::i32, UH, 36120b57cec5SDimitry Andric DAG.getConstant(16, DL, MVT::i32)); 36130b57cec5SDimitry Andric Sign = DAG.getNode(ISD::AND, DL, MVT::i32, Sign, 36140b57cec5SDimitry Andric DAG.getConstant(0x8000, DL, MVT::i32)); 36150b57cec5SDimitry Andric 36160b57cec5SDimitry Andric V = DAG.getNode(ISD::OR, DL, MVT::i32, Sign, V); 36170b57cec5SDimitry Andric return DAG.getZExtOrTrunc(V, DL, Op.getValueType()); 36180b57cec5SDimitry Andric } 36190b57cec5SDimitry Andric 36201db9f3b2SDimitry Andric SDValue AMDGPUTargetLowering::LowerFP_TO_INT(const SDValue Op, 36210b57cec5SDimitry Andric SelectionDAG &DAG) const { 36220b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 3623fe6060f1SDimitry Andric unsigned OpOpcode = Op.getOpcode(); 36240b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 3625fe6060f1SDimitry Andric EVT DestVT = Op.getValueType(); 3626fe6060f1SDimitry Andric 3627fe6060f1SDimitry Andric // Will be selected natively 3628fe6060f1SDimitry Andric if (SrcVT == MVT::f16 && DestVT == MVT::i16) 3629fe6060f1SDimitry Andric return Op; 3630fe6060f1SDimitry Andric 36311db9f3b2SDimitry Andric if (SrcVT == MVT::bf16) { 36321db9f3b2SDimitry Andric SDLoc DL(Op); 36331db9f3b2SDimitry Andric SDValue PromotedSrc = DAG.getNode(ISD::FP_EXTEND, DL, MVT::f32, Src); 36341db9f3b2SDimitry Andric return DAG.getNode(Op.getOpcode(), DL, DestVT, PromotedSrc); 36351db9f3b2SDimitry Andric } 36361db9f3b2SDimitry Andric 3637fe6060f1SDimitry Andric // Promote i16 to i32 3638fe6060f1SDimitry Andric if (DestVT == MVT::i16 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { 3639fe6060f1SDimitry Andric SDLoc DL(Op); 3640fe6060f1SDimitry Andric 3641fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); 3642fe6060f1SDimitry Andric return DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FpToInt32); 3643fe6060f1SDimitry Andric } 3644fe6060f1SDimitry Andric 36451db9f3b2SDimitry Andric if (DestVT != MVT::i64) 36461db9f3b2SDimitry Andric return Op; 36471db9f3b2SDimitry Andric 3648e8d8bef9SDimitry Andric if (SrcVT == MVT::f16 || 3649e8d8bef9SDimitry Andric (SrcVT == MVT::f32 && Src.getOpcode() == ISD::FP16_TO_FP)) { 36500b57cec5SDimitry Andric SDLoc DL(Op); 36510b57cec5SDimitry Andric 3652fe6060f1SDimitry Andric SDValue FpToInt32 = DAG.getNode(OpOpcode, DL, MVT::i32, Src); 3653fe6060f1SDimitry Andric unsigned Ext = 3654fe6060f1SDimitry Andric OpOpcode == ISD::FP_TO_SINT ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 3655fe6060f1SDimitry Andric return DAG.getNode(Ext, DL, MVT::i64, FpToInt32); 36560b57cec5SDimitry Andric } 36570b57cec5SDimitry Andric 36581db9f3b2SDimitry Andric if (SrcVT == MVT::f32 || SrcVT == MVT::f64) 3659fe6060f1SDimitry Andric return LowerFP_TO_INT64(Op, DAG, OpOpcode == ISD::FP_TO_SINT); 36600b57cec5SDimitry Andric 36610b57cec5SDimitry Andric return SDValue(); 36620b57cec5SDimitry Andric } 36630b57cec5SDimitry Andric 36640b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op, 36650b57cec5SDimitry Andric SelectionDAG &DAG) const { 36660b57cec5SDimitry Andric EVT ExtraVT = cast<VTSDNode>(Op.getOperand(1))->getVT(); 36670b57cec5SDimitry Andric MVT VT = Op.getSimpleValueType(); 36680b57cec5SDimitry Andric MVT ScalarVT = VT.getScalarType(); 36690b57cec5SDimitry Andric 36700b57cec5SDimitry Andric assert(VT.isVector()); 36710b57cec5SDimitry Andric 36720b57cec5SDimitry Andric SDValue Src = Op.getOperand(0); 36730b57cec5SDimitry Andric SDLoc DL(Op); 36740b57cec5SDimitry Andric 36750b57cec5SDimitry Andric // TODO: Don't scalarize on Evergreen? 36760b57cec5SDimitry Andric unsigned NElts = VT.getVectorNumElements(); 36770b57cec5SDimitry Andric SmallVector<SDValue, 8> Args; 36780b57cec5SDimitry Andric DAG.ExtractVectorElements(Src, Args, 0, NElts); 36790b57cec5SDimitry Andric 36800b57cec5SDimitry Andric SDValue VTOp = DAG.getValueType(ExtraVT.getScalarType()); 36810b57cec5SDimitry Andric for (unsigned I = 0; I < NElts; ++I) 36820b57cec5SDimitry Andric Args[I] = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, ScalarVT, Args[I], VTOp); 36830b57cec5SDimitry Andric 36840b57cec5SDimitry Andric return DAG.getBuildVector(VT, DL, Args); 36850b57cec5SDimitry Andric } 36860b57cec5SDimitry Andric 36870b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 36880b57cec5SDimitry Andric // Custom DAG optimizations 36890b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 36900b57cec5SDimitry Andric 36910b57cec5SDimitry Andric static bool isU24(SDValue Op, SelectionDAG &DAG) { 36920b57cec5SDimitry Andric return AMDGPUTargetLowering::numBitsUnsigned(Op, DAG) <= 24; 36930b57cec5SDimitry Andric } 36940b57cec5SDimitry Andric 36950b57cec5SDimitry Andric static bool isI24(SDValue Op, SelectionDAG &DAG) { 36960b57cec5SDimitry Andric EVT VT = Op.getValueType(); 36970b57cec5SDimitry Andric return VT.getSizeInBits() >= 24 && // Types less than 24-bit should be treated 36980b57cec5SDimitry Andric // as unsigned 24-bit values. 3699349cc55cSDimitry Andric AMDGPUTargetLowering::numBitsSigned(Op, DAG) <= 24; 37000b57cec5SDimitry Andric } 37010b57cec5SDimitry Andric 3702fe6060f1SDimitry Andric static SDValue simplifyMul24(SDNode *Node24, 37030b57cec5SDimitry Andric TargetLowering::DAGCombinerInfo &DCI) { 37040b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 37055ffd83dbSDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 37068bcb0991SDimitry Andric bool IsIntrin = Node24->getOpcode() == ISD::INTRINSIC_WO_CHAIN; 37078bcb0991SDimitry Andric 37088bcb0991SDimitry Andric SDValue LHS = IsIntrin ? Node24->getOperand(1) : Node24->getOperand(0); 37098bcb0991SDimitry Andric SDValue RHS = IsIntrin ? Node24->getOperand(2) : Node24->getOperand(1); 37108bcb0991SDimitry Andric unsigned NewOpcode = Node24->getOpcode(); 37118bcb0991SDimitry Andric if (IsIntrin) { 3712647cbc5dSDimitry Andric unsigned IID = Node24->getConstantOperandVal(0); 3713349cc55cSDimitry Andric switch (IID) { 3714349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_i24: 3715349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_I24; 3716349cc55cSDimitry Andric break; 3717349cc55cSDimitry Andric case Intrinsic::amdgcn_mul_u24: 3718349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MUL_U24; 3719349cc55cSDimitry Andric break; 3720349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24: 3721349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_I24; 3722349cc55cSDimitry Andric break; 3723349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24: 3724349cc55cSDimitry Andric NewOpcode = AMDGPUISD::MULHI_U24; 3725349cc55cSDimitry Andric break; 3726349cc55cSDimitry Andric default: 3727349cc55cSDimitry Andric llvm_unreachable("Expected 24-bit mul intrinsic"); 3728349cc55cSDimitry Andric } 37298bcb0991SDimitry Andric } 37300b57cec5SDimitry Andric 37310b57cec5SDimitry Andric APInt Demanded = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 24); 37320b57cec5SDimitry Andric 37335ffd83dbSDimitry Andric // First try to simplify using SimplifyMultipleUseDemandedBits which allows 37345ffd83dbSDimitry Andric // the operands to have other uses, but will only perform simplifications that 37355ffd83dbSDimitry Andric // involve bypassing some nodes for this user. 37365ffd83dbSDimitry Andric SDValue DemandedLHS = TLI.SimplifyMultipleUseDemandedBits(LHS, Demanded, DAG); 37375ffd83dbSDimitry Andric SDValue DemandedRHS = TLI.SimplifyMultipleUseDemandedBits(RHS, Demanded, DAG); 37380b57cec5SDimitry Andric if (DemandedLHS || DemandedRHS) 37398bcb0991SDimitry Andric return DAG.getNode(NewOpcode, SDLoc(Node24), Node24->getVTList(), 37400b57cec5SDimitry Andric DemandedLHS ? DemandedLHS : LHS, 37410b57cec5SDimitry Andric DemandedRHS ? DemandedRHS : RHS); 37420b57cec5SDimitry Andric 37430b57cec5SDimitry Andric // Now try SimplifyDemandedBits which can simplify the nodes used by our 37440b57cec5SDimitry Andric // operands if this node is the only user. 37450b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(LHS, Demanded, DCI)) 37460b57cec5SDimitry Andric return SDValue(Node24, 0); 37470b57cec5SDimitry Andric if (TLI.SimplifyDemandedBits(RHS, Demanded, DCI)) 37480b57cec5SDimitry Andric return SDValue(Node24, 0); 37490b57cec5SDimitry Andric 37500b57cec5SDimitry Andric return SDValue(); 37510b57cec5SDimitry Andric } 37520b57cec5SDimitry Andric 37530b57cec5SDimitry Andric template <typename IntTy> 37540b57cec5SDimitry Andric static SDValue constantFoldBFE(SelectionDAG &DAG, IntTy Src0, uint32_t Offset, 37550b57cec5SDimitry Andric uint32_t Width, const SDLoc &DL) { 37560b57cec5SDimitry Andric if (Width + Offset < 32) { 37570b57cec5SDimitry Andric uint32_t Shl = static_cast<uint32_t>(Src0) << (32 - Offset - Width); 37580b57cec5SDimitry Andric IntTy Result = static_cast<IntTy>(Shl) >> (32 - Width); 37590b57cec5SDimitry Andric return DAG.getConstant(Result, DL, MVT::i32); 37600b57cec5SDimitry Andric } 37610b57cec5SDimitry Andric 37620b57cec5SDimitry Andric return DAG.getConstant(Src0 >> Offset, DL, MVT::i32); 37630b57cec5SDimitry Andric } 37640b57cec5SDimitry Andric 37650b57cec5SDimitry Andric static bool hasVolatileUser(SDNode *Val) { 37660b57cec5SDimitry Andric for (SDNode *U : Val->uses()) { 37670b57cec5SDimitry Andric if (MemSDNode *M = dyn_cast<MemSDNode>(U)) { 37680b57cec5SDimitry Andric if (M->isVolatile()) 37690b57cec5SDimitry Andric return true; 37700b57cec5SDimitry Andric } 37710b57cec5SDimitry Andric } 37720b57cec5SDimitry Andric 37730b57cec5SDimitry Andric return false; 37740b57cec5SDimitry Andric } 37750b57cec5SDimitry Andric 37760b57cec5SDimitry Andric bool AMDGPUTargetLowering::shouldCombineMemoryType(EVT VT) const { 37770b57cec5SDimitry Andric // i32 vectors are the canonical memory type. 37780b57cec5SDimitry Andric if (VT.getScalarType() == MVT::i32 || isTypeLegal(VT)) 37790b57cec5SDimitry Andric return false; 37800b57cec5SDimitry Andric 37810b57cec5SDimitry Andric if (!VT.isByteSized()) 37820b57cec5SDimitry Andric return false; 37830b57cec5SDimitry Andric 37840b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 37850b57cec5SDimitry Andric 37860b57cec5SDimitry Andric if ((Size == 1 || Size == 2 || Size == 4) && !VT.isVector()) 37870b57cec5SDimitry Andric return false; 37880b57cec5SDimitry Andric 37890b57cec5SDimitry Andric if (Size == 3 || (Size > 4 && (Size % 4 != 0))) 37900b57cec5SDimitry Andric return false; 37910b57cec5SDimitry Andric 37920b57cec5SDimitry Andric return true; 37930b57cec5SDimitry Andric } 37940b57cec5SDimitry Andric 37950b57cec5SDimitry Andric // Replace load of an illegal type with a store of a bitcast to a friendlier 37960b57cec5SDimitry Andric // type. 37970b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performLoadCombine(SDNode *N, 37980b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 37990b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 38000b57cec5SDimitry Andric return SDValue(); 38010b57cec5SDimitry Andric 38020b57cec5SDimitry Andric LoadSDNode *LN = cast<LoadSDNode>(N); 38035ffd83dbSDimitry Andric if (!LN->isSimple() || !ISD::isNormalLoad(LN) || hasVolatileUser(LN)) 38040b57cec5SDimitry Andric return SDValue(); 38050b57cec5SDimitry Andric 38060b57cec5SDimitry Andric SDLoc SL(N); 38070b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 38080b57cec5SDimitry Andric EVT VT = LN->getMemoryVT(); 38090b57cec5SDimitry Andric 38100b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 38115ffd83dbSDimitry Andric Align Alignment = LN->getAlign(); 38125ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) { 3813bdd1243dSDimitry Andric unsigned IsFast; 38140b57cec5SDimitry Andric unsigned AS = LN->getAddressSpace(); 38150b57cec5SDimitry Andric 38160b57cec5SDimitry Andric // Expand unaligned loads earlier than legalization. Due to visitation order 38170b57cec5SDimitry Andric // problems during legalization, the emitted instructions to pack and unpack 38180b57cec5SDimitry Andric // the bytes again are not eliminated in the case of an unaligned copy. 3819fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses( 3820fe6060f1SDimitry Andric VT, AS, Alignment, LN->getMemOperand()->getFlags(), &IsFast)) { 3821480093f4SDimitry Andric if (VT.isVector()) 382281ad6265SDimitry Andric return SplitVectorLoad(SDValue(LN, 0), DAG); 382381ad6265SDimitry Andric 382481ad6265SDimitry Andric SDValue Ops[2]; 38250b57cec5SDimitry Andric std::tie(Ops[0], Ops[1]) = expandUnalignedLoad(LN, DAG); 3826480093f4SDimitry Andric 38270b57cec5SDimitry Andric return DAG.getMergeValues(Ops, SDLoc(N)); 38280b57cec5SDimitry Andric } 38290b57cec5SDimitry Andric 38300b57cec5SDimitry Andric if (!IsFast) 38310b57cec5SDimitry Andric return SDValue(); 38320b57cec5SDimitry Andric } 38330b57cec5SDimitry Andric 38340b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 38350b57cec5SDimitry Andric return SDValue(); 38360b57cec5SDimitry Andric 38370b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 38380b57cec5SDimitry Andric 38390b57cec5SDimitry Andric SDValue NewLoad 38400b57cec5SDimitry Andric = DAG.getLoad(NewVT, SL, LN->getChain(), 38410b57cec5SDimitry Andric LN->getBasePtr(), LN->getMemOperand()); 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andric SDValue BC = DAG.getNode(ISD::BITCAST, SL, VT, NewLoad); 38440b57cec5SDimitry Andric DCI.CombineTo(N, BC, NewLoad.getValue(1)); 38450b57cec5SDimitry Andric return SDValue(N, 0); 38460b57cec5SDimitry Andric } 38470b57cec5SDimitry Andric 38480b57cec5SDimitry Andric // Replace store of an illegal type with a store of a bitcast to a friendlier 38490b57cec5SDimitry Andric // type. 38500b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performStoreCombine(SDNode *N, 38510b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 38520b57cec5SDimitry Andric if (!DCI.isBeforeLegalize()) 38530b57cec5SDimitry Andric return SDValue(); 38540b57cec5SDimitry Andric 38550b57cec5SDimitry Andric StoreSDNode *SN = cast<StoreSDNode>(N); 38565ffd83dbSDimitry Andric if (!SN->isSimple() || !ISD::isNormalStore(SN)) 38570b57cec5SDimitry Andric return SDValue(); 38580b57cec5SDimitry Andric 38590b57cec5SDimitry Andric EVT VT = SN->getMemoryVT(); 38600b57cec5SDimitry Andric unsigned Size = VT.getStoreSize(); 38610b57cec5SDimitry Andric 38620b57cec5SDimitry Andric SDLoc SL(N); 38630b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 38645ffd83dbSDimitry Andric Align Alignment = SN->getAlign(); 38655ffd83dbSDimitry Andric if (Alignment < Size && isTypeLegal(VT)) { 3866bdd1243dSDimitry Andric unsigned IsFast; 38670b57cec5SDimitry Andric unsigned AS = SN->getAddressSpace(); 38680b57cec5SDimitry Andric 38690b57cec5SDimitry Andric // Expand unaligned stores earlier than legalization. Due to visitation 38700b57cec5SDimitry Andric // order problems during legalization, the emitted instructions to pack and 38710b57cec5SDimitry Andric // unpack the bytes again are not eliminated in the case of an unaligned 38720b57cec5SDimitry Andric // copy. 3873fe6060f1SDimitry Andric if (!allowsMisalignedMemoryAccesses( 3874fe6060f1SDimitry Andric VT, AS, Alignment, SN->getMemOperand()->getFlags(), &IsFast)) { 38750b57cec5SDimitry Andric if (VT.isVector()) 387681ad6265SDimitry Andric return SplitVectorStore(SDValue(SN, 0), DAG); 38770b57cec5SDimitry Andric 38780b57cec5SDimitry Andric return expandUnalignedStore(SN, DAG); 38790b57cec5SDimitry Andric } 38800b57cec5SDimitry Andric 38810b57cec5SDimitry Andric if (!IsFast) 38820b57cec5SDimitry Andric return SDValue(); 38830b57cec5SDimitry Andric } 38840b57cec5SDimitry Andric 38850b57cec5SDimitry Andric if (!shouldCombineMemoryType(VT)) 38860b57cec5SDimitry Andric return SDValue(); 38870b57cec5SDimitry Andric 38880b57cec5SDimitry Andric EVT NewVT = getEquivalentMemType(*DAG.getContext(), VT); 38890b57cec5SDimitry Andric SDValue Val = SN->getValue(); 38900b57cec5SDimitry Andric 38910b57cec5SDimitry Andric //DCI.AddToWorklist(Val.getNode()); 38920b57cec5SDimitry Andric 38930b57cec5SDimitry Andric bool OtherUses = !Val.hasOneUse(); 38940b57cec5SDimitry Andric SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NewVT, Val); 38950b57cec5SDimitry Andric if (OtherUses) { 38960b57cec5SDimitry Andric SDValue CastBack = DAG.getNode(ISD::BITCAST, SL, VT, CastVal); 38970b57cec5SDimitry Andric DAG.ReplaceAllUsesOfValueWith(Val, CastBack); 38980b57cec5SDimitry Andric } 38990b57cec5SDimitry Andric 39000b57cec5SDimitry Andric return DAG.getStore(SN->getChain(), SL, CastVal, 39010b57cec5SDimitry Andric SN->getBasePtr(), SN->getMemOperand()); 39020b57cec5SDimitry Andric } 39030b57cec5SDimitry Andric 39040b57cec5SDimitry Andric // FIXME: This should go in generic DAG combiner with an isTruncateFree check, 39050b57cec5SDimitry Andric // but isTruncateFree is inaccurate for i16 now because of SALU vs. VALU 39060b57cec5SDimitry Andric // issues. 39070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performAssertSZExtCombine(SDNode *N, 39080b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 39090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 39100b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 39110b57cec5SDimitry Andric 39120b57cec5SDimitry Andric // (vt2 (assertzext (truncate vt0:x), vt1)) -> 39130b57cec5SDimitry Andric // (vt2 (truncate (assertzext vt0:x, vt1))) 39140b57cec5SDimitry Andric if (N0.getOpcode() == ISD::TRUNCATE) { 39150b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 39160b57cec5SDimitry Andric EVT ExtVT = cast<VTSDNode>(N1)->getVT(); 39170b57cec5SDimitry Andric SDLoc SL(N); 39180b57cec5SDimitry Andric 39190b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 39200b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 39210b57cec5SDimitry Andric if (SrcVT.bitsGE(ExtVT)) { 39220b57cec5SDimitry Andric SDValue NewInReg = DAG.getNode(N->getOpcode(), SL, SrcVT, Src, N1); 39230b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, N->getValueType(0), NewInReg); 39240b57cec5SDimitry Andric } 39250b57cec5SDimitry Andric } 39260b57cec5SDimitry Andric 39270b57cec5SDimitry Andric return SDValue(); 39280b57cec5SDimitry Andric } 39298bcb0991SDimitry Andric 39308bcb0991SDimitry Andric SDValue AMDGPUTargetLowering::performIntrinsicWOChainCombine( 39318bcb0991SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 3932647cbc5dSDimitry Andric unsigned IID = N->getConstantOperandVal(0); 39338bcb0991SDimitry Andric switch (IID) { 39348bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_i24: 39358bcb0991SDimitry Andric case Intrinsic::amdgcn_mul_u24: 3936349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_i24: 3937349cc55cSDimitry Andric case Intrinsic::amdgcn_mulhi_u24: 3938fe6060f1SDimitry Andric return simplifyMul24(N, DCI); 39395ffd83dbSDimitry Andric case Intrinsic::amdgcn_fract: 39405ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq: 39415ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 39425ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy: 39435f757f3fSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: { 39445ffd83dbSDimitry Andric // FIXME: This is probably wrong. If src is an sNaN, it won't be quieted 39455ffd83dbSDimitry Andric SDValue Src = N->getOperand(1); 39465ffd83dbSDimitry Andric return Src.isUndef() ? Src : SDValue(); 39475ffd83dbSDimitry Andric } 394806c3fb27SDimitry Andric case Intrinsic::amdgcn_frexp_exp: { 394906c3fb27SDimitry Andric // frexp_exp (fneg x) -> frexp_exp x 395006c3fb27SDimitry Andric // frexp_exp (fabs x) -> frexp_exp x 395106c3fb27SDimitry Andric // frexp_exp (fneg (fabs x)) -> frexp_exp x 395206c3fb27SDimitry Andric SDValue Src = N->getOperand(1); 395306c3fb27SDimitry Andric SDValue PeekSign = peekFPSignOps(Src); 395406c3fb27SDimitry Andric if (PeekSign == Src) 395506c3fb27SDimitry Andric return SDValue(); 395606c3fb27SDimitry Andric return SDValue(DCI.DAG.UpdateNodeOperands(N, N->getOperand(0), PeekSign), 395706c3fb27SDimitry Andric 0); 395806c3fb27SDimitry Andric } 39598bcb0991SDimitry Andric default: 39608bcb0991SDimitry Andric return SDValue(); 39618bcb0991SDimitry Andric } 39628bcb0991SDimitry Andric } 39638bcb0991SDimitry Andric 39640b57cec5SDimitry Andric /// Split the 64-bit value \p LHS into two 32-bit components, and perform the 39650b57cec5SDimitry Andric /// binary operation \p Opc to it with the corresponding constant operands. 39660b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::splitBinaryBitConstantOpImpl( 39670b57cec5SDimitry Andric DAGCombinerInfo &DCI, const SDLoc &SL, 39680b57cec5SDimitry Andric unsigned Opc, SDValue LHS, 39690b57cec5SDimitry Andric uint32_t ValLo, uint32_t ValHi) const { 39700b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 39710b57cec5SDimitry Andric SDValue Lo, Hi; 39720b57cec5SDimitry Andric std::tie(Lo, Hi) = split64BitValue(LHS, DAG); 39730b57cec5SDimitry Andric 39740b57cec5SDimitry Andric SDValue LoRHS = DAG.getConstant(ValLo, SL, MVT::i32); 39750b57cec5SDimitry Andric SDValue HiRHS = DAG.getConstant(ValHi, SL, MVT::i32); 39760b57cec5SDimitry Andric 39770b57cec5SDimitry Andric SDValue LoAnd = DAG.getNode(Opc, SL, MVT::i32, Lo, LoRHS); 39780b57cec5SDimitry Andric SDValue HiAnd = DAG.getNode(Opc, SL, MVT::i32, Hi, HiRHS); 39790b57cec5SDimitry Andric 39800b57cec5SDimitry Andric // Re-visit the ands. It's possible we eliminated one of them and it could 39810b57cec5SDimitry Andric // simplify the vector. 39820b57cec5SDimitry Andric DCI.AddToWorklist(Lo.getNode()); 39830b57cec5SDimitry Andric DCI.AddToWorklist(Hi.getNode()); 39840b57cec5SDimitry Andric 39850b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {LoAnd, HiAnd}); 39860b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 39870b57cec5SDimitry Andric } 39880b57cec5SDimitry Andric 39890b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performShlCombine(SDNode *N, 39900b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 39910b57cec5SDimitry Andric EVT VT = N->getValueType(0); 39920b57cec5SDimitry Andric 39930b57cec5SDimitry Andric ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 39940b57cec5SDimitry Andric if (!RHS) 39950b57cec5SDimitry Andric return SDValue(); 39960b57cec5SDimitry Andric 39970b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 39980b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 39990b57cec5SDimitry Andric if (!RHSVal) 40000b57cec5SDimitry Andric return LHS; 40010b57cec5SDimitry Andric 40020b57cec5SDimitry Andric SDLoc SL(N); 40030b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 40040b57cec5SDimitry Andric 40050b57cec5SDimitry Andric switch (LHS->getOpcode()) { 40060b57cec5SDimitry Andric default: 40070b57cec5SDimitry Andric break; 40080b57cec5SDimitry Andric case ISD::ZERO_EXTEND: 40090b57cec5SDimitry Andric case ISD::SIGN_EXTEND: 40100b57cec5SDimitry Andric case ISD::ANY_EXTEND: { 40110b57cec5SDimitry Andric SDValue X = LHS->getOperand(0); 40120b57cec5SDimitry Andric 40130b57cec5SDimitry Andric if (VT == MVT::i32 && RHSVal == 16 && X.getValueType() == MVT::i16 && 40140b57cec5SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, MVT::v2i16)) { 40150b57cec5SDimitry Andric // Prefer build_vector as the canonical form if packed types are legal. 40160b57cec5SDimitry Andric // (shl ([asz]ext i16:x), 16 -> build_vector 0, x 40170b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i16, SL, 40180b57cec5SDimitry Andric { DAG.getConstant(0, SL, MVT::i16), LHS->getOperand(0) }); 40190b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i32, Vec); 40200b57cec5SDimitry Andric } 40210b57cec5SDimitry Andric 40220b57cec5SDimitry Andric // shl (ext x) => zext (shl x), if shift does not overflow int 40230b57cec5SDimitry Andric if (VT != MVT::i64) 40240b57cec5SDimitry Andric break; 40250b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(X); 40260b57cec5SDimitry Andric unsigned LZ = Known.countMinLeadingZeros(); 40270b57cec5SDimitry Andric if (LZ < RHSVal) 40280b57cec5SDimitry Andric break; 40290b57cec5SDimitry Andric EVT XVT = X.getValueType(); 40300b57cec5SDimitry Andric SDValue Shl = DAG.getNode(ISD::SHL, SL, XVT, X, SDValue(RHS, 0)); 40310b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Shl, SL, VT); 40320b57cec5SDimitry Andric } 40330b57cec5SDimitry Andric } 40340b57cec5SDimitry Andric 40350b57cec5SDimitry Andric if (VT != MVT::i64) 40360b57cec5SDimitry Andric return SDValue(); 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric // i64 (shl x, C) -> (build_pair 0, (shl x, C -32)) 40390b57cec5SDimitry Andric 40400b57cec5SDimitry Andric // On some subtargets, 64-bit shift is a quarter rate instruction. In the 40410b57cec5SDimitry Andric // common case, splitting this into a move and a 32-bit shift is faster and 40420b57cec5SDimitry Andric // the same code size. 40430b57cec5SDimitry Andric if (RHSVal < 32) 40440b57cec5SDimitry Andric return SDValue(); 40450b57cec5SDimitry Andric 40460b57cec5SDimitry Andric SDValue ShiftAmt = DAG.getConstant(RHSVal - 32, SL, MVT::i32); 40470b57cec5SDimitry Andric 40480b57cec5SDimitry Andric SDValue Lo = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LHS); 40490b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SHL, SL, MVT::i32, Lo, ShiftAmt); 40500b57cec5SDimitry Andric 40510b57cec5SDimitry Andric const SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 40520b57cec5SDimitry Andric 40530b57cec5SDimitry Andric SDValue Vec = DAG.getBuildVector(MVT::v2i32, SL, {Zero, NewShift}); 40540b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, Vec); 40550b57cec5SDimitry Andric } 40560b57cec5SDimitry Andric 40570b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSraCombine(SDNode *N, 40580b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 40590b57cec5SDimitry Andric if (N->getValueType(0) != MVT::i64) 40600b57cec5SDimitry Andric return SDValue(); 40610b57cec5SDimitry Andric 40620b57cec5SDimitry Andric const ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 40630b57cec5SDimitry Andric if (!RHS) 40640b57cec5SDimitry Andric return SDValue(); 40650b57cec5SDimitry Andric 40660b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 40670b57cec5SDimitry Andric SDLoc SL(N); 40680b57cec5SDimitry Andric unsigned RHSVal = RHS->getZExtValue(); 40690b57cec5SDimitry Andric 40700b57cec5SDimitry Andric // (sra i64:x, 32) -> build_pair x, (sra hi_32(x), 31) 40710b57cec5SDimitry Andric if (RHSVal == 32) { 40720b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 40730b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 40740b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 40750b57cec5SDimitry Andric 40760b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {Hi, NewShift}); 40770b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 40780b57cec5SDimitry Andric } 40790b57cec5SDimitry Andric 40800b57cec5SDimitry Andric // (sra i64:x, 63) -> build_pair (sra hi_32(x), 31), (sra hi_32(x), 31) 40810b57cec5SDimitry Andric if (RHSVal == 63) { 40820b57cec5SDimitry Andric SDValue Hi = getHiHalf64(N->getOperand(0), DAG); 40830b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRA, SL, MVT::i32, Hi, 40840b57cec5SDimitry Andric DAG.getConstant(31, SL, MVT::i32)); 40850b57cec5SDimitry Andric SDValue BuildVec = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, NewShift}); 40860b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildVec); 40870b57cec5SDimitry Andric } 40880b57cec5SDimitry Andric 40890b57cec5SDimitry Andric return SDValue(); 40900b57cec5SDimitry Andric } 40910b57cec5SDimitry Andric 40920b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, 40930b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 40940b57cec5SDimitry Andric auto *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1)); 40950b57cec5SDimitry Andric if (!RHS) 40960b57cec5SDimitry Andric return SDValue(); 40970b57cec5SDimitry Andric 40980b57cec5SDimitry Andric EVT VT = N->getValueType(0); 40990b57cec5SDimitry Andric SDValue LHS = N->getOperand(0); 41000b57cec5SDimitry Andric unsigned ShiftAmt = RHS->getZExtValue(); 41010b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 41020b57cec5SDimitry Andric SDLoc SL(N); 41030b57cec5SDimitry Andric 41040b57cec5SDimitry Andric // fold (srl (and x, c1 << c2), c2) -> (and (srl(x, c2), c1) 41050b57cec5SDimitry Andric // this improves the ability to match BFE patterns in isel. 41060b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::AND) { 41070b57cec5SDimitry Andric if (auto *Mask = dyn_cast<ConstantSDNode>(LHS.getOperand(1))) { 410881ad6265SDimitry Andric unsigned MaskIdx, MaskLen; 410981ad6265SDimitry Andric if (Mask->getAPIntValue().isShiftedMask(MaskIdx, MaskLen) && 411081ad6265SDimitry Andric MaskIdx == ShiftAmt) { 41110b57cec5SDimitry Andric return DAG.getNode( 41120b57cec5SDimitry Andric ISD::AND, SL, VT, 41130b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(0), N->getOperand(1)), 41140b57cec5SDimitry Andric DAG.getNode(ISD::SRL, SL, VT, LHS.getOperand(1), N->getOperand(1))); 41150b57cec5SDimitry Andric } 41160b57cec5SDimitry Andric } 41170b57cec5SDimitry Andric } 41180b57cec5SDimitry Andric 41190b57cec5SDimitry Andric if (VT != MVT::i64) 41200b57cec5SDimitry Andric return SDValue(); 41210b57cec5SDimitry Andric 41220b57cec5SDimitry Andric if (ShiftAmt < 32) 41230b57cec5SDimitry Andric return SDValue(); 41240b57cec5SDimitry Andric 41250b57cec5SDimitry Andric // srl i64:x, C for C >= 32 41260b57cec5SDimitry Andric // => 41270b57cec5SDimitry Andric // build_pair (srl hi_32(x), C - 32), 0 41280b57cec5SDimitry Andric SDValue Zero = DAG.getConstant(0, SL, MVT::i32); 41290b57cec5SDimitry Andric 4130349cc55cSDimitry Andric SDValue Hi = getHiHalf64(LHS, DAG); 41310b57cec5SDimitry Andric 41320b57cec5SDimitry Andric SDValue NewConst = DAG.getConstant(ShiftAmt - 32, SL, MVT::i32); 41330b57cec5SDimitry Andric SDValue NewShift = DAG.getNode(ISD::SRL, SL, MVT::i32, Hi, NewConst); 41340b57cec5SDimitry Andric 41350b57cec5SDimitry Andric SDValue BuildPair = DAG.getBuildVector(MVT::v2i32, SL, {NewShift, Zero}); 41360b57cec5SDimitry Andric 41370b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); 41380b57cec5SDimitry Andric } 41390b57cec5SDimitry Andric 41400b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performTruncateCombine( 41410b57cec5SDimitry Andric SDNode *N, DAGCombinerInfo &DCI) const { 41420b57cec5SDimitry Andric SDLoc SL(N); 41430b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 41440b57cec5SDimitry Andric EVT VT = N->getValueType(0); 41450b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 41460b57cec5SDimitry Andric 41470b57cec5SDimitry Andric // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) 41480b57cec5SDimitry Andric if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) { 41490b57cec5SDimitry Andric SDValue Vec = Src.getOperand(0); 41500b57cec5SDimitry Andric if (Vec.getOpcode() == ISD::BUILD_VECTOR) { 41510b57cec5SDimitry Andric SDValue Elt0 = Vec.getOperand(0); 41520b57cec5SDimitry Andric EVT EltVT = Elt0.getValueType(); 4153e8d8bef9SDimitry Andric if (VT.getFixedSizeInBits() <= EltVT.getFixedSizeInBits()) { 41540b57cec5SDimitry Andric if (EltVT.isFloatingPoint()) { 41550b57cec5SDimitry Andric Elt0 = DAG.getNode(ISD::BITCAST, SL, 41560b57cec5SDimitry Andric EltVT.changeTypeToInteger(), Elt0); 41570b57cec5SDimitry Andric } 41580b57cec5SDimitry Andric 41590b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0); 41600b57cec5SDimitry Andric } 41610b57cec5SDimitry Andric } 41620b57cec5SDimitry Andric } 41630b57cec5SDimitry Andric 41640b57cec5SDimitry Andric // Equivalent of above for accessing the high element of a vector as an 41650b57cec5SDimitry Andric // integer operation. 41660b57cec5SDimitry Andric // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y) 41670b57cec5SDimitry Andric if (Src.getOpcode() == ISD::SRL && !VT.isVector()) { 41680b57cec5SDimitry Andric if (auto K = isConstOrConstSplat(Src.getOperand(1))) { 41690b57cec5SDimitry Andric if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) { 41700b57cec5SDimitry Andric SDValue BV = stripBitcast(Src.getOperand(0)); 41710b57cec5SDimitry Andric if (BV.getOpcode() == ISD::BUILD_VECTOR && 41720b57cec5SDimitry Andric BV.getValueType().getVectorNumElements() == 2) { 41730b57cec5SDimitry Andric SDValue SrcElt = BV.getOperand(1); 41740b57cec5SDimitry Andric EVT SrcEltVT = SrcElt.getValueType(); 41750b57cec5SDimitry Andric if (SrcEltVT.isFloatingPoint()) { 41760b57cec5SDimitry Andric SrcElt = DAG.getNode(ISD::BITCAST, SL, 41770b57cec5SDimitry Andric SrcEltVT.changeTypeToInteger(), SrcElt); 41780b57cec5SDimitry Andric } 41790b57cec5SDimitry Andric 41800b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt); 41810b57cec5SDimitry Andric } 41820b57cec5SDimitry Andric } 41830b57cec5SDimitry Andric } 41840b57cec5SDimitry Andric } 41850b57cec5SDimitry Andric 41860b57cec5SDimitry Andric // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit. 41870b57cec5SDimitry Andric // 41880b57cec5SDimitry Andric // i16 (trunc (srl i64:x, K)), K <= 16 -> 41890b57cec5SDimitry Andric // i16 (trunc (srl (i32 (trunc x), K))) 41900b57cec5SDimitry Andric if (VT.getScalarSizeInBits() < 32) { 41910b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 41920b57cec5SDimitry Andric if (SrcVT.getScalarSizeInBits() > 32 && 41930b57cec5SDimitry Andric (Src.getOpcode() == ISD::SRL || 41940b57cec5SDimitry Andric Src.getOpcode() == ISD::SRA || 41950b57cec5SDimitry Andric Src.getOpcode() == ISD::SHL)) { 41960b57cec5SDimitry Andric SDValue Amt = Src.getOperand(1); 41970b57cec5SDimitry Andric KnownBits Known = DAG.computeKnownBits(Amt); 4198bdd1243dSDimitry Andric 4199bdd1243dSDimitry Andric // - For left shifts, do the transform as long as the shift 4200bdd1243dSDimitry Andric // amount is still legal for i32, so when ShiftAmt < 32 (<= 31) 4201bdd1243dSDimitry Andric // - For right shift, do it if ShiftAmt <= (32 - Size) to avoid 4202bdd1243dSDimitry Andric // losing information stored in the high bits when truncating. 4203bdd1243dSDimitry Andric const unsigned MaxCstSize = 4204bdd1243dSDimitry Andric (Src.getOpcode() == ISD::SHL) ? 31 : (32 - VT.getScalarSizeInBits()); 4205bdd1243dSDimitry Andric if (Known.getMaxValue().ule(MaxCstSize)) { 42060b57cec5SDimitry Andric EVT MidVT = VT.isVector() ? 42070b57cec5SDimitry Andric EVT::getVectorVT(*DAG.getContext(), MVT::i32, 42080b57cec5SDimitry Andric VT.getVectorNumElements()) : MVT::i32; 42090b57cec5SDimitry Andric 42100b57cec5SDimitry Andric EVT NewShiftVT = getShiftAmountTy(MidVT, DAG.getDataLayout()); 42110b57cec5SDimitry Andric SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, MidVT, 42120b57cec5SDimitry Andric Src.getOperand(0)); 42130b57cec5SDimitry Andric DCI.AddToWorklist(Trunc.getNode()); 42140b57cec5SDimitry Andric 42150b57cec5SDimitry Andric if (Amt.getValueType() != NewShiftVT) { 42160b57cec5SDimitry Andric Amt = DAG.getZExtOrTrunc(Amt, SL, NewShiftVT); 42170b57cec5SDimitry Andric DCI.AddToWorklist(Amt.getNode()); 42180b57cec5SDimitry Andric } 42190b57cec5SDimitry Andric 42200b57cec5SDimitry Andric SDValue ShrunkShift = DAG.getNode(Src.getOpcode(), SL, MidVT, 42210b57cec5SDimitry Andric Trunc, Amt); 42220b57cec5SDimitry Andric return DAG.getNode(ISD::TRUNCATE, SL, VT, ShrunkShift); 42230b57cec5SDimitry Andric } 42240b57cec5SDimitry Andric } 42250b57cec5SDimitry Andric } 42260b57cec5SDimitry Andric 42270b57cec5SDimitry Andric return SDValue(); 42280b57cec5SDimitry Andric } 42290b57cec5SDimitry Andric 42300b57cec5SDimitry Andric // We need to specifically handle i64 mul here to avoid unnecessary conversion 42310b57cec5SDimitry Andric // instructions. If we only match on the legalized i64 mul expansion, 42320b57cec5SDimitry Andric // SimplifyDemandedBits will be unable to remove them because there will be 42330b57cec5SDimitry Andric // multiple uses due to the separate mul + mulh[su]. 42340b57cec5SDimitry Andric static SDValue getMul24(SelectionDAG &DAG, const SDLoc &SL, 42350b57cec5SDimitry Andric SDValue N0, SDValue N1, unsigned Size, bool Signed) { 42360b57cec5SDimitry Andric if (Size <= 32) { 42370b57cec5SDimitry Andric unsigned MulOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 42380b57cec5SDimitry Andric return DAG.getNode(MulOpc, SL, MVT::i32, N0, N1); 42390b57cec5SDimitry Andric } 42400b57cec5SDimitry Andric 4241e8d8bef9SDimitry Andric unsigned MulLoOpc = Signed ? AMDGPUISD::MUL_I24 : AMDGPUISD::MUL_U24; 4242e8d8bef9SDimitry Andric unsigned MulHiOpc = Signed ? AMDGPUISD::MULHI_I24 : AMDGPUISD::MULHI_U24; 42430b57cec5SDimitry Andric 4244e8d8bef9SDimitry Andric SDValue MulLo = DAG.getNode(MulLoOpc, SL, MVT::i32, N0, N1); 4245e8d8bef9SDimitry Andric SDValue MulHi = DAG.getNode(MulHiOpc, SL, MVT::i32, N0, N1); 42460b57cec5SDimitry Andric 4247e8d8bef9SDimitry Andric return DAG.getNode(ISD::BUILD_PAIR, SL, MVT::i64, MulLo, MulHi); 42480b57cec5SDimitry Andric } 42490b57cec5SDimitry Andric 425006c3fb27SDimitry Andric /// If \p V is an add of a constant 1, returns the other operand. Otherwise 425106c3fb27SDimitry Andric /// return SDValue(). 425206c3fb27SDimitry Andric static SDValue getAddOneOp(const SDNode *V) { 425306c3fb27SDimitry Andric if (V->getOpcode() != ISD::ADD) 425406c3fb27SDimitry Andric return SDValue(); 425506c3fb27SDimitry Andric 42565f757f3fSDimitry Andric return isOneConstant(V->getOperand(1)) ? V->getOperand(0) : SDValue(); 425706c3fb27SDimitry Andric } 425806c3fb27SDimitry Andric 42590b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, 42600b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 42610fca6ea1SDimitry Andric assert(N->getOpcode() == ISD::MUL); 42620b57cec5SDimitry Andric EVT VT = N->getValueType(0); 42630b57cec5SDimitry Andric 4264fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 4265fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 4266fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 4267fe6060f1SDimitry Andric // value is in an SGPR. 4268fe6060f1SDimitry Andric if (!N->isDivergent()) 4269fe6060f1SDimitry Andric return SDValue(); 4270fe6060f1SDimitry Andric 42710b57cec5SDimitry Andric unsigned Size = VT.getSizeInBits(); 42720b57cec5SDimitry Andric if (VT.isVector() || Size > 64) 42730b57cec5SDimitry Andric return SDValue(); 42740b57cec5SDimitry Andric 42750b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 42760b57cec5SDimitry Andric SDLoc DL(N); 42770b57cec5SDimitry Andric 42780b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 42790b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 42800b57cec5SDimitry Andric 428106c3fb27SDimitry Andric // Undo InstCombine canonicalize X * (Y + 1) -> X * Y + X to enable mad 428206c3fb27SDimitry Andric // matching. 428306c3fb27SDimitry Andric 428406c3fb27SDimitry Andric // mul x, (add y, 1) -> add (mul x, y), x 428506c3fb27SDimitry Andric auto IsFoldableAdd = [](SDValue V) -> SDValue { 428606c3fb27SDimitry Andric SDValue AddOp = getAddOneOp(V.getNode()); 428706c3fb27SDimitry Andric if (!AddOp) 428806c3fb27SDimitry Andric return SDValue(); 428906c3fb27SDimitry Andric 429006c3fb27SDimitry Andric if (V.hasOneUse() || all_of(V->uses(), [](const SDNode *U) -> bool { 429106c3fb27SDimitry Andric return U->getOpcode() == ISD::MUL; 429206c3fb27SDimitry Andric })) 429306c3fb27SDimitry Andric return AddOp; 429406c3fb27SDimitry Andric 429506c3fb27SDimitry Andric return SDValue(); 429606c3fb27SDimitry Andric }; 429706c3fb27SDimitry Andric 429806c3fb27SDimitry Andric // FIXME: The selection pattern is not properly checking for commuted 429906c3fb27SDimitry Andric // operands, so we have to place the mul in the LHS 430006c3fb27SDimitry Andric if (SDValue MulOper = IsFoldableAdd(N0)) { 430106c3fb27SDimitry Andric SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper); 430206c3fb27SDimitry Andric return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1); 430306c3fb27SDimitry Andric } 430406c3fb27SDimitry Andric 430506c3fb27SDimitry Andric if (SDValue MulOper = IsFoldableAdd(N1)) { 430606c3fb27SDimitry Andric SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper); 430706c3fb27SDimitry Andric return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0); 430806c3fb27SDimitry Andric } 430906c3fb27SDimitry Andric 431006c3fb27SDimitry Andric // There are i16 integer mul/mad. 431106c3fb27SDimitry Andric if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16)) 431206c3fb27SDimitry Andric return SDValue(); 431306c3fb27SDimitry Andric 43140b57cec5SDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends 43150b57cec5SDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since 43160b57cec5SDimitry Andric // we can assume the high bits are whatever we want, use the underlying value 43170b57cec5SDimitry Andric // to avoid the unknown high bits from interfering. 43180b57cec5SDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND) 43190b57cec5SDimitry Andric N0 = N0.getOperand(0); 43200b57cec5SDimitry Andric 43210b57cec5SDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND) 43220b57cec5SDimitry Andric N1 = N1.getOperand(0); 43230b57cec5SDimitry Andric 43240b57cec5SDimitry Andric SDValue Mul; 43250b57cec5SDimitry Andric 43260b57cec5SDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { 43270b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 43280b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 43290b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, false); 43300b57cec5SDimitry Andric } else if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { 43310b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 43320b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 43330b57cec5SDimitry Andric Mul = getMul24(DAG, DL, N0, N1, Size, true); 43340b57cec5SDimitry Andric } else { 43350b57cec5SDimitry Andric return SDValue(); 43360b57cec5SDimitry Andric } 43370b57cec5SDimitry Andric 43380b57cec5SDimitry Andric // We need to use sext even for MUL_U24, because MUL_U24 is used 43390b57cec5SDimitry Andric // for signed multiply of 8 and 16-bit types. 43400b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mul, DL, VT); 43410b57cec5SDimitry Andric } 43420b57cec5SDimitry Andric 43434824e7fdSDimitry Andric SDValue 43444824e7fdSDimitry Andric AMDGPUTargetLowering::performMulLoHiCombine(SDNode *N, 43454824e7fdSDimitry Andric DAGCombinerInfo &DCI) const { 43464824e7fdSDimitry Andric if (N->getValueType(0) != MVT::i32) 43474824e7fdSDimitry Andric return SDValue(); 43484824e7fdSDimitry Andric 43494824e7fdSDimitry Andric SelectionDAG &DAG = DCI.DAG; 43504824e7fdSDimitry Andric SDLoc DL(N); 43514824e7fdSDimitry Andric 4352*c80e69b0SDimitry Andric bool Signed = N->getOpcode() == ISD::SMUL_LOHI; 43534824e7fdSDimitry Andric SDValue N0 = N->getOperand(0); 43544824e7fdSDimitry Andric SDValue N1 = N->getOperand(1); 43554824e7fdSDimitry Andric 43564824e7fdSDimitry Andric // SimplifyDemandedBits has the annoying habit of turning useful zero_extends 43574824e7fdSDimitry Andric // in the source into any_extends if the result of the mul is truncated. Since 43584824e7fdSDimitry Andric // we can assume the high bits are whatever we want, use the underlying value 43594824e7fdSDimitry Andric // to avoid the unknown high bits from interfering. 43604824e7fdSDimitry Andric if (N0.getOpcode() == ISD::ANY_EXTEND) 43614824e7fdSDimitry Andric N0 = N0.getOperand(0); 43624824e7fdSDimitry Andric if (N1.getOpcode() == ISD::ANY_EXTEND) 43634824e7fdSDimitry Andric N1 = N1.getOperand(0); 43644824e7fdSDimitry Andric 43654824e7fdSDimitry Andric // Try to use two fast 24-bit multiplies (one for each half of the result) 43664824e7fdSDimitry Andric // instead of one slow extending multiply. 4367*c80e69b0SDimitry Andric unsigned LoOpcode = 0; 4368*c80e69b0SDimitry Andric unsigned HiOpcode = 0; 4369*c80e69b0SDimitry Andric if (Signed) { 4370*c80e69b0SDimitry Andric if (Subtarget->hasMulI24() && isI24(N0, DAG) && isI24(N1, DAG)) { 4371*c80e69b0SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 4372*c80e69b0SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 4373*c80e69b0SDimitry Andric LoOpcode = AMDGPUISD::MUL_I24; 4374*c80e69b0SDimitry Andric HiOpcode = AMDGPUISD::MULHI_I24; 4375*c80e69b0SDimitry Andric } 4376*c80e69b0SDimitry Andric } else { 43774824e7fdSDimitry Andric if (Subtarget->hasMulU24() && isU24(N0, DAG) && isU24(N1, DAG)) { 43784824e7fdSDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 43794824e7fdSDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 43804824e7fdSDimitry Andric LoOpcode = AMDGPUISD::MUL_U24; 43814824e7fdSDimitry Andric HiOpcode = AMDGPUISD::MULHI_U24; 43824824e7fdSDimitry Andric } 4383*c80e69b0SDimitry Andric } 4384*c80e69b0SDimitry Andric if (!LoOpcode) 4385*c80e69b0SDimitry Andric return SDValue(); 43864824e7fdSDimitry Andric 43874824e7fdSDimitry Andric SDValue Lo = DAG.getNode(LoOpcode, DL, MVT::i32, N0, N1); 43884824e7fdSDimitry Andric SDValue Hi = DAG.getNode(HiOpcode, DL, MVT::i32, N0, N1); 43894824e7fdSDimitry Andric DCI.CombineTo(N, Lo, Hi); 43904824e7fdSDimitry Andric return SDValue(N, 0); 43914824e7fdSDimitry Andric } 43924824e7fdSDimitry Andric 43930b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhsCombine(SDNode *N, 43940b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 43950b57cec5SDimitry Andric EVT VT = N->getValueType(0); 43960b57cec5SDimitry Andric 43970b57cec5SDimitry Andric if (!Subtarget->hasMulI24() || VT.isVector()) 43980b57cec5SDimitry Andric return SDValue(); 43990b57cec5SDimitry Andric 4400fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 4401fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 4402fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 4403fe6060f1SDimitry Andric // value is in an SGPR. 4404fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a 4405fe6060f1SDimitry Andric // valu op anyway) 4406fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent()) 4407fe6060f1SDimitry Andric return SDValue(); 4408fe6060f1SDimitry Andric 44090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 44100b57cec5SDimitry Andric SDLoc DL(N); 44110b57cec5SDimitry Andric 44120b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 44130b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 44140b57cec5SDimitry Andric 44150b57cec5SDimitry Andric if (!isI24(N0, DAG) || !isI24(N1, DAG)) 44160b57cec5SDimitry Andric return SDValue(); 44170b57cec5SDimitry Andric 44180b57cec5SDimitry Andric N0 = DAG.getSExtOrTrunc(N0, DL, MVT::i32); 44190b57cec5SDimitry Andric N1 = DAG.getSExtOrTrunc(N1, DL, MVT::i32); 44200b57cec5SDimitry Andric 44210b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_I24, DL, MVT::i32, N0, N1); 44220b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 44230b57cec5SDimitry Andric return DAG.getSExtOrTrunc(Mulhi, DL, VT); 44240b57cec5SDimitry Andric } 44250b57cec5SDimitry Andric 44260b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performMulhuCombine(SDNode *N, 44270b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 44280b57cec5SDimitry Andric EVT VT = N->getValueType(0); 44290b57cec5SDimitry Andric 44300b57cec5SDimitry Andric if (!Subtarget->hasMulU24() || VT.isVector() || VT.getSizeInBits() > 32) 44310b57cec5SDimitry Andric return SDValue(); 44320b57cec5SDimitry Andric 4433fe6060f1SDimitry Andric // Don't generate 24-bit multiplies on values that are in SGPRs, since 4434fe6060f1SDimitry Andric // we only have a 32-bit scalar multiply (avoid values being moved to VGPRs 4435fe6060f1SDimitry Andric // unnecessarily). isDivergent() is used as an approximation of whether the 4436fe6060f1SDimitry Andric // value is in an SGPR. 4437fe6060f1SDimitry Andric // This doesn't apply if no s_mul_hi is available (since we'll end up with a 4438fe6060f1SDimitry Andric // valu op anyway) 4439fe6060f1SDimitry Andric if (Subtarget->hasSMulHi() && !N->isDivergent()) 4440fe6060f1SDimitry Andric return SDValue(); 4441fe6060f1SDimitry Andric 44420b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 44430b57cec5SDimitry Andric SDLoc DL(N); 44440b57cec5SDimitry Andric 44450b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 44460b57cec5SDimitry Andric SDValue N1 = N->getOperand(1); 44470b57cec5SDimitry Andric 44480b57cec5SDimitry Andric if (!isU24(N0, DAG) || !isU24(N1, DAG)) 44490b57cec5SDimitry Andric return SDValue(); 44500b57cec5SDimitry Andric 44510b57cec5SDimitry Andric N0 = DAG.getZExtOrTrunc(N0, DL, MVT::i32); 44520b57cec5SDimitry Andric N1 = DAG.getZExtOrTrunc(N1, DL, MVT::i32); 44530b57cec5SDimitry Andric 44540b57cec5SDimitry Andric SDValue Mulhi = DAG.getNode(AMDGPUISD::MULHI_U24, DL, MVT::i32, N0, N1); 44550b57cec5SDimitry Andric DCI.AddToWorklist(Mulhi.getNode()); 44560b57cec5SDimitry Andric return DAG.getZExtOrTrunc(Mulhi, DL, VT); 44570b57cec5SDimitry Andric } 44580b57cec5SDimitry Andric 44590b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getFFBX_U32(SelectionDAG &DAG, 44600b57cec5SDimitry Andric SDValue Op, 44610b57cec5SDimitry Andric const SDLoc &DL, 44620b57cec5SDimitry Andric unsigned Opc) const { 44630b57cec5SDimitry Andric EVT VT = Op.getValueType(); 44640b57cec5SDimitry Andric EVT LegalVT = getTypeToTransformTo(*DAG.getContext(), VT); 44650b57cec5SDimitry Andric if (LegalVT != MVT::i32 && (Subtarget->has16BitInsts() && 44660b57cec5SDimitry Andric LegalVT != MVT::i16)) 44670b57cec5SDimitry Andric return SDValue(); 44680b57cec5SDimitry Andric 44690b57cec5SDimitry Andric if (VT != MVT::i32) 44700b57cec5SDimitry Andric Op = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Op); 44710b57cec5SDimitry Andric 44720b57cec5SDimitry Andric SDValue FFBX = DAG.getNode(Opc, DL, MVT::i32, Op); 44730b57cec5SDimitry Andric if (VT != MVT::i32) 44740b57cec5SDimitry Andric FFBX = DAG.getNode(ISD::TRUNCATE, DL, VT, FFBX); 44750b57cec5SDimitry Andric 44760b57cec5SDimitry Andric return FFBX; 44770b57cec5SDimitry Andric } 44780b57cec5SDimitry Andric 44790b57cec5SDimitry Andric // The native instructions return -1 on 0 input. Optimize out a select that 44800b57cec5SDimitry Andric // produces -1 on 0. 44810b57cec5SDimitry Andric // 44820b57cec5SDimitry Andric // TODO: If zero is not undef, we could also do this if the output is compared 44830b57cec5SDimitry Andric // against the bitwidth. 44840b57cec5SDimitry Andric // 44850b57cec5SDimitry Andric // TODO: Should probably combine against FFBH_U32 instead of ctlz directly. 44860b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue Cond, 44870b57cec5SDimitry Andric SDValue LHS, SDValue RHS, 44880b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 44895f757f3fSDimitry Andric if (!isNullConstant(Cond.getOperand(1))) 44900b57cec5SDimitry Andric return SDValue(); 44910b57cec5SDimitry Andric 44920b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 44930b57cec5SDimitry Andric ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); 44940b57cec5SDimitry Andric SDValue CmpLHS = Cond.getOperand(0); 44950b57cec5SDimitry Andric 44960b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x 44970b57cec5SDimitry Andric // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x 44980b57cec5SDimitry Andric if (CCOpcode == ISD::SETEQ && 44990b57cec5SDimitry Andric (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) && 450006c3fb27SDimitry Andric RHS.getOperand(0) == CmpLHS && isAllOnesConstant(LHS)) { 45015ffd83dbSDimitry Andric unsigned Opc = 45025ffd83dbSDimitry Andric isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; 45030b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 45040b57cec5SDimitry Andric } 45050b57cec5SDimitry Andric 45060b57cec5SDimitry Andric // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x 45070b57cec5SDimitry Andric // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x 45080b57cec5SDimitry Andric if (CCOpcode == ISD::SETNE && 45095ffd83dbSDimitry Andric (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) && 451006c3fb27SDimitry Andric LHS.getOperand(0) == CmpLHS && isAllOnesConstant(RHS)) { 45115ffd83dbSDimitry Andric unsigned Opc = 45125ffd83dbSDimitry Andric isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32; 45135ffd83dbSDimitry Andric 45140b57cec5SDimitry Andric return getFFBX_U32(DAG, CmpLHS, SL, Opc); 45150b57cec5SDimitry Andric } 45160b57cec5SDimitry Andric 45170b57cec5SDimitry Andric return SDValue(); 45180b57cec5SDimitry Andric } 45190b57cec5SDimitry Andric 45200b57cec5SDimitry Andric static SDValue distributeOpThroughSelect(TargetLowering::DAGCombinerInfo &DCI, 45210b57cec5SDimitry Andric unsigned Op, 45220b57cec5SDimitry Andric const SDLoc &SL, 45230b57cec5SDimitry Andric SDValue Cond, 45240b57cec5SDimitry Andric SDValue N1, 45250b57cec5SDimitry Andric SDValue N2) { 45260b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 45270b57cec5SDimitry Andric EVT VT = N1.getValueType(); 45280b57cec5SDimitry Andric 45290b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, Cond, 45300b57cec5SDimitry Andric N1.getOperand(0), N2.getOperand(0)); 45310b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 45320b57cec5SDimitry Andric return DAG.getNode(Op, SL, VT, NewSelect); 45330b57cec5SDimitry Andric } 45340b57cec5SDimitry Andric 45350b57cec5SDimitry Andric // Pull a free FP operation out of a select so it may fold into uses. 45360b57cec5SDimitry Andric // 45370b57cec5SDimitry Andric // select c, (fneg x), (fneg y) -> fneg (select c, x, y) 45380b57cec5SDimitry Andric // select c, (fneg x), k -> fneg (select c, x, (fneg k)) 45390b57cec5SDimitry Andric // 45400b57cec5SDimitry Andric // select c, (fabs x), (fabs y) -> fabs (select c, x, y) 45410b57cec5SDimitry Andric // select c, (fabs x), +k -> fabs (select c, x, k) 454206c3fb27SDimitry Andric SDValue 454306c3fb27SDimitry Andric AMDGPUTargetLowering::foldFreeOpFromSelect(TargetLowering::DAGCombinerInfo &DCI, 454406c3fb27SDimitry Andric SDValue N) const { 45450b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 45460b57cec5SDimitry Andric SDValue Cond = N.getOperand(0); 45470b57cec5SDimitry Andric SDValue LHS = N.getOperand(1); 45480b57cec5SDimitry Andric SDValue RHS = N.getOperand(2); 45490b57cec5SDimitry Andric 45500b57cec5SDimitry Andric EVT VT = N.getValueType(); 45510b57cec5SDimitry Andric if ((LHS.getOpcode() == ISD::FABS && RHS.getOpcode() == ISD::FABS) || 45520b57cec5SDimitry Andric (LHS.getOpcode() == ISD::FNEG && RHS.getOpcode() == ISD::FNEG)) { 455306c3fb27SDimitry Andric if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode())) 455406c3fb27SDimitry Andric return SDValue(); 455506c3fb27SDimitry Andric 45560b57cec5SDimitry Andric return distributeOpThroughSelect(DCI, LHS.getOpcode(), 45570b57cec5SDimitry Andric SDLoc(N), Cond, LHS, RHS); 45580b57cec5SDimitry Andric } 45590b57cec5SDimitry Andric 45600b57cec5SDimitry Andric bool Inv = false; 45610b57cec5SDimitry Andric if (RHS.getOpcode() == ISD::FABS || RHS.getOpcode() == ISD::FNEG) { 45620b57cec5SDimitry Andric std::swap(LHS, RHS); 45630b57cec5SDimitry Andric Inv = true; 45640b57cec5SDimitry Andric } 45650b57cec5SDimitry Andric 45660b57cec5SDimitry Andric // TODO: Support vector constants. 45670b57cec5SDimitry Andric ConstantFPSDNode *CRHS = dyn_cast<ConstantFPSDNode>(RHS); 456806c3fb27SDimitry Andric if ((LHS.getOpcode() == ISD::FNEG || LHS.getOpcode() == ISD::FABS) && CRHS && 456906c3fb27SDimitry Andric !selectSupportsSourceMods(N.getNode())) { 45700b57cec5SDimitry Andric SDLoc SL(N); 45710b57cec5SDimitry Andric // If one side is an fneg/fabs and the other is a constant, we can push the 45720b57cec5SDimitry Andric // fneg/fabs down. If it's an fabs, the constant needs to be non-negative. 45730b57cec5SDimitry Andric SDValue NewLHS = LHS.getOperand(0); 45740b57cec5SDimitry Andric SDValue NewRHS = RHS; 45750b57cec5SDimitry Andric 45760b57cec5SDimitry Andric // Careful: if the neg can be folded up, don't try to pull it back down. 45770b57cec5SDimitry Andric bool ShouldFoldNeg = true; 45780b57cec5SDimitry Andric 45790b57cec5SDimitry Andric if (NewLHS.hasOneUse()) { 45800b57cec5SDimitry Andric unsigned Opc = NewLHS.getOpcode(); 458106c3fb27SDimitry Andric if (LHS.getOpcode() == ISD::FNEG && fnegFoldsIntoOp(NewLHS.getNode())) 45820b57cec5SDimitry Andric ShouldFoldNeg = false; 45830b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FABS && Opc == ISD::FMUL) 45840b57cec5SDimitry Andric ShouldFoldNeg = false; 45850b57cec5SDimitry Andric } 45860b57cec5SDimitry Andric 45870b57cec5SDimitry Andric if (ShouldFoldNeg) { 458806c3fb27SDimitry Andric if (LHS.getOpcode() == ISD::FABS && CRHS->isNegative()) 458906c3fb27SDimitry Andric return SDValue(); 459006c3fb27SDimitry Andric 459106c3fb27SDimitry Andric // We're going to be forced to use a source modifier anyway, there's no 459206c3fb27SDimitry Andric // point to pulling the negate out unless we can get a size reduction by 459306c3fb27SDimitry Andric // negating the constant. 459406c3fb27SDimitry Andric // 459506c3fb27SDimitry Andric // TODO: Generalize to use getCheaperNegatedExpression which doesn't know 459606c3fb27SDimitry Andric // about cheaper constants. 459706c3fb27SDimitry Andric if (NewLHS.getOpcode() == ISD::FABS && 459806c3fb27SDimitry Andric getConstantNegateCost(CRHS) != NegatibleCost::Cheaper) 459906c3fb27SDimitry Andric return SDValue(); 460006c3fb27SDimitry Andric 460106c3fb27SDimitry Andric if (!AMDGPUTargetLowering::allUsesHaveSourceMods(N.getNode())) 460206c3fb27SDimitry Andric return SDValue(); 460306c3fb27SDimitry Andric 46040b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 46050b57cec5SDimitry Andric NewRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 46060b57cec5SDimitry Andric 46070b57cec5SDimitry Andric if (Inv) 46080b57cec5SDimitry Andric std::swap(NewLHS, NewRHS); 46090b57cec5SDimitry Andric 46100b57cec5SDimitry Andric SDValue NewSelect = DAG.getNode(ISD::SELECT, SL, VT, 46110b57cec5SDimitry Andric Cond, NewLHS, NewRHS); 46120b57cec5SDimitry Andric DCI.AddToWorklist(NewSelect.getNode()); 46130b57cec5SDimitry Andric return DAG.getNode(LHS.getOpcode(), SL, VT, NewSelect); 46140b57cec5SDimitry Andric } 46150b57cec5SDimitry Andric } 46160b57cec5SDimitry Andric 46170b57cec5SDimitry Andric return SDValue(); 46180b57cec5SDimitry Andric } 46190b57cec5SDimitry Andric 46200b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N, 46210b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 46220b57cec5SDimitry Andric if (SDValue Folded = foldFreeOpFromSelect(DCI, SDValue(N, 0))) 46230b57cec5SDimitry Andric return Folded; 46240b57cec5SDimitry Andric 46250b57cec5SDimitry Andric SDValue Cond = N->getOperand(0); 46260b57cec5SDimitry Andric if (Cond.getOpcode() != ISD::SETCC) 46270b57cec5SDimitry Andric return SDValue(); 46280b57cec5SDimitry Andric 46290b57cec5SDimitry Andric EVT VT = N->getValueType(0); 46300b57cec5SDimitry Andric SDValue LHS = Cond.getOperand(0); 46310b57cec5SDimitry Andric SDValue RHS = Cond.getOperand(1); 46320b57cec5SDimitry Andric SDValue CC = Cond.getOperand(2); 46330b57cec5SDimitry Andric 46340b57cec5SDimitry Andric SDValue True = N->getOperand(1); 46350b57cec5SDimitry Andric SDValue False = N->getOperand(2); 46360b57cec5SDimitry Andric 46370b57cec5SDimitry Andric if (Cond.hasOneUse()) { // TODO: Look for multiple select uses. 46380b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 46390b57cec5SDimitry Andric if (DAG.isConstantValueOfAnyType(True) && 46400b57cec5SDimitry Andric !DAG.isConstantValueOfAnyType(False)) { 46410b57cec5SDimitry Andric // Swap cmp + select pair to move constant to false input. 46420b57cec5SDimitry Andric // This will allow using VOPC cndmasks more often. 46430b57cec5SDimitry Andric // select (setcc x, y), k, x -> select (setccinv x, y), x, k 46440b57cec5SDimitry Andric 46450b57cec5SDimitry Andric SDLoc SL(N); 4646480093f4SDimitry Andric ISD::CondCode NewCC = 4647480093f4SDimitry Andric getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), LHS.getValueType()); 46480b57cec5SDimitry Andric 46490b57cec5SDimitry Andric SDValue NewCond = DAG.getSetCC(SL, Cond.getValueType(), LHS, RHS, NewCC); 46500b57cec5SDimitry Andric return DAG.getNode(ISD::SELECT, SL, VT, NewCond, False, True); 46510b57cec5SDimitry Andric } 46520b57cec5SDimitry Andric 46530b57cec5SDimitry Andric if (VT == MVT::f32 && Subtarget->hasFminFmaxLegacy()) { 46540b57cec5SDimitry Andric SDValue MinMax 46550b57cec5SDimitry Andric = combineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI); 46560b57cec5SDimitry Andric // Revisit this node so we can catch min3/max3/med3 patterns. 46570b57cec5SDimitry Andric //DCI.AddToWorklist(MinMax.getNode()); 46580b57cec5SDimitry Andric return MinMax; 46590b57cec5SDimitry Andric } 46600b57cec5SDimitry Andric } 46610b57cec5SDimitry Andric 46620b57cec5SDimitry Andric // There's no reason to not do this if the condition has other uses. 46630b57cec5SDimitry Andric return performCtlz_CttzCombine(SDLoc(N), Cond, True, False, DCI); 46640b57cec5SDimitry Andric } 46650b57cec5SDimitry Andric 46660b57cec5SDimitry Andric static bool isInv2Pi(const APFloat &APF) { 46670b57cec5SDimitry Andric static const APFloat KF16(APFloat::IEEEhalf(), APInt(16, 0x3118)); 46680b57cec5SDimitry Andric static const APFloat KF32(APFloat::IEEEsingle(), APInt(32, 0x3e22f983)); 46690b57cec5SDimitry Andric static const APFloat KF64(APFloat::IEEEdouble(), APInt(64, 0x3fc45f306dc9c882)); 46700b57cec5SDimitry Andric 46710b57cec5SDimitry Andric return APF.bitwiseIsEqual(KF16) || 46720b57cec5SDimitry Andric APF.bitwiseIsEqual(KF32) || 46730b57cec5SDimitry Andric APF.bitwiseIsEqual(KF64); 46740b57cec5SDimitry Andric } 46750b57cec5SDimitry Andric 46760b57cec5SDimitry Andric // 0 and 1.0 / (0.5 * pi) do not have inline immmediates, so there is an 46770b57cec5SDimitry Andric // additional cost to negate them. 467806c3fb27SDimitry Andric TargetLowering::NegatibleCost 467906c3fb27SDimitry Andric AMDGPUTargetLowering::getConstantNegateCost(const ConstantFPSDNode *C) const { 468006c3fb27SDimitry Andric if (C->isZero()) 468106c3fb27SDimitry Andric return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive; 46820b57cec5SDimitry Andric 46830b57cec5SDimitry Andric if (Subtarget->hasInv2PiInlineImm() && isInv2Pi(C->getValueAPF())) 468406c3fb27SDimitry Andric return C->isNegative() ? NegatibleCost::Cheaper : NegatibleCost::Expensive; 468506c3fb27SDimitry Andric 468606c3fb27SDimitry Andric return NegatibleCost::Neutral; 46870b57cec5SDimitry Andric } 46880b57cec5SDimitry Andric 468906c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCostlierToNegate(SDValue N) const { 469006c3fb27SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) 469106c3fb27SDimitry Andric return getConstantNegateCost(C) == NegatibleCost::Expensive; 469206c3fb27SDimitry Andric return false; 469306c3fb27SDimitry Andric } 469406c3fb27SDimitry Andric 469506c3fb27SDimitry Andric bool AMDGPUTargetLowering::isConstantCheaperToNegate(SDValue N) const { 469606c3fb27SDimitry Andric if (const ConstantFPSDNode *C = isConstOrConstSplatFP(N)) 469706c3fb27SDimitry Andric return getConstantNegateCost(C) == NegatibleCost::Cheaper; 46980b57cec5SDimitry Andric return false; 46990b57cec5SDimitry Andric } 47000b57cec5SDimitry Andric 47010b57cec5SDimitry Andric static unsigned inverseMinMax(unsigned Opc) { 47020b57cec5SDimitry Andric switch (Opc) { 47030b57cec5SDimitry Andric case ISD::FMAXNUM: 47040b57cec5SDimitry Andric return ISD::FMINNUM; 47050b57cec5SDimitry Andric case ISD::FMINNUM: 47060b57cec5SDimitry Andric return ISD::FMAXNUM; 47070b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 47080b57cec5SDimitry Andric return ISD::FMINNUM_IEEE; 47090b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 47100b57cec5SDimitry Andric return ISD::FMAXNUM_IEEE; 47115f757f3fSDimitry Andric case ISD::FMAXIMUM: 47125f757f3fSDimitry Andric return ISD::FMINIMUM; 47135f757f3fSDimitry Andric case ISD::FMINIMUM: 47145f757f3fSDimitry Andric return ISD::FMAXIMUM; 47150b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 47160b57cec5SDimitry Andric return AMDGPUISD::FMIN_LEGACY; 47170b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 47180b57cec5SDimitry Andric return AMDGPUISD::FMAX_LEGACY; 47190b57cec5SDimitry Andric default: 47200b57cec5SDimitry Andric llvm_unreachable("invalid min/max opcode"); 47210b57cec5SDimitry Andric } 47220b57cec5SDimitry Andric } 47230b57cec5SDimitry Andric 472406c3fb27SDimitry Andric /// \return true if it's profitable to try to push an fneg into its source 472506c3fb27SDimitry Andric /// instruction. 472606c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldFoldFNegIntoSrc(SDNode *N, SDValue N0) { 47270b57cec5SDimitry Andric // If the input has multiple uses and we can either fold the negate down, or 47280b57cec5SDimitry Andric // the other uses cannot, give up. This both prevents unprofitable 47290b57cec5SDimitry Andric // transformations and infinite loops: we won't repeatedly try to fold around 47300b57cec5SDimitry Andric // a negate that has no 'good' form. 47310b57cec5SDimitry Andric if (N0.hasOneUse()) { 47320b57cec5SDimitry Andric // This may be able to fold into the source, but at a code size cost. Don't 47330b57cec5SDimitry Andric // fold if the fold into the user is free. 47340b57cec5SDimitry Andric if (allUsesHaveSourceMods(N, 0)) 473506c3fb27SDimitry Andric return false; 47360b57cec5SDimitry Andric } else { 473706c3fb27SDimitry Andric if (fnegFoldsIntoOp(N0.getNode()) && 47380b57cec5SDimitry Andric (allUsesHaveSourceMods(N) || !allUsesHaveSourceMods(N0.getNode()))) 473906c3fb27SDimitry Andric return false; 47400b57cec5SDimitry Andric } 47410b57cec5SDimitry Andric 474206c3fb27SDimitry Andric return true; 474306c3fb27SDimitry Andric } 474406c3fb27SDimitry Andric 474506c3fb27SDimitry Andric SDValue AMDGPUTargetLowering::performFNegCombine(SDNode *N, 474606c3fb27SDimitry Andric DAGCombinerInfo &DCI) const { 474706c3fb27SDimitry Andric SelectionDAG &DAG = DCI.DAG; 474806c3fb27SDimitry Andric SDValue N0 = N->getOperand(0); 474906c3fb27SDimitry Andric EVT VT = N->getValueType(0); 475006c3fb27SDimitry Andric 475106c3fb27SDimitry Andric unsigned Opc = N0.getOpcode(); 475206c3fb27SDimitry Andric 475306c3fb27SDimitry Andric if (!shouldFoldFNegIntoSrc(N, N0)) 475406c3fb27SDimitry Andric return SDValue(); 475506c3fb27SDimitry Andric 47560b57cec5SDimitry Andric SDLoc SL(N); 47570b57cec5SDimitry Andric switch (Opc) { 47580b57cec5SDimitry Andric case ISD::FADD: { 47590b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 47600b57cec5SDimitry Andric return SDValue(); 47610b57cec5SDimitry Andric 47620b57cec5SDimitry Andric // (fneg (fadd x, y)) -> (fadd (fneg x), (fneg y)) 47630b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 47640b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 47650b57cec5SDimitry Andric 47660b57cec5SDimitry Andric if (LHS.getOpcode() != ISD::FNEG) 47670b57cec5SDimitry Andric LHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 47680b57cec5SDimitry Andric else 47690b57cec5SDimitry Andric LHS = LHS.getOperand(0); 47700b57cec5SDimitry Andric 47710b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 47720b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 47730b57cec5SDimitry Andric else 47740b57cec5SDimitry Andric RHS = RHS.getOperand(0); 47750b57cec5SDimitry Andric 47760b57cec5SDimitry Andric SDValue Res = DAG.getNode(ISD::FADD, SL, VT, LHS, RHS, N0->getFlags()); 47770b57cec5SDimitry Andric if (Res.getOpcode() != ISD::FADD) 47780b57cec5SDimitry Andric return SDValue(); // Op got folded away. 47790b57cec5SDimitry Andric if (!N0.hasOneUse()) 47800b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 47810b57cec5SDimitry Andric return Res; 47820b57cec5SDimitry Andric } 47830b57cec5SDimitry Andric case ISD::FMUL: 47840b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: { 47850b57cec5SDimitry Andric // (fneg (fmul x, y)) -> (fmul x, (fneg y)) 47860b57cec5SDimitry Andric // (fneg (fmul_legacy x, y)) -> (fmul_legacy x, (fneg y)) 47870b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 47880b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 47890b57cec5SDimitry Andric 47900b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 47910b57cec5SDimitry Andric LHS = LHS.getOperand(0); 47920b57cec5SDimitry Andric else if (RHS.getOpcode() == ISD::FNEG) 47930b57cec5SDimitry Andric RHS = RHS.getOperand(0); 47940b57cec5SDimitry Andric else 47950b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 47960b57cec5SDimitry Andric 47970b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, RHS, N0->getFlags()); 47980b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 47990b57cec5SDimitry Andric return SDValue(); // Op got folded away. 48000b57cec5SDimitry Andric if (!N0.hasOneUse()) 48010b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 48020b57cec5SDimitry Andric return Res; 48030b57cec5SDimitry Andric } 48040b57cec5SDimitry Andric case ISD::FMA: 48050b57cec5SDimitry Andric case ISD::FMAD: { 4806e8d8bef9SDimitry Andric // TODO: handle llvm.amdgcn.fma.legacy 48070b57cec5SDimitry Andric if (!mayIgnoreSignedZero(N0)) 48080b57cec5SDimitry Andric return SDValue(); 48090b57cec5SDimitry Andric 48100b57cec5SDimitry Andric // (fneg (fma x, y, z)) -> (fma x, (fneg y), (fneg z)) 48110b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 48120b57cec5SDimitry Andric SDValue MHS = N0.getOperand(1); 48130b57cec5SDimitry Andric SDValue RHS = N0.getOperand(2); 48140b57cec5SDimitry Andric 48150b57cec5SDimitry Andric if (LHS.getOpcode() == ISD::FNEG) 48160b57cec5SDimitry Andric LHS = LHS.getOperand(0); 48170b57cec5SDimitry Andric else if (MHS.getOpcode() == ISD::FNEG) 48180b57cec5SDimitry Andric MHS = MHS.getOperand(0); 48190b57cec5SDimitry Andric else 48200b57cec5SDimitry Andric MHS = DAG.getNode(ISD::FNEG, SL, VT, MHS); 48210b57cec5SDimitry Andric 48220b57cec5SDimitry Andric if (RHS.getOpcode() != ISD::FNEG) 48230b57cec5SDimitry Andric RHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 48240b57cec5SDimitry Andric else 48250b57cec5SDimitry Andric RHS = RHS.getOperand(0); 48260b57cec5SDimitry Andric 48270b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opc, SL, VT, LHS, MHS, RHS); 48280b57cec5SDimitry Andric if (Res.getOpcode() != Opc) 48290b57cec5SDimitry Andric return SDValue(); // Op got folded away. 48300b57cec5SDimitry Andric if (!N0.hasOneUse()) 48310b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 48320b57cec5SDimitry Andric return Res; 48330b57cec5SDimitry Andric } 48340b57cec5SDimitry Andric case ISD::FMAXNUM: 48350b57cec5SDimitry Andric case ISD::FMINNUM: 48360b57cec5SDimitry Andric case ISD::FMAXNUM_IEEE: 48370b57cec5SDimitry Andric case ISD::FMINNUM_IEEE: 48385f757f3fSDimitry Andric case ISD::FMINIMUM: 48395f757f3fSDimitry Andric case ISD::FMAXIMUM: 48400b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: 48410b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: { 48420b57cec5SDimitry Andric // fneg (fmaxnum x, y) -> fminnum (fneg x), (fneg y) 48430b57cec5SDimitry Andric // fneg (fminnum x, y) -> fmaxnum (fneg x), (fneg y) 48440b57cec5SDimitry Andric // fneg (fmax_legacy x, y) -> fmin_legacy (fneg x), (fneg y) 48450b57cec5SDimitry Andric // fneg (fmin_legacy x, y) -> fmax_legacy (fneg x), (fneg y) 48460b57cec5SDimitry Andric 48470b57cec5SDimitry Andric SDValue LHS = N0.getOperand(0); 48480b57cec5SDimitry Andric SDValue RHS = N0.getOperand(1); 48490b57cec5SDimitry Andric 48500b57cec5SDimitry Andric // 0 doesn't have a negated inline immediate. 48510b57cec5SDimitry Andric // TODO: This constant check should be generalized to other operations. 48520b57cec5SDimitry Andric if (isConstantCostlierToNegate(RHS)) 48530b57cec5SDimitry Andric return SDValue(); 48540b57cec5SDimitry Andric 48550b57cec5SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, VT, LHS); 48560b57cec5SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, VT, RHS); 48570b57cec5SDimitry Andric unsigned Opposite = inverseMinMax(Opc); 48580b57cec5SDimitry Andric 48590b57cec5SDimitry Andric SDValue Res = DAG.getNode(Opposite, SL, VT, NegLHS, NegRHS, N0->getFlags()); 48600b57cec5SDimitry Andric if (Res.getOpcode() != Opposite) 48610b57cec5SDimitry Andric return SDValue(); // Op got folded away. 48620b57cec5SDimitry Andric if (!N0.hasOneUse()) 48630b57cec5SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Res)); 48640b57cec5SDimitry Andric return Res; 48650b57cec5SDimitry Andric } 48660b57cec5SDimitry Andric case AMDGPUISD::FMED3: { 48670b57cec5SDimitry Andric SDValue Ops[3]; 48680b57cec5SDimitry Andric for (unsigned I = 0; I < 3; ++I) 48690b57cec5SDimitry Andric Ops[I] = DAG.getNode(ISD::FNEG, SL, VT, N0->getOperand(I), N0->getFlags()); 48700b57cec5SDimitry Andric 48710b57cec5SDimitry Andric SDValue Res = DAG.getNode(AMDGPUISD::FMED3, SL, VT, Ops, N0->getFlags()); 48720b57cec5SDimitry Andric if (Res.getOpcode() != AMDGPUISD::FMED3) 48730b57cec5SDimitry Andric return SDValue(); // Op got folded away. 4874e8d8bef9SDimitry Andric 4875e8d8bef9SDimitry Andric if (!N0.hasOneUse()) { 4876e8d8bef9SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, VT, Res); 4877e8d8bef9SDimitry Andric DAG.ReplaceAllUsesWith(N0, Neg); 4878e8d8bef9SDimitry Andric 4879e8d8bef9SDimitry Andric for (SDNode *U : Neg->uses()) 4880e8d8bef9SDimitry Andric DCI.AddToWorklist(U); 4881e8d8bef9SDimitry Andric } 4882e8d8bef9SDimitry Andric 48830b57cec5SDimitry Andric return Res; 48840b57cec5SDimitry Andric } 48850b57cec5SDimitry Andric case ISD::FP_EXTEND: 48860b57cec5SDimitry Andric case ISD::FTRUNC: 48870b57cec5SDimitry Andric case ISD::FRINT: 48880b57cec5SDimitry Andric case ISD::FNEARBYINT: // XXX - Should fround be handled? 48895f757f3fSDimitry Andric case ISD::FROUNDEVEN: 48900b57cec5SDimitry Andric case ISD::FSIN: 48910b57cec5SDimitry Andric case ISD::FCANONICALIZE: 48920b57cec5SDimitry Andric case AMDGPUISD::RCP: 48930b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 48940b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 48950b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: { 48960b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 48970b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 48980b57cec5SDimitry Andric // (fneg (fp_extend (fneg x))) -> (fp_extend x) 48990b57cec5SDimitry Andric // (fneg (rcp (fneg x))) -> (rcp x) 49000b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, CvtSrc.getOperand(0)); 49010b57cec5SDimitry Andric } 49020b57cec5SDimitry Andric 49030b57cec5SDimitry Andric if (!N0.hasOneUse()) 49040b57cec5SDimitry Andric return SDValue(); 49050b57cec5SDimitry Andric 49060b57cec5SDimitry Andric // (fneg (fp_extend x)) -> (fp_extend (fneg x)) 49070b57cec5SDimitry Andric // (fneg (rcp x)) -> (rcp (fneg x)) 49080b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 49090b57cec5SDimitry Andric return DAG.getNode(Opc, SL, VT, Neg, N0->getFlags()); 49100b57cec5SDimitry Andric } 49110b57cec5SDimitry Andric case ISD::FP_ROUND: { 49120b57cec5SDimitry Andric SDValue CvtSrc = N0.getOperand(0); 49130b57cec5SDimitry Andric 49140b57cec5SDimitry Andric if (CvtSrc.getOpcode() == ISD::FNEG) { 49150b57cec5SDimitry Andric // (fneg (fp_round (fneg x))) -> (fp_round x) 49160b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, 49170b57cec5SDimitry Andric CvtSrc.getOperand(0), N0.getOperand(1)); 49180b57cec5SDimitry Andric } 49190b57cec5SDimitry Andric 49200b57cec5SDimitry Andric if (!N0.hasOneUse()) 49210b57cec5SDimitry Andric return SDValue(); 49220b57cec5SDimitry Andric 49230b57cec5SDimitry Andric // (fneg (fp_round x)) -> (fp_round (fneg x)) 49240b57cec5SDimitry Andric SDValue Neg = DAG.getNode(ISD::FNEG, SL, CvtSrc.getValueType(), CvtSrc); 49250b57cec5SDimitry Andric return DAG.getNode(ISD::FP_ROUND, SL, VT, Neg, N0.getOperand(1)); 49260b57cec5SDimitry Andric } 49270b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 49280b57cec5SDimitry Andric // v_cvt_f32_f16 supports source modifiers on pre-VI targets without legal 49290b57cec5SDimitry Andric // f16, but legalization of f16 fneg ends up pulling it out of the source. 49300b57cec5SDimitry Andric // Put the fneg back as a legal source operation that can be matched later. 49310b57cec5SDimitry Andric SDLoc SL(N); 49320b57cec5SDimitry Andric 49330b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 49340b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 49350b57cec5SDimitry Andric 49360b57cec5SDimitry Andric // fneg (fp16_to_fp x) -> fp16_to_fp (xor x, 0x8000) 49370b57cec5SDimitry Andric SDValue IntFNeg = DAG.getNode(ISD::XOR, SL, SrcVT, Src, 49380b57cec5SDimitry Andric DAG.getConstant(0x8000, SL, SrcVT)); 49390b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFNeg); 49400b57cec5SDimitry Andric } 494106c3fb27SDimitry Andric case ISD::SELECT: { 494206c3fb27SDimitry Andric // fneg (select c, a, b) -> select c, (fneg a), (fneg b) 494306c3fb27SDimitry Andric // TODO: Invert conditions of foldFreeOpFromSelect 494406c3fb27SDimitry Andric return SDValue(); 494506c3fb27SDimitry Andric } 494606c3fb27SDimitry Andric case ISD::BITCAST: { 494706c3fb27SDimitry Andric SDLoc SL(N); 494806c3fb27SDimitry Andric SDValue BCSrc = N0.getOperand(0); 494906c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::BUILD_VECTOR) { 495006c3fb27SDimitry Andric SDValue HighBits = BCSrc.getOperand(BCSrc.getNumOperands() - 1); 495106c3fb27SDimitry Andric if (HighBits.getValueType().getSizeInBits() != 32 || 495206c3fb27SDimitry Andric !fnegFoldsIntoOp(HighBits.getNode())) 495306c3fb27SDimitry Andric return SDValue(); 495406c3fb27SDimitry Andric 495506c3fb27SDimitry Andric // f64 fneg only really needs to operate on the high half of of the 495606c3fb27SDimitry Andric // register, so try to force it to an f32 operation to help make use of 495706c3fb27SDimitry Andric // source modifiers. 495806c3fb27SDimitry Andric // 495906c3fb27SDimitry Andric // 496006c3fb27SDimitry Andric // fneg (f64 (bitcast (build_vector x, y))) -> 496106c3fb27SDimitry Andric // f64 (bitcast (build_vector (bitcast i32:x to f32), 496206c3fb27SDimitry Andric // (fneg (bitcast i32:y to f32))) 496306c3fb27SDimitry Andric 496406c3fb27SDimitry Andric SDValue CastHi = DAG.getNode(ISD::BITCAST, SL, MVT::f32, HighBits); 496506c3fb27SDimitry Andric SDValue NegHi = DAG.getNode(ISD::FNEG, SL, MVT::f32, CastHi); 496606c3fb27SDimitry Andric SDValue CastBack = 496706c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, HighBits.getValueType(), NegHi); 496806c3fb27SDimitry Andric 496906c3fb27SDimitry Andric SmallVector<SDValue, 8> Ops(BCSrc->op_begin(), BCSrc->op_end()); 497006c3fb27SDimitry Andric Ops.back() = CastBack; 497106c3fb27SDimitry Andric DCI.AddToWorklist(NegHi.getNode()); 497206c3fb27SDimitry Andric SDValue Build = 497306c3fb27SDimitry Andric DAG.getNode(ISD::BUILD_VECTOR, SL, BCSrc.getValueType(), Ops); 497406c3fb27SDimitry Andric SDValue Result = DAG.getNode(ISD::BITCAST, SL, VT, Build); 497506c3fb27SDimitry Andric 497606c3fb27SDimitry Andric if (!N0.hasOneUse()) 497706c3fb27SDimitry Andric DAG.ReplaceAllUsesWith(N0, DAG.getNode(ISD::FNEG, SL, VT, Result)); 497806c3fb27SDimitry Andric return Result; 497906c3fb27SDimitry Andric } 498006c3fb27SDimitry Andric 498106c3fb27SDimitry Andric if (BCSrc.getOpcode() == ISD::SELECT && VT == MVT::f32 && 498206c3fb27SDimitry Andric BCSrc.hasOneUse()) { 498306c3fb27SDimitry Andric // fneg (bitcast (f32 (select cond, i32:lhs, i32:rhs))) -> 498406c3fb27SDimitry Andric // select cond, (bitcast i32:lhs to f32), (bitcast i32:rhs to f32) 498506c3fb27SDimitry Andric 498606c3fb27SDimitry Andric // TODO: Cast back result for multiple uses is beneficial in some cases. 498706c3fb27SDimitry Andric 498806c3fb27SDimitry Andric SDValue LHS = 498906c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(1)); 499006c3fb27SDimitry Andric SDValue RHS = 499106c3fb27SDimitry Andric DAG.getNode(ISD::BITCAST, SL, MVT::f32, BCSrc.getOperand(2)); 499206c3fb27SDimitry Andric 499306c3fb27SDimitry Andric SDValue NegLHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, LHS); 499406c3fb27SDimitry Andric SDValue NegRHS = DAG.getNode(ISD::FNEG, SL, MVT::f32, RHS); 499506c3fb27SDimitry Andric 499606c3fb27SDimitry Andric return DAG.getNode(ISD::SELECT, SL, MVT::f32, BCSrc.getOperand(0), NegLHS, 499706c3fb27SDimitry Andric NegRHS); 499806c3fb27SDimitry Andric } 499906c3fb27SDimitry Andric 500006c3fb27SDimitry Andric return SDValue(); 500106c3fb27SDimitry Andric } 50020b57cec5SDimitry Andric default: 50030b57cec5SDimitry Andric return SDValue(); 50040b57cec5SDimitry Andric } 50050b57cec5SDimitry Andric } 50060b57cec5SDimitry Andric 50070b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performFAbsCombine(SDNode *N, 50080b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 50090b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 50100b57cec5SDimitry Andric SDValue N0 = N->getOperand(0); 50110b57cec5SDimitry Andric 50120b57cec5SDimitry Andric if (!N0.hasOneUse()) 50130b57cec5SDimitry Andric return SDValue(); 50140b57cec5SDimitry Andric 50150b57cec5SDimitry Andric switch (N0.getOpcode()) { 50160b57cec5SDimitry Andric case ISD::FP16_TO_FP: { 50170b57cec5SDimitry Andric assert(!Subtarget->has16BitInsts() && "should only see if f16 is illegal"); 50180b57cec5SDimitry Andric SDLoc SL(N); 50190b57cec5SDimitry Andric SDValue Src = N0.getOperand(0); 50200b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 50210b57cec5SDimitry Andric 50220b57cec5SDimitry Andric // fabs (fp16_to_fp x) -> fp16_to_fp (and x, 0x7fff) 50230b57cec5SDimitry Andric SDValue IntFAbs = DAG.getNode(ISD::AND, SL, SrcVT, Src, 50240b57cec5SDimitry Andric DAG.getConstant(0x7fff, SL, SrcVT)); 50250b57cec5SDimitry Andric return DAG.getNode(ISD::FP16_TO_FP, SL, N->getValueType(0), IntFAbs); 50260b57cec5SDimitry Andric } 50270b57cec5SDimitry Andric default: 50280b57cec5SDimitry Andric return SDValue(); 50290b57cec5SDimitry Andric } 50300b57cec5SDimitry Andric } 50310b57cec5SDimitry Andric 50320b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::performRcpCombine(SDNode *N, 50330b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 50340b57cec5SDimitry Andric const auto *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)); 50350b57cec5SDimitry Andric if (!CFP) 50360b57cec5SDimitry Andric return SDValue(); 50370b57cec5SDimitry Andric 50380b57cec5SDimitry Andric // XXX - Should this flush denormals? 50390b57cec5SDimitry Andric const APFloat &Val = CFP->getValueAPF(); 50400b57cec5SDimitry Andric APFloat One(Val.getSemantics(), "1.0"); 50410b57cec5SDimitry Andric return DCI.DAG.getConstantFP(One / Val, SDLoc(N), N->getValueType(0)); 50420b57cec5SDimitry Andric } 50430b57cec5SDimitry Andric 50440b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, 50450b57cec5SDimitry Andric DAGCombinerInfo &DCI) const { 50460b57cec5SDimitry Andric SelectionDAG &DAG = DCI.DAG; 50470b57cec5SDimitry Andric SDLoc DL(N); 50480b57cec5SDimitry Andric 50490b57cec5SDimitry Andric switch(N->getOpcode()) { 50500b57cec5SDimitry Andric default: 50510b57cec5SDimitry Andric break; 50520b57cec5SDimitry Andric case ISD::BITCAST: { 50530b57cec5SDimitry Andric EVT DestVT = N->getValueType(0); 50540b57cec5SDimitry Andric 50550b57cec5SDimitry Andric // Push casts through vector builds. This helps avoid emitting a large 50560b57cec5SDimitry Andric // number of copies when materializing floating point vector constants. 50570b57cec5SDimitry Andric // 50580b57cec5SDimitry Andric // vNt1 bitcast (vNt0 (build_vector t0:x, t0:y)) => 50590b57cec5SDimitry Andric // vnt1 = build_vector (t1 (bitcast t0:x)), (t1 (bitcast t0:y)) 50600b57cec5SDimitry Andric if (DestVT.isVector()) { 50610b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 50621db9f3b2SDimitry Andric if (Src.getOpcode() == ISD::BUILD_VECTOR && 50631db9f3b2SDimitry Andric (DCI.getDAGCombineLevel() < AfterLegalizeDAG || 50641db9f3b2SDimitry Andric isOperationLegal(ISD::BUILD_VECTOR, DestVT))) { 50650b57cec5SDimitry Andric EVT SrcVT = Src.getValueType(); 50660b57cec5SDimitry Andric unsigned NElts = DestVT.getVectorNumElements(); 50670b57cec5SDimitry Andric 50680b57cec5SDimitry Andric if (SrcVT.getVectorNumElements() == NElts) { 50690b57cec5SDimitry Andric EVT DestEltVT = DestVT.getVectorElementType(); 50700b57cec5SDimitry Andric 50710b57cec5SDimitry Andric SmallVector<SDValue, 8> CastedElts; 50720b57cec5SDimitry Andric SDLoc SL(N); 50730b57cec5SDimitry Andric for (unsigned I = 0, E = SrcVT.getVectorNumElements(); I != E; ++I) { 50740b57cec5SDimitry Andric SDValue Elt = Src.getOperand(I); 50750b57cec5SDimitry Andric CastedElts.push_back(DAG.getNode(ISD::BITCAST, DL, DestEltVT, Elt)); 50760b57cec5SDimitry Andric } 50770b57cec5SDimitry Andric 50780b57cec5SDimitry Andric return DAG.getBuildVector(DestVT, SL, CastedElts); 50790b57cec5SDimitry Andric } 50800b57cec5SDimitry Andric } 50810b57cec5SDimitry Andric } 50820b57cec5SDimitry Andric 5083e8d8bef9SDimitry Andric if (DestVT.getSizeInBits() != 64 || !DestVT.isVector()) 50840b57cec5SDimitry Andric break; 50850b57cec5SDimitry Andric 50860b57cec5SDimitry Andric // Fold bitcasts of constants. 50870b57cec5SDimitry Andric // 50880b57cec5SDimitry Andric // v2i32 (bitcast i64:k) -> build_vector lo_32(k), hi_32(k) 50890b57cec5SDimitry Andric // TODO: Generalize and move to DAGCombiner 50900b57cec5SDimitry Andric SDValue Src = N->getOperand(0); 50910b57cec5SDimitry Andric if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Src)) { 50920b57cec5SDimitry Andric SDLoc SL(N); 50930b57cec5SDimitry Andric uint64_t CVal = C->getZExtValue(); 50940b57cec5SDimitry Andric SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 50950b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 50960b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 50970b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, BV); 50980b57cec5SDimitry Andric } 50990b57cec5SDimitry Andric 51000b57cec5SDimitry Andric if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Src)) { 51010b57cec5SDimitry Andric const APInt &Val = C->getValueAPF().bitcastToAPInt(); 51020b57cec5SDimitry Andric SDLoc SL(N); 51030b57cec5SDimitry Andric uint64_t CVal = Val.getZExtValue(); 51040b57cec5SDimitry Andric SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, SL, MVT::v2i32, 51050b57cec5SDimitry Andric DAG.getConstant(Lo_32(CVal), SL, MVT::i32), 51060b57cec5SDimitry Andric DAG.getConstant(Hi_32(CVal), SL, MVT::i32)); 51070b57cec5SDimitry Andric 51080b57cec5SDimitry Andric return DAG.getNode(ISD::BITCAST, SL, DestVT, Vec); 51090b57cec5SDimitry Andric } 51100b57cec5SDimitry Andric 51110b57cec5SDimitry Andric break; 51120b57cec5SDimitry Andric } 51130b57cec5SDimitry Andric case ISD::SHL: { 51140b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 51150b57cec5SDimitry Andric break; 51160b57cec5SDimitry Andric 51170b57cec5SDimitry Andric return performShlCombine(N, DCI); 51180b57cec5SDimitry Andric } 51190b57cec5SDimitry Andric case ISD::SRL: { 51200b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 51210b57cec5SDimitry Andric break; 51220b57cec5SDimitry Andric 51230b57cec5SDimitry Andric return performSrlCombine(N, DCI); 51240b57cec5SDimitry Andric } 51250b57cec5SDimitry Andric case ISD::SRA: { 51260b57cec5SDimitry Andric if (DCI.getDAGCombineLevel() < AfterLegalizeDAG) 51270b57cec5SDimitry Andric break; 51280b57cec5SDimitry Andric 51290b57cec5SDimitry Andric return performSraCombine(N, DCI); 51300b57cec5SDimitry Andric } 51310b57cec5SDimitry Andric case ISD::TRUNCATE: 51320b57cec5SDimitry Andric return performTruncateCombine(N, DCI); 51330b57cec5SDimitry Andric case ISD::MUL: 51340b57cec5SDimitry Andric return performMulCombine(N, DCI); 513506c3fb27SDimitry Andric case AMDGPUISD::MUL_U24: 513606c3fb27SDimitry Andric case AMDGPUISD::MUL_I24: { 513706c3fb27SDimitry Andric if (SDValue Simplified = simplifyMul24(N, DCI)) 513806c3fb27SDimitry Andric return Simplified; 51390fca6ea1SDimitry Andric break; 514006c3fb27SDimitry Andric } 514106c3fb27SDimitry Andric case AMDGPUISD::MULHI_I24: 514206c3fb27SDimitry Andric case AMDGPUISD::MULHI_U24: 514306c3fb27SDimitry Andric return simplifyMul24(N, DCI); 51444824e7fdSDimitry Andric case ISD::SMUL_LOHI: 51454824e7fdSDimitry Andric case ISD::UMUL_LOHI: 51464824e7fdSDimitry Andric return performMulLoHiCombine(N, DCI); 51470b57cec5SDimitry Andric case ISD::MULHS: 51480b57cec5SDimitry Andric return performMulhsCombine(N, DCI); 51490b57cec5SDimitry Andric case ISD::MULHU: 51500b57cec5SDimitry Andric return performMulhuCombine(N, DCI); 51510b57cec5SDimitry Andric case ISD::SELECT: 51520b57cec5SDimitry Andric return performSelectCombine(N, DCI); 51530b57cec5SDimitry Andric case ISD::FNEG: 51540b57cec5SDimitry Andric return performFNegCombine(N, DCI); 51550b57cec5SDimitry Andric case ISD::FABS: 51560b57cec5SDimitry Andric return performFAbsCombine(N, DCI); 51570b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 51580b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 51590b57cec5SDimitry Andric assert(!N->getValueType(0).isVector() && 51600b57cec5SDimitry Andric "Vector handling of BFE not implemented"); 51610b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); 51620b57cec5SDimitry Andric if (!Width) 51630b57cec5SDimitry Andric break; 51640b57cec5SDimitry Andric 51650b57cec5SDimitry Andric uint32_t WidthVal = Width->getZExtValue() & 0x1f; 51660b57cec5SDimitry Andric if (WidthVal == 0) 51670b57cec5SDimitry Andric return DAG.getConstant(0, DL, MVT::i32); 51680b57cec5SDimitry Andric 51690b57cec5SDimitry Andric ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 51700b57cec5SDimitry Andric if (!Offset) 51710b57cec5SDimitry Andric break; 51720b57cec5SDimitry Andric 51730b57cec5SDimitry Andric SDValue BitsFrom = N->getOperand(0); 51740b57cec5SDimitry Andric uint32_t OffsetVal = Offset->getZExtValue() & 0x1f; 51750b57cec5SDimitry Andric 51760b57cec5SDimitry Andric bool Signed = N->getOpcode() == AMDGPUISD::BFE_I32; 51770b57cec5SDimitry Andric 51780b57cec5SDimitry Andric if (OffsetVal == 0) { 51790b57cec5SDimitry Andric // This is already sign / zero extended, so try to fold away extra BFEs. 51800b57cec5SDimitry Andric unsigned SignBits = Signed ? (32 - WidthVal + 1) : (32 - WidthVal); 51810b57cec5SDimitry Andric 51820b57cec5SDimitry Andric unsigned OpSignBits = DAG.ComputeNumSignBits(BitsFrom); 51830b57cec5SDimitry Andric if (OpSignBits >= SignBits) 51840b57cec5SDimitry Andric return BitsFrom; 51850b57cec5SDimitry Andric 51860b57cec5SDimitry Andric EVT SmallVT = EVT::getIntegerVT(*DAG.getContext(), WidthVal); 51870b57cec5SDimitry Andric if (Signed) { 51880b57cec5SDimitry Andric // This is a sign_extend_inreg. Replace it to take advantage of existing 51890b57cec5SDimitry Andric // DAG Combines. If not eliminated, we will match back to BFE during 51900b57cec5SDimitry Andric // selection. 51910b57cec5SDimitry Andric 51920b57cec5SDimitry Andric // TODO: The sext_inreg of extended types ends, although we can could 51930b57cec5SDimitry Andric // handle them in a single BFE. 51940b57cec5SDimitry Andric return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, BitsFrom, 51950b57cec5SDimitry Andric DAG.getValueType(SmallVT)); 51960b57cec5SDimitry Andric } 51970b57cec5SDimitry Andric 51980b57cec5SDimitry Andric return DAG.getZeroExtendInReg(BitsFrom, DL, SmallVT); 51990b57cec5SDimitry Andric } 52000b57cec5SDimitry Andric 52010b57cec5SDimitry Andric if (ConstantSDNode *CVal = dyn_cast<ConstantSDNode>(BitsFrom)) { 52020b57cec5SDimitry Andric if (Signed) { 52030b57cec5SDimitry Andric return constantFoldBFE<int32_t>(DAG, 52040b57cec5SDimitry Andric CVal->getSExtValue(), 52050b57cec5SDimitry Andric OffsetVal, 52060b57cec5SDimitry Andric WidthVal, 52070b57cec5SDimitry Andric DL); 52080b57cec5SDimitry Andric } 52090b57cec5SDimitry Andric 52100b57cec5SDimitry Andric return constantFoldBFE<uint32_t>(DAG, 52110b57cec5SDimitry Andric CVal->getZExtValue(), 52120b57cec5SDimitry Andric OffsetVal, 52130b57cec5SDimitry Andric WidthVal, 52140b57cec5SDimitry Andric DL); 52150b57cec5SDimitry Andric } 52160b57cec5SDimitry Andric 52170b57cec5SDimitry Andric if ((OffsetVal + WidthVal) >= 32 && 52180b57cec5SDimitry Andric !(Subtarget->hasSDWA() && OffsetVal == 16 && WidthVal == 16)) { 52190b57cec5SDimitry Andric SDValue ShiftVal = DAG.getConstant(OffsetVal, DL, MVT::i32); 52200b57cec5SDimitry Andric return DAG.getNode(Signed ? ISD::SRA : ISD::SRL, DL, MVT::i32, 52210b57cec5SDimitry Andric BitsFrom, ShiftVal); 52220b57cec5SDimitry Andric } 52230b57cec5SDimitry Andric 52240b57cec5SDimitry Andric if (BitsFrom.hasOneUse()) { 52250b57cec5SDimitry Andric APInt Demanded = APInt::getBitsSet(32, 52260b57cec5SDimitry Andric OffsetVal, 52270b57cec5SDimitry Andric OffsetVal + WidthVal); 52280b57cec5SDimitry Andric 52290b57cec5SDimitry Andric KnownBits Known; 52300b57cec5SDimitry Andric TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), 52310b57cec5SDimitry Andric !DCI.isBeforeLegalizeOps()); 52320b57cec5SDimitry Andric const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 52330b57cec5SDimitry Andric if (TLI.ShrinkDemandedConstant(BitsFrom, Demanded, TLO) || 52340b57cec5SDimitry Andric TLI.SimplifyDemandedBits(BitsFrom, Demanded, Known, TLO)) { 52350b57cec5SDimitry Andric DCI.CommitTargetLoweringOpt(TLO); 52360b57cec5SDimitry Andric } 52370b57cec5SDimitry Andric } 52380b57cec5SDimitry Andric 52390b57cec5SDimitry Andric break; 52400b57cec5SDimitry Andric } 52410b57cec5SDimitry Andric case ISD::LOAD: 52420b57cec5SDimitry Andric return performLoadCombine(N, DCI); 52430b57cec5SDimitry Andric case ISD::STORE: 52440b57cec5SDimitry Andric return performStoreCombine(N, DCI); 52450b57cec5SDimitry Andric case AMDGPUISD::RCP: 52460b57cec5SDimitry Andric case AMDGPUISD::RCP_IFLAG: 52470b57cec5SDimitry Andric return performRcpCombine(N, DCI); 52480b57cec5SDimitry Andric case ISD::AssertZext: 52490b57cec5SDimitry Andric case ISD::AssertSext: 52500b57cec5SDimitry Andric return performAssertSZExtCombine(N, DCI); 52518bcb0991SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: 52528bcb0991SDimitry Andric return performIntrinsicWOChainCombine(N, DCI); 52535f757f3fSDimitry Andric case AMDGPUISD::FMAD_FTZ: { 52545f757f3fSDimitry Andric SDValue N0 = N->getOperand(0); 52555f757f3fSDimitry Andric SDValue N1 = N->getOperand(1); 52565f757f3fSDimitry Andric SDValue N2 = N->getOperand(2); 52575f757f3fSDimitry Andric EVT VT = N->getValueType(0); 52585f757f3fSDimitry Andric 52595f757f3fSDimitry Andric // FMAD_FTZ is a FMAD + flush denormals to zero. 52605f757f3fSDimitry Andric // We flush the inputs, the intermediate step, and the output. 52615f757f3fSDimitry Andric ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 52625f757f3fSDimitry Andric ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 52635f757f3fSDimitry Andric ConstantFPSDNode *N2CFP = dyn_cast<ConstantFPSDNode>(N2); 52645f757f3fSDimitry Andric if (N0CFP && N1CFP && N2CFP) { 52655f757f3fSDimitry Andric const auto FTZ = [](const APFloat &V) { 52665f757f3fSDimitry Andric if (V.isDenormal()) { 52675f757f3fSDimitry Andric APFloat Zero(V.getSemantics(), 0); 52685f757f3fSDimitry Andric return V.isNegative() ? -Zero : Zero; 52695f757f3fSDimitry Andric } 52705f757f3fSDimitry Andric return V; 52715f757f3fSDimitry Andric }; 52725f757f3fSDimitry Andric 52735f757f3fSDimitry Andric APFloat V0 = FTZ(N0CFP->getValueAPF()); 52745f757f3fSDimitry Andric APFloat V1 = FTZ(N1CFP->getValueAPF()); 52755f757f3fSDimitry Andric APFloat V2 = FTZ(N2CFP->getValueAPF()); 52765f757f3fSDimitry Andric V0.multiply(V1, APFloat::rmNearestTiesToEven); 52775f757f3fSDimitry Andric V0 = FTZ(V0); 52785f757f3fSDimitry Andric V0.add(V2, APFloat::rmNearestTiesToEven); 52795f757f3fSDimitry Andric return DAG.getConstantFP(FTZ(V0), DL, VT); 52805f757f3fSDimitry Andric } 52815f757f3fSDimitry Andric break; 52825f757f3fSDimitry Andric } 52830b57cec5SDimitry Andric } 52840b57cec5SDimitry Andric return SDValue(); 52850b57cec5SDimitry Andric } 52860b57cec5SDimitry Andric 52870b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 52880b57cec5SDimitry Andric // Helper functions 52890b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 52900b57cec5SDimitry Andric 52910b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::CreateLiveInRegister(SelectionDAG &DAG, 52920b57cec5SDimitry Andric const TargetRegisterClass *RC, 52935ffd83dbSDimitry Andric Register Reg, EVT VT, 52940b57cec5SDimitry Andric const SDLoc &SL, 52950b57cec5SDimitry Andric bool RawReg) const { 52960b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 52970b57cec5SDimitry Andric MachineRegisterInfo &MRI = MF.getRegInfo(); 52985ffd83dbSDimitry Andric Register VReg; 52990b57cec5SDimitry Andric 53000b57cec5SDimitry Andric if (!MRI.isLiveIn(Reg)) { 53010b57cec5SDimitry Andric VReg = MRI.createVirtualRegister(RC); 53020b57cec5SDimitry Andric MRI.addLiveIn(Reg, VReg); 53030b57cec5SDimitry Andric } else { 53040b57cec5SDimitry Andric VReg = MRI.getLiveInVirtReg(Reg); 53050b57cec5SDimitry Andric } 53060b57cec5SDimitry Andric 53070b57cec5SDimitry Andric if (RawReg) 53080b57cec5SDimitry Andric return DAG.getRegister(VReg, VT); 53090b57cec5SDimitry Andric 53100b57cec5SDimitry Andric return DAG.getCopyFromReg(DAG.getEntryNode(), SL, VReg, VT); 53110b57cec5SDimitry Andric } 53120b57cec5SDimitry Andric 53138bcb0991SDimitry Andric // This may be called multiple times, and nothing prevents creating multiple 53148bcb0991SDimitry Andric // objects at the same offset. See if we already defined this object. 53158bcb0991SDimitry Andric static int getOrCreateFixedStackObject(MachineFrameInfo &MFI, unsigned Size, 53168bcb0991SDimitry Andric int64_t Offset) { 53178bcb0991SDimitry Andric for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) { 53188bcb0991SDimitry Andric if (MFI.getObjectOffset(I) == Offset) { 53198bcb0991SDimitry Andric assert(MFI.getObjectSize(I) == Size); 53208bcb0991SDimitry Andric return I; 53218bcb0991SDimitry Andric } 53228bcb0991SDimitry Andric } 53238bcb0991SDimitry Andric 53248bcb0991SDimitry Andric return MFI.CreateFixedObject(Size, Offset, true); 53258bcb0991SDimitry Andric } 53268bcb0991SDimitry Andric 53270b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadStackInputValue(SelectionDAG &DAG, 53280b57cec5SDimitry Andric EVT VT, 53290b57cec5SDimitry Andric const SDLoc &SL, 53300b57cec5SDimitry Andric int64_t Offset) const { 53310b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 53320b57cec5SDimitry Andric MachineFrameInfo &MFI = MF.getFrameInfo(); 53338bcb0991SDimitry Andric int FI = getOrCreateFixedStackObject(MFI, VT.getStoreSize(), Offset); 53340b57cec5SDimitry Andric 53350b57cec5SDimitry Andric auto SrcPtrInfo = MachinePointerInfo::getStack(MF, Offset); 53360b57cec5SDimitry Andric SDValue Ptr = DAG.getFrameIndex(FI, MVT::i32); 53370b57cec5SDimitry Andric 5338e8d8bef9SDimitry Andric return DAG.getLoad(VT, SL, DAG.getEntryNode(), Ptr, SrcPtrInfo, Align(4), 53390b57cec5SDimitry Andric MachineMemOperand::MODereferenceable | 53400b57cec5SDimitry Andric MachineMemOperand::MOInvariant); 53410b57cec5SDimitry Andric } 53420b57cec5SDimitry Andric 53430b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::storeStackInputValue(SelectionDAG &DAG, 53440b57cec5SDimitry Andric const SDLoc &SL, 53450b57cec5SDimitry Andric SDValue Chain, 53460b57cec5SDimitry Andric SDValue ArgVal, 53470b57cec5SDimitry Andric int64_t Offset) const { 53480b57cec5SDimitry Andric MachineFunction &MF = DAG.getMachineFunction(); 53490b57cec5SDimitry Andric MachinePointerInfo DstInfo = MachinePointerInfo::getStack(MF, Offset); 5350fe6060f1SDimitry Andric const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); 53510b57cec5SDimitry Andric 53520b57cec5SDimitry Andric SDValue Ptr = DAG.getConstant(Offset, SL, MVT::i32); 5353fe6060f1SDimitry Andric // Stores to the argument stack area are relative to the stack pointer. 5354fe6060f1SDimitry Andric SDValue SP = 5355fe6060f1SDimitry Andric DAG.getCopyFromReg(Chain, SL, Info->getStackPtrOffsetReg(), MVT::i32); 5356fe6060f1SDimitry Andric Ptr = DAG.getNode(ISD::ADD, SL, MVT::i32, SP, Ptr); 5357e8d8bef9SDimitry Andric SDValue Store = DAG.getStore(Chain, SL, ArgVal, Ptr, DstInfo, Align(4), 53580b57cec5SDimitry Andric MachineMemOperand::MODereferenceable); 53590b57cec5SDimitry Andric return Store; 53600b57cec5SDimitry Andric } 53610b57cec5SDimitry Andric 53620b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::loadInputValue(SelectionDAG &DAG, 53630b57cec5SDimitry Andric const TargetRegisterClass *RC, 53640b57cec5SDimitry Andric EVT VT, const SDLoc &SL, 53650b57cec5SDimitry Andric const ArgDescriptor &Arg) const { 53660b57cec5SDimitry Andric assert(Arg && "Attempting to load missing argument"); 53670b57cec5SDimitry Andric 53680b57cec5SDimitry Andric SDValue V = Arg.isRegister() ? 53690b57cec5SDimitry Andric CreateLiveInRegister(DAG, RC, Arg.getRegister(), VT, SL) : 53700b57cec5SDimitry Andric loadStackInputValue(DAG, VT, SL, Arg.getStackOffset()); 53710b57cec5SDimitry Andric 53720b57cec5SDimitry Andric if (!Arg.isMasked()) 53730b57cec5SDimitry Andric return V; 53740b57cec5SDimitry Andric 53750b57cec5SDimitry Andric unsigned Mask = Arg.getMask(); 537606c3fb27SDimitry Andric unsigned Shift = llvm::countr_zero<unsigned>(Mask); 53770b57cec5SDimitry Andric V = DAG.getNode(ISD::SRL, SL, VT, V, 53780b57cec5SDimitry Andric DAG.getShiftAmountConstant(Shift, VT, SL)); 53790b57cec5SDimitry Andric return DAG.getNode(ISD::AND, SL, VT, V, 53800b57cec5SDimitry Andric DAG.getConstant(Mask >> Shift, SL, VT)); 53810b57cec5SDimitry Andric } 53820b57cec5SDimitry Andric 53830b57cec5SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( 538406c3fb27SDimitry Andric uint64_t ExplicitKernArgSize, const ImplicitParameter Param) const { 538506c3fb27SDimitry Andric unsigned ExplicitArgOffset = Subtarget->getExplicitKernelArgOffset(); 538606c3fb27SDimitry Andric const Align Alignment = Subtarget->getAlignmentForImplicitArgPtr(); 538706c3fb27SDimitry Andric uint64_t ArgOffset = 538806c3fb27SDimitry Andric alignTo(ExplicitKernArgSize, Alignment) + ExplicitArgOffset; 53890b57cec5SDimitry Andric switch (Param) { 539081ad6265SDimitry Andric case FIRST_IMPLICIT: 53910b57cec5SDimitry Andric return ArgOffset; 539281ad6265SDimitry Andric case PRIVATE_BASE: 539381ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::PRIVATE_BASE_OFFSET; 539481ad6265SDimitry Andric case SHARED_BASE: 539581ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::SHARED_BASE_OFFSET; 539681ad6265SDimitry Andric case QUEUE_PTR: 539781ad6265SDimitry Andric return ArgOffset + AMDGPU::ImplicitArg::QUEUE_PTR_OFFSET; 53980b57cec5SDimitry Andric } 53990b57cec5SDimitry Andric llvm_unreachable("unexpected implicit parameter type"); 54000b57cec5SDimitry Andric } 54010b57cec5SDimitry Andric 540206c3fb27SDimitry Andric uint32_t AMDGPUTargetLowering::getImplicitParameterOffset( 540306c3fb27SDimitry Andric const MachineFunction &MF, const ImplicitParameter Param) const { 540406c3fb27SDimitry Andric const AMDGPUMachineFunction *MFI = MF.getInfo<AMDGPUMachineFunction>(); 540506c3fb27SDimitry Andric return getImplicitParameterOffset(MFI->getExplicitKernArgSize(), Param); 540606c3fb27SDimitry Andric } 540706c3fb27SDimitry Andric 54080b57cec5SDimitry Andric #define NODE_NAME_CASE(node) case AMDGPUISD::node: return #node; 54090b57cec5SDimitry Andric 54100b57cec5SDimitry Andric const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const { 54110b57cec5SDimitry Andric switch ((AMDGPUISD::NodeType)Opcode) { 54120b57cec5SDimitry Andric case AMDGPUISD::FIRST_NUMBER: break; 54130b57cec5SDimitry Andric // AMDIL DAG nodes 54140b57cec5SDimitry Andric NODE_NAME_CASE(UMUL); 54150b57cec5SDimitry Andric NODE_NAME_CASE(BRANCH_COND); 54160b57cec5SDimitry Andric 54170b57cec5SDimitry Andric // AMDGPU DAG nodes 54180b57cec5SDimitry Andric NODE_NAME_CASE(IF) 54190b57cec5SDimitry Andric NODE_NAME_CASE(ELSE) 54200b57cec5SDimitry Andric NODE_NAME_CASE(LOOP) 54210b57cec5SDimitry Andric NODE_NAME_CASE(CALL) 54220b57cec5SDimitry Andric NODE_NAME_CASE(TC_RETURN) 542306c3fb27SDimitry Andric NODE_NAME_CASE(TC_RETURN_GFX) 54245f757f3fSDimitry Andric NODE_NAME_CASE(TC_RETURN_CHAIN) 54250b57cec5SDimitry Andric NODE_NAME_CASE(TRAP) 542606c3fb27SDimitry Andric NODE_NAME_CASE(RET_GLUE) 54275f757f3fSDimitry Andric NODE_NAME_CASE(WAVE_ADDRESS) 54280b57cec5SDimitry Andric NODE_NAME_CASE(RETURN_TO_EPILOG) 54290b57cec5SDimitry Andric NODE_NAME_CASE(ENDPGM) 543006c3fb27SDimitry Andric NODE_NAME_CASE(ENDPGM_TRAP) 54310fca6ea1SDimitry Andric NODE_NAME_CASE(SIMULATED_TRAP) 54320b57cec5SDimitry Andric NODE_NAME_CASE(DWORDADDR) 54330b57cec5SDimitry Andric NODE_NAME_CASE(FRACT) 54340b57cec5SDimitry Andric NODE_NAME_CASE(SETCC) 54350b57cec5SDimitry Andric NODE_NAME_CASE(SETREG) 54368bcb0991SDimitry Andric NODE_NAME_CASE(DENORM_MODE) 54370b57cec5SDimitry Andric NODE_NAME_CASE(FMA_W_CHAIN) 54380b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_W_CHAIN) 54390b57cec5SDimitry Andric NODE_NAME_CASE(CLAMP) 54400b57cec5SDimitry Andric NODE_NAME_CASE(COS_HW) 54410b57cec5SDimitry Andric NODE_NAME_CASE(SIN_HW) 54420b57cec5SDimitry Andric NODE_NAME_CASE(FMAX_LEGACY) 54430b57cec5SDimitry Andric NODE_NAME_CASE(FMIN_LEGACY) 54440b57cec5SDimitry Andric NODE_NAME_CASE(FMAX3) 54450b57cec5SDimitry Andric NODE_NAME_CASE(SMAX3) 54460b57cec5SDimitry Andric NODE_NAME_CASE(UMAX3) 54470b57cec5SDimitry Andric NODE_NAME_CASE(FMIN3) 54480b57cec5SDimitry Andric NODE_NAME_CASE(SMIN3) 54490b57cec5SDimitry Andric NODE_NAME_CASE(UMIN3) 54500b57cec5SDimitry Andric NODE_NAME_CASE(FMED3) 54510b57cec5SDimitry Andric NODE_NAME_CASE(SMED3) 54520b57cec5SDimitry Andric NODE_NAME_CASE(UMED3) 54535f757f3fSDimitry Andric NODE_NAME_CASE(FMAXIMUM3) 54545f757f3fSDimitry Andric NODE_NAME_CASE(FMINIMUM3) 54550b57cec5SDimitry Andric NODE_NAME_CASE(FDOT2) 54560b57cec5SDimitry Andric NODE_NAME_CASE(URECIP) 54570b57cec5SDimitry Andric NODE_NAME_CASE(DIV_SCALE) 54580b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FMAS) 54590b57cec5SDimitry Andric NODE_NAME_CASE(DIV_FIXUP) 54600b57cec5SDimitry Andric NODE_NAME_CASE(FMAD_FTZ) 54610b57cec5SDimitry Andric NODE_NAME_CASE(RCP) 54620b57cec5SDimitry Andric NODE_NAME_CASE(RSQ) 54630b57cec5SDimitry Andric NODE_NAME_CASE(RCP_LEGACY) 54640b57cec5SDimitry Andric NODE_NAME_CASE(RCP_IFLAG) 546506c3fb27SDimitry Andric NODE_NAME_CASE(LOG) 546606c3fb27SDimitry Andric NODE_NAME_CASE(EXP) 54670b57cec5SDimitry Andric NODE_NAME_CASE(FMUL_LEGACY) 54680b57cec5SDimitry Andric NODE_NAME_CASE(RSQ_CLAMP) 54690b57cec5SDimitry Andric NODE_NAME_CASE(FP_CLASS) 54700b57cec5SDimitry Andric NODE_NAME_CASE(DOT4) 54710b57cec5SDimitry Andric NODE_NAME_CASE(CARRY) 54720b57cec5SDimitry Andric NODE_NAME_CASE(BORROW) 54730b57cec5SDimitry Andric NODE_NAME_CASE(BFE_U32) 54740b57cec5SDimitry Andric NODE_NAME_CASE(BFE_I32) 54750b57cec5SDimitry Andric NODE_NAME_CASE(BFI) 54760b57cec5SDimitry Andric NODE_NAME_CASE(BFM) 54770b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_U32) 54780b57cec5SDimitry Andric NODE_NAME_CASE(FFBH_I32) 54790b57cec5SDimitry Andric NODE_NAME_CASE(FFBL_B32) 54800b57cec5SDimitry Andric NODE_NAME_CASE(MUL_U24) 54810b57cec5SDimitry Andric NODE_NAME_CASE(MUL_I24) 54820b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_U24) 54830b57cec5SDimitry Andric NODE_NAME_CASE(MULHI_I24) 54840b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U24) 54850b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I24) 54860b57cec5SDimitry Andric NODE_NAME_CASE(MAD_I64_I32) 54870b57cec5SDimitry Andric NODE_NAME_CASE(MAD_U64_U32) 54880b57cec5SDimitry Andric NODE_NAME_CASE(PERM) 54890b57cec5SDimitry Andric NODE_NAME_CASE(TEXTURE_FETCH) 54900b57cec5SDimitry Andric NODE_NAME_CASE(R600_EXPORT) 54910b57cec5SDimitry Andric NODE_NAME_CASE(CONST_ADDRESS) 54920b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_LOAD) 54930b57cec5SDimitry Andric NODE_NAME_CASE(REGISTER_STORE) 54940b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLE) 54950b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEB) 54960b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLED) 54970b57cec5SDimitry Andric NODE_NAME_CASE(SAMPLEL) 54980b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE0) 54990b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE1) 55000b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE2) 55010b57cec5SDimitry Andric NODE_NAME_CASE(CVT_F32_UBYTE3) 55020b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKRTZ_F16_F32) 55030b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_I16_F32) 55040b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PKNORM_U16_F32) 55050b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_I16_I32) 55060b57cec5SDimitry Andric NODE_NAME_CASE(CVT_PK_U16_U32) 55070b57cec5SDimitry Andric NODE_NAME_CASE(FP_TO_FP16) 55080b57cec5SDimitry Andric NODE_NAME_CASE(BUILD_VERTICAL_VECTOR) 55090b57cec5SDimitry Andric NODE_NAME_CASE(CONST_DATA_PTR) 55100b57cec5SDimitry Andric NODE_NAME_CASE(PC_ADD_REL_OFFSET) 55110b57cec5SDimitry Andric NODE_NAME_CASE(LDS) 551281ad6265SDimitry Andric NODE_NAME_CASE(FPTRUNC_ROUND_UPWARD) 551381ad6265SDimitry Andric NODE_NAME_CASE(FPTRUNC_ROUND_DOWNWARD) 55140b57cec5SDimitry Andric NODE_NAME_CASE(DUMMY_CHAIN) 55150b57cec5SDimitry Andric case AMDGPUISD::FIRST_MEM_OPCODE_NUMBER: break; 55160b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI) 55170b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO) 55180b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_I8) 55190b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_HI_U8) 55200b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_I8) 55210b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_D16_LO_U8) 55220b57cec5SDimitry Andric NODE_NAME_CASE(STORE_MSKOR) 55230b57cec5SDimitry Andric NODE_NAME_CASE(LOAD_CONSTANT) 55240b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT) 55250b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_STORE_FORMAT_D16) 55260b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT) 55270b57cec5SDimitry Andric NODE_NAME_CASE(TBUFFER_LOAD_FORMAT_D16) 55280b57cec5SDimitry Andric NODE_NAME_CASE(DS_ORDERED_COUNT) 55290b57cec5SDimitry Andric NODE_NAME_CASE(ATOMIC_CMP_SWAP) 55300b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD) 55310b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE) 55320b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT) 55330b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE) 55340b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT) 55350fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_TFE) 55360fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_UBYTE_TFE) 55370fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_USHORT_TFE) 55380fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_BYTE_TFE) 55390fca6ea1SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_SHORT_TFE) 55400b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT) 5541bdd1243dSDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_TFE) 55420b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_LOAD_FORMAT_D16) 55430b57cec5SDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD) 55447a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_BYTE) 55457a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_UBYTE) 55467a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_SHORT) 55477a6dacacSDimitry Andric NODE_NAME_CASE(SBUFFER_LOAD_USHORT) 55480b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE) 55490b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_BYTE) 55500b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_SHORT) 55510b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT) 55520b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_STORE_FORMAT_D16) 55530b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SWAP) 55540b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_ADD) 55550b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SUB) 55560b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMIN) 55570b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMIN) 55580b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_SMAX) 55590b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_UMAX) 55600b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_AND) 55610b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_OR) 55620b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_XOR) 55638bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_INC) 55648bcb0991SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_DEC) 55650b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CMPSWAP) 55665ffd83dbSDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_CSUB) 55670b57cec5SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FADD) 5568fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMIN) 5569fe6060f1SDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_FMAX) 55707a6dacacSDimitry Andric NODE_NAME_CASE(BUFFER_ATOMIC_COND_SUB_U32) 55710b57cec5SDimitry Andric 55720b57cec5SDimitry Andric case AMDGPUISD::LAST_AMDGPU_ISD_NUMBER: break; 55730b57cec5SDimitry Andric } 55740b57cec5SDimitry Andric return nullptr; 55750b57cec5SDimitry Andric } 55760b57cec5SDimitry Andric 55770b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getSqrtEstimate(SDValue Operand, 55780b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 55790b57cec5SDimitry Andric int &RefinementSteps, 55800b57cec5SDimitry Andric bool &UseOneConstNR, 55810b57cec5SDimitry Andric bool Reciprocal) const { 55820b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 55830b57cec5SDimitry Andric 55840b57cec5SDimitry Andric if (VT == MVT::f32) { 55850b57cec5SDimitry Andric RefinementSteps = 0; 55860b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RSQ, SDLoc(Operand), VT, Operand); 55870b57cec5SDimitry Andric } 55880b57cec5SDimitry Andric 55890b57cec5SDimitry Andric // TODO: There is also f64 rsq instruction, but the documentation is less 55900b57cec5SDimitry Andric // clear on its precision. 55910b57cec5SDimitry Andric 55920b57cec5SDimitry Andric return SDValue(); 55930b57cec5SDimitry Andric } 55940b57cec5SDimitry Andric 55950b57cec5SDimitry Andric SDValue AMDGPUTargetLowering::getRecipEstimate(SDValue Operand, 55960b57cec5SDimitry Andric SelectionDAG &DAG, int Enabled, 55970b57cec5SDimitry Andric int &RefinementSteps) const { 55980b57cec5SDimitry Andric EVT VT = Operand.getValueType(); 55990b57cec5SDimitry Andric 56000b57cec5SDimitry Andric if (VT == MVT::f32) { 56010b57cec5SDimitry Andric // Reciprocal, < 1 ulp error. 56020b57cec5SDimitry Andric // 56030b57cec5SDimitry Andric // This reciprocal approximation converges to < 0.5 ulp error with one 56040b57cec5SDimitry Andric // newton rhapson performed with two fused multiple adds (FMAs). 56050b57cec5SDimitry Andric 56060b57cec5SDimitry Andric RefinementSteps = 0; 56070b57cec5SDimitry Andric return DAG.getNode(AMDGPUISD::RCP, SDLoc(Operand), VT, Operand); 56080b57cec5SDimitry Andric } 56090b57cec5SDimitry Andric 56100b57cec5SDimitry Andric // TODO: There is also f64 rcp instruction, but the documentation is less 56110b57cec5SDimitry Andric // clear on its precision. 56120b57cec5SDimitry Andric 56130b57cec5SDimitry Andric return SDValue(); 56140b57cec5SDimitry Andric } 56150b57cec5SDimitry Andric 561681ad6265SDimitry Andric static unsigned workitemIntrinsicDim(unsigned ID) { 561781ad6265SDimitry Andric switch (ID) { 561881ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 561981ad6265SDimitry Andric return 0; 562081ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 562181ad6265SDimitry Andric return 1; 562281ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: 562381ad6265SDimitry Andric return 2; 562481ad6265SDimitry Andric default: 562581ad6265SDimitry Andric llvm_unreachable("not a workitem intrinsic"); 562681ad6265SDimitry Andric } 562781ad6265SDimitry Andric } 562881ad6265SDimitry Andric 56290b57cec5SDimitry Andric void AMDGPUTargetLowering::computeKnownBitsForTargetNode( 56300b57cec5SDimitry Andric const SDValue Op, KnownBits &Known, 56310b57cec5SDimitry Andric const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth) const { 56320b57cec5SDimitry Andric 56330b57cec5SDimitry Andric Known.resetAll(); // Don't know anything. 56340b57cec5SDimitry Andric 56350b57cec5SDimitry Andric unsigned Opc = Op.getOpcode(); 56360b57cec5SDimitry Andric 56370b57cec5SDimitry Andric switch (Opc) { 56380b57cec5SDimitry Andric default: 56390b57cec5SDimitry Andric break; 56400b57cec5SDimitry Andric case AMDGPUISD::CARRY: 56410b57cec5SDimitry Andric case AMDGPUISD::BORROW: { 56420b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 31); 56430b57cec5SDimitry Andric break; 56440b57cec5SDimitry Andric } 56450b57cec5SDimitry Andric 56460b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: 56470b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 56480b57cec5SDimitry Andric ConstantSDNode *CWidth = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 56490b57cec5SDimitry Andric if (!CWidth) 56500b57cec5SDimitry Andric return; 56510b57cec5SDimitry Andric 56520b57cec5SDimitry Andric uint32_t Width = CWidth->getZExtValue() & 0x1f; 56530b57cec5SDimitry Andric 56540b57cec5SDimitry Andric if (Opc == AMDGPUISD::BFE_U32) 56550b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(32, 32 - Width); 56560b57cec5SDimitry Andric 56570b57cec5SDimitry Andric break; 56580b57cec5SDimitry Andric } 5659fe6060f1SDimitry Andric case AMDGPUISD::FP_TO_FP16: { 56600b57cec5SDimitry Andric unsigned BitWidth = Known.getBitWidth(); 56610b57cec5SDimitry Andric 56620b57cec5SDimitry Andric // High bits are zero. 56630b57cec5SDimitry Andric Known.Zero = APInt::getHighBitsSet(BitWidth, BitWidth - 16); 56640b57cec5SDimitry Andric break; 56650b57cec5SDimitry Andric } 56660b57cec5SDimitry Andric case AMDGPUISD::MUL_U24: 56670b57cec5SDimitry Andric case AMDGPUISD::MUL_I24: { 56680b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 56690b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 56700b57cec5SDimitry Andric unsigned TrailZ = LHSKnown.countMinTrailingZeros() + 56710b57cec5SDimitry Andric RHSKnown.countMinTrailingZeros(); 56720b57cec5SDimitry Andric Known.Zero.setLowBits(std::min(TrailZ, 32u)); 5673480093f4SDimitry Andric // Skip extra check if all bits are known zeros. 5674480093f4SDimitry Andric if (TrailZ >= 32) 5675480093f4SDimitry Andric break; 56760b57cec5SDimitry Andric 56770b57cec5SDimitry Andric // Truncate to 24 bits. 56780b57cec5SDimitry Andric LHSKnown = LHSKnown.trunc(24); 56790b57cec5SDimitry Andric RHSKnown = RHSKnown.trunc(24); 56800b57cec5SDimitry Andric 56810b57cec5SDimitry Andric if (Opc == AMDGPUISD::MUL_I24) { 568204eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxSignificantBits(); 568304eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxSignificantBits(); 568404eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits; 568504eeddc0SDimitry Andric if (MaxValBits > 32) 56860b57cec5SDimitry Andric break; 568704eeddc0SDimitry Andric unsigned SignBits = 32 - MaxValBits + 1; 56880b57cec5SDimitry Andric bool LHSNegative = LHSKnown.isNegative(); 5689480093f4SDimitry Andric bool LHSNonNegative = LHSKnown.isNonNegative(); 5690480093f4SDimitry Andric bool LHSPositive = LHSKnown.isStrictlyPositive(); 56910b57cec5SDimitry Andric bool RHSNegative = RHSKnown.isNegative(); 5692480093f4SDimitry Andric bool RHSNonNegative = RHSKnown.isNonNegative(); 5693480093f4SDimitry Andric bool RHSPositive = RHSKnown.isStrictlyPositive(); 5694480093f4SDimitry Andric 5695480093f4SDimitry Andric if ((LHSNonNegative && RHSNonNegative) || (LHSNegative && RHSNegative)) 569604eeddc0SDimitry Andric Known.Zero.setHighBits(SignBits); 5697480093f4SDimitry Andric else if ((LHSNegative && RHSPositive) || (LHSPositive && RHSNegative)) 569804eeddc0SDimitry Andric Known.One.setHighBits(SignBits); 56990b57cec5SDimitry Andric } else { 570004eeddc0SDimitry Andric unsigned LHSValBits = LHSKnown.countMaxActiveBits(); 570104eeddc0SDimitry Andric unsigned RHSValBits = RHSKnown.countMaxActiveBits(); 570204eeddc0SDimitry Andric unsigned MaxValBits = LHSValBits + RHSValBits; 57030b57cec5SDimitry Andric if (MaxValBits >= 32) 57040b57cec5SDimitry Andric break; 570504eeddc0SDimitry Andric Known.Zero.setBitsFrom(MaxValBits); 57060b57cec5SDimitry Andric } 57070b57cec5SDimitry Andric break; 57080b57cec5SDimitry Andric } 57090b57cec5SDimitry Andric case AMDGPUISD::PERM: { 57100b57cec5SDimitry Andric ConstantSDNode *CMask = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 57110b57cec5SDimitry Andric if (!CMask) 57120b57cec5SDimitry Andric return; 57130b57cec5SDimitry Andric 57140b57cec5SDimitry Andric KnownBits LHSKnown = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 57150b57cec5SDimitry Andric KnownBits RHSKnown = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 57160b57cec5SDimitry Andric unsigned Sel = CMask->getZExtValue(); 57170b57cec5SDimitry Andric 57180b57cec5SDimitry Andric for (unsigned I = 0; I < 32; I += 8) { 57190b57cec5SDimitry Andric unsigned SelBits = Sel & 0xff; 57200b57cec5SDimitry Andric if (SelBits < 4) { 57210b57cec5SDimitry Andric SelBits *= 8; 57220b57cec5SDimitry Andric Known.One |= ((RHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 57230b57cec5SDimitry Andric Known.Zero |= ((RHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 57240b57cec5SDimitry Andric } else if (SelBits < 7) { 57250b57cec5SDimitry Andric SelBits = (SelBits & 3) * 8; 57260b57cec5SDimitry Andric Known.One |= ((LHSKnown.One.getZExtValue() >> SelBits) & 0xff) << I; 57270b57cec5SDimitry Andric Known.Zero |= ((LHSKnown.Zero.getZExtValue() >> SelBits) & 0xff) << I; 57280b57cec5SDimitry Andric } else if (SelBits == 0x0c) { 57298bcb0991SDimitry Andric Known.Zero |= 0xFFull << I; 57300b57cec5SDimitry Andric } else if (SelBits > 0x0c) { 57318bcb0991SDimitry Andric Known.One |= 0xFFull << I; 57320b57cec5SDimitry Andric } 57330b57cec5SDimitry Andric Sel >>= 8; 57340b57cec5SDimitry Andric } 57350b57cec5SDimitry Andric break; 57360b57cec5SDimitry Andric } 57370b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: { 57380b57cec5SDimitry Andric Known.Zero.setHighBits(24); 57390b57cec5SDimitry Andric break; 57400b57cec5SDimitry Andric } 57410b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: { 57420b57cec5SDimitry Andric Known.Zero.setHighBits(16); 57430b57cec5SDimitry Andric break; 57440b57cec5SDimitry Andric } 57450b57cec5SDimitry Andric case AMDGPUISD::LDS: { 57460b57cec5SDimitry Andric auto GA = cast<GlobalAddressSDNode>(Op.getOperand(0).getNode()); 57475ffd83dbSDimitry Andric Align Alignment = GA->getGlobal()->getPointerAlignment(DAG.getDataLayout()); 57480b57cec5SDimitry Andric 57490b57cec5SDimitry Andric Known.Zero.setHighBits(16); 57505ffd83dbSDimitry Andric Known.Zero.setLowBits(Log2(Alignment)); 57510b57cec5SDimitry Andric break; 57520b57cec5SDimitry Andric } 575306c3fb27SDimitry Andric case AMDGPUISD::SMIN3: 575406c3fb27SDimitry Andric case AMDGPUISD::SMAX3: 575506c3fb27SDimitry Andric case AMDGPUISD::SMED3: 575606c3fb27SDimitry Andric case AMDGPUISD::UMIN3: 575706c3fb27SDimitry Andric case AMDGPUISD::UMAX3: 575806c3fb27SDimitry Andric case AMDGPUISD::UMED3: { 575906c3fb27SDimitry Andric KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); 576006c3fb27SDimitry Andric if (Known2.isUnknown()) 576106c3fb27SDimitry Andric break; 576206c3fb27SDimitry Andric 576306c3fb27SDimitry Andric KnownBits Known1 = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); 576406c3fb27SDimitry Andric if (Known1.isUnknown()) 576506c3fb27SDimitry Andric break; 576606c3fb27SDimitry Andric 576706c3fb27SDimitry Andric KnownBits Known0 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); 576806c3fb27SDimitry Andric if (Known0.isUnknown()) 576906c3fb27SDimitry Andric break; 577006c3fb27SDimitry Andric 577106c3fb27SDimitry Andric // TODO: Handle LeadZero/LeadOne from UMIN/UMAX handling. 577206c3fb27SDimitry Andric Known.Zero = Known0.Zero & Known1.Zero & Known2.Zero; 577306c3fb27SDimitry Andric Known.One = Known0.One & Known1.One & Known2.One; 577406c3fb27SDimitry Andric break; 577506c3fb27SDimitry Andric } 57760b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 5777647cbc5dSDimitry Andric unsigned IID = Op.getConstantOperandVal(0); 57780b57cec5SDimitry Andric switch (IID) { 577981ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_x: 578081ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_y: 578181ad6265SDimitry Andric case Intrinsic::amdgcn_workitem_id_z: { 578281ad6265SDimitry Andric unsigned MaxValue = Subtarget->getMaxWorkitemID( 578381ad6265SDimitry Andric DAG.getMachineFunction().getFunction(), workitemIntrinsicDim(IID)); 578406c3fb27SDimitry Andric Known.Zero.setHighBits(llvm::countl_zero(MaxValue)); 578581ad6265SDimitry Andric break; 578681ad6265SDimitry Andric } 57870b57cec5SDimitry Andric default: 57880b57cec5SDimitry Andric break; 57890b57cec5SDimitry Andric } 57900b57cec5SDimitry Andric } 57910b57cec5SDimitry Andric } 57920b57cec5SDimitry Andric } 57930b57cec5SDimitry Andric 57940b57cec5SDimitry Andric unsigned AMDGPUTargetLowering::ComputeNumSignBitsForTargetNode( 57950b57cec5SDimitry Andric SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, 57960b57cec5SDimitry Andric unsigned Depth) const { 57970b57cec5SDimitry Andric switch (Op.getOpcode()) { 57980b57cec5SDimitry Andric case AMDGPUISD::BFE_I32: { 57990b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 58000b57cec5SDimitry Andric if (!Width) 58010b57cec5SDimitry Andric return 1; 58020b57cec5SDimitry Andric 58030b57cec5SDimitry Andric unsigned SignBits = 32 - Width->getZExtValue() + 1; 58040b57cec5SDimitry Andric if (!isNullConstant(Op.getOperand(1))) 58050b57cec5SDimitry Andric return SignBits; 58060b57cec5SDimitry Andric 58070b57cec5SDimitry Andric // TODO: Could probably figure something out with non-0 offsets. 58080b57cec5SDimitry Andric unsigned Op0SignBits = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 58090b57cec5SDimitry Andric return std::max(SignBits, Op0SignBits); 58100b57cec5SDimitry Andric } 58110b57cec5SDimitry Andric 58120b57cec5SDimitry Andric case AMDGPUISD::BFE_U32: { 58130b57cec5SDimitry Andric ConstantSDNode *Width = dyn_cast<ConstantSDNode>(Op.getOperand(2)); 58140b57cec5SDimitry Andric return Width ? 32 - (Width->getZExtValue() & 0x1f) : 1; 58150b57cec5SDimitry Andric } 58160b57cec5SDimitry Andric 58170b57cec5SDimitry Andric case AMDGPUISD::CARRY: 58180b57cec5SDimitry Andric case AMDGPUISD::BORROW: 58190b57cec5SDimitry Andric return 31; 58200b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_BYTE: 58210b57cec5SDimitry Andric return 25; 58220b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_SHORT: 58230b57cec5SDimitry Andric return 17; 58240b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_UBYTE: 58250b57cec5SDimitry Andric return 24; 58260b57cec5SDimitry Andric case AMDGPUISD::BUFFER_LOAD_USHORT: 58270b57cec5SDimitry Andric return 16; 58280b57cec5SDimitry Andric case AMDGPUISD::FP_TO_FP16: 58290b57cec5SDimitry Andric return 16; 583006c3fb27SDimitry Andric case AMDGPUISD::SMIN3: 583106c3fb27SDimitry Andric case AMDGPUISD::SMAX3: 583206c3fb27SDimitry Andric case AMDGPUISD::SMED3: 583306c3fb27SDimitry Andric case AMDGPUISD::UMIN3: 583406c3fb27SDimitry Andric case AMDGPUISD::UMAX3: 583506c3fb27SDimitry Andric case AMDGPUISD::UMED3: { 583606c3fb27SDimitry Andric unsigned Tmp2 = DAG.ComputeNumSignBits(Op.getOperand(2), Depth + 1); 583706c3fb27SDimitry Andric if (Tmp2 == 1) 583806c3fb27SDimitry Andric return 1; // Early out. 583906c3fb27SDimitry Andric 584006c3fb27SDimitry Andric unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth + 1); 584106c3fb27SDimitry Andric if (Tmp1 == 1) 584206c3fb27SDimitry Andric return 1; // Early out. 584306c3fb27SDimitry Andric 584406c3fb27SDimitry Andric unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1); 584506c3fb27SDimitry Andric if (Tmp0 == 1) 584606c3fb27SDimitry Andric return 1; // Early out. 584706c3fb27SDimitry Andric 58480fca6ea1SDimitry Andric return std::min({Tmp0, Tmp1, Tmp2}); 584906c3fb27SDimitry Andric } 58500b57cec5SDimitry Andric default: 58510b57cec5SDimitry Andric return 1; 58520b57cec5SDimitry Andric } 58530b57cec5SDimitry Andric } 58540b57cec5SDimitry Andric 58555ffd83dbSDimitry Andric unsigned AMDGPUTargetLowering::computeNumSignBitsForTargetInstr( 58565ffd83dbSDimitry Andric GISelKnownBits &Analysis, Register R, 58575ffd83dbSDimitry Andric const APInt &DemandedElts, const MachineRegisterInfo &MRI, 58585ffd83dbSDimitry Andric unsigned Depth) const { 58595ffd83dbSDimitry Andric const MachineInstr *MI = MRI.getVRegDef(R); 58605ffd83dbSDimitry Andric if (!MI) 58615ffd83dbSDimitry Andric return 1; 58625ffd83dbSDimitry Andric 58635ffd83dbSDimitry Andric // TODO: Check range metadata on MMO. 58645ffd83dbSDimitry Andric switch (MI->getOpcode()) { 58655ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE: 58665ffd83dbSDimitry Andric return 25; 58675ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT: 58685ffd83dbSDimitry Andric return 17; 58695ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE: 58705ffd83dbSDimitry Andric return 24; 58715ffd83dbSDimitry Andric case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT: 58725ffd83dbSDimitry Andric return 16; 587306c3fb27SDimitry Andric case AMDGPU::G_AMDGPU_SMED3: 587406c3fb27SDimitry Andric case AMDGPU::G_AMDGPU_UMED3: { 587506c3fb27SDimitry Andric auto [Dst, Src0, Src1, Src2] = MI->getFirst4Regs(); 587606c3fb27SDimitry Andric unsigned Tmp2 = Analysis.computeNumSignBits(Src2, DemandedElts, Depth + 1); 587706c3fb27SDimitry Andric if (Tmp2 == 1) 587806c3fb27SDimitry Andric return 1; 587906c3fb27SDimitry Andric unsigned Tmp1 = Analysis.computeNumSignBits(Src1, DemandedElts, Depth + 1); 588006c3fb27SDimitry Andric if (Tmp1 == 1) 588106c3fb27SDimitry Andric return 1; 588206c3fb27SDimitry Andric unsigned Tmp0 = Analysis.computeNumSignBits(Src0, DemandedElts, Depth + 1); 588306c3fb27SDimitry Andric if (Tmp0 == 1) 588406c3fb27SDimitry Andric return 1; 58850fca6ea1SDimitry Andric return std::min({Tmp0, Tmp1, Tmp2}); 588606c3fb27SDimitry Andric } 58875ffd83dbSDimitry Andric default: 58885ffd83dbSDimitry Andric return 1; 58895ffd83dbSDimitry Andric } 58905ffd83dbSDimitry Andric } 58915ffd83dbSDimitry Andric 58920b57cec5SDimitry Andric bool AMDGPUTargetLowering::isKnownNeverNaNForTargetNode(SDValue Op, 58930b57cec5SDimitry Andric const SelectionDAG &DAG, 58940b57cec5SDimitry Andric bool SNaN, 58950b57cec5SDimitry Andric unsigned Depth) const { 58960b57cec5SDimitry Andric unsigned Opcode = Op.getOpcode(); 58970b57cec5SDimitry Andric switch (Opcode) { 58980b57cec5SDimitry Andric case AMDGPUISD::FMIN_LEGACY: 58990b57cec5SDimitry Andric case AMDGPUISD::FMAX_LEGACY: { 59000b57cec5SDimitry Andric if (SNaN) 59010b57cec5SDimitry Andric return true; 59020b57cec5SDimitry Andric 59030b57cec5SDimitry Andric // TODO: Can check no nans on one of the operands for each one, but which 59040b57cec5SDimitry Andric // one? 59050b57cec5SDimitry Andric return false; 59060b57cec5SDimitry Andric } 59070b57cec5SDimitry Andric case AMDGPUISD::FMUL_LEGACY: 59080b57cec5SDimitry Andric case AMDGPUISD::CVT_PKRTZ_F16_F32: { 59090b57cec5SDimitry Andric if (SNaN) 59100b57cec5SDimitry Andric return true; 59110b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 59120b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 59130b57cec5SDimitry Andric } 59140b57cec5SDimitry Andric case AMDGPUISD::FMED3: 59150b57cec5SDimitry Andric case AMDGPUISD::FMIN3: 59160b57cec5SDimitry Andric case AMDGPUISD::FMAX3: 59175f757f3fSDimitry Andric case AMDGPUISD::FMINIMUM3: 59185f757f3fSDimitry Andric case AMDGPUISD::FMAXIMUM3: 59190b57cec5SDimitry Andric case AMDGPUISD::FMAD_FTZ: { 59200b57cec5SDimitry Andric if (SNaN) 59210b57cec5SDimitry Andric return true; 59220b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1) && 59230b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 59240b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 59250b57cec5SDimitry Andric } 59260b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE0: 59270b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE1: 59280b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE2: 59290b57cec5SDimitry Andric case AMDGPUISD::CVT_F32_UBYTE3: 59300b57cec5SDimitry Andric return true; 59310b57cec5SDimitry Andric 59320b57cec5SDimitry Andric case AMDGPUISD::RCP: 59330b57cec5SDimitry Andric case AMDGPUISD::RSQ: 59340b57cec5SDimitry Andric case AMDGPUISD::RCP_LEGACY: 59350b57cec5SDimitry Andric case AMDGPUISD::RSQ_CLAMP: { 59360b57cec5SDimitry Andric if (SNaN) 59370b57cec5SDimitry Andric return true; 59380b57cec5SDimitry Andric 59390b57cec5SDimitry Andric // TODO: Need is known positive check. 59400b57cec5SDimitry Andric return false; 59410b57cec5SDimitry Andric } 594206c3fb27SDimitry Andric case ISD::FLDEXP: 59430b57cec5SDimitry Andric case AMDGPUISD::FRACT: { 59440b57cec5SDimitry Andric if (SNaN) 59450b57cec5SDimitry Andric return true; 59460b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(0), SNaN, Depth + 1); 59470b57cec5SDimitry Andric } 59480b57cec5SDimitry Andric case AMDGPUISD::DIV_SCALE: 59490b57cec5SDimitry Andric case AMDGPUISD::DIV_FMAS: 59500b57cec5SDimitry Andric case AMDGPUISD::DIV_FIXUP: 59510b57cec5SDimitry Andric // TODO: Refine on operands. 59520b57cec5SDimitry Andric return SNaN; 59530b57cec5SDimitry Andric case AMDGPUISD::SIN_HW: 59540b57cec5SDimitry Andric case AMDGPUISD::COS_HW: { 59550b57cec5SDimitry Andric // TODO: Need check for infinity 59560b57cec5SDimitry Andric return SNaN; 59570b57cec5SDimitry Andric } 59580b57cec5SDimitry Andric case ISD::INTRINSIC_WO_CHAIN: { 5959647cbc5dSDimitry Andric unsigned IntrinsicID = Op.getConstantOperandVal(0); 59600b57cec5SDimitry Andric // TODO: Handle more intrinsics 59610b57cec5SDimitry Andric switch (IntrinsicID) { 59620b57cec5SDimitry Andric case Intrinsic::amdgcn_cubeid: 59630b57cec5SDimitry Andric return true; 59640b57cec5SDimitry Andric 59650b57cec5SDimitry Andric case Intrinsic::amdgcn_frexp_mant: { 59660b57cec5SDimitry Andric if (SNaN) 59670b57cec5SDimitry Andric return true; 59680b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1); 59690b57cec5SDimitry Andric } 59700b57cec5SDimitry Andric case Intrinsic::amdgcn_cvt_pkrtz: { 59710b57cec5SDimitry Andric if (SNaN) 59720b57cec5SDimitry Andric return true; 59730b57cec5SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 59740b57cec5SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1); 59750b57cec5SDimitry Andric } 59765ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp: 59775ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq: 59785ffd83dbSDimitry Andric case Intrinsic::amdgcn_rcp_legacy: 59795ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_legacy: 59805ffd83dbSDimitry Andric case Intrinsic::amdgcn_rsq_clamp: { 59815ffd83dbSDimitry Andric if (SNaN) 59825ffd83dbSDimitry Andric return true; 59835ffd83dbSDimitry Andric 59845ffd83dbSDimitry Andric // TODO: Need is known positive check. 59855ffd83dbSDimitry Andric return false; 59865ffd83dbSDimitry Andric } 59875ffd83dbSDimitry Andric case Intrinsic::amdgcn_trig_preop: 59880b57cec5SDimitry Andric case Intrinsic::amdgcn_fdot2: 59890b57cec5SDimitry Andric // TODO: Refine on operand 59900b57cec5SDimitry Andric return SNaN; 5991e8d8bef9SDimitry Andric case Intrinsic::amdgcn_fma_legacy: 5992e8d8bef9SDimitry Andric if (SNaN) 5993e8d8bef9SDimitry Andric return true; 5994e8d8bef9SDimitry Andric return DAG.isKnownNeverNaN(Op.getOperand(1), SNaN, Depth + 1) && 5995e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(2), SNaN, Depth + 1) && 5996e8d8bef9SDimitry Andric DAG.isKnownNeverNaN(Op.getOperand(3), SNaN, Depth + 1); 59970b57cec5SDimitry Andric default: 59980b57cec5SDimitry Andric return false; 59990b57cec5SDimitry Andric } 60000b57cec5SDimitry Andric } 60010b57cec5SDimitry Andric default: 60020b57cec5SDimitry Andric return false; 60030b57cec5SDimitry Andric } 60040b57cec5SDimitry Andric } 60050b57cec5SDimitry Andric 600606c3fb27SDimitry Andric bool AMDGPUTargetLowering::isReassocProfitable(MachineRegisterInfo &MRI, 600706c3fb27SDimitry Andric Register N0, Register N1) const { 600806c3fb27SDimitry Andric return MRI.hasOneNonDBGUse(N0); // FIXME: handle regbanks 600906c3fb27SDimitry Andric } 601006c3fb27SDimitry Andric 60110b57cec5SDimitry Andric TargetLowering::AtomicExpansionKind 60120b57cec5SDimitry Andric AMDGPUTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { 60130b57cec5SDimitry Andric switch (RMW->getOperation()) { 60140b57cec5SDimitry Andric case AtomicRMWInst::Nand: 60150b57cec5SDimitry Andric case AtomicRMWInst::FAdd: 60160b57cec5SDimitry Andric case AtomicRMWInst::FSub: 6017753f127fSDimitry Andric case AtomicRMWInst::FMax: 6018753f127fSDimitry Andric case AtomicRMWInst::FMin: 60190b57cec5SDimitry Andric return AtomicExpansionKind::CmpXChg; 60200fca6ea1SDimitry Andric case AtomicRMWInst::Xchg: { 60210fca6ea1SDimitry Andric const DataLayout &DL = RMW->getFunction()->getDataLayout(); 60220fca6ea1SDimitry Andric unsigned ValSize = DL.getTypeSizeInBits(RMW->getType()); 60230fca6ea1SDimitry Andric if (ValSize == 32 || ValSize == 64) 60240fca6ea1SDimitry Andric return AtomicExpansionKind::None; 60250fca6ea1SDimitry Andric return AtomicExpansionKind::CmpXChg; 60260fca6ea1SDimitry Andric } 6027bdd1243dSDimitry Andric default: { 6028bdd1243dSDimitry Andric if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) { 6029bdd1243dSDimitry Andric unsigned Size = IntTy->getBitWidth(); 6030bdd1243dSDimitry Andric if (Size == 32 || Size == 64) 60310b57cec5SDimitry Andric return AtomicExpansionKind::None; 60320b57cec5SDimitry Andric } 6033bdd1243dSDimitry Andric 6034bdd1243dSDimitry Andric return AtomicExpansionKind::CmpXChg; 6035bdd1243dSDimitry Andric } 6036bdd1243dSDimitry Andric } 60370b57cec5SDimitry Andric } 6038fe6060f1SDimitry Andric 603906c3fb27SDimitry Andric /// Whether it is profitable to sink the operands of an 604006c3fb27SDimitry Andric /// Instruction I to the basic block of I. 604106c3fb27SDimitry Andric /// This helps using several modifiers (like abs and neg) more often. 604206c3fb27SDimitry Andric bool AMDGPUTargetLowering::shouldSinkOperands( 604306c3fb27SDimitry Andric Instruction *I, SmallVectorImpl<Use *> &Ops) const { 604406c3fb27SDimitry Andric using namespace PatternMatch; 604506c3fb27SDimitry Andric 604606c3fb27SDimitry Andric for (auto &Op : I->operands()) { 604706c3fb27SDimitry Andric // Ensure we are not already sinking this operand. 604806c3fb27SDimitry Andric if (any_of(Ops, [&](Use *U) { return U->get() == Op.get(); })) 604906c3fb27SDimitry Andric continue; 605006c3fb27SDimitry Andric 605106c3fb27SDimitry Andric if (match(&Op, m_FAbs(m_Value())) || match(&Op, m_FNeg(m_Value()))) 605206c3fb27SDimitry Andric Ops.push_back(&Op); 605306c3fb27SDimitry Andric } 605406c3fb27SDimitry Andric 605506c3fb27SDimitry Andric return !Ops.empty(); 605606c3fb27SDimitry Andric } 6057