10b57cec5SDimitry Andric //===-- NVPTXAsmPrinter.cpp - NVPTX LLVM assembly writer ------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file contains a printer that converts from our internal representation 100b57cec5SDimitry Andric // of machine-dependent LLVM code to NVPTX assembly language. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "NVPTXAsmPrinter.h" 150b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXBaseInfo.h" 160b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXInstPrinter.h" 170b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXMCAsmInfo.h" 180b57cec5SDimitry Andric #include "MCTargetDesc/NVPTXTargetStreamer.h" 190b57cec5SDimitry Andric #include "NVPTX.h" 200b57cec5SDimitry Andric #include "NVPTXMCExpr.h" 210b57cec5SDimitry Andric #include "NVPTXMachineFunctionInfo.h" 220b57cec5SDimitry Andric #include "NVPTXRegisterInfo.h" 230b57cec5SDimitry Andric #include "NVPTXSubtarget.h" 240b57cec5SDimitry Andric #include "NVPTXTargetMachine.h" 250b57cec5SDimitry Andric #include "NVPTXUtilities.h" 260b57cec5SDimitry Andric #include "TargetInfo/NVPTXTargetInfo.h" 270b57cec5SDimitry Andric #include "cl_common_defines.h" 280b57cec5SDimitry Andric #include "llvm/ADT/APFloat.h" 290b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 300b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 310b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 320b57cec5SDimitry Andric #include "llvm/ADT/SmallString.h" 330b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 340b57cec5SDimitry Andric #include "llvm/ADT/StringExtras.h" 350b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h" 360b57cec5SDimitry Andric #include "llvm/ADT/Twine.h" 370b57cec5SDimitry Andric #include "llvm/Analysis/ConstantFolding.h" 380b57cec5SDimitry Andric #include "llvm/CodeGen/Analysis.h" 390b57cec5SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 400b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFrameInfo.h" 410b57cec5SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 420b57cec5SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 430b57cec5SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 440b57cec5SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h" 450b57cec5SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 460b57cec5SDimitry Andric #include "llvm/CodeGen/MachineRegisterInfo.h" 470b57cec5SDimitry Andric #include "llvm/CodeGen/TargetRegisterInfo.h" 480b57cec5SDimitry Andric #include "llvm/CodeGen/ValueTypes.h" 49*0fca6ea1SDimitry Andric #include "llvm/CodeGenTypes/MachineValueType.h" 500b57cec5SDimitry Andric #include "llvm/IR/Attributes.h" 510b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 520b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 530b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 540b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 550b57cec5SDimitry Andric #include "llvm/IR/DebugInfo.h" 560b57cec5SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 570b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 580b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 590b57cec5SDimitry Andric #include "llvm/IR/Function.h" 60*0fca6ea1SDimitry Andric #include "llvm/IR/GlobalAlias.h" 610b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 620b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 630b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 640b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 650b57cec5SDimitry Andric #include "llvm/IR/Module.h" 660b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 670b57cec5SDimitry Andric #include "llvm/IR/Type.h" 680b57cec5SDimitry Andric #include "llvm/IR/User.h" 690b57cec5SDimitry Andric #include "llvm/MC/MCExpr.h" 700b57cec5SDimitry Andric #include "llvm/MC/MCInst.h" 710b57cec5SDimitry Andric #include "llvm/MC/MCInstrDesc.h" 720b57cec5SDimitry Andric #include "llvm/MC/MCStreamer.h" 730b57cec5SDimitry Andric #include "llvm/MC/MCSymbol.h" 74349cc55cSDimitry Andric #include "llvm/MC/TargetRegistry.h" 75*0fca6ea1SDimitry Andric #include "llvm/Support/Alignment.h" 760b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 770b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 78fcaf7f86SDimitry Andric #include "llvm/Support/Endian.h" 790b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 80fcaf7f86SDimitry Andric #include "llvm/Support/NativeFormatting.h" 810b57cec5SDimitry Andric #include "llvm/Support/Path.h" 820b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 830b57cec5SDimitry Andric #include "llvm/Target/TargetLoweringObjectFile.h" 840b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 8506c3fb27SDimitry Andric #include "llvm/TargetParser/Triple.h" 860b57cec5SDimitry Andric #include "llvm/Transforms/Utils/UnrollLoop.h" 870b57cec5SDimitry Andric #include <cassert> 880b57cec5SDimitry Andric #include <cstdint> 890b57cec5SDimitry Andric #include <cstring> 900b57cec5SDimitry Andric #include <new> 910b57cec5SDimitry Andric #include <string> 920b57cec5SDimitry Andric #include <utility> 930b57cec5SDimitry Andric #include <vector> 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric using namespace llvm; 960b57cec5SDimitry Andric 9706c3fb27SDimitry Andric static cl::opt<bool> 9806c3fb27SDimitry Andric LowerCtorDtor("nvptx-lower-global-ctor-dtor", 9906c3fb27SDimitry Andric cl::desc("Lower GPU ctor / dtors to globals on the device."), 10006c3fb27SDimitry Andric cl::init(false), cl::Hidden); 10106c3fb27SDimitry Andric 1020b57cec5SDimitry Andric #define DEPOTNAME "__local_depot" 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V 1050b57cec5SDimitry Andric /// depends. 1060b57cec5SDimitry Andric static void 1070b57cec5SDimitry Andric DiscoverDependentGlobals(const Value *V, 1080b57cec5SDimitry Andric DenseSet<const GlobalVariable *> &Globals) { 1090b57cec5SDimitry Andric if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) 1100b57cec5SDimitry Andric Globals.insert(GV); 1110b57cec5SDimitry Andric else { 1120b57cec5SDimitry Andric if (const User *U = dyn_cast<User>(V)) { 1130b57cec5SDimitry Andric for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { 1140b57cec5SDimitry Andric DiscoverDependentGlobals(U->getOperand(i), Globals); 1150b57cec5SDimitry Andric } 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric } 1180b57cec5SDimitry Andric } 1190b57cec5SDimitry Andric 1200b57cec5SDimitry Andric /// VisitGlobalVariableForEmission - Add \p GV to the list of GlobalVariable 1210b57cec5SDimitry Andric /// instances to be emitted, but only after any dependents have been added 1220b57cec5SDimitry Andric /// first.s 1230b57cec5SDimitry Andric static void 1240b57cec5SDimitry Andric VisitGlobalVariableForEmission(const GlobalVariable *GV, 1250b57cec5SDimitry Andric SmallVectorImpl<const GlobalVariable *> &Order, 1260b57cec5SDimitry Andric DenseSet<const GlobalVariable *> &Visited, 1270b57cec5SDimitry Andric DenseSet<const GlobalVariable *> &Visiting) { 1280b57cec5SDimitry Andric // Have we already visited this one? 1290b57cec5SDimitry Andric if (Visited.count(GV)) 1300b57cec5SDimitry Andric return; 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric // Do we have a circular dependency? 1330b57cec5SDimitry Andric if (!Visiting.insert(GV).second) 1340b57cec5SDimitry Andric report_fatal_error("Circular dependency found in global variable set"); 1350b57cec5SDimitry Andric 1360b57cec5SDimitry Andric // Make sure we visit all dependents first 1370b57cec5SDimitry Andric DenseSet<const GlobalVariable *> Others; 1380b57cec5SDimitry Andric for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) 1390b57cec5SDimitry Andric DiscoverDependentGlobals(GV->getOperand(i), Others); 1400b57cec5SDimitry Andric 1414824e7fdSDimitry Andric for (const GlobalVariable *GV : Others) 1424824e7fdSDimitry Andric VisitGlobalVariableForEmission(GV, Order, Visited, Visiting); 1430b57cec5SDimitry Andric 1440b57cec5SDimitry Andric // Now we can visit ourself 1450b57cec5SDimitry Andric Order.push_back(GV); 1460b57cec5SDimitry Andric Visited.insert(GV); 1470b57cec5SDimitry Andric Visiting.erase(GV); 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric 1505ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitInstruction(const MachineInstr *MI) { 151753f127fSDimitry Andric NVPTX_MC::verifyInstructionPredicates(MI->getOpcode(), 152753f127fSDimitry Andric getSubtargetInfo().getFeatureBits()); 153753f127fSDimitry Andric 1540b57cec5SDimitry Andric MCInst Inst; 1550b57cec5SDimitry Andric lowerToMCInst(MI, Inst); 1560b57cec5SDimitry Andric EmitToStreamer(*OutStreamer, Inst); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric // Handle symbol backtracking for targets that do not support image handles 1600b57cec5SDimitry Andric bool NVPTXAsmPrinter::lowerImageHandleOperand(const MachineInstr *MI, 1610b57cec5SDimitry Andric unsigned OpNo, MCOperand &MCOp) { 1620b57cec5SDimitry Andric const MachineOperand &MO = MI->getOperand(OpNo); 1630b57cec5SDimitry Andric const MCInstrDesc &MCID = MI->getDesc(); 1640b57cec5SDimitry Andric 1650b57cec5SDimitry Andric if (MCID.TSFlags & NVPTXII::IsTexFlag) { 1660b57cec5SDimitry Andric // This is a texture fetch, so operand 4 is a texref and operand 5 is 1670b57cec5SDimitry Andric // a samplerref 1680b57cec5SDimitry Andric if (OpNo == 4 && MO.isImm()) { 1690b57cec5SDimitry Andric lowerImageHandleSymbol(MO.getImm(), MCOp); 1700b57cec5SDimitry Andric return true; 1710b57cec5SDimitry Andric } 1720b57cec5SDimitry Andric if (OpNo == 5 && MO.isImm() && !(MCID.TSFlags & NVPTXII::IsTexModeUnifiedFlag)) { 1730b57cec5SDimitry Andric lowerImageHandleSymbol(MO.getImm(), MCOp); 1740b57cec5SDimitry Andric return true; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric return false; 1780b57cec5SDimitry Andric } else if (MCID.TSFlags & NVPTXII::IsSuldMask) { 1790b57cec5SDimitry Andric unsigned VecSize = 1800b57cec5SDimitry Andric 1 << (((MCID.TSFlags & NVPTXII::IsSuldMask) >> NVPTXII::IsSuldShift) - 1); 1810b57cec5SDimitry Andric 1820b57cec5SDimitry Andric // For a surface load of vector size N, the Nth operand will be the surfref 1830b57cec5SDimitry Andric if (OpNo == VecSize && MO.isImm()) { 1840b57cec5SDimitry Andric lowerImageHandleSymbol(MO.getImm(), MCOp); 1850b57cec5SDimitry Andric return true; 1860b57cec5SDimitry Andric } 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric return false; 1890b57cec5SDimitry Andric } else if (MCID.TSFlags & NVPTXII::IsSustFlag) { 1900b57cec5SDimitry Andric // This is a surface store, so operand 0 is a surfref 1910b57cec5SDimitry Andric if (OpNo == 0 && MO.isImm()) { 1920b57cec5SDimitry Andric lowerImageHandleSymbol(MO.getImm(), MCOp); 1930b57cec5SDimitry Andric return true; 1940b57cec5SDimitry Andric } 1950b57cec5SDimitry Andric 1960b57cec5SDimitry Andric return false; 1970b57cec5SDimitry Andric } else if (MCID.TSFlags & NVPTXII::IsSurfTexQueryFlag) { 1980b57cec5SDimitry Andric // This is a query, so operand 1 is a surfref/texref 1990b57cec5SDimitry Andric if (OpNo == 1 && MO.isImm()) { 2000b57cec5SDimitry Andric lowerImageHandleSymbol(MO.getImm(), MCOp); 2010b57cec5SDimitry Andric return true; 2020b57cec5SDimitry Andric } 2030b57cec5SDimitry Andric 2040b57cec5SDimitry Andric return false; 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric return false; 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { 2110b57cec5SDimitry Andric // Ewwww 2120b57cec5SDimitry Andric LLVMTargetMachine &TM = const_cast<LLVMTargetMachine&>(MF->getTarget()); 2130b57cec5SDimitry Andric NVPTXTargetMachine &nvTM = static_cast<NVPTXTargetMachine&>(TM); 2140b57cec5SDimitry Andric const NVPTXMachineFunctionInfo *MFI = MF->getInfo<NVPTXMachineFunctionInfo>(); 2150b57cec5SDimitry Andric const char *Sym = MFI->getImageHandleSymbol(Index); 216bdd1243dSDimitry Andric StringRef SymName = nvTM.getStrPool().save(Sym); 217bdd1243dSDimitry Andric MCOp = GetSymbolRef(OutContext.getOrCreateSymbol(SymName)); 2180b57cec5SDimitry Andric } 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { 2210b57cec5SDimitry Andric OutMI.setOpcode(MI->getOpcode()); 2220b57cec5SDimitry Andric // Special: Do not mangle symbol operand of CALL_PROTOTYPE 2230b57cec5SDimitry Andric if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { 2240b57cec5SDimitry Andric const MachineOperand &MO = MI->getOperand(0); 2250b57cec5SDimitry Andric OutMI.addOperand(GetSymbolRef( 2260b57cec5SDimitry Andric OutContext.getOrCreateSymbol(Twine(MO.getSymbolName())))); 2270b57cec5SDimitry Andric return; 2280b57cec5SDimitry Andric } 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 2310b57cec5SDimitry Andric for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 2320b57cec5SDimitry Andric const MachineOperand &MO = MI->getOperand(i); 2330b57cec5SDimitry Andric 2340b57cec5SDimitry Andric MCOperand MCOp; 2350b57cec5SDimitry Andric if (!STI.hasImageHandles()) { 2360b57cec5SDimitry Andric if (lowerImageHandleOperand(MI, i, MCOp)) { 2370b57cec5SDimitry Andric OutMI.addOperand(MCOp); 2380b57cec5SDimitry Andric continue; 2390b57cec5SDimitry Andric } 2400b57cec5SDimitry Andric } 2410b57cec5SDimitry Andric 2420b57cec5SDimitry Andric if (lowerOperand(MO, MCOp)) 2430b57cec5SDimitry Andric OutMI.addOperand(MCOp); 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric } 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric bool NVPTXAsmPrinter::lowerOperand(const MachineOperand &MO, 2480b57cec5SDimitry Andric MCOperand &MCOp) { 2490b57cec5SDimitry Andric switch (MO.getType()) { 2500b57cec5SDimitry Andric default: llvm_unreachable("unknown operand type"); 2510b57cec5SDimitry Andric case MachineOperand::MO_Register: 2520b57cec5SDimitry Andric MCOp = MCOperand::createReg(encodeVirtualRegister(MO.getReg())); 2530b57cec5SDimitry Andric break; 2540b57cec5SDimitry Andric case MachineOperand::MO_Immediate: 2550b57cec5SDimitry Andric MCOp = MCOperand::createImm(MO.getImm()); 2560b57cec5SDimitry Andric break; 2570b57cec5SDimitry Andric case MachineOperand::MO_MachineBasicBlock: 2580b57cec5SDimitry Andric MCOp = MCOperand::createExpr(MCSymbolRefExpr::create( 2590b57cec5SDimitry Andric MO.getMBB()->getSymbol(), OutContext)); 2600b57cec5SDimitry Andric break; 2610b57cec5SDimitry Andric case MachineOperand::MO_ExternalSymbol: 2620b57cec5SDimitry Andric MCOp = GetSymbolRef(GetExternalSymbolSymbol(MO.getSymbolName())); 2630b57cec5SDimitry Andric break; 2640b57cec5SDimitry Andric case MachineOperand::MO_GlobalAddress: 2650b57cec5SDimitry Andric MCOp = GetSymbolRef(getSymbol(MO.getGlobal())); 2660b57cec5SDimitry Andric break; 2670b57cec5SDimitry Andric case MachineOperand::MO_FPImmediate: { 2680b57cec5SDimitry Andric const ConstantFP *Cnt = MO.getFPImm(); 2690b57cec5SDimitry Andric const APFloat &Val = Cnt->getValueAPF(); 2700b57cec5SDimitry Andric 2710b57cec5SDimitry Andric switch (Cnt->getType()->getTypeID()) { 2720b57cec5SDimitry Andric default: report_fatal_error("Unsupported FP type"); break; 2730b57cec5SDimitry Andric case Type::HalfTyID: 2740b57cec5SDimitry Andric MCOp = MCOperand::createExpr( 2750b57cec5SDimitry Andric NVPTXFloatMCExpr::createConstantFPHalf(Val, OutContext)); 2760b57cec5SDimitry Andric break; 27706c3fb27SDimitry Andric case Type::BFloatTyID: 27806c3fb27SDimitry Andric MCOp = MCOperand::createExpr( 27906c3fb27SDimitry Andric NVPTXFloatMCExpr::createConstantBFPHalf(Val, OutContext)); 28006c3fb27SDimitry Andric break; 2810b57cec5SDimitry Andric case Type::FloatTyID: 2820b57cec5SDimitry Andric MCOp = MCOperand::createExpr( 2830b57cec5SDimitry Andric NVPTXFloatMCExpr::createConstantFPSingle(Val, OutContext)); 2840b57cec5SDimitry Andric break; 2850b57cec5SDimitry Andric case Type::DoubleTyID: 2860b57cec5SDimitry Andric MCOp = MCOperand::createExpr( 2870b57cec5SDimitry Andric NVPTXFloatMCExpr::createConstantFPDouble(Val, OutContext)); 2880b57cec5SDimitry Andric break; 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric break; 2910b57cec5SDimitry Andric } 2920b57cec5SDimitry Andric } 2930b57cec5SDimitry Andric return true; 2940b57cec5SDimitry Andric } 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric unsigned NVPTXAsmPrinter::encodeVirtualRegister(unsigned Reg) { 2978bcb0991SDimitry Andric if (Register::isVirtualRegister(Reg)) { 2980b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(Reg); 2990b57cec5SDimitry Andric 3000b57cec5SDimitry Andric DenseMap<unsigned, unsigned> &RegMap = VRegMapping[RC]; 3010b57cec5SDimitry Andric unsigned RegNum = RegMap[Reg]; 3020b57cec5SDimitry Andric 3030b57cec5SDimitry Andric // Encode the register class in the upper 4 bits 3040b57cec5SDimitry Andric // Must be kept in sync with NVPTXInstPrinter::printRegName 3050b57cec5SDimitry Andric unsigned Ret = 0; 3060b57cec5SDimitry Andric if (RC == &NVPTX::Int1RegsRegClass) { 3070b57cec5SDimitry Andric Ret = (1 << 28); 3080b57cec5SDimitry Andric } else if (RC == &NVPTX::Int16RegsRegClass) { 3090b57cec5SDimitry Andric Ret = (2 << 28); 3100b57cec5SDimitry Andric } else if (RC == &NVPTX::Int32RegsRegClass) { 3110b57cec5SDimitry Andric Ret = (3 << 28); 3120b57cec5SDimitry Andric } else if (RC == &NVPTX::Int64RegsRegClass) { 3130b57cec5SDimitry Andric Ret = (4 << 28); 3140b57cec5SDimitry Andric } else if (RC == &NVPTX::Float32RegsRegClass) { 3150b57cec5SDimitry Andric Ret = (5 << 28); 3160b57cec5SDimitry Andric } else if (RC == &NVPTX::Float64RegsRegClass) { 3170b57cec5SDimitry Andric Ret = (6 << 28); 318*0fca6ea1SDimitry Andric } else if (RC == &NVPTX::Int128RegsRegClass) { 319*0fca6ea1SDimitry Andric Ret = (7 << 28); 3200b57cec5SDimitry Andric } else { 3210b57cec5SDimitry Andric report_fatal_error("Bad register class"); 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric 3240b57cec5SDimitry Andric // Insert the vreg number 3250b57cec5SDimitry Andric Ret |= (RegNum & 0x0FFFFFFF); 3260b57cec5SDimitry Andric return Ret; 3270b57cec5SDimitry Andric } else { 3280b57cec5SDimitry Andric // Some special-use registers are actually physical registers. 3290b57cec5SDimitry Andric // Encode this as the register class ID of 0 and the real register ID. 3300b57cec5SDimitry Andric return Reg & 0x0FFFFFFF; 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric } 3330b57cec5SDimitry Andric 3340b57cec5SDimitry Andric MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { 3350b57cec5SDimitry Andric const MCExpr *Expr; 3360b57cec5SDimitry Andric Expr = MCSymbolRefExpr::create(Symbol, MCSymbolRefExpr::VK_None, 3370b57cec5SDimitry Andric OutContext); 3380b57cec5SDimitry Andric return MCOperand::createExpr(Expr); 3390b57cec5SDimitry Andric } 3400b57cec5SDimitry Andric 34106c3fb27SDimitry Andric static bool ShouldPassAsArray(Type *Ty) { 34206c3fb27SDimitry Andric return Ty->isAggregateType() || Ty->isVectorTy() || Ty->isIntegerTy(128) || 34306c3fb27SDimitry Andric Ty->isHalfTy() || Ty->isBFloatTy(); 34406c3fb27SDimitry Andric } 34506c3fb27SDimitry Andric 3460b57cec5SDimitry Andric void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { 3470b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 3480b57cec5SDimitry Andric const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 34981ad6265SDimitry Andric const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering()); 3500b57cec5SDimitry Andric 3510b57cec5SDimitry Andric Type *Ty = F->getReturnType(); 3520b57cec5SDimitry Andric 3530b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric if (Ty->getTypeID() == Type::VoidTyID) 3560b57cec5SDimitry Andric return; 3570b57cec5SDimitry Andric O << " ("; 3580b57cec5SDimitry Andric 3590b57cec5SDimitry Andric if (isABI) { 36006c3fb27SDimitry Andric if ((Ty->isFloatingPointTy() || Ty->isIntegerTy()) && 36106c3fb27SDimitry Andric !ShouldPassAsArray(Ty)) { 3620b57cec5SDimitry Andric unsigned size = 0; 3630b57cec5SDimitry Andric if (auto *ITy = dyn_cast<IntegerType>(Ty)) { 3640b57cec5SDimitry Andric size = ITy->getBitWidth(); 3650b57cec5SDimitry Andric } else { 3660b57cec5SDimitry Andric assert(Ty->isFloatingPointTy() && "Floating point type expected here"); 3670b57cec5SDimitry Andric size = Ty->getPrimitiveSizeInBits(); 3680b57cec5SDimitry Andric } 369fcaf7f86SDimitry Andric size = promoteScalarArgumentSize(size); 3700b57cec5SDimitry Andric O << ".param .b" << size << " func_retval0"; 3710b57cec5SDimitry Andric } else if (isa<PointerType>(Ty)) { 3720b57cec5SDimitry Andric O << ".param .b" << TLI->getPointerTy(DL).getSizeInBits() 3730b57cec5SDimitry Andric << " func_retval0"; 37406c3fb27SDimitry Andric } else if (ShouldPassAsArray(Ty)) { 3750b57cec5SDimitry Andric unsigned totalsz = DL.getTypeAllocSize(Ty); 376*0fca6ea1SDimitry Andric Align RetAlignment = TLI->getFunctionArgumentAlignment( 377*0fca6ea1SDimitry Andric F, Ty, AttributeList::ReturnIndex, DL); 378*0fca6ea1SDimitry Andric O << ".param .align " << RetAlignment.value() << " .b8 func_retval0[" 379*0fca6ea1SDimitry Andric << totalsz << "]"; 3800b57cec5SDimitry Andric } else 3810b57cec5SDimitry Andric llvm_unreachable("Unknown return type"); 3820b57cec5SDimitry Andric } else { 3830b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts; 3840b57cec5SDimitry Andric ComputeValueVTs(*TLI, DL, Ty, vtparts); 3850b57cec5SDimitry Andric unsigned idx = 0; 3860b57cec5SDimitry Andric for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 3870b57cec5SDimitry Andric unsigned elems = 1; 3880b57cec5SDimitry Andric EVT elemtype = vtparts[i]; 3890b57cec5SDimitry Andric if (vtparts[i].isVector()) { 3900b57cec5SDimitry Andric elems = vtparts[i].getVectorNumElements(); 3910b57cec5SDimitry Andric elemtype = vtparts[i].getVectorElementType(); 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric for (unsigned j = 0, je = elems; j != je; ++j) { 3950b57cec5SDimitry Andric unsigned sz = elemtype.getSizeInBits(); 396fcaf7f86SDimitry Andric if (elemtype.isInteger()) 397fcaf7f86SDimitry Andric sz = promoteScalarArgumentSize(sz); 3980b57cec5SDimitry Andric O << ".reg .b" << sz << " func_retval" << idx; 3990b57cec5SDimitry Andric if (j < je - 1) 4000b57cec5SDimitry Andric O << ", "; 4010b57cec5SDimitry Andric ++idx; 4020b57cec5SDimitry Andric } 4030b57cec5SDimitry Andric if (i < e - 1) 4040b57cec5SDimitry Andric O << ", "; 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric } 4070b57cec5SDimitry Andric O << ") "; 4080b57cec5SDimitry Andric } 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF, 4110b57cec5SDimitry Andric raw_ostream &O) { 4120b57cec5SDimitry Andric const Function &F = MF.getFunction(); 4130b57cec5SDimitry Andric printReturnValStr(&F, O); 4140b57cec5SDimitry Andric } 4150b57cec5SDimitry Andric 4160b57cec5SDimitry Andric // Return true if MBB is the header of a loop marked with 417bdd1243dSDimitry Andric // llvm.loop.unroll.disable or llvm.loop.unroll.count=1. 4180b57cec5SDimitry Andric bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll( 4190b57cec5SDimitry Andric const MachineBasicBlock &MBB) const { 420*0fca6ea1SDimitry Andric MachineLoopInfo &LI = getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 4210b57cec5SDimitry Andric // We insert .pragma "nounroll" only to the loop header. 4220b57cec5SDimitry Andric if (!LI.isLoopHeader(&MBB)) 4230b57cec5SDimitry Andric return false; 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric // llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore, 4260b57cec5SDimitry Andric // we iterate through each back edge of the loop with header MBB, and check 4270b57cec5SDimitry Andric // whether its metadata contains llvm.loop.unroll.disable. 428349cc55cSDimitry Andric for (const MachineBasicBlock *PMBB : MBB.predecessors()) { 4290b57cec5SDimitry Andric if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) { 4300b57cec5SDimitry Andric // Edges from other loops to MBB are not back edges. 4310b57cec5SDimitry Andric continue; 4320b57cec5SDimitry Andric } 4330b57cec5SDimitry Andric if (const BasicBlock *PBB = PMBB->getBasicBlock()) { 4340b57cec5SDimitry Andric if (MDNode *LoopID = 4350b57cec5SDimitry Andric PBB->getTerminator()->getMetadata(LLVMContext::MD_loop)) { 4360b57cec5SDimitry Andric if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable")) 4370b57cec5SDimitry Andric return true; 438bdd1243dSDimitry Andric if (MDNode *UnrollCountMD = 439bdd1243dSDimitry Andric GetUnrollMetadata(LoopID, "llvm.loop.unroll.count")) { 440bdd1243dSDimitry Andric if (mdconst::extract<ConstantInt>(UnrollCountMD->getOperand(1)) 44106c3fb27SDimitry Andric ->isOne()) 442bdd1243dSDimitry Andric return true; 443bdd1243dSDimitry Andric } 4440b57cec5SDimitry Andric } 4450b57cec5SDimitry Andric } 4460b57cec5SDimitry Andric } 4470b57cec5SDimitry Andric return false; 4480b57cec5SDimitry Andric } 4490b57cec5SDimitry Andric 4505ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) { 4515ffd83dbSDimitry Andric AsmPrinter::emitBasicBlockStart(MBB); 4520b57cec5SDimitry Andric if (isLoopHeaderOfNoUnroll(MBB)) 4535ffd83dbSDimitry Andric OutStreamer->emitRawText(StringRef("\t.pragma \"nounroll\";\n")); 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4565ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitFunctionEntryLabel() { 4570b57cec5SDimitry Andric SmallString<128> Str; 4580b57cec5SDimitry Andric raw_svector_ostream O(Str); 4590b57cec5SDimitry Andric 4600b57cec5SDimitry Andric if (!GlobalsEmitted) { 4610b57cec5SDimitry Andric emitGlobals(*MF->getFunction().getParent()); 4620b57cec5SDimitry Andric GlobalsEmitted = true; 4630b57cec5SDimitry Andric } 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // Set up 4660b57cec5SDimitry Andric MRI = &MF->getRegInfo(); 4670b57cec5SDimitry Andric F = &MF->getFunction(); 4680b57cec5SDimitry Andric emitLinkageDirective(F, O); 4690b57cec5SDimitry Andric if (isKernelFunction(*F)) 4700b57cec5SDimitry Andric O << ".entry "; 4710b57cec5SDimitry Andric else { 4720b57cec5SDimitry Andric O << ".func "; 4730b57cec5SDimitry Andric printReturnValStr(*MF, O); 4740b57cec5SDimitry Andric } 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric CurrentFnSym->print(O, MAI); 4770b57cec5SDimitry Andric 47806c3fb27SDimitry Andric emitFunctionParamList(F, O); 47906c3fb27SDimitry Andric O << "\n"; 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric if (isKernelFunction(*F)) 4820b57cec5SDimitry Andric emitKernelFunctionDirectives(*F, O); 4830b57cec5SDimitry Andric 484bdd1243dSDimitry Andric if (shouldEmitPTXNoReturn(F, TM)) 485bdd1243dSDimitry Andric O << ".noreturn"; 486bdd1243dSDimitry Andric 4875ffd83dbSDimitry Andric OutStreamer->emitRawText(O.str()); 4880b57cec5SDimitry Andric 4890b57cec5SDimitry Andric VRegMapping.clear(); 4900b57cec5SDimitry Andric // Emit open brace for function body. 4915ffd83dbSDimitry Andric OutStreamer->emitRawText(StringRef("{\n")); 4920b57cec5SDimitry Andric setAndEmitFunctionVirtualRegisters(*MF); 4930b57cec5SDimitry Andric // Emit initial .loc debug directive for correct relocation symbol data. 494*0fca6ea1SDimitry Andric if (const DISubprogram *SP = MF->getFunction().getSubprogram()) { 495*0fca6ea1SDimitry Andric assert(SP->getUnit()); 496*0fca6ea1SDimitry Andric if (!SP->getUnit()->isDebugDirectivesOnly() && MMI && MMI->hasDebugInfo()) 4970b57cec5SDimitry Andric emitInitialRawDwarfLocDirective(*MF); 4980b57cec5SDimitry Andric } 499*0fca6ea1SDimitry Andric } 5000b57cec5SDimitry Andric 5010b57cec5SDimitry Andric bool NVPTXAsmPrinter::runOnMachineFunction(MachineFunction &F) { 5020b57cec5SDimitry Andric bool Result = AsmPrinter::runOnMachineFunction(F); 5030b57cec5SDimitry Andric // Emit closing brace for the body of function F. 5040b57cec5SDimitry Andric // The closing brace must be emitted here because we need to emit additional 5050b57cec5SDimitry Andric // debug labels/data after the last basic block. 5060b57cec5SDimitry Andric // We need to emit the closing brace here because we don't have function that 5070b57cec5SDimitry Andric // finished emission of the function body. 5085ffd83dbSDimitry Andric OutStreamer->emitRawText(StringRef("}\n")); 5090b57cec5SDimitry Andric return Result; 5100b57cec5SDimitry Andric } 5110b57cec5SDimitry Andric 5125ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitFunctionBodyStart() { 5130b57cec5SDimitry Andric SmallString<128> Str; 5140b57cec5SDimitry Andric raw_svector_ostream O(Str); 5150b57cec5SDimitry Andric emitDemotedVars(&MF->getFunction(), O); 5165ffd83dbSDimitry Andric OutStreamer->emitRawText(O.str()); 5170b57cec5SDimitry Andric } 5180b57cec5SDimitry Andric 5195ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitFunctionBodyEnd() { 5200b57cec5SDimitry Andric VRegMapping.clear(); 5210b57cec5SDimitry Andric } 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric const MCSymbol *NVPTXAsmPrinter::getFunctionFrameSymbol() const { 5240b57cec5SDimitry Andric SmallString<128> Str; 5250b57cec5SDimitry Andric raw_svector_ostream(Str) << DEPOTNAME << getFunctionNumber(); 5260b57cec5SDimitry Andric return OutContext.getOrCreateSymbol(Str); 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { 5308bcb0991SDimitry Andric Register RegNo = MI->getOperand(0).getReg(); 531bdd1243dSDimitry Andric if (RegNo.isVirtual()) { 5320b57cec5SDimitry Andric OutStreamer->AddComment(Twine("implicit-def: ") + 5330b57cec5SDimitry Andric getVirtualRegisterName(RegNo)); 5340b57cec5SDimitry Andric } else { 5350b57cec5SDimitry Andric const NVPTXSubtarget &STI = MI->getMF()->getSubtarget<NVPTXSubtarget>(); 5360b57cec5SDimitry Andric OutStreamer->AddComment(Twine("implicit-def: ") + 5370b57cec5SDimitry Andric STI.getRegisterInfo()->getName(RegNo)); 5380b57cec5SDimitry Andric } 53981ad6265SDimitry Andric OutStreamer->addBlankLine(); 5400b57cec5SDimitry Andric } 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F, 5430b57cec5SDimitry Andric raw_ostream &O) const { 5440b57cec5SDimitry Andric // If the NVVM IR has some of reqntid* specified, then output 5450b57cec5SDimitry Andric // the reqntid directive, and set the unspecified ones to 1. 5465f757f3fSDimitry Andric // If none of Reqntid* is specified, don't output reqntid directive. 547*0fca6ea1SDimitry Andric std::optional<unsigned> Reqntidx = getReqNTIDx(F); 548*0fca6ea1SDimitry Andric std::optional<unsigned> Reqntidy = getReqNTIDy(F); 549*0fca6ea1SDimitry Andric std::optional<unsigned> Reqntidz = getReqNTIDz(F); 5500b57cec5SDimitry Andric 551*0fca6ea1SDimitry Andric if (Reqntidx || Reqntidy || Reqntidz) 552*0fca6ea1SDimitry Andric O << ".reqntid " << Reqntidx.value_or(1) << ", " << Reqntidy.value_or(1) 553*0fca6ea1SDimitry Andric << ", " << Reqntidz.value_or(1) << "\n"; 5540b57cec5SDimitry Andric 5550b57cec5SDimitry Andric // If the NVVM IR has some of maxntid* specified, then output 5560b57cec5SDimitry Andric // the maxntid directive, and set the unspecified ones to 1. 5570b57cec5SDimitry Andric // If none of maxntid* is specified, don't output maxntid directive. 558*0fca6ea1SDimitry Andric std::optional<unsigned> Maxntidx = getMaxNTIDx(F); 559*0fca6ea1SDimitry Andric std::optional<unsigned> Maxntidy = getMaxNTIDy(F); 560*0fca6ea1SDimitry Andric std::optional<unsigned> Maxntidz = getMaxNTIDz(F); 5610b57cec5SDimitry Andric 562*0fca6ea1SDimitry Andric if (Maxntidx || Maxntidy || Maxntidz) 563*0fca6ea1SDimitry Andric O << ".maxntid " << Maxntidx.value_or(1) << ", " << Maxntidy.value_or(1) 564*0fca6ea1SDimitry Andric << ", " << Maxntidz.value_or(1) << "\n"; 5650b57cec5SDimitry Andric 5665f757f3fSDimitry Andric unsigned Mincta = 0; 5675f757f3fSDimitry Andric if (getMinCTASm(F, Mincta)) 5685f757f3fSDimitry Andric O << ".minnctapersm " << Mincta << "\n"; 5690b57cec5SDimitry Andric 5705f757f3fSDimitry Andric unsigned Maxnreg = 0; 5715f757f3fSDimitry Andric if (getMaxNReg(F, Maxnreg)) 5725f757f3fSDimitry Andric O << ".maxnreg " << Maxnreg << "\n"; 5735f757f3fSDimitry Andric 5745f757f3fSDimitry Andric // .maxclusterrank directive requires SM_90 or higher, make sure that we 5755f757f3fSDimitry Andric // filter it out for lower SM versions, as it causes a hard ptxas crash. 5765f757f3fSDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 5775f757f3fSDimitry Andric const auto *STI = static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl()); 5785f757f3fSDimitry Andric unsigned Maxclusterrank = 0; 5795f757f3fSDimitry Andric if (getMaxClusterRank(F, Maxclusterrank) && STI->getSmVersion() >= 90) 5805f757f3fSDimitry Andric O << ".maxclusterrank " << Maxclusterrank << "\n"; 5810b57cec5SDimitry Andric } 5820b57cec5SDimitry Andric 5835f757f3fSDimitry Andric std::string NVPTXAsmPrinter::getVirtualRegisterName(unsigned Reg) const { 5840b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(Reg); 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric std::string Name; 5870b57cec5SDimitry Andric raw_string_ostream NameStr(Name); 5880b57cec5SDimitry Andric 5890b57cec5SDimitry Andric VRegRCMap::const_iterator I = VRegMapping.find(RC); 5900b57cec5SDimitry Andric assert(I != VRegMapping.end() && "Bad register class"); 5910b57cec5SDimitry Andric const DenseMap<unsigned, unsigned> &RegMap = I->second; 5920b57cec5SDimitry Andric 5930b57cec5SDimitry Andric VRegMap::const_iterator VI = RegMap.find(Reg); 5940b57cec5SDimitry Andric assert(VI != RegMap.end() && "Bad virtual register"); 5950b57cec5SDimitry Andric unsigned MappedVR = VI->second; 5960b57cec5SDimitry Andric 5970b57cec5SDimitry Andric NameStr << getNVPTXRegClassStr(RC) << MappedVR; 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric NameStr.flush(); 6000b57cec5SDimitry Andric return Name; 6010b57cec5SDimitry Andric } 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric void NVPTXAsmPrinter::emitVirtualRegister(unsigned int vr, 6040b57cec5SDimitry Andric raw_ostream &O) { 6050b57cec5SDimitry Andric O << getVirtualRegisterName(vr); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric 608*0fca6ea1SDimitry Andric void NVPTXAsmPrinter::emitAliasDeclaration(const GlobalAlias *GA, 609*0fca6ea1SDimitry Andric raw_ostream &O) { 610*0fca6ea1SDimitry Andric const Function *F = dyn_cast_or_null<Function>(GA->getAliaseeObject()); 611*0fca6ea1SDimitry Andric if (!F || isKernelFunction(*F) || F->isDeclaration()) 612*0fca6ea1SDimitry Andric report_fatal_error( 613*0fca6ea1SDimitry Andric "NVPTX aliasee must be a non-kernel function definition"); 614*0fca6ea1SDimitry Andric 615*0fca6ea1SDimitry Andric if (GA->hasLinkOnceLinkage() || GA->hasWeakLinkage() || 616*0fca6ea1SDimitry Andric GA->hasAvailableExternallyLinkage() || GA->hasCommonLinkage()) 617*0fca6ea1SDimitry Andric report_fatal_error("NVPTX aliasee must not be '.weak'"); 618*0fca6ea1SDimitry Andric 619*0fca6ea1SDimitry Andric emitDeclarationWithName(F, getSymbol(GA), O); 620*0fca6ea1SDimitry Andric } 621*0fca6ea1SDimitry Andric 6220b57cec5SDimitry Andric void NVPTXAsmPrinter::emitDeclaration(const Function *F, raw_ostream &O) { 623*0fca6ea1SDimitry Andric emitDeclarationWithName(F, getSymbol(F), O); 624*0fca6ea1SDimitry Andric } 625*0fca6ea1SDimitry Andric 626*0fca6ea1SDimitry Andric void NVPTXAsmPrinter::emitDeclarationWithName(const Function *F, MCSymbol *S, 627*0fca6ea1SDimitry Andric raw_ostream &O) { 6280b57cec5SDimitry Andric emitLinkageDirective(F, O); 6290b57cec5SDimitry Andric if (isKernelFunction(*F)) 6300b57cec5SDimitry Andric O << ".entry "; 6310b57cec5SDimitry Andric else 6320b57cec5SDimitry Andric O << ".func "; 6330b57cec5SDimitry Andric printReturnValStr(F, O); 634*0fca6ea1SDimitry Andric S->print(O, MAI); 6350b57cec5SDimitry Andric O << "\n"; 6360b57cec5SDimitry Andric emitFunctionParamList(F, O); 63706c3fb27SDimitry Andric O << "\n"; 638bdd1243dSDimitry Andric if (shouldEmitPTXNoReturn(F, TM)) 639bdd1243dSDimitry Andric O << ".noreturn"; 6400b57cec5SDimitry Andric O << ";\n"; 6410b57cec5SDimitry Andric } 6420b57cec5SDimitry Andric 6430b57cec5SDimitry Andric static bool usedInGlobalVarDef(const Constant *C) { 6440b57cec5SDimitry Andric if (!C) 6450b57cec5SDimitry Andric return false; 6460b57cec5SDimitry Andric 6470b57cec5SDimitry Andric if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(C)) { 6480b57cec5SDimitry Andric return GV->getName() != "llvm.used"; 6490b57cec5SDimitry Andric } 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric for (const User *U : C->users()) 6520b57cec5SDimitry Andric if (const Constant *C = dyn_cast<Constant>(U)) 6530b57cec5SDimitry Andric if (usedInGlobalVarDef(C)) 6540b57cec5SDimitry Andric return true; 6550b57cec5SDimitry Andric 6560b57cec5SDimitry Andric return false; 6570b57cec5SDimitry Andric } 6580b57cec5SDimitry Andric 6590b57cec5SDimitry Andric static bool usedInOneFunc(const User *U, Function const *&oneFunc) { 6600b57cec5SDimitry Andric if (const GlobalVariable *othergv = dyn_cast<GlobalVariable>(U)) { 6610b57cec5SDimitry Andric if (othergv->getName() == "llvm.used") 6620b57cec5SDimitry Andric return true; 6630b57cec5SDimitry Andric } 6640b57cec5SDimitry Andric 6650b57cec5SDimitry Andric if (const Instruction *instr = dyn_cast<Instruction>(U)) { 6660b57cec5SDimitry Andric if (instr->getParent() && instr->getParent()->getParent()) { 6670b57cec5SDimitry Andric const Function *curFunc = instr->getParent()->getParent(); 6680b57cec5SDimitry Andric if (oneFunc && (curFunc != oneFunc)) 6690b57cec5SDimitry Andric return false; 6700b57cec5SDimitry Andric oneFunc = curFunc; 6710b57cec5SDimitry Andric return true; 6720b57cec5SDimitry Andric } else 6730b57cec5SDimitry Andric return false; 6740b57cec5SDimitry Andric } 6750b57cec5SDimitry Andric 6760b57cec5SDimitry Andric for (const User *UU : U->users()) 6770b57cec5SDimitry Andric if (!usedInOneFunc(UU, oneFunc)) 6780b57cec5SDimitry Andric return false; 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric return true; 6810b57cec5SDimitry Andric } 6820b57cec5SDimitry Andric 6830b57cec5SDimitry Andric /* Find out if a global variable can be demoted to local scope. 6840b57cec5SDimitry Andric * Currently, this is valid for CUDA shared variables, which have local 6850b57cec5SDimitry Andric * scope and global lifetime. So the conditions to check are : 6860b57cec5SDimitry Andric * 1. Is the global variable in shared address space? 6875f757f3fSDimitry Andric * 2. Does it have local linkage? 6880b57cec5SDimitry Andric * 3. Is the global variable referenced only in one function? 6890b57cec5SDimitry Andric */ 6900b57cec5SDimitry Andric static bool canDemoteGlobalVar(const GlobalVariable *gv, Function const *&f) { 6915f757f3fSDimitry Andric if (!gv->hasLocalLinkage()) 6920b57cec5SDimitry Andric return false; 6930b57cec5SDimitry Andric PointerType *Pty = gv->getType(); 6940b57cec5SDimitry Andric if (Pty->getAddressSpace() != ADDRESS_SPACE_SHARED) 6950b57cec5SDimitry Andric return false; 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric const Function *oneFunc = nullptr; 6980b57cec5SDimitry Andric 6990b57cec5SDimitry Andric bool flag = usedInOneFunc(gv, oneFunc); 7000b57cec5SDimitry Andric if (!flag) 7010b57cec5SDimitry Andric return false; 7020b57cec5SDimitry Andric if (!oneFunc) 7030b57cec5SDimitry Andric return false; 7040b57cec5SDimitry Andric f = oneFunc; 7050b57cec5SDimitry Andric return true; 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric 7080b57cec5SDimitry Andric static bool useFuncSeen(const Constant *C, 7090b57cec5SDimitry Andric DenseMap<const Function *, bool> &seenMap) { 7100b57cec5SDimitry Andric for (const User *U : C->users()) { 7110b57cec5SDimitry Andric if (const Constant *cu = dyn_cast<Constant>(U)) { 7120b57cec5SDimitry Andric if (useFuncSeen(cu, seenMap)) 7130b57cec5SDimitry Andric return true; 7140b57cec5SDimitry Andric } else if (const Instruction *I = dyn_cast<Instruction>(U)) { 7150b57cec5SDimitry Andric const BasicBlock *bb = I->getParent(); 7160b57cec5SDimitry Andric if (!bb) 7170b57cec5SDimitry Andric continue; 7180b57cec5SDimitry Andric const Function *caller = bb->getParent(); 7190b57cec5SDimitry Andric if (!caller) 7200b57cec5SDimitry Andric continue; 72106c3fb27SDimitry Andric if (seenMap.contains(caller)) 7220b57cec5SDimitry Andric return true; 7230b57cec5SDimitry Andric } 7240b57cec5SDimitry Andric } 7250b57cec5SDimitry Andric return false; 7260b57cec5SDimitry Andric } 7270b57cec5SDimitry Andric 7280b57cec5SDimitry Andric void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { 7290b57cec5SDimitry Andric DenseMap<const Function *, bool> seenMap; 7304824e7fdSDimitry Andric for (const Function &F : M) { 7314824e7fdSDimitry Andric if (F.getAttributes().hasFnAttr("nvptx-libcall-callee")) { 7324824e7fdSDimitry Andric emitDeclaration(&F, O); 7330b57cec5SDimitry Andric continue; 7340b57cec5SDimitry Andric } 7350b57cec5SDimitry Andric 7364824e7fdSDimitry Andric if (F.isDeclaration()) { 7374824e7fdSDimitry Andric if (F.use_empty()) 7380b57cec5SDimitry Andric continue; 7394824e7fdSDimitry Andric if (F.getIntrinsicID()) 7400b57cec5SDimitry Andric continue; 7414824e7fdSDimitry Andric emitDeclaration(&F, O); 7420b57cec5SDimitry Andric continue; 7430b57cec5SDimitry Andric } 7444824e7fdSDimitry Andric for (const User *U : F.users()) { 7450b57cec5SDimitry Andric if (const Constant *C = dyn_cast<Constant>(U)) { 7460b57cec5SDimitry Andric if (usedInGlobalVarDef(C)) { 7470b57cec5SDimitry Andric // The use is in the initialization of a global variable 7480b57cec5SDimitry Andric // that is a function pointer, so print a declaration 7490b57cec5SDimitry Andric // for the original function 7504824e7fdSDimitry Andric emitDeclaration(&F, O); 7510b57cec5SDimitry Andric break; 7520b57cec5SDimitry Andric } 7530b57cec5SDimitry Andric // Emit a declaration of this function if the function that 7540b57cec5SDimitry Andric // uses this constant expr has already been seen. 7550b57cec5SDimitry Andric if (useFuncSeen(C, seenMap)) { 7564824e7fdSDimitry Andric emitDeclaration(&F, O); 7570b57cec5SDimitry Andric break; 7580b57cec5SDimitry Andric } 7590b57cec5SDimitry Andric } 7600b57cec5SDimitry Andric 7610b57cec5SDimitry Andric if (!isa<Instruction>(U)) 7620b57cec5SDimitry Andric continue; 7630b57cec5SDimitry Andric const Instruction *instr = cast<Instruction>(U); 7640b57cec5SDimitry Andric const BasicBlock *bb = instr->getParent(); 7650b57cec5SDimitry Andric if (!bb) 7660b57cec5SDimitry Andric continue; 7670b57cec5SDimitry Andric const Function *caller = bb->getParent(); 7680b57cec5SDimitry Andric if (!caller) 7690b57cec5SDimitry Andric continue; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric // If a caller has already been seen, then the caller is 7720b57cec5SDimitry Andric // appearing in the module before the callee. so print out 7730b57cec5SDimitry Andric // a declaration for the callee. 77406c3fb27SDimitry Andric if (seenMap.contains(caller)) { 7754824e7fdSDimitry Andric emitDeclaration(&F, O); 7760b57cec5SDimitry Andric break; 7770b57cec5SDimitry Andric } 7780b57cec5SDimitry Andric } 7794824e7fdSDimitry Andric seenMap[&F] = true; 7800b57cec5SDimitry Andric } 781*0fca6ea1SDimitry Andric for (const GlobalAlias &GA : M.aliases()) 782*0fca6ea1SDimitry Andric emitAliasDeclaration(&GA, O); 7830b57cec5SDimitry Andric } 7840b57cec5SDimitry Andric 7850b57cec5SDimitry Andric static bool isEmptyXXStructor(GlobalVariable *GV) { 7860b57cec5SDimitry Andric if (!GV) return true; 7870b57cec5SDimitry Andric const ConstantArray *InitList = dyn_cast<ConstantArray>(GV->getInitializer()); 7880b57cec5SDimitry Andric if (!InitList) return true; // Not an array; we don't know how to parse. 7890b57cec5SDimitry Andric return InitList->getNumOperands() == 0; 7900b57cec5SDimitry Andric } 7910b57cec5SDimitry Andric 7925ffd83dbSDimitry Andric void NVPTXAsmPrinter::emitStartOfAsmFile(Module &M) { 7930b57cec5SDimitry Andric // Construct a default subtarget off of the TargetMachine defaults. The 7940b57cec5SDimitry Andric // rest of NVPTX isn't friendly to change subtargets per function and 7950b57cec5SDimitry Andric // so the default TargetMachine will have all of the options. 7960b57cec5SDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 7970b57cec5SDimitry Andric const auto* STI = static_cast<const NVPTXSubtarget*>(NTM.getSubtargetImpl()); 7985ffd83dbSDimitry Andric SmallString<128> Str1; 7995ffd83dbSDimitry Andric raw_svector_ostream OS1(Str1); 8000b57cec5SDimitry Andric 8015ffd83dbSDimitry Andric // Emit header before any dwarf directives are emitted below. 8025ffd83dbSDimitry Andric emitHeader(M, OS1, *STI); 8035ffd83dbSDimitry Andric OutStreamer->emitRawText(OS1.str()); 8045ffd83dbSDimitry Andric } 8055ffd83dbSDimitry Andric 8065ffd83dbSDimitry Andric bool NVPTXAsmPrinter::doInitialization(Module &M) { 80706c3fb27SDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 80806c3fb27SDimitry Andric const NVPTXSubtarget &STI = 80906c3fb27SDimitry Andric *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl()); 81006c3fb27SDimitry Andric if (M.alias_size() && (STI.getPTXVersion() < 63 || STI.getSmVersion() < 30)) 81106c3fb27SDimitry Andric report_fatal_error(".alias requires PTX version >= 6.3 and sm_30"); 81206c3fb27SDimitry Andric 8135f757f3fSDimitry Andric // OpenMP supports NVPTX global constructors and destructors. 8145f757f3fSDimitry Andric bool IsOpenMP = M.getModuleFlag("openmp") != nullptr; 8155f757f3fSDimitry Andric 81606c3fb27SDimitry Andric if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_ctors")) && 8175f757f3fSDimitry Andric !LowerCtorDtor && !IsOpenMP) { 8180b57cec5SDimitry Andric report_fatal_error( 8190b57cec5SDimitry Andric "Module has a nontrivial global ctor, which NVPTX does not support."); 8200b57cec5SDimitry Andric return true; // error 8210b57cec5SDimitry Andric } 82206c3fb27SDimitry Andric if (!isEmptyXXStructor(M.getNamedGlobal("llvm.global_dtors")) && 8235f757f3fSDimitry Andric !LowerCtorDtor && !IsOpenMP) { 8240b57cec5SDimitry Andric report_fatal_error( 8250b57cec5SDimitry Andric "Module has a nontrivial global dtor, which NVPTX does not support."); 8260b57cec5SDimitry Andric return true; // error 8270b57cec5SDimitry Andric } 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric // We need to call the parent's one explicitly. 8300b57cec5SDimitry Andric bool Result = AsmPrinter::doInitialization(M); 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric GlobalsEmitted = false; 8330b57cec5SDimitry Andric 8340b57cec5SDimitry Andric return Result; 8350b57cec5SDimitry Andric } 8360b57cec5SDimitry Andric 8370b57cec5SDimitry Andric void NVPTXAsmPrinter::emitGlobals(const Module &M) { 8380b57cec5SDimitry Andric SmallString<128> Str2; 8390b57cec5SDimitry Andric raw_svector_ostream OS2(Str2); 8400b57cec5SDimitry Andric 8410b57cec5SDimitry Andric emitDeclarations(M, OS2); 8420b57cec5SDimitry Andric 8430b57cec5SDimitry Andric // As ptxas does not support forward references of globals, we need to first 8440b57cec5SDimitry Andric // sort the list of module-level globals in def-use order. We visit each 8450b57cec5SDimitry Andric // global variable in order, and ensure that we emit it *after* its dependent 8460b57cec5SDimitry Andric // globals. We use a little extra memory maintaining both a set and a list to 8470b57cec5SDimitry Andric // have fast searches while maintaining a strict ordering. 8480b57cec5SDimitry Andric SmallVector<const GlobalVariable *, 8> Globals; 8490b57cec5SDimitry Andric DenseSet<const GlobalVariable *> GVVisited; 8500b57cec5SDimitry Andric DenseSet<const GlobalVariable *> GVVisiting; 8510b57cec5SDimitry Andric 8520b57cec5SDimitry Andric // Visit each global variable, in order 8530b57cec5SDimitry Andric for (const GlobalVariable &I : M.globals()) 8540b57cec5SDimitry Andric VisitGlobalVariableForEmission(&I, Globals, GVVisited, GVVisiting); 8550b57cec5SDimitry Andric 85606c3fb27SDimitry Andric assert(GVVisited.size() == M.global_size() && "Missed a global variable"); 8570b57cec5SDimitry Andric assert(GVVisiting.size() == 0 && "Did not fully process a global variable"); 8580b57cec5SDimitry Andric 85981ad6265SDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 86081ad6265SDimitry Andric const NVPTXSubtarget &STI = 86181ad6265SDimitry Andric *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl()); 86281ad6265SDimitry Andric 8630b57cec5SDimitry Andric // Print out module-level global variables in proper order 864*0fca6ea1SDimitry Andric for (const GlobalVariable *GV : Globals) 865*0fca6ea1SDimitry Andric printModuleLevelGV(GV, OS2, /*processDemoted=*/false, STI); 8660b57cec5SDimitry Andric 8670b57cec5SDimitry Andric OS2 << '\n'; 8680b57cec5SDimitry Andric 8695ffd83dbSDimitry Andric OutStreamer->emitRawText(OS2.str()); 8700b57cec5SDimitry Andric } 8710b57cec5SDimitry Andric 87206c3fb27SDimitry Andric void NVPTXAsmPrinter::emitGlobalAlias(const Module &M, const GlobalAlias &GA) { 87306c3fb27SDimitry Andric SmallString<128> Str; 87406c3fb27SDimitry Andric raw_svector_ostream OS(Str); 87506c3fb27SDimitry Andric 87606c3fb27SDimitry Andric MCSymbol *Name = getSymbol(&GA); 87706c3fb27SDimitry Andric 878*0fca6ea1SDimitry Andric OS << ".alias " << Name->getName() << ", " << GA.getAliaseeObject()->getName() 879*0fca6ea1SDimitry Andric << ";\n"; 88006c3fb27SDimitry Andric 88106c3fb27SDimitry Andric OutStreamer->emitRawText(OS.str()); 88206c3fb27SDimitry Andric } 88306c3fb27SDimitry Andric 8840b57cec5SDimitry Andric void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, 8850b57cec5SDimitry Andric const NVPTXSubtarget &STI) { 8860b57cec5SDimitry Andric O << "//\n"; 8870b57cec5SDimitry Andric O << "// Generated by LLVM NVPTX Back-End\n"; 8880b57cec5SDimitry Andric O << "//\n"; 8890b57cec5SDimitry Andric O << "\n"; 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric unsigned PTXVersion = STI.getPTXVersion(); 8920b57cec5SDimitry Andric O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; 8930b57cec5SDimitry Andric 8940b57cec5SDimitry Andric O << ".target "; 8950b57cec5SDimitry Andric O << STI.getTargetName(); 8960b57cec5SDimitry Andric 8970b57cec5SDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 8980b57cec5SDimitry Andric if (NTM.getDrvInterface() == NVPTX::NVCL) 8990b57cec5SDimitry Andric O << ", texmode_independent"; 9000b57cec5SDimitry Andric 9010b57cec5SDimitry Andric bool HasFullDebugInfo = false; 9020b57cec5SDimitry Andric for (DICompileUnit *CU : M.debug_compile_units()) { 9030b57cec5SDimitry Andric switch(CU->getEmissionKind()) { 9040b57cec5SDimitry Andric case DICompileUnit::NoDebug: 9050b57cec5SDimitry Andric case DICompileUnit::DebugDirectivesOnly: 9060b57cec5SDimitry Andric break; 9070b57cec5SDimitry Andric case DICompileUnit::LineTablesOnly: 9080b57cec5SDimitry Andric case DICompileUnit::FullDebug: 9090b57cec5SDimitry Andric HasFullDebugInfo = true; 9100b57cec5SDimitry Andric break; 9110b57cec5SDimitry Andric } 9120b57cec5SDimitry Andric if (HasFullDebugInfo) 9130b57cec5SDimitry Andric break; 9140b57cec5SDimitry Andric } 9150b57cec5SDimitry Andric if (MMI && MMI->hasDebugInfo() && HasFullDebugInfo) 9160b57cec5SDimitry Andric O << ", debug"; 9170b57cec5SDimitry Andric 9180b57cec5SDimitry Andric O << "\n"; 9190b57cec5SDimitry Andric 9200b57cec5SDimitry Andric O << ".address_size "; 9210b57cec5SDimitry Andric if (NTM.is64Bit()) 9220b57cec5SDimitry Andric O << "64"; 9230b57cec5SDimitry Andric else 9240b57cec5SDimitry Andric O << "32"; 9250b57cec5SDimitry Andric O << "\n"; 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric O << "\n"; 9280b57cec5SDimitry Andric } 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric bool NVPTXAsmPrinter::doFinalization(Module &M) { 9310b57cec5SDimitry Andric bool HasDebugInfo = MMI && MMI->hasDebugInfo(); 9320b57cec5SDimitry Andric 9330b57cec5SDimitry Andric // If we did not emit any functions, then the global declarations have not 9340b57cec5SDimitry Andric // yet been emitted. 9350b57cec5SDimitry Andric if (!GlobalsEmitted) { 9360b57cec5SDimitry Andric emitGlobals(M); 9370b57cec5SDimitry Andric GlobalsEmitted = true; 9380b57cec5SDimitry Andric } 9390b57cec5SDimitry Andric 9400b57cec5SDimitry Andric // call doFinalization 9410b57cec5SDimitry Andric bool ret = AsmPrinter::doFinalization(M); 9420b57cec5SDimitry Andric 9430b57cec5SDimitry Andric clearAnnotationCache(&M); 9440b57cec5SDimitry Andric 945bdd1243dSDimitry Andric auto *TS = 946bdd1243dSDimitry Andric static_cast<NVPTXTargetStreamer *>(OutStreamer->getTargetStreamer()); 9470b57cec5SDimitry Andric // Close the last emitted section 9480b57cec5SDimitry Andric if (HasDebugInfo) { 94981ad6265SDimitry Andric TS->closeLastSection(); 9500b57cec5SDimitry Andric // Emit empty .debug_loc section for better support of the empty files. 9515ffd83dbSDimitry Andric OutStreamer->emitRawText("\t.section\t.debug_loc\t{\t}"); 9520b57cec5SDimitry Andric } 9530b57cec5SDimitry Andric 9540b57cec5SDimitry Andric // Output last DWARF .file directives, if any. 95581ad6265SDimitry Andric TS->outputDwarfFileDirectives(); 9560b57cec5SDimitry Andric 9570b57cec5SDimitry Andric return ret; 9580b57cec5SDimitry Andric } 9590b57cec5SDimitry Andric 9600b57cec5SDimitry Andric // This function emits appropriate linkage directives for 9610b57cec5SDimitry Andric // functions and global variables. 9620b57cec5SDimitry Andric // 9630b57cec5SDimitry Andric // extern function declaration -> .extern 9640b57cec5SDimitry Andric // extern function definition -> .visible 9650b57cec5SDimitry Andric // external global variable with init -> .visible 9660b57cec5SDimitry Andric // external without init -> .extern 9670b57cec5SDimitry Andric // appending -> not allowed, assert. 9680b57cec5SDimitry Andric // for any linkage other than 9690b57cec5SDimitry Andric // internal, private, linker_private, 9700b57cec5SDimitry Andric // linker_private_weak, linker_private_weak_def_auto, 9710b57cec5SDimitry Andric // we emit -> .weak. 9720b57cec5SDimitry Andric 9730b57cec5SDimitry Andric void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, 9740b57cec5SDimitry Andric raw_ostream &O) { 9750b57cec5SDimitry Andric if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == NVPTX::CUDA) { 9760b57cec5SDimitry Andric if (V->hasExternalLinkage()) { 9770b57cec5SDimitry Andric if (isa<GlobalVariable>(V)) { 9780b57cec5SDimitry Andric const GlobalVariable *GVar = cast<GlobalVariable>(V); 9790b57cec5SDimitry Andric if (GVar) { 9800b57cec5SDimitry Andric if (GVar->hasInitializer()) 9810b57cec5SDimitry Andric O << ".visible "; 9820b57cec5SDimitry Andric else 9830b57cec5SDimitry Andric O << ".extern "; 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric } else if (V->isDeclaration()) 9860b57cec5SDimitry Andric O << ".extern "; 9870b57cec5SDimitry Andric else 9880b57cec5SDimitry Andric O << ".visible "; 9890b57cec5SDimitry Andric } else if (V->hasAppendingLinkage()) { 9900b57cec5SDimitry Andric std::string msg; 9910b57cec5SDimitry Andric msg.append("Error: "); 9920b57cec5SDimitry Andric msg.append("Symbol "); 9930b57cec5SDimitry Andric if (V->hasName()) 9945ffd83dbSDimitry Andric msg.append(std::string(V->getName())); 9950b57cec5SDimitry Andric msg.append("has unsupported appending linkage type"); 9960b57cec5SDimitry Andric llvm_unreachable(msg.c_str()); 9970b57cec5SDimitry Andric } else if (!V->hasInternalLinkage() && 9980b57cec5SDimitry Andric !V->hasPrivateLinkage()) { 9990b57cec5SDimitry Andric O << ".weak "; 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric } 10020b57cec5SDimitry Andric } 10030b57cec5SDimitry Andric 10040b57cec5SDimitry Andric void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, 100581ad6265SDimitry Andric raw_ostream &O, bool processDemoted, 100681ad6265SDimitry Andric const NVPTXSubtarget &STI) { 10070b57cec5SDimitry Andric // Skip meta data 10080b57cec5SDimitry Andric if (GVar->hasSection()) { 10090b57cec5SDimitry Andric if (GVar->getSection() == "llvm.metadata") 10100b57cec5SDimitry Andric return; 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric // Skip LLVM intrinsic global variables 10145f757f3fSDimitry Andric if (GVar->getName().starts_with("llvm.") || 10155f757f3fSDimitry Andric GVar->getName().starts_with("nvvm.")) 10160b57cec5SDimitry Andric return; 10170b57cec5SDimitry Andric 10180b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 10190b57cec5SDimitry Andric 10200b57cec5SDimitry Andric // GlobalVariables are always constant pointers themselves. 10210b57cec5SDimitry Andric Type *ETy = GVar->getValueType(); 10220b57cec5SDimitry Andric 10230b57cec5SDimitry Andric if (GVar->hasExternalLinkage()) { 10240b57cec5SDimitry Andric if (GVar->hasInitializer()) 10250b57cec5SDimitry Andric O << ".visible "; 10260b57cec5SDimitry Andric else 10270b57cec5SDimitry Andric O << ".extern "; 1028*0fca6ea1SDimitry Andric } else if (STI.getPTXVersion() >= 50 && GVar->hasCommonLinkage() && 1029*0fca6ea1SDimitry Andric GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) { 1030*0fca6ea1SDimitry Andric O << ".common "; 10310b57cec5SDimitry Andric } else if (GVar->hasLinkOnceLinkage() || GVar->hasWeakLinkage() || 10320b57cec5SDimitry Andric GVar->hasAvailableExternallyLinkage() || 10330b57cec5SDimitry Andric GVar->hasCommonLinkage()) { 10340b57cec5SDimitry Andric O << ".weak "; 10350b57cec5SDimitry Andric } 10360b57cec5SDimitry Andric 10370b57cec5SDimitry Andric if (isTexture(*GVar)) { 10380b57cec5SDimitry Andric O << ".global .texref " << getTextureName(*GVar) << ";\n"; 10390b57cec5SDimitry Andric return; 10400b57cec5SDimitry Andric } 10410b57cec5SDimitry Andric 10420b57cec5SDimitry Andric if (isSurface(*GVar)) { 10430b57cec5SDimitry Andric O << ".global .surfref " << getSurfaceName(*GVar) << ";\n"; 10440b57cec5SDimitry Andric return; 10450b57cec5SDimitry Andric } 10460b57cec5SDimitry Andric 10470b57cec5SDimitry Andric if (GVar->isDeclaration()) { 10480b57cec5SDimitry Andric // (extern) declarations, no definition or initializer 10490b57cec5SDimitry Andric // Currently the only known declaration is for an automatic __local 10500b57cec5SDimitry Andric // (.shared) promoted to global. 105181ad6265SDimitry Andric emitPTXGlobalVariable(GVar, O, STI); 10520b57cec5SDimitry Andric O << ";\n"; 10530b57cec5SDimitry Andric return; 10540b57cec5SDimitry Andric } 10550b57cec5SDimitry Andric 10560b57cec5SDimitry Andric if (isSampler(*GVar)) { 10570b57cec5SDimitry Andric O << ".global .samplerref " << getSamplerName(*GVar); 10580b57cec5SDimitry Andric 10590b57cec5SDimitry Andric const Constant *Initializer = nullptr; 10600b57cec5SDimitry Andric if (GVar->hasInitializer()) 10610b57cec5SDimitry Andric Initializer = GVar->getInitializer(); 10620b57cec5SDimitry Andric const ConstantInt *CI = nullptr; 10630b57cec5SDimitry Andric if (Initializer) 10640b57cec5SDimitry Andric CI = dyn_cast<ConstantInt>(Initializer); 10650b57cec5SDimitry Andric if (CI) { 10660b57cec5SDimitry Andric unsigned sample = CI->getZExtValue(); 10670b57cec5SDimitry Andric 10680b57cec5SDimitry Andric O << " = { "; 10690b57cec5SDimitry Andric 10700b57cec5SDimitry Andric for (int i = 0, 10710b57cec5SDimitry Andric addr = ((sample & __CLK_ADDRESS_MASK) >> __CLK_ADDRESS_BASE); 10720b57cec5SDimitry Andric i < 3; i++) { 10730b57cec5SDimitry Andric O << "addr_mode_" << i << " = "; 10740b57cec5SDimitry Andric switch (addr) { 10750b57cec5SDimitry Andric case 0: 10760b57cec5SDimitry Andric O << "wrap"; 10770b57cec5SDimitry Andric break; 10780b57cec5SDimitry Andric case 1: 10790b57cec5SDimitry Andric O << "clamp_to_border"; 10800b57cec5SDimitry Andric break; 10810b57cec5SDimitry Andric case 2: 10820b57cec5SDimitry Andric O << "clamp_to_edge"; 10830b57cec5SDimitry Andric break; 10840b57cec5SDimitry Andric case 3: 10850b57cec5SDimitry Andric O << "wrap"; 10860b57cec5SDimitry Andric break; 10870b57cec5SDimitry Andric case 4: 10880b57cec5SDimitry Andric O << "mirror"; 10890b57cec5SDimitry Andric break; 10900b57cec5SDimitry Andric } 10910b57cec5SDimitry Andric O << ", "; 10920b57cec5SDimitry Andric } 10930b57cec5SDimitry Andric O << "filter_mode = "; 10940b57cec5SDimitry Andric switch ((sample & __CLK_FILTER_MASK) >> __CLK_FILTER_BASE) { 10950b57cec5SDimitry Andric case 0: 10960b57cec5SDimitry Andric O << "nearest"; 10970b57cec5SDimitry Andric break; 10980b57cec5SDimitry Andric case 1: 10990b57cec5SDimitry Andric O << "linear"; 11000b57cec5SDimitry Andric break; 11010b57cec5SDimitry Andric case 2: 11020b57cec5SDimitry Andric llvm_unreachable("Anisotropic filtering is not supported"); 11030b57cec5SDimitry Andric default: 11040b57cec5SDimitry Andric O << "nearest"; 11050b57cec5SDimitry Andric break; 11060b57cec5SDimitry Andric } 11070b57cec5SDimitry Andric if (!((sample & __CLK_NORMALIZED_MASK) >> __CLK_NORMALIZED_BASE)) { 11080b57cec5SDimitry Andric O << ", force_unnormalized_coords = 1"; 11090b57cec5SDimitry Andric } 11100b57cec5SDimitry Andric O << " }"; 11110b57cec5SDimitry Andric } 11120b57cec5SDimitry Andric 11130b57cec5SDimitry Andric O << ";\n"; 11140b57cec5SDimitry Andric return; 11150b57cec5SDimitry Andric } 11160b57cec5SDimitry Andric 11170b57cec5SDimitry Andric if (GVar->hasPrivateLinkage()) { 11180b57cec5SDimitry Andric if (strncmp(GVar->getName().data(), "unrollpragma", 12) == 0) 11190b57cec5SDimitry Andric return; 11200b57cec5SDimitry Andric 11210b57cec5SDimitry Andric // FIXME - need better way (e.g. Metadata) to avoid generating this global 11220b57cec5SDimitry Andric if (strncmp(GVar->getName().data(), "filename", 8) == 0) 11230b57cec5SDimitry Andric return; 11240b57cec5SDimitry Andric if (GVar->use_empty()) 11250b57cec5SDimitry Andric return; 11260b57cec5SDimitry Andric } 11270b57cec5SDimitry Andric 11280b57cec5SDimitry Andric const Function *demotedFunc = nullptr; 11290b57cec5SDimitry Andric if (!processDemoted && canDemoteGlobalVar(GVar, demotedFunc)) { 11300b57cec5SDimitry Andric O << "// " << GVar->getName() << " has been demoted\n"; 11310b57cec5SDimitry Andric if (localDecls.find(demotedFunc) != localDecls.end()) 11320b57cec5SDimitry Andric localDecls[demotedFunc].push_back(GVar); 11330b57cec5SDimitry Andric else { 11340b57cec5SDimitry Andric std::vector<const GlobalVariable *> temp; 11350b57cec5SDimitry Andric temp.push_back(GVar); 11360b57cec5SDimitry Andric localDecls[demotedFunc] = temp; 11370b57cec5SDimitry Andric } 11380b57cec5SDimitry Andric return; 11390b57cec5SDimitry Andric } 11400b57cec5SDimitry Andric 11410b57cec5SDimitry Andric O << "."; 1142*0fca6ea1SDimitry Andric emitPTXAddressSpace(GVar->getAddressSpace(), O); 11430b57cec5SDimitry Andric 11440b57cec5SDimitry Andric if (isManaged(*GVar)) { 114581ad6265SDimitry Andric if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) { 114681ad6265SDimitry Andric report_fatal_error( 114781ad6265SDimitry Andric ".attribute(.managed) requires PTX version >= 4.0 and sm_30"); 114881ad6265SDimitry Andric } 11490b57cec5SDimitry Andric O << " .attribute(.managed)"; 11500b57cec5SDimitry Andric } 11510b57cec5SDimitry Andric 11520eae32dcSDimitry Andric if (MaybeAlign A = GVar->getAlign()) 11530eae32dcSDimitry Andric O << " .align " << A->value(); 11540b57cec5SDimitry Andric else 1155bdd1243dSDimitry Andric O << " .align " << (int)DL.getPrefTypeAlign(ETy).value(); 11560b57cec5SDimitry Andric 11570b57cec5SDimitry Andric if (ETy->isFloatingPointTy() || ETy->isPointerTy() || 11580b57cec5SDimitry Andric (ETy->isIntegerTy() && ETy->getScalarSizeInBits() <= 64)) { 11590b57cec5SDimitry Andric O << " ."; 11600b57cec5SDimitry Andric // Special case: ABI requires that we use .u8 for predicates 11610b57cec5SDimitry Andric if (ETy->isIntegerTy(1)) 11620b57cec5SDimitry Andric O << "u8"; 11630b57cec5SDimitry Andric else 11640b57cec5SDimitry Andric O << getPTXFundamentalTypeStr(ETy, false); 11650b57cec5SDimitry Andric O << " "; 11660b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 11670b57cec5SDimitry Andric 11680b57cec5SDimitry Andric // Ptx allows variable initilization only for constant and global state 11690b57cec5SDimitry Andric // spaces. 11700b57cec5SDimitry Andric if (GVar->hasInitializer()) { 1171*0fca6ea1SDimitry Andric if ((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1172*0fca6ea1SDimitry Andric (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) { 11730b57cec5SDimitry Andric const Constant *Initializer = GVar->getInitializer(); 11740b57cec5SDimitry Andric // 'undef' is treated as there is no value specified. 11750b57cec5SDimitry Andric if (!Initializer->isNullValue() && !isa<UndefValue>(Initializer)) { 11760b57cec5SDimitry Andric O << " = "; 11770b57cec5SDimitry Andric printScalarConstant(Initializer, O); 11780b57cec5SDimitry Andric } 11790b57cec5SDimitry Andric } else { 11800b57cec5SDimitry Andric // The frontend adds zero-initializer to device and constant variables 11810b57cec5SDimitry Andric // that don't have an initial value, and UndefValue to shared 11820b57cec5SDimitry Andric // variables, so skip warning for this case. 11830b57cec5SDimitry Andric if (!GVar->getInitializer()->isNullValue() && 11840b57cec5SDimitry Andric !isa<UndefValue>(GVar->getInitializer())) { 11850b57cec5SDimitry Andric report_fatal_error("initial value of '" + GVar->getName() + 11860b57cec5SDimitry Andric "' is not allowed in addrspace(" + 1187*0fca6ea1SDimitry Andric Twine(GVar->getAddressSpace()) + ")"); 11880b57cec5SDimitry Andric } 11890b57cec5SDimitry Andric } 11900b57cec5SDimitry Andric } 11910b57cec5SDimitry Andric } else { 119206c3fb27SDimitry Andric uint64_t ElementSize = 0; 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric // Although PTX has direct support for struct type and array type and 11950b57cec5SDimitry Andric // LLVM IR is very similar to PTX, the LLVM CodeGen does not support for 11960b57cec5SDimitry Andric // targets that support these high level field accesses. Structs, arrays 11970b57cec5SDimitry Andric // and vectors are lowered into arrays of bytes. 11980b57cec5SDimitry Andric switch (ETy->getTypeID()) { 11990b57cec5SDimitry Andric case Type::IntegerTyID: // Integers larger than 64 bits 12000b57cec5SDimitry Andric case Type::StructTyID: 12010b57cec5SDimitry Andric case Type::ArrayTyID: 12025ffd83dbSDimitry Andric case Type::FixedVectorTyID: 12030b57cec5SDimitry Andric ElementSize = DL.getTypeStoreSize(ETy); 12040b57cec5SDimitry Andric // Ptx allows variable initilization only for constant and 12050b57cec5SDimitry Andric // global state spaces. 1206*0fca6ea1SDimitry Andric if (((GVar->getAddressSpace() == ADDRESS_SPACE_GLOBAL) || 1207*0fca6ea1SDimitry Andric (GVar->getAddressSpace() == ADDRESS_SPACE_CONST)) && 12080b57cec5SDimitry Andric GVar->hasInitializer()) { 12090b57cec5SDimitry Andric const Constant *Initializer = GVar->getInitializer(); 12100b57cec5SDimitry Andric if (!isa<UndefValue>(Initializer) && !Initializer->isNullValue()) { 1211fcaf7f86SDimitry Andric AggBuffer aggBuffer(ElementSize, *this); 12120b57cec5SDimitry Andric bufferAggregateConstant(Initializer, &aggBuffer); 1213fcaf7f86SDimitry Andric if (aggBuffer.numSymbols()) { 1214fcaf7f86SDimitry Andric unsigned int ptrSize = MAI->getCodePointerSize(); 1215fcaf7f86SDimitry Andric if (ElementSize % ptrSize || 1216fcaf7f86SDimitry Andric !aggBuffer.allSymbolsAligned(ptrSize)) { 1217fcaf7f86SDimitry Andric // Print in bytes and use the mask() operator for pointers. 1218fcaf7f86SDimitry Andric if (!STI.hasMaskOperator()) 1219fcaf7f86SDimitry Andric report_fatal_error( 1220fcaf7f86SDimitry Andric "initialized packed aggregate with pointers '" + 1221fcaf7f86SDimitry Andric GVar->getName() + 1222fcaf7f86SDimitry Andric "' requires at least PTX ISA version 7.1"); 1223fcaf7f86SDimitry Andric O << " .u8 "; 12240b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 1225fcaf7f86SDimitry Andric O << "[" << ElementSize << "] = {"; 1226fcaf7f86SDimitry Andric aggBuffer.printBytes(O); 1227fcaf7f86SDimitry Andric O << "}"; 12280b57cec5SDimitry Andric } else { 1229fcaf7f86SDimitry Andric O << " .u" << ptrSize * 8 << " "; 12300b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 1231fcaf7f86SDimitry Andric O << "[" << ElementSize / ptrSize << "] = {"; 1232fcaf7f86SDimitry Andric aggBuffer.printWords(O); 1233fcaf7f86SDimitry Andric O << "}"; 12340b57cec5SDimitry Andric } 12350b57cec5SDimitry Andric } else { 12360b57cec5SDimitry Andric O << " .b8 "; 12370b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 1238fcaf7f86SDimitry Andric O << "[" << ElementSize << "] = {"; 1239fcaf7f86SDimitry Andric aggBuffer.printBytes(O); 12400b57cec5SDimitry Andric O << "}"; 1241fcaf7f86SDimitry Andric } 12420b57cec5SDimitry Andric } else { 12430b57cec5SDimitry Andric O << " .b8 "; 12440b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 12450b57cec5SDimitry Andric if (ElementSize) { 12460b57cec5SDimitry Andric O << "["; 12470b57cec5SDimitry Andric O << ElementSize; 12480b57cec5SDimitry Andric O << "]"; 12490b57cec5SDimitry Andric } 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric } else { 12520b57cec5SDimitry Andric O << " .b8 "; 12530b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 12540b57cec5SDimitry Andric if (ElementSize) { 12550b57cec5SDimitry Andric O << "["; 12560b57cec5SDimitry Andric O << ElementSize; 12570b57cec5SDimitry Andric O << "]"; 12580b57cec5SDimitry Andric } 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric break; 12610b57cec5SDimitry Andric default: 12620b57cec5SDimitry Andric llvm_unreachable("type not supported yet"); 12630b57cec5SDimitry Andric } 12640b57cec5SDimitry Andric } 12650b57cec5SDimitry Andric O << ";\n"; 12660b57cec5SDimitry Andric } 12670b57cec5SDimitry Andric 1268fcaf7f86SDimitry Andric void NVPTXAsmPrinter::AggBuffer::printSymbol(unsigned nSym, raw_ostream &os) { 1269fcaf7f86SDimitry Andric const Value *v = Symbols[nSym]; 1270fcaf7f86SDimitry Andric const Value *v0 = SymbolsBeforeStripping[nSym]; 1271fcaf7f86SDimitry Andric if (const GlobalValue *GVar = dyn_cast<GlobalValue>(v)) { 1272fcaf7f86SDimitry Andric MCSymbol *Name = AP.getSymbol(GVar); 1273fcaf7f86SDimitry Andric PointerType *PTy = dyn_cast<PointerType>(v0->getType()); 1274fcaf7f86SDimitry Andric // Is v0 a generic pointer? 1275fcaf7f86SDimitry Andric bool isGenericPointer = PTy && PTy->getAddressSpace() == 0; 1276fcaf7f86SDimitry Andric if (EmitGeneric && isGenericPointer && !isa<Function>(v)) { 1277fcaf7f86SDimitry Andric os << "generic("; 1278fcaf7f86SDimitry Andric Name->print(os, AP.MAI); 1279fcaf7f86SDimitry Andric os << ")"; 1280fcaf7f86SDimitry Andric } else { 1281fcaf7f86SDimitry Andric Name->print(os, AP.MAI); 1282fcaf7f86SDimitry Andric } 1283fcaf7f86SDimitry Andric } else if (const ConstantExpr *CExpr = dyn_cast<ConstantExpr>(v0)) { 1284fcaf7f86SDimitry Andric const MCExpr *Expr = AP.lowerConstantForGV(cast<Constant>(CExpr), false); 1285fcaf7f86SDimitry Andric AP.printMCExpr(*Expr, os); 1286fcaf7f86SDimitry Andric } else 1287fcaf7f86SDimitry Andric llvm_unreachable("symbol type unknown"); 1288fcaf7f86SDimitry Andric } 1289fcaf7f86SDimitry Andric 1290fcaf7f86SDimitry Andric void NVPTXAsmPrinter::AggBuffer::printBytes(raw_ostream &os) { 1291fcaf7f86SDimitry Andric unsigned int ptrSize = AP.MAI->getCodePointerSize(); 1292*0fca6ea1SDimitry Andric // Do not emit trailing zero initializers. They will be zero-initialized by 1293*0fca6ea1SDimitry Andric // ptxas. This saves on both space requirements for the generated PTX and on 1294*0fca6ea1SDimitry Andric // memory use by ptxas. (See: 1295*0fca6ea1SDimitry Andric // https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#global-state-space) 1296*0fca6ea1SDimitry Andric unsigned int InitializerCount = size; 1297*0fca6ea1SDimitry Andric // TODO: symbols make this harder, but it would still be good to trim trailing 1298*0fca6ea1SDimitry Andric // 0s for aggs with symbols as well. 1299*0fca6ea1SDimitry Andric if (numSymbols() == 0) 1300*0fca6ea1SDimitry Andric while (InitializerCount >= 1 && !buffer[InitializerCount - 1]) 1301*0fca6ea1SDimitry Andric InitializerCount--; 1302*0fca6ea1SDimitry Andric 1303*0fca6ea1SDimitry Andric symbolPosInBuffer.push_back(InitializerCount); 1304fcaf7f86SDimitry Andric unsigned int nSym = 0; 1305fcaf7f86SDimitry Andric unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; 1306*0fca6ea1SDimitry Andric for (unsigned int pos = 0; pos < InitializerCount;) { 1307fcaf7f86SDimitry Andric if (pos) 1308fcaf7f86SDimitry Andric os << ", "; 1309fcaf7f86SDimitry Andric if (pos != nextSymbolPos) { 1310fcaf7f86SDimitry Andric os << (unsigned int)buffer[pos]; 1311fcaf7f86SDimitry Andric ++pos; 1312fcaf7f86SDimitry Andric continue; 1313fcaf7f86SDimitry Andric } 1314fcaf7f86SDimitry Andric // Generate a per-byte mask() operator for the symbol, which looks like: 1315fcaf7f86SDimitry Andric // .global .u8 addr[] = {0xFF(foo), 0xFF00(foo), 0xFF0000(foo), ...}; 1316fcaf7f86SDimitry Andric // See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#initializers 1317fcaf7f86SDimitry Andric std::string symText; 1318fcaf7f86SDimitry Andric llvm::raw_string_ostream oss(symText); 1319fcaf7f86SDimitry Andric printSymbol(nSym, oss); 1320fcaf7f86SDimitry Andric for (unsigned i = 0; i < ptrSize; ++i) { 1321fcaf7f86SDimitry Andric if (i) 1322fcaf7f86SDimitry Andric os << ", "; 1323fcaf7f86SDimitry Andric llvm::write_hex(os, 0xFFULL << i * 8, HexPrintStyle::PrefixUpper); 1324fcaf7f86SDimitry Andric os << "(" << symText << ")"; 1325fcaf7f86SDimitry Andric } 1326fcaf7f86SDimitry Andric pos += ptrSize; 1327fcaf7f86SDimitry Andric nextSymbolPos = symbolPosInBuffer[++nSym]; 1328fcaf7f86SDimitry Andric assert(nextSymbolPos >= pos); 1329fcaf7f86SDimitry Andric } 1330fcaf7f86SDimitry Andric } 1331fcaf7f86SDimitry Andric 1332fcaf7f86SDimitry Andric void NVPTXAsmPrinter::AggBuffer::printWords(raw_ostream &os) { 1333fcaf7f86SDimitry Andric unsigned int ptrSize = AP.MAI->getCodePointerSize(); 1334fcaf7f86SDimitry Andric symbolPosInBuffer.push_back(size); 1335fcaf7f86SDimitry Andric unsigned int nSym = 0; 1336fcaf7f86SDimitry Andric unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; 1337fcaf7f86SDimitry Andric assert(nextSymbolPos % ptrSize == 0); 1338fcaf7f86SDimitry Andric for (unsigned int pos = 0; pos < size; pos += ptrSize) { 1339fcaf7f86SDimitry Andric if (pos) 1340fcaf7f86SDimitry Andric os << ", "; 1341fcaf7f86SDimitry Andric if (pos == nextSymbolPos) { 1342fcaf7f86SDimitry Andric printSymbol(nSym, os); 1343fcaf7f86SDimitry Andric nextSymbolPos = symbolPosInBuffer[++nSym]; 1344fcaf7f86SDimitry Andric assert(nextSymbolPos % ptrSize == 0); 1345fcaf7f86SDimitry Andric assert(nextSymbolPos >= pos + ptrSize); 1346fcaf7f86SDimitry Andric } else if (ptrSize == 4) 1347fcaf7f86SDimitry Andric os << support::endian::read32le(&buffer[pos]); 1348fcaf7f86SDimitry Andric else 1349fcaf7f86SDimitry Andric os << support::endian::read64le(&buffer[pos]); 1350fcaf7f86SDimitry Andric } 1351fcaf7f86SDimitry Andric } 1352fcaf7f86SDimitry Andric 13530b57cec5SDimitry Andric void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { 13540b57cec5SDimitry Andric if (localDecls.find(f) == localDecls.end()) 13550b57cec5SDimitry Andric return; 13560b57cec5SDimitry Andric 13570b57cec5SDimitry Andric std::vector<const GlobalVariable *> &gvars = localDecls[f]; 13580b57cec5SDimitry Andric 135981ad6265SDimitry Andric const NVPTXTargetMachine &NTM = static_cast<const NVPTXTargetMachine &>(TM); 136081ad6265SDimitry Andric const NVPTXSubtarget &STI = 136181ad6265SDimitry Andric *static_cast<const NVPTXSubtarget *>(NTM.getSubtargetImpl()); 136281ad6265SDimitry Andric 136304eeddc0SDimitry Andric for (const GlobalVariable *GV : gvars) { 13640b57cec5SDimitry Andric O << "\t// demoted variable\n\t"; 136581ad6265SDimitry Andric printModuleLevelGV(GV, O, /*processDemoted=*/true, STI); 13660b57cec5SDimitry Andric } 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric 13690b57cec5SDimitry Andric void NVPTXAsmPrinter::emitPTXAddressSpace(unsigned int AddressSpace, 13700b57cec5SDimitry Andric raw_ostream &O) const { 13710b57cec5SDimitry Andric switch (AddressSpace) { 13720b57cec5SDimitry Andric case ADDRESS_SPACE_LOCAL: 13730b57cec5SDimitry Andric O << "local"; 13740b57cec5SDimitry Andric break; 13750b57cec5SDimitry Andric case ADDRESS_SPACE_GLOBAL: 13760b57cec5SDimitry Andric O << "global"; 13770b57cec5SDimitry Andric break; 13780b57cec5SDimitry Andric case ADDRESS_SPACE_CONST: 13790b57cec5SDimitry Andric O << "const"; 13800b57cec5SDimitry Andric break; 13810b57cec5SDimitry Andric case ADDRESS_SPACE_SHARED: 13820b57cec5SDimitry Andric O << "shared"; 13830b57cec5SDimitry Andric break; 13840b57cec5SDimitry Andric default: 13850b57cec5SDimitry Andric report_fatal_error("Bad address space found while emitting PTX: " + 13860b57cec5SDimitry Andric llvm::Twine(AddressSpace)); 13870b57cec5SDimitry Andric break; 13880b57cec5SDimitry Andric } 13890b57cec5SDimitry Andric } 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric std::string 13920b57cec5SDimitry Andric NVPTXAsmPrinter::getPTXFundamentalTypeStr(Type *Ty, bool useB4PTR) const { 13930b57cec5SDimitry Andric switch (Ty->getTypeID()) { 13940b57cec5SDimitry Andric case Type::IntegerTyID: { 13950b57cec5SDimitry Andric unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth(); 13960b57cec5SDimitry Andric if (NumBits == 1) 13970b57cec5SDimitry Andric return "pred"; 13980b57cec5SDimitry Andric else if (NumBits <= 64) { 13990b57cec5SDimitry Andric std::string name = "u"; 14000b57cec5SDimitry Andric return name + utostr(NumBits); 14010b57cec5SDimitry Andric } else { 14020b57cec5SDimitry Andric llvm_unreachable("Integer too large"); 14030b57cec5SDimitry Andric break; 14040b57cec5SDimitry Andric } 14050b57cec5SDimitry Andric break; 14060b57cec5SDimitry Andric } 140706c3fb27SDimitry Andric case Type::BFloatTyID: 14080b57cec5SDimitry Andric case Type::HalfTyID: 140906c3fb27SDimitry Andric // fp16 and bf16 are stored as .b16 for compatibility with pre-sm_53 141006c3fb27SDimitry Andric // PTX assembly. 14110b57cec5SDimitry Andric return "b16"; 14120b57cec5SDimitry Andric case Type::FloatTyID: 14130b57cec5SDimitry Andric return "f32"; 14140b57cec5SDimitry Andric case Type::DoubleTyID: 14150b57cec5SDimitry Andric return "f64"; 1416bdd1243dSDimitry Andric case Type::PointerTyID: { 1417bdd1243dSDimitry Andric unsigned PtrSize = TM.getPointerSizeInBits(Ty->getPointerAddressSpace()); 1418bdd1243dSDimitry Andric assert((PtrSize == 64 || PtrSize == 32) && "Unexpected pointer size"); 1419bdd1243dSDimitry Andric 1420bdd1243dSDimitry Andric if (PtrSize == 64) 14210b57cec5SDimitry Andric if (useB4PTR) 14220b57cec5SDimitry Andric return "b64"; 14230b57cec5SDimitry Andric else 14240b57cec5SDimitry Andric return "u64"; 14250b57cec5SDimitry Andric else if (useB4PTR) 14260b57cec5SDimitry Andric return "b32"; 14270b57cec5SDimitry Andric else 14280b57cec5SDimitry Andric return "u32"; 1429bdd1243dSDimitry Andric } 1430e8d8bef9SDimitry Andric default: 1431e8d8bef9SDimitry Andric break; 14320b57cec5SDimitry Andric } 14330b57cec5SDimitry Andric llvm_unreachable("unexpected type"); 14340b57cec5SDimitry Andric } 14350b57cec5SDimitry Andric 14360b57cec5SDimitry Andric void NVPTXAsmPrinter::emitPTXGlobalVariable(const GlobalVariable *GVar, 143781ad6265SDimitry Andric raw_ostream &O, 143881ad6265SDimitry Andric const NVPTXSubtarget &STI) { 14390b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric // GlobalVariables are always constant pointers themselves. 14420b57cec5SDimitry Andric Type *ETy = GVar->getValueType(); 14430b57cec5SDimitry Andric 14440b57cec5SDimitry Andric O << "."; 14450b57cec5SDimitry Andric emitPTXAddressSpace(GVar->getType()->getAddressSpace(), O); 144681ad6265SDimitry Andric if (isManaged(*GVar)) { 144781ad6265SDimitry Andric if (STI.getPTXVersion() < 40 || STI.getSmVersion() < 30) { 144881ad6265SDimitry Andric report_fatal_error( 144981ad6265SDimitry Andric ".attribute(.managed) requires PTX version >= 4.0 and sm_30"); 145081ad6265SDimitry Andric } 145181ad6265SDimitry Andric O << " .attribute(.managed)"; 145281ad6265SDimitry Andric } 14530eae32dcSDimitry Andric if (MaybeAlign A = GVar->getAlign()) 14540eae32dcSDimitry Andric O << " .align " << A->value(); 14550b57cec5SDimitry Andric else 1456bdd1243dSDimitry Andric O << " .align " << (int)DL.getPrefTypeAlign(ETy).value(); 14570b57cec5SDimitry Andric 14580b57cec5SDimitry Andric // Special case for i128 14590b57cec5SDimitry Andric if (ETy->isIntegerTy(128)) { 14600b57cec5SDimitry Andric O << " .b8 "; 14610b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 14620b57cec5SDimitry Andric O << "[16]"; 14630b57cec5SDimitry Andric return; 14640b57cec5SDimitry Andric } 14650b57cec5SDimitry Andric 14660b57cec5SDimitry Andric if (ETy->isFloatingPointTy() || ETy->isIntOrPtrTy()) { 14670b57cec5SDimitry Andric O << " ."; 14680b57cec5SDimitry Andric O << getPTXFundamentalTypeStr(ETy); 14690b57cec5SDimitry Andric O << " "; 14700b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 14710b57cec5SDimitry Andric return; 14720b57cec5SDimitry Andric } 14730b57cec5SDimitry Andric 14740b57cec5SDimitry Andric int64_t ElementSize = 0; 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric // Although PTX has direct support for struct type and array type and LLVM IR 14770b57cec5SDimitry Andric // is very similar to PTX, the LLVM CodeGen does not support for targets that 14780b57cec5SDimitry Andric // support these high level field accesses. Structs and arrays are lowered 14790b57cec5SDimitry Andric // into arrays of bytes. 14800b57cec5SDimitry Andric switch (ETy->getTypeID()) { 14810b57cec5SDimitry Andric case Type::StructTyID: 14820b57cec5SDimitry Andric case Type::ArrayTyID: 14835ffd83dbSDimitry Andric case Type::FixedVectorTyID: 14840b57cec5SDimitry Andric ElementSize = DL.getTypeStoreSize(ETy); 14850b57cec5SDimitry Andric O << " .b8 "; 14860b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 14870b57cec5SDimitry Andric O << "["; 14880b57cec5SDimitry Andric if (ElementSize) { 14890b57cec5SDimitry Andric O << ElementSize; 14900b57cec5SDimitry Andric } 14910b57cec5SDimitry Andric O << "]"; 14920b57cec5SDimitry Andric break; 14930b57cec5SDimitry Andric default: 14940b57cec5SDimitry Andric llvm_unreachable("type not supported yet"); 14950b57cec5SDimitry Andric } 14960b57cec5SDimitry Andric } 14970b57cec5SDimitry Andric 14980b57cec5SDimitry Andric void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { 14990b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 15000b57cec5SDimitry Andric const AttributeList &PAL = F->getAttributes(); 15010b57cec5SDimitry Andric const NVPTXSubtarget &STI = TM.getSubtarget<NVPTXSubtarget>(*F); 150281ad6265SDimitry Andric const auto *TLI = cast<NVPTXTargetLowering>(STI.getTargetLowering()); 150381ad6265SDimitry Andric 15040b57cec5SDimitry Andric Function::const_arg_iterator I, E; 15050b57cec5SDimitry Andric unsigned paramIndex = 0; 15060b57cec5SDimitry Andric bool first = true; 15070b57cec5SDimitry Andric bool isKernelFunc = isKernelFunction(*F); 15080b57cec5SDimitry Andric bool isABI = (STI.getSmVersion() >= 20); 15090b57cec5SDimitry Andric bool hasImageHandles = STI.hasImageHandles(); 15100b57cec5SDimitry Andric 1511bdd1243dSDimitry Andric if (F->arg_empty() && !F->isVarArg()) { 151206c3fb27SDimitry Andric O << "()"; 15130b57cec5SDimitry Andric return; 15140b57cec5SDimitry Andric } 15150b57cec5SDimitry Andric 15160b57cec5SDimitry Andric O << "(\n"; 15170b57cec5SDimitry Andric 15180b57cec5SDimitry Andric for (I = F->arg_begin(), E = F->arg_end(); I != E; ++I, paramIndex++) { 15190b57cec5SDimitry Andric Type *Ty = I->getType(); 15200b57cec5SDimitry Andric 15210b57cec5SDimitry Andric if (!first) 15220b57cec5SDimitry Andric O << ",\n"; 15230b57cec5SDimitry Andric 15240b57cec5SDimitry Andric first = false; 15250b57cec5SDimitry Andric 15260b57cec5SDimitry Andric // Handle image/sampler parameters 15270b57cec5SDimitry Andric if (isKernelFunction(*F)) { 15280b57cec5SDimitry Andric if (isSampler(*I) || isImage(*I)) { 15290b57cec5SDimitry Andric if (isImage(*I)) { 15300b57cec5SDimitry Andric if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { 15310b57cec5SDimitry Andric if (hasImageHandles) 15320b57cec5SDimitry Andric O << "\t.param .u64 .ptr .surfref "; 15330b57cec5SDimitry Andric else 15340b57cec5SDimitry Andric O << "\t.param .surfref "; 153506c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 15360b57cec5SDimitry Andric } 15370b57cec5SDimitry Andric else { // Default image is read_only 15380b57cec5SDimitry Andric if (hasImageHandles) 15390b57cec5SDimitry Andric O << "\t.param .u64 .ptr .texref "; 15400b57cec5SDimitry Andric else 15410b57cec5SDimitry Andric O << "\t.param .texref "; 154206c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 15430b57cec5SDimitry Andric } 15440b57cec5SDimitry Andric } else { 15450b57cec5SDimitry Andric if (hasImageHandles) 15460b57cec5SDimitry Andric O << "\t.param .u64 .ptr .samplerref "; 15470b57cec5SDimitry Andric else 15480b57cec5SDimitry Andric O << "\t.param .samplerref "; 154906c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 15500b57cec5SDimitry Andric } 15510b57cec5SDimitry Andric continue; 15520b57cec5SDimitry Andric } 15530b57cec5SDimitry Andric } 15540b57cec5SDimitry Andric 155581ad6265SDimitry Andric auto getOptimalAlignForParam = [TLI, &DL, &PAL, F, 155681ad6265SDimitry Andric paramIndex](Type *Ty) -> Align { 1557*0fca6ea1SDimitry Andric if (MaybeAlign StackAlign = 1558*0fca6ea1SDimitry Andric getAlign(*F, paramIndex + AttributeList::FirstArgIndex)) 1559*0fca6ea1SDimitry Andric return StackAlign.value(); 1560*0fca6ea1SDimitry Andric 156181ad6265SDimitry Andric Align TypeAlign = TLI->getFunctionParamOptimizedAlign(F, Ty, DL); 156281ad6265SDimitry Andric MaybeAlign ParamAlign = PAL.getParamAlignment(paramIndex); 156381ad6265SDimitry Andric return std::max(TypeAlign, ParamAlign.valueOrOne()); 156481ad6265SDimitry Andric }; 156581ad6265SDimitry Andric 1566349cc55cSDimitry Andric if (!PAL.hasParamAttr(paramIndex, Attribute::ByVal)) { 156706c3fb27SDimitry Andric if (ShouldPassAsArray(Ty)) { 15680b57cec5SDimitry Andric // Just print .param .align <a> .b8 .param[size]; 156981ad6265SDimitry Andric // <a> = optimal alignment for the element type; always multiple of 157081ad6265SDimitry Andric // PAL.getParamAlignment 15710b57cec5SDimitry Andric // size = typeallocsize of element type 157281ad6265SDimitry Andric Align OptimalAlign = getOptimalAlignForParam(Ty); 15730b57cec5SDimitry Andric 157481ad6265SDimitry Andric O << "\t.param .align " << OptimalAlign.value() << " .b8 "; 157506c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 157681ad6265SDimitry Andric O << "[" << DL.getTypeAllocSize(Ty) << "]"; 15770b57cec5SDimitry Andric 15780b57cec5SDimitry Andric continue; 15790b57cec5SDimitry Andric } 15800b57cec5SDimitry Andric // Just a scalar 15810b57cec5SDimitry Andric auto *PTy = dyn_cast<PointerType>(Ty); 1582bdd1243dSDimitry Andric unsigned PTySizeInBits = 0; 1583bdd1243dSDimitry Andric if (PTy) { 1584bdd1243dSDimitry Andric PTySizeInBits = 1585bdd1243dSDimitry Andric TLI->getPointerTy(DL, PTy->getAddressSpace()).getSizeInBits(); 1586bdd1243dSDimitry Andric assert(PTySizeInBits && "Invalid pointer size"); 1587bdd1243dSDimitry Andric } 1588bdd1243dSDimitry Andric 15890b57cec5SDimitry Andric if (isKernelFunc) { 15900b57cec5SDimitry Andric if (PTy) { 15910b57cec5SDimitry Andric // Special handling for pointer arguments to kernel 1592bdd1243dSDimitry Andric O << "\t.param .u" << PTySizeInBits << " "; 15930b57cec5SDimitry Andric 15940b57cec5SDimitry Andric if (static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() != 15950b57cec5SDimitry Andric NVPTX::CUDA) { 15960b57cec5SDimitry Andric int addrSpace = PTy->getAddressSpace(); 15970b57cec5SDimitry Andric switch (addrSpace) { 15980b57cec5SDimitry Andric default: 15990b57cec5SDimitry Andric O << ".ptr "; 16000b57cec5SDimitry Andric break; 16010b57cec5SDimitry Andric case ADDRESS_SPACE_CONST: 16020b57cec5SDimitry Andric O << ".ptr .const "; 16030b57cec5SDimitry Andric break; 16040b57cec5SDimitry Andric case ADDRESS_SPACE_SHARED: 16050b57cec5SDimitry Andric O << ".ptr .shared "; 16060b57cec5SDimitry Andric break; 16070b57cec5SDimitry Andric case ADDRESS_SPACE_GLOBAL: 16080b57cec5SDimitry Andric O << ".ptr .global "; 16090b57cec5SDimitry Andric break; 16100b57cec5SDimitry Andric } 161181ad6265SDimitry Andric Align ParamAlign = I->getParamAlign().valueOrOne(); 161281ad6265SDimitry Andric O << ".align " << ParamAlign.value() << " "; 16130b57cec5SDimitry Andric } 161406c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 16150b57cec5SDimitry Andric continue; 16160b57cec5SDimitry Andric } 16170b57cec5SDimitry Andric 16180b57cec5SDimitry Andric // non-pointer scalar to kernel func 16190b57cec5SDimitry Andric O << "\t.param ."; 16200b57cec5SDimitry Andric // Special case: predicate operands become .u8 types 16210b57cec5SDimitry Andric if (Ty->isIntegerTy(1)) 16220b57cec5SDimitry Andric O << "u8"; 16230b57cec5SDimitry Andric else 16240b57cec5SDimitry Andric O << getPTXFundamentalTypeStr(Ty); 16250b57cec5SDimitry Andric O << " "; 162606c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 16270b57cec5SDimitry Andric continue; 16280b57cec5SDimitry Andric } 16290b57cec5SDimitry Andric // Non-kernel function, just print .param .b<size> for ABI 16300b57cec5SDimitry Andric // and .reg .b<size> for non-ABI 16310b57cec5SDimitry Andric unsigned sz = 0; 16320b57cec5SDimitry Andric if (isa<IntegerType>(Ty)) { 16330b57cec5SDimitry Andric sz = cast<IntegerType>(Ty)->getBitWidth(); 1634fcaf7f86SDimitry Andric sz = promoteScalarArgumentSize(sz); 1635bdd1243dSDimitry Andric } else if (PTy) { 1636bdd1243dSDimitry Andric assert(PTySizeInBits && "Invalid pointer size"); 1637bdd1243dSDimitry Andric sz = PTySizeInBits; 163806c3fb27SDimitry Andric } else 16390b57cec5SDimitry Andric sz = Ty->getPrimitiveSizeInBits(); 16400b57cec5SDimitry Andric if (isABI) 16410b57cec5SDimitry Andric O << "\t.param .b" << sz << " "; 16420b57cec5SDimitry Andric else 16430b57cec5SDimitry Andric O << "\t.reg .b" << sz << " "; 164406c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 16450b57cec5SDimitry Andric continue; 16460b57cec5SDimitry Andric } 16470b57cec5SDimitry Andric 164881ad6265SDimitry Andric // param has byVal attribute. 164981ad6265SDimitry Andric Type *ETy = PAL.getParamByValType(paramIndex); 165081ad6265SDimitry Andric assert(ETy && "Param should have byval type"); 16510b57cec5SDimitry Andric 16520b57cec5SDimitry Andric if (isABI || isKernelFunc) { 16530b57cec5SDimitry Andric // Just print .param .align <a> .b8 .param[size]; 165481ad6265SDimitry Andric // <a> = optimal alignment for the element type; always multiple of 165581ad6265SDimitry Andric // PAL.getParamAlignment 16560b57cec5SDimitry Andric // size = typeallocsize of element type 1657bdd1243dSDimitry Andric Align OptimalAlign = 1658bdd1243dSDimitry Andric isKernelFunc 1659bdd1243dSDimitry Andric ? getOptimalAlignForParam(ETy) 1660bdd1243dSDimitry Andric : TLI->getFunctionByValParamAlign( 1661bdd1243dSDimitry Andric F, ETy, PAL.getParamAlignment(paramIndex).valueOrOne(), DL); 166281ad6265SDimitry Andric 16630b57cec5SDimitry Andric unsigned sz = DL.getTypeAllocSize(ETy); 166481ad6265SDimitry Andric O << "\t.param .align " << OptimalAlign.value() << " .b8 "; 166506c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 16660b57cec5SDimitry Andric O << "[" << sz << "]"; 16670b57cec5SDimitry Andric continue; 16680b57cec5SDimitry Andric } else { 16690b57cec5SDimitry Andric // Split the ETy into constituent parts and 16700b57cec5SDimitry Andric // print .param .b<size> <name> for each part. 16710b57cec5SDimitry Andric // Further, if a part is vector, print the above for 16720b57cec5SDimitry Andric // each vector element. 16730b57cec5SDimitry Andric SmallVector<EVT, 16> vtparts; 16740b57cec5SDimitry Andric ComputeValueVTs(*TLI, DL, ETy, vtparts); 16750b57cec5SDimitry Andric for (unsigned i = 0, e = vtparts.size(); i != e; ++i) { 16760b57cec5SDimitry Andric unsigned elems = 1; 16770b57cec5SDimitry Andric EVT elemtype = vtparts[i]; 16780b57cec5SDimitry Andric if (vtparts[i].isVector()) { 16790b57cec5SDimitry Andric elems = vtparts[i].getVectorNumElements(); 16800b57cec5SDimitry Andric elemtype = vtparts[i].getVectorElementType(); 16810b57cec5SDimitry Andric } 16820b57cec5SDimitry Andric 16830b57cec5SDimitry Andric for (unsigned j = 0, je = elems; j != je; ++j) { 16840b57cec5SDimitry Andric unsigned sz = elemtype.getSizeInBits(); 1685fcaf7f86SDimitry Andric if (elemtype.isInteger()) 1686fcaf7f86SDimitry Andric sz = promoteScalarArgumentSize(sz); 16870b57cec5SDimitry Andric O << "\t.reg .b" << sz << " "; 168806c3fb27SDimitry Andric O << TLI->getParamName(F, paramIndex); 16890b57cec5SDimitry Andric if (j < je - 1) 16900b57cec5SDimitry Andric O << ",\n"; 16910b57cec5SDimitry Andric ++paramIndex; 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric if (i < e - 1) 16940b57cec5SDimitry Andric O << ",\n"; 16950b57cec5SDimitry Andric } 16960b57cec5SDimitry Andric --paramIndex; 16970b57cec5SDimitry Andric continue; 16980b57cec5SDimitry Andric } 16990b57cec5SDimitry Andric } 17000b57cec5SDimitry Andric 1701bdd1243dSDimitry Andric if (F->isVarArg()) { 1702bdd1243dSDimitry Andric if (!first) 1703bdd1243dSDimitry Andric O << ",\n"; 1704bdd1243dSDimitry Andric O << "\t.param .align " << STI.getMaxRequiredAlignment(); 1705bdd1243dSDimitry Andric O << " .b8 "; 170606c3fb27SDimitry Andric O << TLI->getParamName(F, /* vararg */ -1) << "[]"; 1707bdd1243dSDimitry Andric } 1708bdd1243dSDimitry Andric 170906c3fb27SDimitry Andric O << "\n)"; 17100b57cec5SDimitry Andric } 17110b57cec5SDimitry Andric 17120b57cec5SDimitry Andric void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( 17130b57cec5SDimitry Andric const MachineFunction &MF) { 17140b57cec5SDimitry Andric SmallString<128> Str; 17150b57cec5SDimitry Andric raw_svector_ostream O(Str); 17160b57cec5SDimitry Andric 17170b57cec5SDimitry Andric // Map the global virtual register number to a register class specific 17180b57cec5SDimitry Andric // virtual register number starting from 1 with that class. 17190b57cec5SDimitry Andric const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); 17200b57cec5SDimitry Andric //unsigned numRegClasses = TRI->getNumRegClasses(); 17210b57cec5SDimitry Andric 17220b57cec5SDimitry Andric // Emit the Fake Stack Object 17230b57cec5SDimitry Andric const MachineFrameInfo &MFI = MF.getFrameInfo(); 1724*0fca6ea1SDimitry Andric int64_t NumBytes = MFI.getStackSize(); 17250b57cec5SDimitry Andric if (NumBytes) { 17265ffd83dbSDimitry Andric O << "\t.local .align " << MFI.getMaxAlign().value() << " .b8 \t" 17275ffd83dbSDimitry Andric << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; 17280b57cec5SDimitry Andric if (static_cast<const NVPTXTargetMachine &>(MF.getTarget()).is64Bit()) { 17290b57cec5SDimitry Andric O << "\t.reg .b64 \t%SP;\n"; 17300b57cec5SDimitry Andric O << "\t.reg .b64 \t%SPL;\n"; 17310b57cec5SDimitry Andric } else { 17320b57cec5SDimitry Andric O << "\t.reg .b32 \t%SP;\n"; 17330b57cec5SDimitry Andric O << "\t.reg .b32 \t%SPL;\n"; 17340b57cec5SDimitry Andric } 17350b57cec5SDimitry Andric } 17360b57cec5SDimitry Andric 17370b57cec5SDimitry Andric // Go through all virtual registers to establish the mapping between the 17380b57cec5SDimitry Andric // global virtual 17390b57cec5SDimitry Andric // register number and the per class virtual register number. 17400b57cec5SDimitry Andric // We use the per class virtual register number in the ptx output. 17410b57cec5SDimitry Andric unsigned int numVRs = MRI->getNumVirtRegs(); 17420b57cec5SDimitry Andric for (unsigned i = 0; i < numVRs; i++) { 174304eeddc0SDimitry Andric Register vr = Register::index2VirtReg(i); 17440b57cec5SDimitry Andric const TargetRegisterClass *RC = MRI->getRegClass(vr); 17450b57cec5SDimitry Andric DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 17460b57cec5SDimitry Andric int n = regmap.size(); 17470b57cec5SDimitry Andric regmap.insert(std::make_pair(vr, n + 1)); 17480b57cec5SDimitry Andric } 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric // Emit register declarations 17510b57cec5SDimitry Andric // @TODO: Extract out the real register usage 17520b57cec5SDimitry Andric // O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n"; 17530b57cec5SDimitry Andric // O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n"; 17540b57cec5SDimitry Andric // O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n"; 17550b57cec5SDimitry Andric // O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n"; 17560b57cec5SDimitry Andric // O << "\t.reg .s64 %rd<" << NVPTXNumRegisters << ">;\n"; 17570b57cec5SDimitry Andric // O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n"; 17580b57cec5SDimitry Andric // O << "\t.reg .f64 %fd<" << NVPTXNumRegisters << ">;\n"; 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andric // Emit declaration of the virtual registers or 'physical' registers for 17610b57cec5SDimitry Andric // each register class 17620b57cec5SDimitry Andric for (unsigned i=0; i< TRI->getNumRegClasses(); i++) { 17630b57cec5SDimitry Andric const TargetRegisterClass *RC = TRI->getRegClass(i); 17640b57cec5SDimitry Andric DenseMap<unsigned, unsigned> ®map = VRegMapping[RC]; 17650b57cec5SDimitry Andric std::string rcname = getNVPTXRegClassName(RC); 17660b57cec5SDimitry Andric std::string rcStr = getNVPTXRegClassStr(RC); 17670b57cec5SDimitry Andric int n = regmap.size(); 17680b57cec5SDimitry Andric 17690b57cec5SDimitry Andric // Only declare those registers that may be used. 17700b57cec5SDimitry Andric if (n) { 17710b57cec5SDimitry Andric O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1) 17720b57cec5SDimitry Andric << ">;\n"; 17730b57cec5SDimitry Andric } 17740b57cec5SDimitry Andric } 17750b57cec5SDimitry Andric 17765ffd83dbSDimitry Andric OutStreamer->emitRawText(O.str()); 17770b57cec5SDimitry Andric } 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { 17800b57cec5SDimitry Andric APFloat APF = APFloat(Fp->getValueAPF()); // make a copy 17810b57cec5SDimitry Andric bool ignored; 17820b57cec5SDimitry Andric unsigned int numHex; 17830b57cec5SDimitry Andric const char *lead; 17840b57cec5SDimitry Andric 17850b57cec5SDimitry Andric if (Fp->getType()->getTypeID() == Type::FloatTyID) { 17860b57cec5SDimitry Andric numHex = 8; 17870b57cec5SDimitry Andric lead = "0f"; 17880b57cec5SDimitry Andric APF.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven, &ignored); 17890b57cec5SDimitry Andric } else if (Fp->getType()->getTypeID() == Type::DoubleTyID) { 17900b57cec5SDimitry Andric numHex = 16; 17910b57cec5SDimitry Andric lead = "0d"; 17920b57cec5SDimitry Andric APF.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &ignored); 17930b57cec5SDimitry Andric } else 17940b57cec5SDimitry Andric llvm_unreachable("unsupported fp type"); 17950b57cec5SDimitry Andric 17960b57cec5SDimitry Andric APInt API = APF.bitcastToAPInt(); 17970b57cec5SDimitry Andric O << lead << format_hex_no_prefix(API.getZExtValue(), numHex, /*Upper=*/true); 17980b57cec5SDimitry Andric } 17990b57cec5SDimitry Andric 18000b57cec5SDimitry Andric void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { 18010b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 18020b57cec5SDimitry Andric O << CI->getValue(); 18030b57cec5SDimitry Andric return; 18040b57cec5SDimitry Andric } 18050b57cec5SDimitry Andric if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CPV)) { 18060b57cec5SDimitry Andric printFPConstant(CFP, O); 18070b57cec5SDimitry Andric return; 18080b57cec5SDimitry Andric } 18090b57cec5SDimitry Andric if (isa<ConstantPointerNull>(CPV)) { 18100b57cec5SDimitry Andric O << "0"; 18110b57cec5SDimitry Andric return; 18120b57cec5SDimitry Andric } 18130b57cec5SDimitry Andric if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 18140b57cec5SDimitry Andric bool IsNonGenericPointer = false; 18150b57cec5SDimitry Andric if (GVar->getType()->getAddressSpace() != 0) { 18160b57cec5SDimitry Andric IsNonGenericPointer = true; 18170b57cec5SDimitry Andric } 18180b57cec5SDimitry Andric if (EmitGeneric && !isa<Function>(CPV) && !IsNonGenericPointer) { 18190b57cec5SDimitry Andric O << "generic("; 18200b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 18210b57cec5SDimitry Andric O << ")"; 18220b57cec5SDimitry Andric } else { 18230b57cec5SDimitry Andric getSymbol(GVar)->print(O, MAI); 18240b57cec5SDimitry Andric } 18250b57cec5SDimitry Andric return; 18260b57cec5SDimitry Andric } 18270b57cec5SDimitry Andric if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1828bdd1243dSDimitry Andric const MCExpr *E = lowerConstantForGV(cast<Constant>(Cexpr), false); 1829bdd1243dSDimitry Andric printMCExpr(*E, O); 18300b57cec5SDimitry Andric return; 18310b57cec5SDimitry Andric } 18320b57cec5SDimitry Andric llvm_unreachable("Not scalar type found in printScalarConstant()"); 18330b57cec5SDimitry Andric } 18340b57cec5SDimitry Andric 18350b57cec5SDimitry Andric void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, 1836349cc55cSDimitry Andric AggBuffer *AggBuffer) { 18370b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 1838349cc55cSDimitry Andric int AllocSize = DL.getTypeAllocSize(CPV->getType()); 18390b57cec5SDimitry Andric if (isa<UndefValue>(CPV) || CPV->isNullValue()) { 1840349cc55cSDimitry Andric // Non-zero Bytes indicates that we need to zero-fill everything. Otherwise, 1841349cc55cSDimitry Andric // only the space allocated by CPV. 1842349cc55cSDimitry Andric AggBuffer->addZeros(Bytes ? Bytes : AllocSize); 18430b57cec5SDimitry Andric return; 18440b57cec5SDimitry Andric } 18450b57cec5SDimitry Andric 1846349cc55cSDimitry Andric // Helper for filling AggBuffer with APInts. 1847349cc55cSDimitry Andric auto AddIntToBuffer = [AggBuffer, Bytes](const APInt &Val) { 1848349cc55cSDimitry Andric size_t NumBytes = (Val.getBitWidth() + 7) / 8; 1849349cc55cSDimitry Andric SmallVector<unsigned char, 16> Buf(NumBytes); 1850*0fca6ea1SDimitry Andric // `extractBitsAsZExtValue` does not allow the extraction of bits beyond the 1851*0fca6ea1SDimitry Andric // input's bit width, and i1 arrays may not have a length that is a multuple 1852*0fca6ea1SDimitry Andric // of 8. We handle the last byte separately, so we never request out of 1853*0fca6ea1SDimitry Andric // bounds bits. 1854*0fca6ea1SDimitry Andric for (unsigned I = 0; I < NumBytes - 1; ++I) { 1855349cc55cSDimitry Andric Buf[I] = Val.extractBitsAsZExtValue(8, I * 8); 1856349cc55cSDimitry Andric } 1857*0fca6ea1SDimitry Andric size_t LastBytePosition = (NumBytes - 1) * 8; 1858*0fca6ea1SDimitry Andric size_t LastByteBits = Val.getBitWidth() - LastBytePosition; 1859*0fca6ea1SDimitry Andric Buf[NumBytes - 1] = 1860*0fca6ea1SDimitry Andric Val.extractBitsAsZExtValue(LastByteBits, LastBytePosition); 1861349cc55cSDimitry Andric AggBuffer->addBytes(Buf.data(), NumBytes, Bytes); 1862349cc55cSDimitry Andric }; 18630b57cec5SDimitry Andric 1864349cc55cSDimitry Andric switch (CPV->getType()->getTypeID()) { 1865349cc55cSDimitry Andric case Type::IntegerTyID: 1866349cc55cSDimitry Andric if (const auto CI = dyn_cast<ConstantInt>(CPV)) { 1867349cc55cSDimitry Andric AddIntToBuffer(CI->getValue()); 18680b57cec5SDimitry Andric break; 1869349cc55cSDimitry Andric } 1870349cc55cSDimitry Andric if (const auto *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 1871349cc55cSDimitry Andric if (const auto *CI = 1872349cc55cSDimitry Andric dyn_cast<ConstantInt>(ConstantFoldConstant(Cexpr, DL))) { 1873349cc55cSDimitry Andric AddIntToBuffer(CI->getValue()); 18740b57cec5SDimitry Andric break; 18750b57cec5SDimitry Andric } 18760b57cec5SDimitry Andric if (Cexpr->getOpcode() == Instruction::PtrToInt) { 1877349cc55cSDimitry Andric Value *V = Cexpr->getOperand(0)->stripPointerCasts(); 1878349cc55cSDimitry Andric AggBuffer->addSymbol(V, Cexpr->getOperand(0)); 1879349cc55cSDimitry Andric AggBuffer->addZeros(AllocSize); 18800b57cec5SDimitry Andric break; 18810b57cec5SDimitry Andric } 18820b57cec5SDimitry Andric } 18830b57cec5SDimitry Andric llvm_unreachable("unsupported integer const type"); 18840b57cec5SDimitry Andric break; 1885349cc55cSDimitry Andric 18860b57cec5SDimitry Andric case Type::HalfTyID: 1887bdd1243dSDimitry Andric case Type::BFloatTyID: 18880b57cec5SDimitry Andric case Type::FloatTyID: 1889349cc55cSDimitry Andric case Type::DoubleTyID: 1890349cc55cSDimitry Andric AddIntToBuffer(cast<ConstantFP>(CPV)->getValueAPF().bitcastToAPInt()); 18910b57cec5SDimitry Andric break; 1892349cc55cSDimitry Andric 18930b57cec5SDimitry Andric case Type::PointerTyID: { 18940b57cec5SDimitry Andric if (const GlobalValue *GVar = dyn_cast<GlobalValue>(CPV)) { 1895349cc55cSDimitry Andric AggBuffer->addSymbol(GVar, GVar); 18960b57cec5SDimitry Andric } else if (const ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(CPV)) { 18970b57cec5SDimitry Andric const Value *v = Cexpr->stripPointerCasts(); 1898349cc55cSDimitry Andric AggBuffer->addSymbol(v, Cexpr); 18990b57cec5SDimitry Andric } 1900349cc55cSDimitry Andric AggBuffer->addZeros(AllocSize); 19010b57cec5SDimitry Andric break; 19020b57cec5SDimitry Andric } 19030b57cec5SDimitry Andric 19040b57cec5SDimitry Andric case Type::ArrayTyID: 19055ffd83dbSDimitry Andric case Type::FixedVectorTyID: 19060b57cec5SDimitry Andric case Type::StructTyID: { 19070b57cec5SDimitry Andric if (isa<ConstantAggregate>(CPV) || isa<ConstantDataSequential>(CPV)) { 1908349cc55cSDimitry Andric bufferAggregateConstant(CPV, AggBuffer); 1909349cc55cSDimitry Andric if (Bytes > AllocSize) 1910349cc55cSDimitry Andric AggBuffer->addZeros(Bytes - AllocSize); 19110b57cec5SDimitry Andric } else if (isa<ConstantAggregateZero>(CPV)) 1912349cc55cSDimitry Andric AggBuffer->addZeros(Bytes); 19130b57cec5SDimitry Andric else 19140b57cec5SDimitry Andric llvm_unreachable("Unexpected Constant type"); 19150b57cec5SDimitry Andric break; 19160b57cec5SDimitry Andric } 19170b57cec5SDimitry Andric 19180b57cec5SDimitry Andric default: 19190b57cec5SDimitry Andric llvm_unreachable("unsupported type"); 19200b57cec5SDimitry Andric } 19210b57cec5SDimitry Andric } 19220b57cec5SDimitry Andric 19230b57cec5SDimitry Andric void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, 19240b57cec5SDimitry Andric AggBuffer *aggBuffer) { 19250b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 19260b57cec5SDimitry Andric int Bytes; 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric // Integers of arbitrary width 19290b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(CPV)) { 19300b57cec5SDimitry Andric APInt Val = CI->getValue(); 19310b57cec5SDimitry Andric for (unsigned I = 0, E = DL.getTypeAllocSize(CPV->getType()); I < E; ++I) { 19320b57cec5SDimitry Andric uint8_t Byte = Val.getLoBits(8).getZExtValue(); 19330b57cec5SDimitry Andric aggBuffer->addBytes(&Byte, 1, 1); 19340b57cec5SDimitry Andric Val.lshrInPlace(8); 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric return; 19370b57cec5SDimitry Andric } 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric // Old constants 19400b57cec5SDimitry Andric if (isa<ConstantArray>(CPV) || isa<ConstantVector>(CPV)) { 19410b57cec5SDimitry Andric if (CPV->getNumOperands()) 19420b57cec5SDimitry Andric for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) 19430b57cec5SDimitry Andric bufferLEByte(cast<Constant>(CPV->getOperand(i)), 0, aggBuffer); 19440b57cec5SDimitry Andric return; 19450b57cec5SDimitry Andric } 19460b57cec5SDimitry Andric 19470b57cec5SDimitry Andric if (const ConstantDataSequential *CDS = 19480b57cec5SDimitry Andric dyn_cast<ConstantDataSequential>(CPV)) { 19490b57cec5SDimitry Andric if (CDS->getNumElements()) 19500b57cec5SDimitry Andric for (unsigned i = 0; i < CDS->getNumElements(); ++i) 19510b57cec5SDimitry Andric bufferLEByte(cast<Constant>(CDS->getElementAsConstant(i)), 0, 19520b57cec5SDimitry Andric aggBuffer); 19530b57cec5SDimitry Andric return; 19540b57cec5SDimitry Andric } 19550b57cec5SDimitry Andric 19560b57cec5SDimitry Andric if (isa<ConstantStruct>(CPV)) { 19570b57cec5SDimitry Andric if (CPV->getNumOperands()) { 19580b57cec5SDimitry Andric StructType *ST = cast<StructType>(CPV->getType()); 19590b57cec5SDimitry Andric for (unsigned i = 0, e = CPV->getNumOperands(); i != e; ++i) { 19600b57cec5SDimitry Andric if (i == (e - 1)) 19610b57cec5SDimitry Andric Bytes = DL.getStructLayout(ST)->getElementOffset(0) + 19620b57cec5SDimitry Andric DL.getTypeAllocSize(ST) - 19630b57cec5SDimitry Andric DL.getStructLayout(ST)->getElementOffset(i); 19640b57cec5SDimitry Andric else 19650b57cec5SDimitry Andric Bytes = DL.getStructLayout(ST)->getElementOffset(i + 1) - 19660b57cec5SDimitry Andric DL.getStructLayout(ST)->getElementOffset(i); 19670b57cec5SDimitry Andric bufferLEByte(cast<Constant>(CPV->getOperand(i)), Bytes, aggBuffer); 19680b57cec5SDimitry Andric } 19690b57cec5SDimitry Andric } 19700b57cec5SDimitry Andric return; 19710b57cec5SDimitry Andric } 19720b57cec5SDimitry Andric llvm_unreachable("unsupported constant type in printAggregateConstant()"); 19730b57cec5SDimitry Andric } 19740b57cec5SDimitry Andric 19750b57cec5SDimitry Andric /// lowerConstantForGV - Return an MCExpr for the given Constant. This is mostly 19760b57cec5SDimitry Andric /// a copy from AsmPrinter::lowerConstant, except customized to only handle 19770b57cec5SDimitry Andric /// expressions that are representable in PTX and create 19780b57cec5SDimitry Andric /// NVPTXGenericMCSymbolRefExpr nodes for addrspacecast instructions. 19790b57cec5SDimitry Andric const MCExpr * 19800b57cec5SDimitry Andric NVPTXAsmPrinter::lowerConstantForGV(const Constant *CV, bool ProcessingGeneric) { 19810b57cec5SDimitry Andric MCContext &Ctx = OutContext; 19820b57cec5SDimitry Andric 19830b57cec5SDimitry Andric if (CV->isNullValue() || isa<UndefValue>(CV)) 19840b57cec5SDimitry Andric return MCConstantExpr::create(0, Ctx); 19850b57cec5SDimitry Andric 19860b57cec5SDimitry Andric if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) 19870b57cec5SDimitry Andric return MCConstantExpr::create(CI->getZExtValue(), Ctx); 19880b57cec5SDimitry Andric 19890b57cec5SDimitry Andric if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV)) { 19900b57cec5SDimitry Andric const MCSymbolRefExpr *Expr = 19910b57cec5SDimitry Andric MCSymbolRefExpr::create(getSymbol(GV), Ctx); 19920b57cec5SDimitry Andric if (ProcessingGeneric) { 19930b57cec5SDimitry Andric return NVPTXGenericMCSymbolRefExpr::create(Expr, Ctx); 19940b57cec5SDimitry Andric } else { 19950b57cec5SDimitry Andric return Expr; 19960b57cec5SDimitry Andric } 19970b57cec5SDimitry Andric } 19980b57cec5SDimitry Andric 19990b57cec5SDimitry Andric const ConstantExpr *CE = dyn_cast<ConstantExpr>(CV); 20000b57cec5SDimitry Andric if (!CE) { 20010b57cec5SDimitry Andric llvm_unreachable("Unknown constant value to lower!"); 20020b57cec5SDimitry Andric } 20030b57cec5SDimitry Andric 20040b57cec5SDimitry Andric switch (CE->getOpcode()) { 20055f757f3fSDimitry Andric default: 20065f757f3fSDimitry Andric break; // Error 20070b57cec5SDimitry Andric 20080b57cec5SDimitry Andric case Instruction::AddrSpaceCast: { 20090b57cec5SDimitry Andric // Strip the addrspacecast and pass along the operand 20100b57cec5SDimitry Andric PointerType *DstTy = cast<PointerType>(CE->getType()); 20115f757f3fSDimitry Andric if (DstTy->getAddressSpace() == 0) 20120b57cec5SDimitry Andric return lowerConstantForGV(cast<const Constant>(CE->getOperand(0)), true); 20135f757f3fSDimitry Andric 20145f757f3fSDimitry Andric break; // Error 20150b57cec5SDimitry Andric } 20160b57cec5SDimitry Andric 20170b57cec5SDimitry Andric case Instruction::GetElementPtr: { 20180b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 20190b57cec5SDimitry Andric 20200b57cec5SDimitry Andric // Generate a symbolic expression for the byte address 20210b57cec5SDimitry Andric APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); 20220b57cec5SDimitry Andric cast<GEPOperator>(CE)->accumulateConstantOffset(DL, OffsetAI); 20230b57cec5SDimitry Andric 20240b57cec5SDimitry Andric const MCExpr *Base = lowerConstantForGV(CE->getOperand(0), 20250b57cec5SDimitry Andric ProcessingGeneric); 20260b57cec5SDimitry Andric if (!OffsetAI) 20270b57cec5SDimitry Andric return Base; 20280b57cec5SDimitry Andric 20290b57cec5SDimitry Andric int64_t Offset = OffsetAI.getSExtValue(); 20300b57cec5SDimitry Andric return MCBinaryExpr::createAdd(Base, MCConstantExpr::create(Offset, Ctx), 20310b57cec5SDimitry Andric Ctx); 20320b57cec5SDimitry Andric } 20330b57cec5SDimitry Andric 20340b57cec5SDimitry Andric case Instruction::Trunc: 20350b57cec5SDimitry Andric // We emit the value and depend on the assembler to truncate the generated 20360b57cec5SDimitry Andric // expression properly. This is important for differences between 20370b57cec5SDimitry Andric // blockaddress labels. Since the two labels are in the same function, it 20380b57cec5SDimitry Andric // is reasonable to treat their delta as a 32-bit value. 2039bdd1243dSDimitry Andric [[fallthrough]]; 20400b57cec5SDimitry Andric case Instruction::BitCast: 20410b57cec5SDimitry Andric return lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 20420b57cec5SDimitry Andric 20430b57cec5SDimitry Andric case Instruction::IntToPtr: { 20440b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 20450b57cec5SDimitry Andric 20460b57cec5SDimitry Andric // Handle casts to pointers by changing them into casts to the appropriate 20470b57cec5SDimitry Andric // integer type. This promotes constant folding and simplifies this code. 20480b57cec5SDimitry Andric Constant *Op = CE->getOperand(0); 20495f757f3fSDimitry Andric Op = ConstantFoldIntegerCast(Op, DL.getIntPtrType(CV->getType()), 20505f757f3fSDimitry Andric /*IsSigned*/ false, DL); 20515f757f3fSDimitry Andric if (Op) 20520b57cec5SDimitry Andric return lowerConstantForGV(Op, ProcessingGeneric); 20535f757f3fSDimitry Andric 20545f757f3fSDimitry Andric break; // Error 20550b57cec5SDimitry Andric } 20560b57cec5SDimitry Andric 20570b57cec5SDimitry Andric case Instruction::PtrToInt: { 20580b57cec5SDimitry Andric const DataLayout &DL = getDataLayout(); 20590b57cec5SDimitry Andric 20600b57cec5SDimitry Andric // Support only foldable casts to/from pointers that can be eliminated by 20610b57cec5SDimitry Andric // changing the pointer to the appropriately sized integer type. 20620b57cec5SDimitry Andric Constant *Op = CE->getOperand(0); 20630b57cec5SDimitry Andric Type *Ty = CE->getType(); 20640b57cec5SDimitry Andric 20650b57cec5SDimitry Andric const MCExpr *OpExpr = lowerConstantForGV(Op, ProcessingGeneric); 20660b57cec5SDimitry Andric 20670b57cec5SDimitry Andric // We can emit the pointer value into this slot if the slot is an 20680b57cec5SDimitry Andric // integer slot equal to the size of the pointer. 20690b57cec5SDimitry Andric if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) 20700b57cec5SDimitry Andric return OpExpr; 20710b57cec5SDimitry Andric 20720b57cec5SDimitry Andric // Otherwise the pointer is smaller than the resultant integer, mask off 20730b57cec5SDimitry Andric // the high bits so we are sure to get a proper truncation if the input is 20740b57cec5SDimitry Andric // a constant expr. 20750b57cec5SDimitry Andric unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); 20760b57cec5SDimitry Andric const MCExpr *MaskExpr = MCConstantExpr::create(~0ULL >> (64-InBits), Ctx); 20770b57cec5SDimitry Andric return MCBinaryExpr::createAnd(OpExpr, MaskExpr, Ctx); 20780b57cec5SDimitry Andric } 20790b57cec5SDimitry Andric 20800b57cec5SDimitry Andric // The MC library also has a right-shift operator, but it isn't consistently 20810b57cec5SDimitry Andric // signed or unsigned between different targets. 20820b57cec5SDimitry Andric case Instruction::Add: { 20830b57cec5SDimitry Andric const MCExpr *LHS = lowerConstantForGV(CE->getOperand(0), ProcessingGeneric); 20840b57cec5SDimitry Andric const MCExpr *RHS = lowerConstantForGV(CE->getOperand(1), ProcessingGeneric); 20850b57cec5SDimitry Andric switch (CE->getOpcode()) { 20860b57cec5SDimitry Andric default: llvm_unreachable("Unknown binary operator constant cast expr"); 20870b57cec5SDimitry Andric case Instruction::Add: return MCBinaryExpr::createAdd(LHS, RHS, Ctx); 20880b57cec5SDimitry Andric } 20890b57cec5SDimitry Andric } 20900b57cec5SDimitry Andric } 20915f757f3fSDimitry Andric 20925f757f3fSDimitry Andric // If the code isn't optimized, there may be outstanding folding 20935f757f3fSDimitry Andric // opportunities. Attempt to fold the expression using DataLayout as a 20945f757f3fSDimitry Andric // last resort before giving up. 20955f757f3fSDimitry Andric Constant *C = ConstantFoldConstant(CE, getDataLayout()); 20965f757f3fSDimitry Andric if (C != CE) 20975f757f3fSDimitry Andric return lowerConstantForGV(C, ProcessingGeneric); 20985f757f3fSDimitry Andric 20995f757f3fSDimitry Andric // Otherwise report the problem to the user. 21005f757f3fSDimitry Andric std::string S; 21015f757f3fSDimitry Andric raw_string_ostream OS(S); 21025f757f3fSDimitry Andric OS << "Unsupported expression in static initializer: "; 21035f757f3fSDimitry Andric CE->printAsOperand(OS, /*PrintType=*/false, 21045f757f3fSDimitry Andric !MF ? nullptr : MF->getFunction().getParent()); 21055f757f3fSDimitry Andric report_fatal_error(Twine(OS.str())); 21060b57cec5SDimitry Andric } 21070b57cec5SDimitry Andric 21080b57cec5SDimitry Andric // Copy of MCExpr::print customized for NVPTX 21090b57cec5SDimitry Andric void NVPTXAsmPrinter::printMCExpr(const MCExpr &Expr, raw_ostream &OS) { 21100b57cec5SDimitry Andric switch (Expr.getKind()) { 21110b57cec5SDimitry Andric case MCExpr::Target: 21120b57cec5SDimitry Andric return cast<MCTargetExpr>(&Expr)->printImpl(OS, MAI); 21130b57cec5SDimitry Andric case MCExpr::Constant: 21140b57cec5SDimitry Andric OS << cast<MCConstantExpr>(Expr).getValue(); 21150b57cec5SDimitry Andric return; 21160b57cec5SDimitry Andric 21170b57cec5SDimitry Andric case MCExpr::SymbolRef: { 21180b57cec5SDimitry Andric const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Expr); 21190b57cec5SDimitry Andric const MCSymbol &Sym = SRE.getSymbol(); 21200b57cec5SDimitry Andric Sym.print(OS, MAI); 21210b57cec5SDimitry Andric return; 21220b57cec5SDimitry Andric } 21230b57cec5SDimitry Andric 21240b57cec5SDimitry Andric case MCExpr::Unary: { 21250b57cec5SDimitry Andric const MCUnaryExpr &UE = cast<MCUnaryExpr>(Expr); 21260b57cec5SDimitry Andric switch (UE.getOpcode()) { 21270b57cec5SDimitry Andric case MCUnaryExpr::LNot: OS << '!'; break; 21280b57cec5SDimitry Andric case MCUnaryExpr::Minus: OS << '-'; break; 21290b57cec5SDimitry Andric case MCUnaryExpr::Not: OS << '~'; break; 21300b57cec5SDimitry Andric case MCUnaryExpr::Plus: OS << '+'; break; 21310b57cec5SDimitry Andric } 21320b57cec5SDimitry Andric printMCExpr(*UE.getSubExpr(), OS); 21330b57cec5SDimitry Andric return; 21340b57cec5SDimitry Andric } 21350b57cec5SDimitry Andric 21360b57cec5SDimitry Andric case MCExpr::Binary: { 21370b57cec5SDimitry Andric const MCBinaryExpr &BE = cast<MCBinaryExpr>(Expr); 21380b57cec5SDimitry Andric 21390b57cec5SDimitry Andric // Only print parens around the LHS if it is non-trivial. 21400b57cec5SDimitry Andric if (isa<MCConstantExpr>(BE.getLHS()) || isa<MCSymbolRefExpr>(BE.getLHS()) || 21410b57cec5SDimitry Andric isa<NVPTXGenericMCSymbolRefExpr>(BE.getLHS())) { 21420b57cec5SDimitry Andric printMCExpr(*BE.getLHS(), OS); 21430b57cec5SDimitry Andric } else { 21440b57cec5SDimitry Andric OS << '('; 21450b57cec5SDimitry Andric printMCExpr(*BE.getLHS(), OS); 21460b57cec5SDimitry Andric OS<< ')'; 21470b57cec5SDimitry Andric } 21480b57cec5SDimitry Andric 21490b57cec5SDimitry Andric switch (BE.getOpcode()) { 21500b57cec5SDimitry Andric case MCBinaryExpr::Add: 21510b57cec5SDimitry Andric // Print "X-42" instead of "X+-42". 21520b57cec5SDimitry Andric if (const MCConstantExpr *RHSC = dyn_cast<MCConstantExpr>(BE.getRHS())) { 21530b57cec5SDimitry Andric if (RHSC->getValue() < 0) { 21540b57cec5SDimitry Andric OS << RHSC->getValue(); 21550b57cec5SDimitry Andric return; 21560b57cec5SDimitry Andric } 21570b57cec5SDimitry Andric } 21580b57cec5SDimitry Andric 21590b57cec5SDimitry Andric OS << '+'; 21600b57cec5SDimitry Andric break; 21610b57cec5SDimitry Andric default: llvm_unreachable("Unhandled binary operator"); 21620b57cec5SDimitry Andric } 21630b57cec5SDimitry Andric 21640b57cec5SDimitry Andric // Only print parens around the LHS if it is non-trivial. 21650b57cec5SDimitry Andric if (isa<MCConstantExpr>(BE.getRHS()) || isa<MCSymbolRefExpr>(BE.getRHS())) { 21660b57cec5SDimitry Andric printMCExpr(*BE.getRHS(), OS); 21670b57cec5SDimitry Andric } else { 21680b57cec5SDimitry Andric OS << '('; 21690b57cec5SDimitry Andric printMCExpr(*BE.getRHS(), OS); 21700b57cec5SDimitry Andric OS << ')'; 21710b57cec5SDimitry Andric } 21720b57cec5SDimitry Andric return; 21730b57cec5SDimitry Andric } 21740b57cec5SDimitry Andric } 21750b57cec5SDimitry Andric 21760b57cec5SDimitry Andric llvm_unreachable("Invalid expression kind!"); 21770b57cec5SDimitry Andric } 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric /// PrintAsmOperand - Print out an operand for an inline asm expression. 21800b57cec5SDimitry Andric /// 21810b57cec5SDimitry Andric bool NVPTXAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, 21820b57cec5SDimitry Andric const char *ExtraCode, raw_ostream &O) { 21830b57cec5SDimitry Andric if (ExtraCode && ExtraCode[0]) { 21840b57cec5SDimitry Andric if (ExtraCode[1] != 0) 21850b57cec5SDimitry Andric return true; // Unknown modifier. 21860b57cec5SDimitry Andric 21870b57cec5SDimitry Andric switch (ExtraCode[0]) { 21880b57cec5SDimitry Andric default: 21890b57cec5SDimitry Andric // See if this is a generic print operand 21900b57cec5SDimitry Andric return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); 21910b57cec5SDimitry Andric case 'r': 21920b57cec5SDimitry Andric break; 21930b57cec5SDimitry Andric } 21940b57cec5SDimitry Andric } 21950b57cec5SDimitry Andric 21960b57cec5SDimitry Andric printOperand(MI, OpNo, O); 21970b57cec5SDimitry Andric 21980b57cec5SDimitry Andric return false; 21990b57cec5SDimitry Andric } 22000b57cec5SDimitry Andric 22010b57cec5SDimitry Andric bool NVPTXAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, 22020b57cec5SDimitry Andric unsigned OpNo, 22030b57cec5SDimitry Andric const char *ExtraCode, 22040b57cec5SDimitry Andric raw_ostream &O) { 22050b57cec5SDimitry Andric if (ExtraCode && ExtraCode[0]) 22060b57cec5SDimitry Andric return true; // Unknown modifier 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric O << '['; 22090b57cec5SDimitry Andric printMemOperand(MI, OpNo, O); 22100b57cec5SDimitry Andric O << ']'; 22110b57cec5SDimitry Andric 22120b57cec5SDimitry Andric return false; 22130b57cec5SDimitry Andric } 22140b57cec5SDimitry Andric 22155f757f3fSDimitry Andric void NVPTXAsmPrinter::printOperand(const MachineInstr *MI, unsigned OpNum, 22160b57cec5SDimitry Andric raw_ostream &O) { 22175f757f3fSDimitry Andric const MachineOperand &MO = MI->getOperand(OpNum); 22180b57cec5SDimitry Andric switch (MO.getType()) { 22190b57cec5SDimitry Andric case MachineOperand::MO_Register: 2220bdd1243dSDimitry Andric if (MO.getReg().isPhysical()) { 22210b57cec5SDimitry Andric if (MO.getReg() == NVPTX::VRDepot) 22220b57cec5SDimitry Andric O << DEPOTNAME << getFunctionNumber(); 22230b57cec5SDimitry Andric else 22240b57cec5SDimitry Andric O << NVPTXInstPrinter::getRegisterName(MO.getReg()); 22250b57cec5SDimitry Andric } else { 22260b57cec5SDimitry Andric emitVirtualRegister(MO.getReg(), O); 22270b57cec5SDimitry Andric } 22280b57cec5SDimitry Andric break; 22290b57cec5SDimitry Andric 22300b57cec5SDimitry Andric case MachineOperand::MO_Immediate: 22310b57cec5SDimitry Andric O << MO.getImm(); 22320b57cec5SDimitry Andric break; 22330b57cec5SDimitry Andric 22340b57cec5SDimitry Andric case MachineOperand::MO_FPImmediate: 22350b57cec5SDimitry Andric printFPConstant(MO.getFPImm(), O); 22360b57cec5SDimitry Andric break; 22370b57cec5SDimitry Andric 22380b57cec5SDimitry Andric case MachineOperand::MO_GlobalAddress: 22390b57cec5SDimitry Andric PrintSymbolOperand(MO, O); 22400b57cec5SDimitry Andric break; 22410b57cec5SDimitry Andric 22420b57cec5SDimitry Andric case MachineOperand::MO_MachineBasicBlock: 22430b57cec5SDimitry Andric MO.getMBB()->getSymbol()->print(O, MAI); 22440b57cec5SDimitry Andric break; 22450b57cec5SDimitry Andric 22460b57cec5SDimitry Andric default: 22470b57cec5SDimitry Andric llvm_unreachable("Operand type not supported."); 22480b57cec5SDimitry Andric } 22490b57cec5SDimitry Andric } 22500b57cec5SDimitry Andric 22515f757f3fSDimitry Andric void NVPTXAsmPrinter::printMemOperand(const MachineInstr *MI, unsigned OpNum, 22520b57cec5SDimitry Andric raw_ostream &O, const char *Modifier) { 22535f757f3fSDimitry Andric printOperand(MI, OpNum, O); 22540b57cec5SDimitry Andric 22550b57cec5SDimitry Andric if (Modifier && strcmp(Modifier, "add") == 0) { 22560b57cec5SDimitry Andric O << ", "; 22575f757f3fSDimitry Andric printOperand(MI, OpNum + 1, O); 22580b57cec5SDimitry Andric } else { 22595f757f3fSDimitry Andric if (MI->getOperand(OpNum + 1).isImm() && 22605f757f3fSDimitry Andric MI->getOperand(OpNum + 1).getImm() == 0) 22610b57cec5SDimitry Andric return; // don't print ',0' or '+0' 22620b57cec5SDimitry Andric O << "+"; 22635f757f3fSDimitry Andric printOperand(MI, OpNum + 1, O); 22640b57cec5SDimitry Andric } 22650b57cec5SDimitry Andric } 22660b57cec5SDimitry Andric 22670b57cec5SDimitry Andric // Force static initialization. 2268480093f4SDimitry Andric extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXAsmPrinter() { 22690b57cec5SDimitry Andric RegisterAsmPrinter<NVPTXAsmPrinter> X(getTheNVPTXTarget32()); 22700b57cec5SDimitry Andric RegisterAsmPrinter<NVPTXAsmPrinter> Y(getTheNVPTXTarget64()); 22710b57cec5SDimitry Andric } 2272