10b57cec5SDimitry Andric //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file defines the interface to a pass that merges duplicate global 100b57cec5SDimitry Andric // constants together into a single constant that is shared. This is useful 110b57cec5SDimitry Andric // because some passes (ie TraceValues) insert a lot of string constants into 120b57cec5SDimitry Andric // the program, regardless of whether or not an existing string is available. 130b57cec5SDimitry Andric // 140b57cec5SDimitry Andric // Algorithm: ConstantMerge is designed to build up a map of available constants 150b57cec5SDimitry Andric // and eliminate duplicates when it is initialized. 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 180b57cec5SDimitry Andric 190b57cec5SDimitry Andric #include "llvm/Transforms/IPO/ConstantMerge.h" 200b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 220b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h" 240b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 250b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 260b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 270b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 280b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h" 290b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 300b57cec5SDimitry Andric #include "llvm/IR/Module.h" 310b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 32*0fca6ea1SDimitry Andric #include "llvm/Support/Debug.h" 330b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h" 340b57cec5SDimitry Andric #include <algorithm> 350b57cec5SDimitry Andric #include <cassert> 360b57cec5SDimitry Andric #include <utility> 370b57cec5SDimitry Andric 380b57cec5SDimitry Andric using namespace llvm; 390b57cec5SDimitry Andric 400b57cec5SDimitry Andric #define DEBUG_TYPE "constmerge" 410b57cec5SDimitry Andric 420b57cec5SDimitry Andric STATISTIC(NumIdenticalMerged, "Number of identical global constants merged"); 430b57cec5SDimitry Andric 440b57cec5SDimitry Andric /// Find values that are marked as llvm.used. 450b57cec5SDimitry Andric static void FindUsedValues(GlobalVariable *LLVMUsed, 460b57cec5SDimitry Andric SmallPtrSetImpl<const GlobalValue*> &UsedValues) { 470b57cec5SDimitry Andric if (!LLVMUsed) return; 480b57cec5SDimitry Andric ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer()); 490b57cec5SDimitry Andric 500b57cec5SDimitry Andric for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) { 518bcb0991SDimitry Andric Value *Operand = Inits->getOperand(i)->stripPointerCasts(); 520b57cec5SDimitry Andric GlobalValue *GV = cast<GlobalValue>(Operand); 530b57cec5SDimitry Andric UsedValues.insert(GV); 540b57cec5SDimitry Andric } 550b57cec5SDimitry Andric } 560b57cec5SDimitry Andric 570b57cec5SDimitry Andric // True if A is better than B. 580b57cec5SDimitry Andric static bool IsBetterCanonical(const GlobalVariable &A, 590b57cec5SDimitry Andric const GlobalVariable &B) { 600b57cec5SDimitry Andric if (!A.hasLocalLinkage() && B.hasLocalLinkage()) 610b57cec5SDimitry Andric return true; 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric if (A.hasLocalLinkage() && !B.hasLocalLinkage()) 640b57cec5SDimitry Andric return false; 650b57cec5SDimitry Andric 660b57cec5SDimitry Andric return A.hasGlobalUnnamedAddr(); 670b57cec5SDimitry Andric } 680b57cec5SDimitry Andric 690b57cec5SDimitry Andric static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) { 700b57cec5SDimitry Andric SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; 710b57cec5SDimitry Andric GV->getAllMetadata(MDs); 720b57cec5SDimitry Andric for (const auto &V : MDs) 730b57cec5SDimitry Andric if (V.first != LLVMContext::MD_dbg) 740b57cec5SDimitry Andric return true; 750b57cec5SDimitry Andric return false; 760b57cec5SDimitry Andric } 770b57cec5SDimitry Andric 780b57cec5SDimitry Andric static void copyDebugLocMetadata(const GlobalVariable *From, 790b57cec5SDimitry Andric GlobalVariable *To) { 800b57cec5SDimitry Andric SmallVector<DIGlobalVariableExpression *, 1> MDs; 810b57cec5SDimitry Andric From->getDebugInfo(MDs); 82bdd1243dSDimitry Andric for (auto *MD : MDs) 830b57cec5SDimitry Andric To->addDebugInfo(MD); 840b57cec5SDimitry Andric } 850b57cec5SDimitry Andric 865ffd83dbSDimitry Andric static Align getAlign(GlobalVariable *GV) { 8781ad6265SDimitry Andric return GV->getAlign().value_or( 88*0fca6ea1SDimitry Andric GV->getDataLayout().getPreferredAlign(GV)); 890b57cec5SDimitry Andric } 900b57cec5SDimitry Andric 910b57cec5SDimitry Andric static bool 920b57cec5SDimitry Andric isUnmergeableGlobal(GlobalVariable *GV, 930b57cec5SDimitry Andric const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) { 940b57cec5SDimitry Andric // Only process constants with initializers in the default address space. 950b57cec5SDimitry Andric return !GV->isConstant() || !GV->hasDefinitiveInitializer() || 960b57cec5SDimitry Andric GV->getType()->getAddressSpace() != 0 || GV->hasSection() || 974652422eSDimitry Andric // Don't touch thread-local variables. 984652422eSDimitry Andric GV->isThreadLocal() || 990b57cec5SDimitry Andric // Don't touch values marked with attribute(used). 1000b57cec5SDimitry Andric UsedGlobals.count(GV); 1010b57cec5SDimitry Andric } 1020b57cec5SDimitry Andric 1030b57cec5SDimitry Andric enum class CanMerge { No, Yes }; 1040b57cec5SDimitry Andric static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) { 1050b57cec5SDimitry Andric if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr()) 1060b57cec5SDimitry Andric return CanMerge::No; 1070b57cec5SDimitry Andric if (hasMetadataOtherThanDebugLoc(Old)) 1080b57cec5SDimitry Andric return CanMerge::No; 1090b57cec5SDimitry Andric assert(!hasMetadataOtherThanDebugLoc(New)); 1100b57cec5SDimitry Andric if (!Old->hasGlobalUnnamedAddr()) 1110b57cec5SDimitry Andric New->setUnnamedAddr(GlobalValue::UnnamedAddr::None); 1120b57cec5SDimitry Andric return CanMerge::Yes; 1130b57cec5SDimitry Andric } 1140b57cec5SDimitry Andric 1150b57cec5SDimitry Andric static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) { 1160b57cec5SDimitry Andric Constant *NewConstant = New; 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @" 1190b57cec5SDimitry Andric << New->getName() << "\n"); 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Bump the alignment if necessary. 1225ffd83dbSDimitry Andric if (Old->getAlign() || New->getAlign()) 1235ffd83dbSDimitry Andric New->setAlignment(std::max(getAlign(Old), getAlign(New))); 1240b57cec5SDimitry Andric 1250b57cec5SDimitry Andric copyDebugLocMetadata(Old, New); 1260b57cec5SDimitry Andric Old->replaceAllUsesWith(NewConstant); 1270b57cec5SDimitry Andric 1280b57cec5SDimitry Andric // Delete the global value from the module. 1290b57cec5SDimitry Andric assert(Old->hasLocalLinkage() && 1300b57cec5SDimitry Andric "Refusing to delete an externally visible global variable."); 1310b57cec5SDimitry Andric Old->eraseFromParent(); 1320b57cec5SDimitry Andric } 1330b57cec5SDimitry Andric 1340b57cec5SDimitry Andric static bool mergeConstants(Module &M) { 1350b57cec5SDimitry Andric // Find all the globals that are marked "used". These cannot be merged. 1360b57cec5SDimitry Andric SmallPtrSet<const GlobalValue*, 8> UsedGlobals; 1370b57cec5SDimitry Andric FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals); 1380b57cec5SDimitry Andric FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals); 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric // Map unique constants to globals. 1410b57cec5SDimitry Andric DenseMap<Constant *, GlobalVariable *> CMap; 1420b57cec5SDimitry Andric 1430b57cec5SDimitry Andric SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32> 1440b57cec5SDimitry Andric SameContentReplacements; 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric size_t ChangesMade = 0; 1470b57cec5SDimitry Andric size_t OldChangesMade = 0; 1480b57cec5SDimitry Andric 1490b57cec5SDimitry Andric // Iterate constant merging while we are still making progress. Merging two 1500b57cec5SDimitry Andric // constants together may allow us to merge other constants together if the 1510b57cec5SDimitry Andric // second level constants have initializers which point to the globals that 1520b57cec5SDimitry Andric // were just merged. 1530b57cec5SDimitry Andric while (true) { 1540b57cec5SDimitry Andric // Find the canonical constants others will be merged with. 155349cc55cSDimitry Andric for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 1560b57cec5SDimitry Andric // If this GV is dead, remove it. 157349cc55cSDimitry Andric GV.removeDeadConstantUsers(); 158349cc55cSDimitry Andric if (GV.use_empty() && GV.hasLocalLinkage()) { 159349cc55cSDimitry Andric GV.eraseFromParent(); 1600b57cec5SDimitry Andric ++ChangesMade; 1610b57cec5SDimitry Andric continue; 1620b57cec5SDimitry Andric } 1630b57cec5SDimitry Andric 164349cc55cSDimitry Andric if (isUnmergeableGlobal(&GV, UsedGlobals)) 1650b57cec5SDimitry Andric continue; 1660b57cec5SDimitry Andric 1670b57cec5SDimitry Andric // This transformation is legal for weak ODR globals in the sense it 1680b57cec5SDimitry Andric // doesn't change semantics, but we really don't want to perform it 1690b57cec5SDimitry Andric // anyway; it's likely to pessimize code generation, and some tools 1700b57cec5SDimitry Andric // (like the Darwin linker in cases involving CFString) don't expect it. 171349cc55cSDimitry Andric if (GV.isWeakForLinker()) 1720b57cec5SDimitry Andric continue; 1730b57cec5SDimitry Andric 1740b57cec5SDimitry Andric // Don't touch globals with metadata other then !dbg. 175349cc55cSDimitry Andric if (hasMetadataOtherThanDebugLoc(&GV)) 1760b57cec5SDimitry Andric continue; 1770b57cec5SDimitry Andric 178349cc55cSDimitry Andric Constant *Init = GV.getInitializer(); 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric // Check to see if the initializer is already known. 1810b57cec5SDimitry Andric GlobalVariable *&Slot = CMap[Init]; 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric // If this is the first constant we find or if the old one is local, 1840b57cec5SDimitry Andric // replace with the current one. If the current is externally visible 1850b57cec5SDimitry Andric // it cannot be replace, but can be the canonical constant we merge with. 1860b57cec5SDimitry Andric bool FirstConstantFound = !Slot; 187349cc55cSDimitry Andric if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) { 188349cc55cSDimitry Andric Slot = &GV; 189349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName() 1900b57cec5SDimitry Andric << (FirstConstantFound ? "\n" : " (updated)\n")); 1910b57cec5SDimitry Andric } 1920b57cec5SDimitry Andric } 1930b57cec5SDimitry Andric 1940b57cec5SDimitry Andric // Identify all globals that can be merged together, filling in the 1950b57cec5SDimitry Andric // SameContentReplacements vector. We cannot do the replacement in this pass 1960b57cec5SDimitry Andric // because doing so may cause initializers of other globals to be rewritten, 1970b57cec5SDimitry Andric // invalidating the Constant* pointers in CMap. 198349cc55cSDimitry Andric for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) { 199349cc55cSDimitry Andric if (isUnmergeableGlobal(&GV, UsedGlobals)) 2000b57cec5SDimitry Andric continue; 2010b57cec5SDimitry Andric 2020b57cec5SDimitry Andric // We can only replace constant with local linkage. 203349cc55cSDimitry Andric if (!GV.hasLocalLinkage()) 2040b57cec5SDimitry Andric continue; 2050b57cec5SDimitry Andric 206349cc55cSDimitry Andric Constant *Init = GV.getInitializer(); 2070b57cec5SDimitry Andric 2080b57cec5SDimitry Andric // Check to see if the initializer is already known. 2090b57cec5SDimitry Andric auto Found = CMap.find(Init); 2100b57cec5SDimitry Andric if (Found == CMap.end()) 2110b57cec5SDimitry Andric continue; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric GlobalVariable *Slot = Found->second; 214349cc55cSDimitry Andric if (Slot == &GV) 2150b57cec5SDimitry Andric continue; 2160b57cec5SDimitry Andric 217349cc55cSDimitry Andric if (makeMergeable(&GV, Slot) == CanMerge::No) 2180b57cec5SDimitry Andric continue; 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric // Make all uses of the duplicate constant use the canonical version. 221349cc55cSDimitry Andric LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @" 2220b57cec5SDimitry Andric << Slot->getName() << "\n"); 223349cc55cSDimitry Andric SameContentReplacements.push_back(std::make_pair(&GV, Slot)); 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric // Now that we have figured out which replacements must be made, do them all 2270b57cec5SDimitry Andric // now. This avoid invalidating the pointers in CMap, which are unneeded 2280b57cec5SDimitry Andric // now. 2290b57cec5SDimitry Andric for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) { 2300b57cec5SDimitry Andric GlobalVariable *Old = SameContentReplacements[i].first; 2310b57cec5SDimitry Andric GlobalVariable *New = SameContentReplacements[i].second; 2320b57cec5SDimitry Andric replace(M, Old, New); 2330b57cec5SDimitry Andric ++ChangesMade; 2340b57cec5SDimitry Andric ++NumIdenticalMerged; 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric if (ChangesMade == OldChangesMade) 2380b57cec5SDimitry Andric break; 2390b57cec5SDimitry Andric OldChangesMade = ChangesMade; 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric SameContentReplacements.clear(); 2420b57cec5SDimitry Andric CMap.clear(); 2430b57cec5SDimitry Andric } 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric return ChangesMade; 2460b57cec5SDimitry Andric } 2470b57cec5SDimitry Andric 2480b57cec5SDimitry Andric PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) { 2490b57cec5SDimitry Andric if (!mergeConstants(M)) 2500b57cec5SDimitry Andric return PreservedAnalyses::all(); 2510b57cec5SDimitry Andric return PreservedAnalyses::none(); 2520b57cec5SDimitry Andric } 253