xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/IPO/ConstantMerge.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- ConstantMerge.cpp - Merge duplicate global constants ---------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file defines the interface to a pass that merges duplicate global
100b57cec5SDimitry Andric // constants together into a single constant that is shared.  This is useful
110b57cec5SDimitry Andric // because some passes (ie TraceValues) insert a lot of string constants into
120b57cec5SDimitry Andric // the program, regardless of whether or not an existing string is available.
130b57cec5SDimitry Andric //
140b57cec5SDimitry Andric // Algorithm: ConstantMerge is designed to build up a map of available constants
150b57cec5SDimitry Andric // and eliminate duplicates when it is initialized.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
180b57cec5SDimitry Andric 
190b57cec5SDimitry Andric #include "llvm/Transforms/IPO/ConstantMerge.h"
200b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
220b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
230b57cec5SDimitry Andric #include "llvm/ADT/Statistic.h"
240b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
250b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h"
260b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
270b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
280b57cec5SDimitry Andric #include "llvm/IR/GlobalVariable.h"
290b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h"
300b57cec5SDimitry Andric #include "llvm/IR/Module.h"
310b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
32*0fca6ea1SDimitry Andric #include "llvm/Support/Debug.h"
330b57cec5SDimitry Andric #include "llvm/Transforms/IPO.h"
340b57cec5SDimitry Andric #include <algorithm>
350b57cec5SDimitry Andric #include <cassert>
360b57cec5SDimitry Andric #include <utility>
370b57cec5SDimitry Andric 
380b57cec5SDimitry Andric using namespace llvm;
390b57cec5SDimitry Andric 
400b57cec5SDimitry Andric #define DEBUG_TYPE "constmerge"
410b57cec5SDimitry Andric 
420b57cec5SDimitry Andric STATISTIC(NumIdenticalMerged, "Number of identical global constants merged");
430b57cec5SDimitry Andric 
440b57cec5SDimitry Andric /// Find values that are marked as llvm.used.
450b57cec5SDimitry Andric static void FindUsedValues(GlobalVariable *LLVMUsed,
460b57cec5SDimitry Andric                            SmallPtrSetImpl<const GlobalValue*> &UsedValues) {
470b57cec5SDimitry Andric   if (!LLVMUsed) return;
480b57cec5SDimitry Andric   ConstantArray *Inits = cast<ConstantArray>(LLVMUsed->getInitializer());
490b57cec5SDimitry Andric 
500b57cec5SDimitry Andric   for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) {
518bcb0991SDimitry Andric     Value *Operand = Inits->getOperand(i)->stripPointerCasts();
520b57cec5SDimitry Andric     GlobalValue *GV = cast<GlobalValue>(Operand);
530b57cec5SDimitry Andric     UsedValues.insert(GV);
540b57cec5SDimitry Andric   }
550b57cec5SDimitry Andric }
560b57cec5SDimitry Andric 
570b57cec5SDimitry Andric // True if A is better than B.
580b57cec5SDimitry Andric static bool IsBetterCanonical(const GlobalVariable &A,
590b57cec5SDimitry Andric                               const GlobalVariable &B) {
600b57cec5SDimitry Andric   if (!A.hasLocalLinkage() && B.hasLocalLinkage())
610b57cec5SDimitry Andric     return true;
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric   if (A.hasLocalLinkage() && !B.hasLocalLinkage())
640b57cec5SDimitry Andric     return false;
650b57cec5SDimitry Andric 
660b57cec5SDimitry Andric   return A.hasGlobalUnnamedAddr();
670b57cec5SDimitry Andric }
680b57cec5SDimitry Andric 
690b57cec5SDimitry Andric static bool hasMetadataOtherThanDebugLoc(const GlobalVariable *GV) {
700b57cec5SDimitry Andric   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
710b57cec5SDimitry Andric   GV->getAllMetadata(MDs);
720b57cec5SDimitry Andric   for (const auto &V : MDs)
730b57cec5SDimitry Andric     if (V.first != LLVMContext::MD_dbg)
740b57cec5SDimitry Andric       return true;
750b57cec5SDimitry Andric   return false;
760b57cec5SDimitry Andric }
770b57cec5SDimitry Andric 
780b57cec5SDimitry Andric static void copyDebugLocMetadata(const GlobalVariable *From,
790b57cec5SDimitry Andric                                  GlobalVariable *To) {
800b57cec5SDimitry Andric   SmallVector<DIGlobalVariableExpression *, 1> MDs;
810b57cec5SDimitry Andric   From->getDebugInfo(MDs);
82bdd1243dSDimitry Andric   for (auto *MD : MDs)
830b57cec5SDimitry Andric     To->addDebugInfo(MD);
840b57cec5SDimitry Andric }
850b57cec5SDimitry Andric 
865ffd83dbSDimitry Andric static Align getAlign(GlobalVariable *GV) {
8781ad6265SDimitry Andric   return GV->getAlign().value_or(
88*0fca6ea1SDimitry Andric       GV->getDataLayout().getPreferredAlign(GV));
890b57cec5SDimitry Andric }
900b57cec5SDimitry Andric 
910b57cec5SDimitry Andric static bool
920b57cec5SDimitry Andric isUnmergeableGlobal(GlobalVariable *GV,
930b57cec5SDimitry Andric                     const SmallPtrSetImpl<const GlobalValue *> &UsedGlobals) {
940b57cec5SDimitry Andric   // Only process constants with initializers in the default address space.
950b57cec5SDimitry Andric   return !GV->isConstant() || !GV->hasDefinitiveInitializer() ||
960b57cec5SDimitry Andric          GV->getType()->getAddressSpace() != 0 || GV->hasSection() ||
974652422eSDimitry Andric          // Don't touch thread-local variables.
984652422eSDimitry Andric          GV->isThreadLocal() ||
990b57cec5SDimitry Andric          // Don't touch values marked with attribute(used).
1000b57cec5SDimitry Andric          UsedGlobals.count(GV);
1010b57cec5SDimitry Andric }
1020b57cec5SDimitry Andric 
1030b57cec5SDimitry Andric enum class CanMerge { No, Yes };
1040b57cec5SDimitry Andric static CanMerge makeMergeable(GlobalVariable *Old, GlobalVariable *New) {
1050b57cec5SDimitry Andric   if (!Old->hasGlobalUnnamedAddr() && !New->hasGlobalUnnamedAddr())
1060b57cec5SDimitry Andric     return CanMerge::No;
1070b57cec5SDimitry Andric   if (hasMetadataOtherThanDebugLoc(Old))
1080b57cec5SDimitry Andric     return CanMerge::No;
1090b57cec5SDimitry Andric   assert(!hasMetadataOtherThanDebugLoc(New));
1100b57cec5SDimitry Andric   if (!Old->hasGlobalUnnamedAddr())
1110b57cec5SDimitry Andric     New->setUnnamedAddr(GlobalValue::UnnamedAddr::None);
1120b57cec5SDimitry Andric   return CanMerge::Yes;
1130b57cec5SDimitry Andric }
1140b57cec5SDimitry Andric 
1150b57cec5SDimitry Andric static void replace(Module &M, GlobalVariable *Old, GlobalVariable *New) {
1160b57cec5SDimitry Andric   Constant *NewConstant = New;
1170b57cec5SDimitry Andric 
1180b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Replacing global: @" << Old->getName() << " -> @"
1190b57cec5SDimitry Andric                     << New->getName() << "\n");
1200b57cec5SDimitry Andric 
1210b57cec5SDimitry Andric   // Bump the alignment if necessary.
1225ffd83dbSDimitry Andric   if (Old->getAlign() || New->getAlign())
1235ffd83dbSDimitry Andric     New->setAlignment(std::max(getAlign(Old), getAlign(New)));
1240b57cec5SDimitry Andric 
1250b57cec5SDimitry Andric   copyDebugLocMetadata(Old, New);
1260b57cec5SDimitry Andric   Old->replaceAllUsesWith(NewConstant);
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric   // Delete the global value from the module.
1290b57cec5SDimitry Andric   assert(Old->hasLocalLinkage() &&
1300b57cec5SDimitry Andric          "Refusing to delete an externally visible global variable.");
1310b57cec5SDimitry Andric   Old->eraseFromParent();
1320b57cec5SDimitry Andric }
1330b57cec5SDimitry Andric 
1340b57cec5SDimitry Andric static bool mergeConstants(Module &M) {
1350b57cec5SDimitry Andric   // Find all the globals that are marked "used".  These cannot be merged.
1360b57cec5SDimitry Andric   SmallPtrSet<const GlobalValue*, 8> UsedGlobals;
1370b57cec5SDimitry Andric   FindUsedValues(M.getGlobalVariable("llvm.used"), UsedGlobals);
1380b57cec5SDimitry Andric   FindUsedValues(M.getGlobalVariable("llvm.compiler.used"), UsedGlobals);
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric   // Map unique constants to globals.
1410b57cec5SDimitry Andric   DenseMap<Constant *, GlobalVariable *> CMap;
1420b57cec5SDimitry Andric 
1430b57cec5SDimitry Andric   SmallVector<std::pair<GlobalVariable *, GlobalVariable *>, 32>
1440b57cec5SDimitry Andric       SameContentReplacements;
1450b57cec5SDimitry Andric 
1460b57cec5SDimitry Andric   size_t ChangesMade = 0;
1470b57cec5SDimitry Andric   size_t OldChangesMade = 0;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric   // Iterate constant merging while we are still making progress.  Merging two
1500b57cec5SDimitry Andric   // constants together may allow us to merge other constants together if the
1510b57cec5SDimitry Andric   // second level constants have initializers which point to the globals that
1520b57cec5SDimitry Andric   // were just merged.
1530b57cec5SDimitry Andric   while (true) {
1540b57cec5SDimitry Andric     // Find the canonical constants others will be merged with.
155349cc55cSDimitry Andric     for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
1560b57cec5SDimitry Andric       // If this GV is dead, remove it.
157349cc55cSDimitry Andric       GV.removeDeadConstantUsers();
158349cc55cSDimitry Andric       if (GV.use_empty() && GV.hasLocalLinkage()) {
159349cc55cSDimitry Andric         GV.eraseFromParent();
1600b57cec5SDimitry Andric         ++ChangesMade;
1610b57cec5SDimitry Andric         continue;
1620b57cec5SDimitry Andric       }
1630b57cec5SDimitry Andric 
164349cc55cSDimitry Andric       if (isUnmergeableGlobal(&GV, UsedGlobals))
1650b57cec5SDimitry Andric         continue;
1660b57cec5SDimitry Andric 
1670b57cec5SDimitry Andric       // This transformation is legal for weak ODR globals in the sense it
1680b57cec5SDimitry Andric       // doesn't change semantics, but we really don't want to perform it
1690b57cec5SDimitry Andric       // anyway; it's likely to pessimize code generation, and some tools
1700b57cec5SDimitry Andric       // (like the Darwin linker in cases involving CFString) don't expect it.
171349cc55cSDimitry Andric       if (GV.isWeakForLinker())
1720b57cec5SDimitry Andric         continue;
1730b57cec5SDimitry Andric 
1740b57cec5SDimitry Andric       // Don't touch globals with metadata other then !dbg.
175349cc55cSDimitry Andric       if (hasMetadataOtherThanDebugLoc(&GV))
1760b57cec5SDimitry Andric         continue;
1770b57cec5SDimitry Andric 
178349cc55cSDimitry Andric       Constant *Init = GV.getInitializer();
1790b57cec5SDimitry Andric 
1800b57cec5SDimitry Andric       // Check to see if the initializer is already known.
1810b57cec5SDimitry Andric       GlobalVariable *&Slot = CMap[Init];
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric       // If this is the first constant we find or if the old one is local,
1840b57cec5SDimitry Andric       // replace with the current one. If the current is externally visible
1850b57cec5SDimitry Andric       // it cannot be replace, but can be the canonical constant we merge with.
1860b57cec5SDimitry Andric       bool FirstConstantFound = !Slot;
187349cc55cSDimitry Andric       if (FirstConstantFound || IsBetterCanonical(GV, *Slot)) {
188349cc55cSDimitry Andric         Slot = &GV;
189349cc55cSDimitry Andric         LLVM_DEBUG(dbgs() << "Cmap[" << *Init << "] = " << GV.getName()
1900b57cec5SDimitry Andric                           << (FirstConstantFound ? "\n" : " (updated)\n"));
1910b57cec5SDimitry Andric       }
1920b57cec5SDimitry Andric     }
1930b57cec5SDimitry Andric 
1940b57cec5SDimitry Andric     // Identify all globals that can be merged together, filling in the
1950b57cec5SDimitry Andric     // SameContentReplacements vector. We cannot do the replacement in this pass
1960b57cec5SDimitry Andric     // because doing so may cause initializers of other globals to be rewritten,
1970b57cec5SDimitry Andric     // invalidating the Constant* pointers in CMap.
198349cc55cSDimitry Andric     for (GlobalVariable &GV : llvm::make_early_inc_range(M.globals())) {
199349cc55cSDimitry Andric       if (isUnmergeableGlobal(&GV, UsedGlobals))
2000b57cec5SDimitry Andric         continue;
2010b57cec5SDimitry Andric 
2020b57cec5SDimitry Andric       // We can only replace constant with local linkage.
203349cc55cSDimitry Andric       if (!GV.hasLocalLinkage())
2040b57cec5SDimitry Andric         continue;
2050b57cec5SDimitry Andric 
206349cc55cSDimitry Andric       Constant *Init = GV.getInitializer();
2070b57cec5SDimitry Andric 
2080b57cec5SDimitry Andric       // Check to see if the initializer is already known.
2090b57cec5SDimitry Andric       auto Found = CMap.find(Init);
2100b57cec5SDimitry Andric       if (Found == CMap.end())
2110b57cec5SDimitry Andric         continue;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric       GlobalVariable *Slot = Found->second;
214349cc55cSDimitry Andric       if (Slot == &GV)
2150b57cec5SDimitry Andric         continue;
2160b57cec5SDimitry Andric 
217349cc55cSDimitry Andric       if (makeMergeable(&GV, Slot) == CanMerge::No)
2180b57cec5SDimitry Andric         continue;
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric       // Make all uses of the duplicate constant use the canonical version.
221349cc55cSDimitry Andric       LLVM_DEBUG(dbgs() << "Will replace: @" << GV.getName() << " -> @"
2220b57cec5SDimitry Andric                         << Slot->getName() << "\n");
223349cc55cSDimitry Andric       SameContentReplacements.push_back(std::make_pair(&GV, Slot));
2240b57cec5SDimitry Andric     }
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric     // Now that we have figured out which replacements must be made, do them all
2270b57cec5SDimitry Andric     // now.  This avoid invalidating the pointers in CMap, which are unneeded
2280b57cec5SDimitry Andric     // now.
2290b57cec5SDimitry Andric     for (unsigned i = 0, e = SameContentReplacements.size(); i != e; ++i) {
2300b57cec5SDimitry Andric       GlobalVariable *Old = SameContentReplacements[i].first;
2310b57cec5SDimitry Andric       GlobalVariable *New = SameContentReplacements[i].second;
2320b57cec5SDimitry Andric       replace(M, Old, New);
2330b57cec5SDimitry Andric       ++ChangesMade;
2340b57cec5SDimitry Andric       ++NumIdenticalMerged;
2350b57cec5SDimitry Andric     }
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric     if (ChangesMade == OldChangesMade)
2380b57cec5SDimitry Andric       break;
2390b57cec5SDimitry Andric     OldChangesMade = ChangesMade;
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric     SameContentReplacements.clear();
2420b57cec5SDimitry Andric     CMap.clear();
2430b57cec5SDimitry Andric   }
2440b57cec5SDimitry Andric 
2450b57cec5SDimitry Andric   return ChangesMade;
2460b57cec5SDimitry Andric }
2470b57cec5SDimitry Andric 
2480b57cec5SDimitry Andric PreservedAnalyses ConstantMergePass::run(Module &M, ModuleAnalysisManager &) {
2490b57cec5SDimitry Andric   if (!mergeConstants(M))
2500b57cec5SDimitry Andric     return PreservedAnalyses::all();
2510b57cec5SDimitry Andric   return PreservedAnalyses::none();
2520b57cec5SDimitry Andric }
253