xref: /llvm-project/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp (revision 6053b37e454c056d25a31b39a06279cafd35edc8)
1 //===- TypeBasedAliasAnalysis.cpp - Type-Based Alias Analysis -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the TypeBasedAliasAnalysis pass, which implements
10 // metadata-based TBAA.
11 //
12 // In LLVM IR, memory does not have types, so LLVM's own type system is not
13 // suitable for doing TBAA. Instead, metadata is added to the IR to describe
14 // a type system of a higher level language. This can be used to implement
15 // typical C/C++ TBAA, but it can also be used to implement custom alias
16 // analysis behavior for other languages.
17 //
18 // We now support two types of metadata format: scalar TBAA and struct-path
19 // aware TBAA. After all testing cases are upgraded to use struct-path aware
20 // TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA
21 // can be dropped.
22 //
23 // The scalar TBAA metadata format is very simple. TBAA MDNodes have up to
24 // three fields, e.g.:
25 //   !0 = !{ !"an example type tree" }
26 //   !1 = !{ !"int", !0 }
27 //   !2 = !{ !"float", !0 }
28 //   !3 = !{ !"const float", !2, i64 1 }
29 //
30 // The first field is an identity field. It can be any value, usually
31 // an MDString, which uniquely identifies the type. The most important
32 // name in the tree is the name of the root node. Two trees with
33 // different root node names are entirely disjoint, even if they
34 // have leaves with common names.
35 //
36 // The second field identifies the type's parent node in the tree, or
37 // is null or omitted for a root node. A type is considered to alias
38 // all of its descendants and all of its ancestors in the tree. Also,
39 // a type is considered to alias all types in other trees, so that
40 // bitcode produced from multiple front-ends is handled conservatively.
41 //
42 // If the third field is present, it's an integer which if equal to 1
43 // indicates that the type is "constant" (meaning pointsToConstantMemory
44 // should return true; see
45 // http://llvm.org/docs/AliasAnalysis.html#OtherItfs).
46 //
47 // With struct-path aware TBAA, the MDNodes attached to an instruction using
48 // "!tbaa" are called path tag nodes.
49 //
50 // The path tag node has 4 fields with the last field being optional.
51 //
52 // The first field is the base type node, it can be a struct type node
53 // or a scalar type node. The second field is the access type node, it
54 // must be a scalar type node. The third field is the offset into the base type.
55 // The last field has the same meaning as the last field of our scalar TBAA:
56 // it's an integer which if equal to 1 indicates that the access is "constant".
57 //
58 // The struct type node has a name and a list of pairs, one pair for each member
59 // of the struct. The first element of each pair is a type node (a struct type
60 // node or a scalar type node), specifying the type of the member, the second
61 // element of each pair is the offset of the member.
62 //
63 // Given an example
64 // typedef struct {
65 //   short s;
66 // } A;
67 // typedef struct {
68 //   uint16_t s;
69 //   A a;
70 // } B;
71 //
72 // For an access to B.a.s, we attach !5 (a path tag node) to the load/store
73 // instruction. The base type is !4 (struct B), the access type is !2 (scalar
74 // type short) and the offset is 4.
75 //
76 // !0 = !{!"Simple C/C++ TBAA"}
77 // !1 = !{!"omnipotent char", !0} // Scalar type node
78 // !2 = !{!"short", !1}           // Scalar type node
79 // !3 = !{!"A", !2, i64 0}        // Struct type node
80 // !4 = !{!"B", !2, i64 0, !3, i64 4}
81 //                                                           // Struct type node
82 // !5 = !{!4, !2, i64 4}          // Path tag node
83 //
84 // The struct type nodes and the scalar type nodes form a type DAG.
85 //         Root (!0)
86 //         char (!1)  -- edge to Root
87 //         short (!2) -- edge to char
88 //         A (!3) -- edge with offset 0 to short
89 //         B (!4) -- edge with offset 0 to short and edge with offset 4 to A
90 //
91 // To check if two tags (tagX and tagY) can alias, we start from the base type
92 // of tagX, follow the edge with the correct offset in the type DAG and adjust
93 // the offset until we reach the base type of tagY or until we reach the Root
94 // node.
95 // If we reach the base type of tagY, compare the adjusted offset with
96 // offset of tagY, return Alias if the offsets are the same, return NoAlias
97 // otherwise.
98 // If we reach the Root node, perform the above starting from base type of tagY
99 // to see if we reach base type of tagX.
100 //
101 // If they have different roots, they're part of different potentially
102 // unrelated type systems, so we return Alias to be conservative.
103 // If neither node is an ancestor of the other and they have the same root,
104 // then we say NoAlias.
105 //
106 //===----------------------------------------------------------------------===//
107 
108 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
109 #include "llvm/ADT/SetVector.h"
110 #include "llvm/Analysis/AliasAnalysis.h"
111 #include "llvm/Analysis/MemoryLocation.h"
112 #include "llvm/IR/Constants.h"
113 #include "llvm/IR/DerivedTypes.h"
114 #include "llvm/IR/InstrTypes.h"
115 #include "llvm/IR/LLVMContext.h"
116 #include "llvm/IR/Metadata.h"
117 #include "llvm/InitializePasses.h"
118 #include "llvm/Pass.h"
119 #include "llvm/Support/Casting.h"
120 #include "llvm/Support/CommandLine.h"
121 #include "llvm/Support/ErrorHandling.h"
122 #include <cassert>
123 #include <cstdint>
124 
125 using namespace llvm;
126 
127 // A handy option for disabling TBAA functionality. The same effect can also be
128 // achieved by stripping the !tbaa tags from IR, but this option is sometimes
129 // more convenient.
130 static cl::opt<bool> EnableTBAA("enable-tbaa", cl::init(true), cl::Hidden);
131 
132 namespace {
133 
134 /// isNewFormatTypeNode - Return true iff the given type node is in the new
135 /// size-aware format.
136 static bool isNewFormatTypeNode(const MDNode *N) {
137   if (N->getNumOperands() < 3)
138     return false;
139   // In the old format the first operand is a string.
140   if (!isa<MDNode>(N->getOperand(0)))
141     return false;
142   return true;
143 }
144 
145 /// This is a simple wrapper around an MDNode which provides a higher-level
146 /// interface by hiding the details of how alias analysis information is encoded
147 /// in its operands.
148 template<typename MDNodeTy>
149 class TBAANodeImpl {
150   MDNodeTy *Node = nullptr;
151 
152 public:
153   TBAANodeImpl() = default;
154   explicit TBAANodeImpl(MDNodeTy *N) : Node(N) {}
155 
156   /// getNode - Get the MDNode for this TBAANode.
157   MDNodeTy *getNode() const { return Node; }
158 
159   /// isNewFormat - Return true iff the wrapped type node is in the new
160   /// size-aware format.
161   bool isNewFormat() const { return isNewFormatTypeNode(Node); }
162 
163   /// getParent - Get this TBAANode's Alias tree parent.
164   TBAANodeImpl<MDNodeTy> getParent() const {
165     if (isNewFormat())
166       return TBAANodeImpl(cast<MDNodeTy>(Node->getOperand(0)));
167 
168     if (Node->getNumOperands() < 2)
169       return TBAANodeImpl<MDNodeTy>();
170     MDNodeTy *P = dyn_cast_or_null<MDNodeTy>(Node->getOperand(1));
171     if (!P)
172       return TBAANodeImpl<MDNodeTy>();
173     // Ok, this node has a valid parent. Return it.
174     return TBAANodeImpl<MDNodeTy>(P);
175   }
176 
177   /// Test if this TBAANode represents a type for objects which are
178   /// not modified (by any means) in the context where this
179   /// AliasAnalysis is relevant.
180   bool isTypeImmutable() const {
181     if (Node->getNumOperands() < 3)
182       return false;
183     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(2));
184     if (!CI)
185       return false;
186     return CI->getValue()[0];
187   }
188 };
189 
190 /// \name Specializations of \c TBAANodeImpl for const and non const qualified
191 /// \c MDNode.
192 /// @{
193 using TBAANode = TBAANodeImpl<const MDNode>;
194 using MutableTBAANode = TBAANodeImpl<MDNode>;
195 /// @}
196 
197 /// This is a simple wrapper around an MDNode which provides a
198 /// higher-level interface by hiding the details of how alias analysis
199 /// information is encoded in its operands.
200 template<typename MDNodeTy>
201 class TBAAStructTagNodeImpl {
202   /// This node should be created with createTBAAAccessTag().
203   MDNodeTy *Node;
204 
205 public:
206   explicit TBAAStructTagNodeImpl(MDNodeTy *N) : Node(N) {}
207 
208   /// Get the MDNode for this TBAAStructTagNode.
209   MDNodeTy *getNode() const { return Node; }
210 
211   /// isNewFormat - Return true iff the wrapped access tag is in the new
212   /// size-aware format.
213   bool isNewFormat() const {
214     if (Node->getNumOperands() < 4)
215       return false;
216     if (MDNodeTy *AccessType = getAccessType())
217       if (!TBAANodeImpl<MDNodeTy>(AccessType).isNewFormat())
218         return false;
219     return true;
220   }
221 
222   MDNodeTy *getBaseType() const {
223     return dyn_cast_or_null<MDNode>(Node->getOperand(0));
224   }
225 
226   MDNodeTy *getAccessType() const {
227     return dyn_cast_or_null<MDNode>(Node->getOperand(1));
228   }
229 
230   uint64_t getOffset() const {
231     return mdconst::extract<ConstantInt>(Node->getOperand(2))->getZExtValue();
232   }
233 
234   uint64_t getSize() const {
235     if (!isNewFormat())
236       return UINT64_MAX;
237     return mdconst::extract<ConstantInt>(Node->getOperand(3))->getZExtValue();
238   }
239 
240   /// Test if this TBAAStructTagNode represents a type for objects
241   /// which are not modified (by any means) in the context where this
242   /// AliasAnalysis is relevant.
243   bool isTypeImmutable() const {
244     unsigned OpNo = isNewFormat() ? 4 : 3;
245     if (Node->getNumOperands() < OpNo + 1)
246       return false;
247     ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Node->getOperand(OpNo));
248     if (!CI)
249       return false;
250     return CI->getValue()[0];
251   }
252 };
253 
254 /// \name Specializations of \c TBAAStructTagNodeImpl for const and non const
255 /// qualified \c MDNods.
256 /// @{
257 using TBAAStructTagNode = TBAAStructTagNodeImpl<const MDNode>;
258 using MutableTBAAStructTagNode = TBAAStructTagNodeImpl<MDNode>;
259 /// @}
260 
261 /// This is a simple wrapper around an MDNode which provides a
262 /// higher-level interface by hiding the details of how alias analysis
263 /// information is encoded in its operands.
264 class TBAAStructTypeNode {
265   /// This node should be created with createTBAATypeNode().
266   const MDNode *Node = nullptr;
267 
268 public:
269   TBAAStructTypeNode() = default;
270   explicit TBAAStructTypeNode(const MDNode *N) : Node(N) {}
271 
272   /// Get the MDNode for this TBAAStructTypeNode.
273   const MDNode *getNode() const { return Node; }
274 
275   /// isNewFormat - Return true iff the wrapped type node is in the new
276   /// size-aware format.
277   bool isNewFormat() const { return isNewFormatTypeNode(Node); }
278 
279   bool operator==(const TBAAStructTypeNode &Other) const {
280     return getNode() == Other.getNode();
281   }
282 
283   /// getId - Return type identifier.
284   Metadata *getId() const {
285     return Node->getOperand(isNewFormat() ? 2 : 0);
286   }
287 
288   unsigned getNumFields() const {
289     unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
290     unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
291     return (getNode()->getNumOperands() - FirstFieldOpNo) / NumOpsPerField;
292   }
293 
294   TBAAStructTypeNode getFieldType(unsigned FieldIndex) const {
295     unsigned FirstFieldOpNo = isNewFormat() ? 3 : 1;
296     unsigned NumOpsPerField = isNewFormat() ? 3 : 2;
297     unsigned OpIndex = FirstFieldOpNo + FieldIndex * NumOpsPerField;
298     auto *TypeNode = cast<MDNode>(getNode()->getOperand(OpIndex));
299     return TBAAStructTypeNode(TypeNode);
300   }
301 
302   /// Get this TBAAStructTypeNode's field in the type DAG with
303   /// given offset. Update the offset to be relative to the field type.
304   TBAAStructTypeNode getField(uint64_t &Offset) const {
305     bool NewFormat = isNewFormat();
306     const ArrayRef<MDOperand> Operands = Node->operands();
307     const unsigned NumOperands = Operands.size();
308 
309     if (NewFormat) {
310       // New-format root and scalar type nodes have no fields.
311       if (NumOperands < 6)
312         return TBAAStructTypeNode();
313     } else {
314       // Parent can be omitted for the root node.
315       if (NumOperands < 2)
316         return TBAAStructTypeNode();
317 
318       // Fast path for a scalar type node and a struct type node with a single
319       // field.
320       if (NumOperands <= 3) {
321         uint64_t Cur =
322             NumOperands == 2
323                 ? 0
324                 : mdconst::extract<ConstantInt>(Operands[2])->getZExtValue();
325         Offset -= Cur;
326         MDNode *P = dyn_cast_or_null<MDNode>(Operands[1]);
327         if (!P)
328           return TBAAStructTypeNode();
329         return TBAAStructTypeNode(P);
330       }
331     }
332 
333     // Assume the offsets are in order. We return the previous field if
334     // the current offset is bigger than the given offset.
335     unsigned FirstFieldOpNo = NewFormat ? 3 : 1;
336     unsigned NumOpsPerField = NewFormat ? 3 : 2;
337     unsigned TheIdx = 0;
338 
339     for (unsigned Idx = FirstFieldOpNo; Idx < NumOperands;
340          Idx += NumOpsPerField) {
341       uint64_t Cur =
342           mdconst::extract<ConstantInt>(Operands[Idx + 1])->getZExtValue();
343       if (Cur > Offset) {
344         assert(Idx >= FirstFieldOpNo + NumOpsPerField &&
345                "TBAAStructTypeNode::getField should have an offset match!");
346         TheIdx = Idx - NumOpsPerField;
347         break;
348       }
349     }
350     // Move along the last field.
351     if (TheIdx == 0)
352       TheIdx = NumOperands - NumOpsPerField;
353     uint64_t Cur =
354         mdconst::extract<ConstantInt>(Operands[TheIdx + 1])->getZExtValue();
355     Offset -= Cur;
356     MDNode *P = dyn_cast_or_null<MDNode>(Operands[TheIdx]);
357     if (!P)
358       return TBAAStructTypeNode();
359     return TBAAStructTypeNode(P);
360   }
361 };
362 
363 } // end anonymous namespace
364 
365 /// Check the first operand of the tbaa tag node, if it is a MDNode, we treat
366 /// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA
367 /// format.
368 static bool isStructPathTBAA(const MDNode *MD) {
369   // Anonymous TBAA root starts with a MDNode and dragonegg uses it as
370   // a TBAA tag.
371   return isa<MDNode>(MD->getOperand(0)) && MD->getNumOperands() >= 3;
372 }
373 
374 AliasResult TypeBasedAAResult::alias(const MemoryLocation &LocA,
375                                      const MemoryLocation &LocB,
376                                      AAQueryInfo &AAQI) {
377   if (!EnableTBAA)
378     return AAResultBase::alias(LocA, LocB, AAQI);
379 
380   // If accesses may alias, chain to the next AliasAnalysis.
381   if (Aliases(LocA.AATags.TBAA, LocB.AATags.TBAA))
382     return AAResultBase::alias(LocA, LocB, AAQI);
383 
384   // Otherwise return a definitive result.
385   return AliasResult::NoAlias;
386 }
387 
388 bool TypeBasedAAResult::pointsToConstantMemory(const MemoryLocation &Loc,
389                                                AAQueryInfo &AAQI,
390                                                bool OrLocal) {
391   if (!EnableTBAA)
392     return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
393 
394   const MDNode *M = Loc.AATags.TBAA;
395   if (!M)
396     return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
397 
398   // If this is an "immutable" type, we can assume the pointer is pointing
399   // to constant memory.
400   if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
401       (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
402     return true;
403 
404   return AAResultBase::pointsToConstantMemory(Loc, AAQI, OrLocal);
405 }
406 
407 FunctionModRefBehavior
408 TypeBasedAAResult::getModRefBehavior(const CallBase *Call,
409                                      AAQueryInfo &AAQI) {
410   if (!EnableTBAA)
411     return AAResultBase::getModRefBehavior(Call, AAQI);
412 
413   // If this is an "immutable" type, we can assume the call doesn't write
414   // to memory.
415   if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
416     if ((!isStructPathTBAA(M) && TBAANode(M).isTypeImmutable()) ||
417         (isStructPathTBAA(M) && TBAAStructTagNode(M).isTypeImmutable()))
418       return FunctionModRefBehavior::readOnly();
419 
420   return AAResultBase::getModRefBehavior(Call, AAQI);
421 }
422 
423 FunctionModRefBehavior TypeBasedAAResult::getModRefBehavior(const Function *F) {
424   // Functions don't have metadata. Just chain to the next implementation.
425   return AAResultBase::getModRefBehavior(F);
426 }
427 
428 ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call,
429                                             const MemoryLocation &Loc,
430                                             AAQueryInfo &AAQI) {
431   if (!EnableTBAA)
432     return AAResultBase::getModRefInfo(Call, Loc, AAQI);
433 
434   if (const MDNode *L = Loc.AATags.TBAA)
435     if (const MDNode *M = Call->getMetadata(LLVMContext::MD_tbaa))
436       if (!Aliases(L, M))
437         return ModRefInfo::NoModRef;
438 
439   return AAResultBase::getModRefInfo(Call, Loc, AAQI);
440 }
441 
442 ModRefInfo TypeBasedAAResult::getModRefInfo(const CallBase *Call1,
443                                             const CallBase *Call2,
444                                             AAQueryInfo &AAQI) {
445   if (!EnableTBAA)
446     return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
447 
448   if (const MDNode *M1 = Call1->getMetadata(LLVMContext::MD_tbaa))
449     if (const MDNode *M2 = Call2->getMetadata(LLVMContext::MD_tbaa))
450       if (!Aliases(M1, M2))
451         return ModRefInfo::NoModRef;
452 
453   return AAResultBase::getModRefInfo(Call1, Call2, AAQI);
454 }
455 
456 bool MDNode::isTBAAVtableAccess() const {
457   if (!isStructPathTBAA(this)) {
458     if (getNumOperands() < 1)
459       return false;
460     if (MDString *Tag1 = dyn_cast<MDString>(getOperand(0))) {
461       if (Tag1->getString() == "vtable pointer")
462         return true;
463     }
464     return false;
465   }
466 
467   // For struct-path aware TBAA, we use the access type of the tag.
468   TBAAStructTagNode Tag(this);
469   TBAAStructTypeNode AccessType(Tag.getAccessType());
470   if(auto *Id = dyn_cast<MDString>(AccessType.getId()))
471     if (Id->getString() == "vtable pointer")
472       return true;
473   return false;
474 }
475 
476 static bool matchAccessTags(const MDNode *A, const MDNode *B,
477                             const MDNode **GenericTag = nullptr);
478 
479 MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) {
480   const MDNode *GenericTag;
481   matchAccessTags(A, B, &GenericTag);
482   return const_cast<MDNode*>(GenericTag);
483 }
484 
485 static const MDNode *getLeastCommonType(const MDNode *A, const MDNode *B) {
486   if (!A || !B)
487     return nullptr;
488 
489   if (A == B)
490     return A;
491 
492   SmallSetVector<const MDNode *, 4> PathA;
493   TBAANode TA(A);
494   while (TA.getNode()) {
495     if (PathA.count(TA.getNode()))
496       report_fatal_error("Cycle found in TBAA metadata.");
497     PathA.insert(TA.getNode());
498     TA = TA.getParent();
499   }
500 
501   SmallSetVector<const MDNode *, 4> PathB;
502   TBAANode TB(B);
503   while (TB.getNode()) {
504     if (PathB.count(TB.getNode()))
505       report_fatal_error("Cycle found in TBAA metadata.");
506     PathB.insert(TB.getNode());
507     TB = TB.getParent();
508   }
509 
510   int IA = PathA.size() - 1;
511   int IB = PathB.size() - 1;
512 
513   const MDNode *Ret = nullptr;
514   while (IA >= 0 && IB >= 0) {
515     if (PathA[IA] == PathB[IB])
516       Ret = PathA[IA];
517     else
518       break;
519     --IA;
520     --IB;
521   }
522 
523   return Ret;
524 }
525 
526 AAMDNodes AAMDNodes::merge(const AAMDNodes &Other) const {
527   AAMDNodes Result;
528   Result.TBAA = MDNode::getMostGenericTBAA(TBAA, Other.TBAA);
529   Result.TBAAStruct = nullptr;
530   Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
531   Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
532   return Result;
533 }
534 
535 AAMDNodes AAMDNodes::concat(const AAMDNodes &Other) const {
536   AAMDNodes Result;
537   Result.TBAA = Result.TBAAStruct = nullptr;
538   Result.Scope = MDNode::getMostGenericAliasScope(Scope, Other.Scope);
539   Result.NoAlias = MDNode::intersect(NoAlias, Other.NoAlias);
540   return Result;
541 }
542 
543 static const MDNode *createAccessTag(const MDNode *AccessType) {
544   // If there is no access type or the access type is the root node, then
545   // we don't have any useful access tag to return.
546   if (!AccessType || AccessType->getNumOperands() < 2)
547     return nullptr;
548 
549   Type *Int64 = IntegerType::get(AccessType->getContext(), 64);
550   auto *OffsetNode = ConstantAsMetadata::get(ConstantInt::get(Int64, 0));
551 
552   if (TBAAStructTypeNode(AccessType).isNewFormat()) {
553     // TODO: Take access ranges into account when matching access tags and
554     // fix this code to generate actual access sizes for generic tags.
555     uint64_t AccessSize = UINT64_MAX;
556     auto *SizeNode =
557         ConstantAsMetadata::get(ConstantInt::get(Int64, AccessSize));
558     Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
559                        const_cast<MDNode*>(AccessType),
560                        OffsetNode, SizeNode};
561     return MDNode::get(AccessType->getContext(), Ops);
562   }
563 
564   Metadata *Ops[] = {const_cast<MDNode*>(AccessType),
565                      const_cast<MDNode*>(AccessType),
566                      OffsetNode};
567   return MDNode::get(AccessType->getContext(), Ops);
568 }
569 
570 static bool hasField(TBAAStructTypeNode BaseType,
571                      TBAAStructTypeNode FieldType) {
572   for (unsigned I = 0, E = BaseType.getNumFields(); I != E; ++I) {
573     TBAAStructTypeNode T = BaseType.getFieldType(I);
574     if (T == FieldType || hasField(T, FieldType))
575       return true;
576   }
577   return false;
578 }
579 
580 /// Return true if for two given accesses, one of the accessed objects may be a
581 /// subobject of the other. The \p BaseTag and \p SubobjectTag parameters
582 /// describe the accesses to the base object and the subobject respectively.
583 /// \p CommonType must be the metadata node describing the common type of the
584 /// accessed objects. On return, \p MayAlias is set to true iff these accesses
585 /// may alias and \p Generic, if not null, points to the most generic access
586 /// tag for the given two.
587 static bool mayBeAccessToSubobjectOf(TBAAStructTagNode BaseTag,
588                                      TBAAStructTagNode SubobjectTag,
589                                      const MDNode *CommonType,
590                                      const MDNode **GenericTag,
591                                      bool &MayAlias) {
592   // If the base object is of the least common type, then this may be an access
593   // to its subobject.
594   if (BaseTag.getAccessType() == BaseTag.getBaseType() &&
595       BaseTag.getAccessType() == CommonType) {
596     if (GenericTag)
597       *GenericTag = createAccessTag(CommonType);
598     MayAlias = true;
599     return true;
600   }
601 
602   // If the access to the base object is through a field of the subobject's
603   // type, then this may be an access to that field. To check for that we start
604   // from the base type, follow the edge with the correct offset in the type DAG
605   // and adjust the offset until we reach the field type or until we reach the
606   // access type.
607   bool NewFormat = BaseTag.isNewFormat();
608   TBAAStructTypeNode BaseType(BaseTag.getBaseType());
609   uint64_t OffsetInBase = BaseTag.getOffset();
610 
611   for (;;) {
612     // In the old format there is no distinction between fields and parent
613     // types, so in this case we consider all nodes up to the root.
614     if (!BaseType.getNode()) {
615       assert(!NewFormat && "Did not see access type in access path!");
616       break;
617     }
618 
619     if (BaseType.getNode() == SubobjectTag.getBaseType()) {
620       bool SameMemberAccess = OffsetInBase == SubobjectTag.getOffset();
621       if (GenericTag) {
622         *GenericTag = SameMemberAccess ? SubobjectTag.getNode() :
623                                          createAccessTag(CommonType);
624       }
625       MayAlias = SameMemberAccess;
626       return true;
627     }
628 
629     // With new-format nodes we stop at the access type.
630     if (NewFormat && BaseType.getNode() == BaseTag.getAccessType())
631       break;
632 
633     // Follow the edge with the correct offset. Offset will be adjusted to
634     // be relative to the field type.
635     BaseType = BaseType.getField(OffsetInBase);
636   }
637 
638   // If the base object has a direct or indirect field of the subobject's type,
639   // then this may be an access to that field. We need this to check now that
640   // we support aggregates as access types.
641   if (NewFormat) {
642     // TBAAStructTypeNode BaseAccessType(BaseTag.getAccessType());
643     TBAAStructTypeNode FieldType(SubobjectTag.getBaseType());
644     if (hasField(BaseType, FieldType)) {
645       if (GenericTag)
646         *GenericTag = createAccessTag(CommonType);
647       MayAlias = true;
648       return true;
649     }
650   }
651 
652   return false;
653 }
654 
655 /// matchTags - Return true if the given couple of accesses are allowed to
656 /// overlap. If \arg GenericTag is not null, then on return it points to the
657 /// most generic access descriptor for the given two.
658 static bool matchAccessTags(const MDNode *A, const MDNode *B,
659                             const MDNode **GenericTag) {
660   if (A == B) {
661     if (GenericTag)
662       *GenericTag = A;
663     return true;
664   }
665 
666   // Accesses with no TBAA information may alias with any other accesses.
667   if (!A || !B) {
668     if (GenericTag)
669       *GenericTag = nullptr;
670     return true;
671   }
672 
673   // Verify that both input nodes are struct-path aware.  Auto-upgrade should
674   // have taken care of this.
675   assert(isStructPathTBAA(A) && "Access A is not struct-path aware!");
676   assert(isStructPathTBAA(B) && "Access B is not struct-path aware!");
677 
678   TBAAStructTagNode TagA(A), TagB(B);
679   const MDNode *CommonType = getLeastCommonType(TagA.getAccessType(),
680                                                 TagB.getAccessType());
681 
682   // If the final access types have different roots, they're part of different
683   // potentially unrelated type systems, so we must be conservative.
684   if (!CommonType) {
685     if (GenericTag)
686       *GenericTag = nullptr;
687     return true;
688   }
689 
690   // If one of the accessed objects may be a subobject of the other, then such
691   // accesses may alias.
692   bool MayAlias;
693   if (mayBeAccessToSubobjectOf(/* BaseTag= */ TagA, /* SubobjectTag= */ TagB,
694                                CommonType, GenericTag, MayAlias) ||
695       mayBeAccessToSubobjectOf(/* BaseTag= */ TagB, /* SubobjectTag= */ TagA,
696                                CommonType, GenericTag, MayAlias))
697     return MayAlias;
698 
699   // Otherwise, we've proved there's no alias.
700   if (GenericTag)
701     *GenericTag = createAccessTag(CommonType);
702   return false;
703 }
704 
705 /// Aliases - Test whether the access represented by tag A may alias the
706 /// access represented by tag B.
707 bool TypeBasedAAResult::Aliases(const MDNode *A, const MDNode *B) const {
708   return matchAccessTags(A, B);
709 }
710 
711 AnalysisKey TypeBasedAA::Key;
712 
713 TypeBasedAAResult TypeBasedAA::run(Function &F, FunctionAnalysisManager &AM) {
714   return TypeBasedAAResult();
715 }
716 
717 char TypeBasedAAWrapperPass::ID = 0;
718 INITIALIZE_PASS(TypeBasedAAWrapperPass, "tbaa", "Type-Based Alias Analysis",
719                 false, true)
720 
721 ImmutablePass *llvm::createTypeBasedAAWrapperPass() {
722   return new TypeBasedAAWrapperPass();
723 }
724 
725 TypeBasedAAWrapperPass::TypeBasedAAWrapperPass() : ImmutablePass(ID) {
726   initializeTypeBasedAAWrapperPassPass(*PassRegistry::getPassRegistry());
727 }
728 
729 bool TypeBasedAAWrapperPass::doInitialization(Module &M) {
730   Result.reset(new TypeBasedAAResult());
731   return false;
732 }
733 
734 bool TypeBasedAAWrapperPass::doFinalization(Module &M) {
735   Result.reset();
736   return false;
737 }
738 
739 void TypeBasedAAWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
740   AU.setPreservesAll();
741 }
742 
743 MDNode *AAMDNodes::shiftTBAA(MDNode *MD, size_t Offset) {
744   // Fast path if there's no offset
745   if (Offset == 0)
746     return MD;
747   // Fast path if there's no path tbaa node (and thus scalar)
748   if (!isStructPathTBAA(MD))
749     return MD;
750 
751   // The correct behavior here is to add the offset into the TBAA
752   // struct node offset. The base type, however may not have defined
753   // a type at this additional offset, resulting in errors. Since
754   // this method is only used within a given load/store access
755   // the offset provided is only used to subdivide the previous load
756   // maintaining the validity of the previous TBAA.
757   //
758   // This, however, should be revisited in the future.
759   return MD;
760 }
761 
762 MDNode *AAMDNodes::shiftTBAAStruct(MDNode *MD, size_t Offset) {
763   // Fast path if there's no offset
764   if (Offset == 0)
765     return MD;
766   SmallVector<Metadata *, 3> Sub;
767   for (size_t i = 0, size = MD->getNumOperands(); i < size; i += 3) {
768     ConstantInt *InnerOffset = mdconst::extract<ConstantInt>(MD->getOperand(i));
769     ConstantInt *InnerSize =
770         mdconst::extract<ConstantInt>(MD->getOperand(i + 1));
771     // Don't include any triples that aren't in bounds
772     if (InnerOffset->getZExtValue() + InnerSize->getZExtValue() <= Offset)
773       continue;
774 
775     uint64_t NewSize = InnerSize->getZExtValue();
776     uint64_t NewOffset = InnerOffset->getZExtValue() - Offset;
777     if (InnerOffset->getZExtValue() < Offset) {
778       NewOffset = 0;
779       NewSize -= Offset - InnerOffset->getZExtValue();
780     }
781 
782     // Shift the offset of the triple
783     Sub.push_back(ConstantAsMetadata::get(
784         ConstantInt::get(InnerOffset->getType(), NewOffset)));
785     Sub.push_back(ConstantAsMetadata::get(
786         ConstantInt::get(InnerSize->getType(), NewSize)));
787     Sub.push_back(MD->getOperand(i + 2));
788   }
789   return MDNode::get(MD->getContext(), Sub);
790 }
791 
792 MDNode *AAMDNodes::extendToTBAA(MDNode *MD, ssize_t Len) {
793   // Fast path if 0-length
794   if (Len == 0)
795     return nullptr;
796 
797   // Regular TBAA is invariant of length, so we only need to consider
798   // struct-path TBAA.
799   if (!isStructPathTBAA(MD))
800     return MD;
801 
802   TBAAStructTagNode Tag(MD);
803 
804   // Only new format TBAA has a size
805   if (!Tag.isNewFormat())
806     return MD;
807 
808   // If unknown size, drop the TBAA.
809   if (Len == -1)
810     return nullptr;
811 
812   // Otherwise, create TBAA with the new Len
813   ArrayRef<MDOperand> MDOperands = MD->operands();
814   SmallVector<Metadata *, 4> NextNodes(MDOperands.begin(), MDOperands.end());
815   ConstantInt *PreviousSize = mdconst::extract<ConstantInt>(NextNodes[3]);
816 
817   // Don't create a new MDNode if it is the same length.
818   if (PreviousSize->equalsInt(Len))
819     return MD;
820 
821   NextNodes[3] =
822       ConstantAsMetadata::get(ConstantInt::get(PreviousSize->getType(), Len));
823   return MDNode::get(MD->getContext(), NextNodes);
824 }
825