xref: /llvm-project/flang/include/flang/Optimizer/CodeGen/TBAABuilder.h (revision 6df4e7c25ffb15ed8cba8ccb9cf9fa18b082013d)
1 //===-- TBAABuilder.h -- TBAA builder declarations --------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
14 #define FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
15 
16 #include "flang/Optimizer/Analysis/TBAAForest.h"
17 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
18 
19 namespace fir {
20 
21 // TBAA builder provides mapping between FIR types and their TBAA type
22 // descriptors, and methods to populate that mapping during FIR to LLVM
23 // type conversion and to attach llvm.tbaa attributes to memory access
24 // instructions.
25 //
26 // TBAA type information is represented with LLVM::MetadataOp operation
27 // with specific symbol name `TBAABuilder::tbaaMetaOpName`. The basic
28 // TBAA trees used for Flang consists of the following nodes:
29 //   llvm.metadata @__flang_tbaa {
30 //     llvm.tbaa_root @root_0 {id = "Flang Type TBAA Function Root funcName"}
31 //     llvm.tbaa_type_desc @type_desc_1 {id = "any access",
32 //                                       members = {<@root_0, 0>}}
33 //     llvm.tbaa_type_desc @type_desc_2 {id = "any data access",
34 //                                       members = {<@type_desc_1, 0>}}
35 //     llvm.tbaa_type_desc @type_desc_3 {id = "descriptor member",
36 //                                       members = {<@type_desc_1, 0>}}
37 //   }
38 //
39 // The `<any data access>` and `<descriptor member>` type descriptors
40 // are two sub-roots of the basic TBAA tree, and they allow representing
41 // box and non-box accesses, which can never alias in the current Flang
42 // implementation. The `<any access>` type descriptor is their common parent
43 // that can be used for representing accesses that may alias box and non-box
44 // accesses if an access cannot be classified strictly as box or non-box.
45 // In the current implementation `<any access>` is not used by TBAA access tags,
46 // because it is always known whether an operation accesses box or non-box.
47 //
48 // Given this basic TBAA tree structure, the box/descriptor types may
49 // be represented like this:
50 //   llvm.tbaa_type_desc @type_desc_4 {
51 //       id = "CFI_cdesc_t_dim0",
52 //       members = {<@type_desc_3, 0>, // base_addr
53 //                  <@type_desc_3, 8>, // elem_len
54 //                  <@type_desc_3, 16>, // version
55 //                  <@type_desc_3, 20>, // rank
56 //                  <@type_desc_3, 21>, // type
57 //                  <@type_desc_3, 22>, // attribute
58 //                  <@type_desc_3, 23>} // extra
59 //   }
60 //   llvm.tbaa_type_desc @type_desc_5 {
61 //       id = "CFI_cdesc_t_dim1",
62 //       members = {<@type_desc_3, 0>, // base_addr
63 //                  <@type_desc_3, 8>, // elem_len
64 //                  <@type_desc_3, 16>, // version
65 //                  <@type_desc_3, 20>, // rank
66 //                  <@type_desc_3, 21>, // type
67 //                  <@type_desc_3, 22>, // attribute
68 //                  <@type_desc_3, 23>, // extra
69 //                  <@type_desc_3, 24>, // dim[0].lower_bound
70 //                  <@type_desc_3, 32>, // dim[0].extent
71 //                  <@type_desc_3, 40>} // dim[0].sm
72 //   }
73 //   llvm.tbaa_type_desc @type_desc_6 {
74 //       id = "CFI_cdesc_t_dim2",
75 //       members = {<@type_desc_3, 0>, // base_addr
76 //                  <@type_desc_3, 8>, // elem_len
77 //                  <@type_desc_3, 16>, // version
78 //                  <@type_desc_3, 20>, // rank
79 //                  <@type_desc_3, 21>, // type
80 //                  <@type_desc_3, 22>, // attribute
81 //                  <@type_desc_3, 23>, // extra
82 //                  <@type_desc_3, 24>, // dim[0].lower_bound
83 //                  <@type_desc_3, 32>, // dim[0].extent
84 //                  <@type_desc_3, 40>, // dim[0].sm
85 //                  <@type_desc_3, 48>, // dim[1].lower_bound
86 //                  <@type_desc_3, 56>, // dim[1].extent
87 //                  <@type_desc_3, 64>} // dim[1].sm
88 //   }
89 // etc.
90 //
91 // Note that the TBAA type descriptors cannot represent array members
92 // of structures, so the `dim` array in the descriptor structure
93 // has to be represented as linear set of members.
94 //
95 // We can use the same TBAA type descriptor for all members of the F18
96 // descriptor structure, because the actual accesses of the F18 descriptor
97 // members will be disambiguated based on their offset off the beginning
98 // of the descriptor. Thus, all members have the same `<descriptor member>`
99 // type in the TBAA graph.
100 //
101 // The TBAA type descriptors have to be created during FIR to LLVM type
102 // conversion, so fir::LLVMTypeConverter has to provide the member offsets
103 // to TBAABuilder - the offsets must be computed based on the LLVM type
104 // to which the FIR type is being converted.
105 //
106 // TBAABuilder keeps a map between the FIR type and its TBAA type descriptor.
107 // The map is used when a TBAA tag needs to be attached to a memory accessing
108 // operation given the FIR types identifying the access's base and access type
109 // and the offset within the base type, e.g. an access of one dimensional
110 // descriptor's `base_addr` member may be defined by:
111 //   * base FIR type: !fir.box<!fir.array<?xf32>> - the resulting
112 //     access tag will use `<CFI_cdesc_t_dim1>` type descriptor for the base
113 //     type.
114 //   * access FIR type: <undefined> - all accesses within descriptors
115 //     are always represented with `<descriptor member>` type descriptor.
116 //   * offset:
117 //       llvm.getelementptr %arg0[0, 0] :
118 //           (!llvm.ptr<struct<(ptr<f32>, i64, i32, i8, i8, i8, i8,
119 //                              array<1 x array<3 x i64>>)>>) ->
120 //           !llvm.ptr<ptr<f32>>
121 //     The offset is computed based on the LLVM::GEPOp's indices and the LLVM
122 //     type layout.
123 //
124 // Detailed representation of the layout of the F18 descriptors is required
125 // to disambiguate accesses of the different members of the descriptors,
126 // e.g. a read of `base_addr` member (of one box) can never alias with
127 // a write of `rank` member (of another box).
128 //
129 // TODO: define handling of assumed-rank arrays' boxes (they can always
130 // be represented with a conservative tag:
131 //   < `<descriptor member>`, `<descriptor member>`, 0 >
132 // so that they alias with any other box accesses.
133 //
134 // The same representation can be used for user-defined types, though,
135 // strict type aliasing cannot be applied for Fortran programs without
136 // additional guarantees from the user. Fortran's storage association
137 // constructs provide a way to alias data of different types, so using
138 // TBAA would be incorrect, e.g.:
139 //   subroutine test()
140 //     real :: a
141 //     integer :: b
142 //     equivalence (a, b)
143 //     a = 1.0
144 //     call test2(b)
145 //   end subroutine test
146 //
147 // The store operation for `a = 1.0` has the base/access type `f32`,
148 // while a load from `b` inside `test2` will have base/access type
149 // `i32`. Due to the storage association the store and the load alias,
150 // so using the access types to create TBAA access tags may result
151 // in an incorrect result if `test2` was inlined. Moreover, in the scope
152 // of `test2` Flang is not able to indentify whether `b` is part
153 // of an equivalence.
154 //
155 // TBAA may still be applied for programs not using storage association
156 // for objects of different data types (e.g. under an opt-in compiler option).
157 //
158 // The initial implementation does not create detailed type descriptors
159 // for box types and always uses the conservative box access tag:
160 //   < `<descriptor member>`, `<descriptor member>`, 0 >
161 //
162 // Given the storage association, all non-box accesses are represented
163 // with the conservative data access tag:
164 //   < `<any data access>`, `<any data access>`, 0 >
165 
166 // additional tags are added in flang/Optimizer/Transforms/AddAliasTags.cpp
167 // (before CodeGen)
168 class TBAABuilder {
169 public:
170   /// if forceUnifiedTree is true, functions will not have different TBAA trees
171   TBAABuilder(mlir::MLIRContext *context, bool applyTBAA,
172               bool forceUnifiedTree = false);
173   TBAABuilder(TBAABuilder const &) = delete;
174   TBAABuilder &operator=(TBAABuilder const &) = delete;
175 
176   // Attach the llvm.tbaa attribute to the given memory accessing operation
177   // based on the provided base/access FIR types and the GEPOp.
178   void attachTBAATag(mlir::LLVM::AliasAnalysisOpInterface op,
179                      mlir::Type baseFIRType, mlir::Type accessFIRType,
180                      mlir::LLVM::GEPOp gep);
181 
182 private:
183   // Find or create TBAATagAttr attribute (TBAA access tag) with the specified
184   // components and return it.
185   mlir::LLVM::TBAATagAttr
186   getAccessTag(mlir::LLVM::TBAATypeDescriptorAttr baseTypeDesc,
187                mlir::LLVM::TBAATypeDescriptorAttr accessTypeDesc,
188                int64_t offset);
189 
190   // Returns TBAATagAttr representing access tag:
191   //   < <descriptor member>, <descriptor member>, 0 >
192   mlir::LLVM::TBAATagAttr getAnyBoxAccessTag(mlir::LLVM::LLVMFuncOp func);
193   // Returns TBAATagAttr representing access tag:
194   //   < <any data access>, <any data access>, 0 >
195   mlir::LLVM::TBAATagAttr getAnyDataAccessTag(mlir::LLVM::LLVMFuncOp func);
196   // Returns TBAATagAttr representing access tag:
197   //   < <any access>, <any access>, 0 >
198   mlir::LLVM::TBAATagAttr getAnyAccessTag(mlir::LLVM::LLVMFuncOp func);
199 
200   // Returns TBAATagAttr representing access tag described by the base and
201   // access FIR types and the LLVM::GepOp representing the access in terms of
202   // the FIR types converted to LLVM types. The base type must be derivative of
203   // fir::BaseBoxType.
204   mlir::LLVM::TBAATagAttr getBoxAccessTag(mlir::Type baseFIRType,
205                                           mlir::Type accessFIRType,
206                                           mlir::LLVM::GEPOp gep,
207                                           mlir::LLVM::LLVMFuncOp func);
208 
209   // Returns TBAATagAttr representing access tag described by the base and
210   // access FIR types and the LLVM::GepOp representing the access in terms of
211   // the FIR types converted to LLVM types. The FIR types must describe the
212   // "data" access, i.e. not an access of any box/descriptor member.
213   mlir::LLVM::TBAATagAttr getDataAccessTag(mlir::Type baseFIRType,
214                                            mlir::Type accessFIRType,
215                                            mlir::LLVM::GEPOp gep,
216                                            mlir::LLVM::LLVMFuncOp func);
217 
218   // Set to true, if TBAA builder is active, otherwise, all public
219   // methods are no-ops.
220   bool enableTBAA;
221 
222   // Number of attached TBAA tags (used for debugging).
223   unsigned tagAttachmentCounter = 0;
224 
225   // Mapping from a FIR type to the corresponding TBAATypeDescriptorAttr. It
226   // must be populated during the type conversion. Currently unused.
227   llvm::DenseMap<mlir::Type, mlir::LLVM::TBAATypeDescriptorAttr> typeDescMap;
228 
229   // Each TBAA tag is a tuple of <baseTypeSym, accessTypeSym, offset>.
230   // This map holds a TBAATagAttr for each unique tuple.
231   llvm::DenseMap<
232       std::tuple<mlir::LLVM::TBAANodeAttr, mlir::LLVM::TBAANodeAttr, int64_t>,
233       mlir::LLVM::TBAATagAttr>
234       tagsMap;
235 
236   TBAAForrest trees;
237 };
238 
239 } // namespace fir
240 
241 #endif // FORTRAN_OPTIMIZER_CODEGEN_TBAABUILDER_H
242