xref: /llvm-project/flang/include/flang/Optimizer/Builder/Character.h (revision 7046202c3dde093420c08e40116568e76a48ee59)
1 //===-- Character.h -- lowering of characters -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
14 #define FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
15 
16 #include "flang/Optimizer/Builder/BoxValue.h"
17 #include "flang/Optimizer/Builder/LowLevelIntrinsics.h"
18 #include "flang/Optimizer/Builder/Runtime/Character.h"
19 
20 namespace fir {
21 class FirOpBuilder;
22 }
23 
24 namespace fir::factory {
25 
26 /// Helper to facilitate lowering of CHARACTER in FIR.
27 class CharacterExprHelper {
28 public:
29   /// Constructor.
CharacterExprHelper(FirOpBuilder & builder,mlir::Location loc)30   explicit CharacterExprHelper(FirOpBuilder &builder, mlir::Location loc)
31       : builder{builder}, loc{loc} {}
32   CharacterExprHelper(const CharacterExprHelper &) = delete;
33 
34   /// Copy the \p count first characters of \p src into \p dest.
35   /// \p count can have any integer type.
36   void createCopy(const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
37                   mlir::Value count);
38 
39   /// Set characters of \p str at position [\p lower, \p upper) to blanks.
40   /// \p lower and \upper bounds are zero based.
41   /// If \p upper <= \p lower, no padding is done.
42   /// \p upper and \p lower can have any integer type.
43   void createPadding(const fir::CharBoxValue &str, mlir::Value lower,
44                      mlir::Value upper);
45 
46   /// Create str(lb:ub), lower bounds must always be specified, upper
47   /// bound is optional.
48   fir::CharBoxValue createSubstring(const fir::CharBoxValue &str,
49                                     llvm::ArrayRef<mlir::Value> bounds);
50 
51   /// Compute substring base address given the raw address (not fir.boxchar) of
52   /// a scalar string, a substring / lower bound, and the substring type.
53   mlir::Value genSubstringBase(mlir::Value stringRawAddr,
54                                mlir::Value lowerBound,
55                                mlir::Type substringAddrType,
56                                mlir::Value one = {});
57 
58   /// Return blank character of given \p type !fir.char<kind>
59   mlir::Value createBlankConstant(fir::CharacterType type);
60 
61   /// Lower \p lhs = \p rhs where \p lhs and \p rhs are scalar characters.
62   /// It handles cases where \p lhs and \p rhs may overlap.
63   void createAssign(const fir::ExtendedValue &lhs,
64                     const fir::ExtendedValue &rhs);
65 
66   /// Create lhs // rhs in temp obtained with fir.alloca
67   fir::CharBoxValue createConcatenate(const fir::CharBoxValue &lhs,
68                                       const fir::CharBoxValue &rhs);
69 
70   /// Create {max,min}(lhs,rhs) in temp obtained with fir.alloca
71   fir::CharBoxValue
72   createCharExtremum(bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs);
73 
74   /// LEN_TRIM intrinsic.
75   mlir::Value createLenTrim(const fir::CharBoxValue &str);
76 
77   /// Embox \p addr and \p len and return fir.boxchar.
78   /// Take care of type conversions before emboxing.
79   /// \p len is converted to the integer type for character lengths if needed.
80   mlir::Value createEmboxChar(mlir::Value addr, mlir::Value len);
81   /// Create a fir.boxchar for \p str. If \p str is not in memory, a temp is
82   /// allocated to create the fir.boxchar.
83   mlir::Value createEmbox(const fir::CharBoxValue &str);
84   /// Embox a string array. Note that the size/shape of the array is not
85   /// retrievable from the resulting mlir::Value.
86   mlir::Value createEmbox(const fir::CharArrayBoxValue &str);
87 
88   /// Convert character array to a scalar by reducing the extents into the
89   /// length. Will fail if call on non reference like base.
90   fir::CharBoxValue toScalarCharacter(const fir::CharArrayBoxValue &);
91 
92   /// Unbox \p boxchar into (fir.ref<fir.char<kind>>, character length type).
93   std::pair<mlir::Value, mlir::Value> createUnboxChar(mlir::Value boxChar);
94 
95   /// Allocate a temp of fir::CharacterType type and length len.
96   /// Returns related fir.ref<fir.array<? x fir.char<kind>>>.
97   fir::CharBoxValue createCharacterTemp(mlir::Type type, mlir::Value len);
98 
99   /// Allocate a temp of compile time constant length.
100   /// Returns related fir.ref<fir.array<len x fir.char<kind>>>.
101   fir::CharBoxValue createCharacterTemp(mlir::Type type, int len);
102 
103   /// Create a temporary with the same kind, length, and value as source.
104   fir::CharBoxValue createTempFrom(const fir::ExtendedValue &source);
105 
106   /// Return true if \p type is a character literal type (is
107   /// `fir.array<len x fir.char<kind>>`).;
108   static bool isCharacterLiteral(mlir::Type type);
109 
110   /// Return true if \p type is one of the following type
111   /// - fir.boxchar<kind>
112   /// - fir.ref<fir.char<kind,len>>
113   /// - fir.char<kind,len>
114   static bool isCharacterScalar(mlir::Type type);
115 
116   /// Does this extended value base type is fir.char<kind,len>
117   /// where len is not the unknown extent ?
118   static bool hasConstantLengthInType(const fir::ExtendedValue &);
119 
120   /// Extract the kind of a character type
121   static fir::KindTy getCharacterKind(mlir::Type type);
122 
123   /// Extract the kind of a character or array of character type.
124   static fir::KindTy getCharacterOrSequenceKind(mlir::Type type);
125 
126   // TODO: Do we really need all these flavors of unwrapping to get the fir.char
127   // type? Or can we merge these? It would be better to merge them and eliminate
128   // the confusion.
129 
130   /// Determine the inner character type. Unwraps references, boxes, and
131   /// sequences to find the !fir.char element type.
132   static fir::CharacterType getCharType(mlir::Type type);
133 
134   /// Get fir.char<kind> type with the same kind as inside str.
135   static fir::CharacterType getCharacterType(mlir::Type type);
136   static fir::CharacterType getCharacterType(const fir::CharBoxValue &box);
137   static fir::CharacterType getCharacterType(mlir::Value str);
138 
139   /// Create an extended value from a value of type:
140   /// - fir.boxchar<kind>
141   /// - fir.ref<fir.char<kind,len>>
142   /// - fir.char<kind,len>
143   /// or the array versions:
144   /// - fir.ref<fir.array<n x...x fir.char<kind,len>>>
145   /// - fir.array<n x...x fir.char<kind,len>>
146   ///
147   /// Does the heavy lifting of converting the value \p character (along with an
148   /// optional \p len value) to an extended value. If \p len is null, a length
149   /// value is extracted from \p character (or its type). This will produce an
150   /// error if it's not possible. The returned value is a CharBoxValue if \p
151   /// character is a scalar, otherwise it is a CharArrayBoxValue.
152   fir::ExtendedValue toExtendedValue(mlir::Value character,
153                                      mlir::Value len = {});
154 
155   /// Is `type` a sequence (array) of CHARACTER type? Return true for any of the
156   /// following cases:
157   ///   - !fir.array<dim x ... x !fir.char<kind, len>>
158   ///   - !fir.ref<T>  where T is either of the first case
159   ///   - !fir.box<T>  where T is either of the first case
160   ///
161   /// In certain contexts, Fortran allows an array of CHARACTERs to be treated
162   /// as if it were one longer CHARACTER scalar, each element append to the
163   /// previous.
164   static bool isArray(mlir::Type type);
165 
166   /// Temporary helper to help migrating towards properties of
167   /// ExtendedValue containing characters.
168   /// Mainly, this ensure that characters are always CharArrayBoxValue,
169   /// CharBoxValue, or BoxValue and that the base address is not a boxchar.
170   /// Return the argument if this is not a character.
171   /// TODO: Create and propagate ExtendedValue according to properties listed
172   /// above instead of fixing it when needed.
173   fir::ExtendedValue cleanUpCharacterExtendedValue(const fir::ExtendedValue &);
174 
175   /// Create fir.char<kind> singleton from \p code integer value.
176   mlir::Value createSingletonFromCode(mlir::Value code, int kind);
177   /// Returns integer value held in a character singleton.
178   mlir::Value extractCodeFromSingleton(mlir::Value singleton);
179 
180   /// Create a value for the length of a character based on its memory reference
181   /// that may be a boxchar, box or !fir.[ptr|ref|heap]<fir.char<kind, len>>. If
182   /// the memref is a simple address and the length is not constant in type, the
183   /// returned length will be empty.
184   mlir::Value getLength(mlir::Value memref);
185 
186   /// Compute length given a fir.box describing a character entity.
187   /// It adjusts the length from the number of bytes per the descriptor
188   /// to the number of characters per the Fortran KIND.
189   mlir::Value readLengthFromBox(mlir::Value box);
190 
191   /// Same as readLengthFromBox but the CharacterType is provided.
192   mlir::Value readLengthFromBox(mlir::Value box, fir::CharacterType charTy);
193 
194 private:
195   /// FIXME: the implementation also needs a clean-up now that
196   /// CharBoxValue are better propagated.
197   fir::CharBoxValue materializeValue(mlir::Value str);
198   mlir::Value getCharBoxBuffer(const fir::CharBoxValue &box);
199   mlir::Value createElementAddr(mlir::Value buffer, mlir::Value index);
200   mlir::Value createLoadCharAt(mlir::Value buff, mlir::Value index);
201   void createStoreCharAt(mlir::Value str, mlir::Value index, mlir::Value c);
202   void createLengthOneAssign(const fir::CharBoxValue &lhs,
203                              const fir::CharBoxValue &rhs);
204   void createAssign(const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs);
205   mlir::Value createBlankConstantCode(fir::CharacterType type);
206 
207 private:
208   FirOpBuilder &builder;
209   mlir::Location loc;
210 };
211 
212 //===----------------------------------------------------------------------===//
213 // Tools to work with Character dummy procedures
214 //===----------------------------------------------------------------------===//
215 
216 /// Create a tuple<function type, length type> type to pass character functions
217 /// as arguments along their length. The function type set in the tuple is the
218 /// one provided by \p funcPointerType.
219 mlir::Type getCharacterProcedureTupleType(mlir::Type funcPointerType);
220 
221 /// Create a tuple<addr, len> given \p addr and \p len as well as the tuple
222 /// type \p argTy. \p addr must be any function address, and \p len may be any
223 /// integer or nullptr. Converts will be inserted if needed if \addr and \p len
224 /// types are not the same as the one inside the tuple type \p tupleType.
225 mlir::Value createCharacterProcedureTuple(fir::FirOpBuilder &builder,
226                                           mlir::Location loc,
227                                           mlir::Type tupleType,
228                                           mlir::Value addr, mlir::Value len);
229 
230 /// Given a tuple containing a character function address and its result length,
231 /// extract the tuple into a pair of value <function address, result length>.
232 /// If openBoxProc is true, the function address is extracted from the
233 /// fir.boxproc, otherwise, the returned function address is the fir.boxproc.
234 std::pair<mlir::Value, mlir::Value>
235 extractCharacterProcedureTuple(fir::FirOpBuilder &builder, mlir::Location loc,
236                                mlir::Value tuple, bool openBoxProc = true);
237 
238 fir::CharBoxValue convertCharacterKind(fir::FirOpBuilder &builder,
239                                        mlir::Location loc,
240                                        fir::CharBoxValue srcBoxChar,
241                                        int toKind);
242 
243 } // namespace fir::factory
244 
245 #endif // FORTRAN_OPTIMIZER_BUILDER_CHARACTER_H
246