xref: /llvm-project/flang/lib/Optimizer/Builder/Character.cpp (revision fac349a169976f822fb27f03e623fa0d28aec1f3)
1 //===-- Character.cpp -----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "flang/Optimizer/Builder/Character.h"
14 #include "flang/Optimizer/Builder/DoLoopHelper.h"
15 #include "flang/Optimizer/Builder/FIRBuilder.h"
16 #include "flang/Optimizer/Builder/Todo.h"
17 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
18 #include "llvm/Support/Debug.h"
19 #include <optional>
20 
21 #define DEBUG_TYPE "flang-lower-character"
22 
23 //===----------------------------------------------------------------------===//
24 // CharacterExprHelper implementation
25 //===----------------------------------------------------------------------===//
26 
27 /// Unwrap all the ref and box types and return the inner element type.
unwrapBoxAndRef(mlir::Type type)28 static mlir::Type unwrapBoxAndRef(mlir::Type type) {
29   if (auto boxType = mlir::dyn_cast<fir::BoxCharType>(type))
30     return boxType.getEleTy();
31   while (true) {
32     type = fir::unwrapRefType(type);
33     if (auto boxTy = mlir::dyn_cast<fir::BoxType>(type))
34       type = boxTy.getEleTy();
35     else
36       break;
37   }
38   return type;
39 }
40 
41 /// Unwrap base fir.char<kind,len> type.
recoverCharacterType(mlir::Type type)42 static fir::CharacterType recoverCharacterType(mlir::Type type) {
43   type = fir::unwrapSequenceType(unwrapBoxAndRef(type));
44   if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type))
45     return charTy;
46   llvm::report_fatal_error("expected a character type");
47 }
48 
isCharacterScalar(mlir::Type type)49 bool fir::factory::CharacterExprHelper::isCharacterScalar(mlir::Type type) {
50   type = unwrapBoxAndRef(type);
51   return !mlir::isa<fir::SequenceType>(type) && fir::isa_char(type);
52 }
53 
isArray(mlir::Type type)54 bool fir::factory::CharacterExprHelper::isArray(mlir::Type type) {
55   type = unwrapBoxAndRef(type);
56   if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(type))
57     return fir::isa_char(seqTy.getEleTy());
58   return false;
59 }
60 
61 fir::CharacterType
getCharacterType(mlir::Type type)62 fir::factory::CharacterExprHelper::getCharacterType(mlir::Type type) {
63   assert(isCharacterScalar(type) && "expected scalar character");
64   return recoverCharacterType(type);
65 }
66 
67 fir::CharacterType
getCharType(mlir::Type type)68 fir::factory::CharacterExprHelper::getCharType(mlir::Type type) {
69   return recoverCharacterType(type);
70 }
71 
getCharacterType(const fir::CharBoxValue & box)72 fir::CharacterType fir::factory::CharacterExprHelper::getCharacterType(
73     const fir::CharBoxValue &box) {
74   return getCharacterType(box.getBuffer().getType());
75 }
76 
77 fir::CharacterType
getCharacterType(mlir::Value str)78 fir::factory::CharacterExprHelper::getCharacterType(mlir::Value str) {
79   return getCharacterType(str.getType());
80 }
81 
82 /// Determine the static size of the character. Returns the computed size, not
83 /// an IR Value.
84 static std::optional<fir::CharacterType::LenType>
getCompileTimeLength(const fir::CharBoxValue & box)85 getCompileTimeLength(const fir::CharBoxValue &box) {
86   auto len = recoverCharacterType(box.getBuffer().getType()).getLen();
87   if (len == fir::CharacterType::unknownLen())
88     return {};
89   return len;
90 }
91 
92 /// Detect the precondition that the value `str` does not reside in memory. Such
93 /// values will have a type `!fir.array<...x!fir.char<N>>` or `!fir.char<N>`.
needToMaterialize(mlir::Value str)94 LLVM_ATTRIBUTE_UNUSED static bool needToMaterialize(mlir::Value str) {
95   return mlir::isa<fir::SequenceType>(str.getType()) ||
96          fir::isa_char(str.getType());
97 }
98 
99 /// This is called only if `str` does not reside in memory. Such a bare string
100 /// value will be converted into a memory-based temporary and an extended
101 /// boxchar value returned.
102 fir::CharBoxValue
materializeValue(mlir::Value str)103 fir::factory::CharacterExprHelper::materializeValue(mlir::Value str) {
104   assert(needToMaterialize(str));
105   auto ty = str.getType();
106   assert(isCharacterScalar(ty) && "expected scalar character");
107   auto charTy = mlir::dyn_cast<fir::CharacterType>(ty);
108   if (!charTy || charTy.getLen() == fir::CharacterType::unknownLen()) {
109     LLVM_DEBUG(llvm::dbgs() << "cannot materialize: " << str << '\n');
110     llvm_unreachable("must be a !fir.char<N> type");
111   }
112   auto len = builder.createIntegerConstant(
113       loc, builder.getCharacterLengthType(), charTy.getLen());
114   auto temp = builder.create<fir::AllocaOp>(loc, charTy);
115   builder.create<fir::StoreOp>(loc, str, temp);
116   LLVM_DEBUG(llvm::dbgs() << "materialized as local: " << str << " -> (" << temp
117                           << ", " << len << ")\n");
118   return {temp, len};
119 }
120 
121 fir::ExtendedValue
toExtendedValue(mlir::Value character,mlir::Value len)122 fir::factory::CharacterExprHelper::toExtendedValue(mlir::Value character,
123                                                    mlir::Value len) {
124   auto lenType = builder.getCharacterLengthType();
125   auto type = character.getType();
126   auto base = fir::isa_passbyref_type(type) ? character : mlir::Value{};
127   auto resultLen = len;
128   llvm::SmallVector<mlir::Value> extents;
129 
130   if (auto eleType = fir::dyn_cast_ptrEleTy(type))
131     type = eleType;
132 
133   if (auto arrayType = mlir::dyn_cast<fir::SequenceType>(type)) {
134     type = arrayType.getEleTy();
135     auto indexType = builder.getIndexType();
136     for (auto extent : arrayType.getShape()) {
137       if (extent == fir::SequenceType::getUnknownExtent())
138         break;
139       extents.emplace_back(
140           builder.createIntegerConstant(loc, indexType, extent));
141     }
142     // Last extent might be missing in case of assumed-size. If more extents
143     // could not be deduced from type, that's an error (a fir.box should
144     // have been used in the interface).
145     if (extents.size() + 1 < arrayType.getShape().size())
146       mlir::emitError(loc, "cannot retrieve array extents from type");
147   }
148 
149   if (auto charTy = mlir::dyn_cast<fir::CharacterType>(type)) {
150     if (!resultLen && charTy.getLen() != fir::CharacterType::unknownLen())
151       resultLen = builder.createIntegerConstant(loc, lenType, charTy.getLen());
152   } else if (auto boxCharType = mlir::dyn_cast<fir::BoxCharType>(type)) {
153     auto refType = builder.getRefType(boxCharType.getEleTy());
154     // If the embox is accessible, use its operand to avoid filling
155     // the generated fir with embox/unbox.
156     mlir::Value boxCharLen;
157     if (auto definingOp = character.getDefiningOp()) {
158       if (auto box = mlir::dyn_cast<fir::EmboxCharOp>(definingOp)) {
159         base = box.getMemref();
160         boxCharLen = box.getLen();
161       }
162     }
163     if (!boxCharLen) {
164       auto unboxed =
165           builder.create<fir::UnboxCharOp>(loc, refType, lenType, character);
166       base = builder.createConvert(loc, refType, unboxed.getResult(0));
167       boxCharLen = unboxed.getResult(1);
168     }
169     if (!resultLen) {
170       resultLen = boxCharLen;
171     }
172   } else if (mlir::isa<fir::BoxType>(type)) {
173     mlir::emitError(loc, "descriptor or derived type not yet handled");
174   } else {
175     llvm_unreachable("Cannot translate mlir::Value to character ExtendedValue");
176   }
177 
178   if (!base) {
179     if (auto load =
180             mlir::dyn_cast_or_null<fir::LoadOp>(character.getDefiningOp())) {
181       base = load.getOperand();
182     } else {
183       return materializeValue(fir::getBase(character));
184     }
185   }
186   if (!resultLen)
187     llvm::report_fatal_error("no dynamic length found for character");
188   if (!extents.empty())
189     return fir::CharArrayBoxValue{base, resultLen, extents};
190   return fir::CharBoxValue{base, resultLen};
191 }
192 
getSingletonCharType(mlir::MLIRContext * ctxt,int kind)193 static mlir::Type getSingletonCharType(mlir::MLIRContext *ctxt, int kind) {
194   return fir::CharacterType::getSingleton(ctxt, kind);
195 }
196 
197 mlir::Value
createEmbox(const fir::CharBoxValue & box)198 fir::factory::CharacterExprHelper::createEmbox(const fir::CharBoxValue &box) {
199   // Base CharBoxValue of CharArrayBoxValue are ok here (do not require a scalar
200   // type)
201   auto charTy = recoverCharacterType(box.getBuffer().getType());
202   auto boxCharType =
203       fir::BoxCharType::get(builder.getContext(), charTy.getFKind());
204   auto refType = fir::ReferenceType::get(boxCharType.getEleTy());
205   mlir::Value buff = box.getBuffer();
206   // fir.boxchar requires a memory reference. Allocate temp if the character is
207   // not in memory.
208   if (!fir::isa_ref_type(buff.getType())) {
209     auto temp = builder.createTemporary(loc, buff.getType());
210     builder.create<fir::StoreOp>(loc, buff, temp);
211     buff = temp;
212   }
213   // fir.emboxchar only accepts scalar, cast array buffer to a scalar buffer.
214   if (mlir::isa<fir::SequenceType>(fir::dyn_cast_ptrEleTy(buff.getType())))
215     buff = builder.createConvert(loc, refType, buff);
216   // Convert in case the provided length is not of the integer type that must
217   // be used in boxchar.
218   auto len = builder.createConvert(loc, builder.getCharacterLengthType(),
219                                    box.getLen());
220   return builder.create<fir::EmboxCharOp>(loc, boxCharType, buff, len);
221 }
222 
toScalarCharacter(const fir::CharArrayBoxValue & box)223 fir::CharBoxValue fir::factory::CharacterExprHelper::toScalarCharacter(
224     const fir::CharArrayBoxValue &box) {
225   if (mlir::isa<fir::PointerType>(box.getBuffer().getType()))
226     TODO(loc, "concatenating non contiguous character array into a scalar");
227 
228   // TODO: add a fast path multiplying new length at compile time if the info is
229   // in the array type.
230   auto lenType = builder.getCharacterLengthType();
231   auto len = builder.createConvert(loc, lenType, box.getLen());
232   for (auto extent : box.getExtents())
233     len = builder.create<mlir::arith::MulIOp>(
234         loc, len, builder.createConvert(loc, lenType, extent));
235 
236   // TODO: typeLen can be improved in compiled constant cases
237   // TODO: allow bare fir.array<> (no ref) conversion here ?
238   auto typeLen = fir::CharacterType::unknownLen();
239   auto kind = recoverCharacterType(box.getBuffer().getType()).getFKind();
240   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
241   auto type = fir::ReferenceType::get(charTy);
242   auto buffer = builder.createConvert(loc, type, box.getBuffer());
243   return {buffer, len};
244 }
245 
createEmbox(const fir::CharArrayBoxValue & box)246 mlir::Value fir::factory::CharacterExprHelper::createEmbox(
247     const fir::CharArrayBoxValue &box) {
248   // Use same embox as for scalar. It's losing the actual data size information
249   // (We do not multiply the length by the array size), but that is what Fortran
250   // call interfaces using boxchar expect.
251   return createEmbox(static_cast<const fir::CharBoxValue &>(box));
252 }
253 
254 /// Get the address of the element at position \p index of the scalar character
255 /// \p buffer.
256 /// \p buffer must be of type !fir.ref<fir.char<k, len>>. The length may be
257 /// unknown. \p index must have any integer type, and is zero based. The return
258 /// value is a singleton address (!fir.ref<!fir.char<kind>>)
259 mlir::Value
createElementAddr(mlir::Value buffer,mlir::Value index)260 fir::factory::CharacterExprHelper::createElementAddr(mlir::Value buffer,
261                                                      mlir::Value index) {
262   // The only way to address an element of a fir.ref<char<kind, len>> is to cast
263   // it to a fir.array<len x fir.char<kind>> and use fir.coordinate_of.
264   auto bufferType = buffer.getType();
265   assert(fir::isa_ref_type(bufferType));
266   assert(isCharacterScalar(bufferType));
267   auto charTy = recoverCharacterType(bufferType);
268   auto singleTy = getSingletonCharType(builder.getContext(), charTy.getFKind());
269   auto singleRefTy = builder.getRefType(singleTy);
270   auto extent = fir::SequenceType::getUnknownExtent();
271   if (charTy.getLen() != fir::CharacterType::unknownLen())
272     extent = charTy.getLen();
273   auto coorTy = builder.getRefType(fir::SequenceType::get({extent}, singleTy));
274 
275   auto coor = builder.createConvert(loc, coorTy, buffer);
276   auto i = builder.createConvert(loc, builder.getIndexType(), index);
277   return builder.create<fir::CoordinateOp>(loc, singleRefTy, coor, i);
278 }
279 
280 /// Load a character out of `buff` from offset `index`.
281 /// `buff` must be a reference to memory.
282 mlir::Value
createLoadCharAt(mlir::Value buff,mlir::Value index)283 fir::factory::CharacterExprHelper::createLoadCharAt(mlir::Value buff,
284                                                     mlir::Value index) {
285   LLVM_DEBUG(llvm::dbgs() << "load a char: " << buff << " type: "
286                           << buff.getType() << " at: " << index << '\n');
287   return builder.create<fir::LoadOp>(loc, createElementAddr(buff, index));
288 }
289 
290 /// Store the singleton character `c` to `str` at offset `index`.
291 /// `str` must be a reference to memory.
createStoreCharAt(mlir::Value str,mlir::Value index,mlir::Value c)292 void fir::factory::CharacterExprHelper::createStoreCharAt(mlir::Value str,
293                                                           mlir::Value index,
294                                                           mlir::Value c) {
295   LLVM_DEBUG(llvm::dbgs() << "store the char: " << c << " into: " << str
296                           << " type: " << str.getType() << " at: " << index
297                           << '\n');
298   auto addr = createElementAddr(str, index);
299   builder.create<fir::StoreOp>(loc, c, addr);
300 }
301 
302 // FIXME: this temp is useless... either fir.coordinate_of needs to
303 // work on "loaded" characters (!fir.array<len x fir.char<kind>>) or
304 // character should never be loaded.
305 // If this is a fir.array<>, allocate and store the value so that
306 // fir.cooridnate_of can be use on the value.
getCharBoxBuffer(const fir::CharBoxValue & box)307 mlir::Value fir::factory::CharacterExprHelper::getCharBoxBuffer(
308     const fir::CharBoxValue &box) {
309   auto buff = box.getBuffer();
310   if (fir::isa_char(buff.getType())) {
311     auto newBuff = builder.create<fir::AllocaOp>(loc, buff.getType());
312     builder.create<fir::StoreOp>(loc, buff, newBuff);
313     return newBuff;
314   }
315   return buff;
316 }
317 
318 /// Create a loop to copy `count` characters from `src` to `dest`. Note that the
319 /// KIND indicates the number of bits in a code point. (ASCII, UCS-2, or UCS-4.)
createCopy(const fir::CharBoxValue & dest,const fir::CharBoxValue & src,mlir::Value count)320 void fir::factory::CharacterExprHelper::createCopy(
321     const fir::CharBoxValue &dest, const fir::CharBoxValue &src,
322     mlir::Value count) {
323   auto fromBuff = getCharBoxBuffer(src);
324   auto toBuff = getCharBoxBuffer(dest);
325   LLVM_DEBUG(llvm::dbgs() << "create char copy from: "; src.dump();
326              llvm::dbgs() << " to: "; dest.dump();
327              llvm::dbgs() << " count: " << count << '\n');
328   auto kind = getCharacterKind(src.getBuffer().getType());
329   // If the src and dest are the same KIND, then use memmove to move the bits.
330   // We don't have to worry about overlapping ranges with memmove.
331   if (getCharacterKind(dest.getBuffer().getType()) == kind) {
332     auto bytes = builder.getKindMap().getCharacterBitsize(kind) / 8;
333     auto i64Ty = builder.getI64Type();
334     auto kindBytes = builder.createIntegerConstant(loc, i64Ty, bytes);
335     auto castCount = builder.createConvert(loc, i64Ty, count);
336     auto totalBytes =
337         builder.create<mlir::arith::MulIOp>(loc, kindBytes, castCount);
338     auto notVolatile = builder.createBool(loc, false);
339     auto memmv = getLlvmMemmove(builder);
340     auto argTys = memmv.getFunctionType().getInputs();
341     auto toPtr = builder.createConvert(loc, argTys[0], toBuff);
342     auto fromPtr = builder.createConvert(loc, argTys[1], fromBuff);
343     builder.create<fir::CallOp>(
344         loc, memmv, mlir::ValueRange{toPtr, fromPtr, totalBytes, notVolatile});
345     return;
346   }
347 
348   // Convert a CHARACTER of one KIND into a CHARACTER of another KIND.
349   builder.create<fir::CharConvertOp>(loc, src.getBuffer(), count,
350                                      dest.getBuffer());
351 }
352 
createPadding(const fir::CharBoxValue & str,mlir::Value lower,mlir::Value upper)353 void fir::factory::CharacterExprHelper::createPadding(
354     const fir::CharBoxValue &str, mlir::Value lower, mlir::Value upper) {
355   auto blank = createBlankConstant(getCharacterType(str));
356   // Always create the loop, if upper < lower, no iteration will be
357   // executed.
358   auto toBuff = getCharBoxBuffer(str);
359   fir::factory::DoLoopHelper{builder, loc}.createLoop(
360       lower, upper, [&](fir::FirOpBuilder &, mlir::Value index) {
361         createStoreCharAt(toBuff, index, blank);
362       });
363 }
364 
365 fir::CharBoxValue
createCharacterTemp(mlir::Type type,mlir::Value len)366 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
367                                                        mlir::Value len) {
368   auto kind = recoverCharacterType(type).getFKind();
369   auto typeLen = fir::CharacterType::unknownLen();
370   // If len is a constant, reflect the length in the type.
371   if (auto cstLen = getIntIfConstant(len))
372     typeLen = *cstLen;
373   auto *ctxt = builder.getContext();
374   auto charTy = fir::CharacterType::get(ctxt, kind, typeLen);
375   llvm::SmallVector<mlir::Value> lenParams;
376   if (typeLen == fir::CharacterType::unknownLen())
377     lenParams.push_back(len);
378   auto ref = builder.allocateLocal(loc, charTy, "", ".chrtmp",
379                                    /*shape=*/std::nullopt, lenParams);
380   return {ref, len};
381 }
382 
createTempFrom(const fir::ExtendedValue & source)383 fir::CharBoxValue fir::factory::CharacterExprHelper::createTempFrom(
384     const fir::ExtendedValue &source) {
385   const auto *charBox = source.getCharBox();
386   if (!charBox)
387     fir::emitFatalError(loc, "source must be a fir::CharBoxValue");
388   auto len = charBox->getLen();
389   auto sourceTy = charBox->getBuffer().getType();
390   auto temp = createCharacterTemp(sourceTy, len);
391   if (fir::isa_ref_type(sourceTy)) {
392     createCopy(temp, *charBox, len);
393   } else {
394     auto ref = builder.createConvert(loc, builder.getRefType(sourceTy),
395                                      temp.getBuffer());
396     builder.create<fir::StoreOp>(loc, charBox->getBuffer(), ref);
397   }
398   return temp;
399 }
400 
401 // Simple length one character assignment without loops.
createLengthOneAssign(const fir::CharBoxValue & lhs,const fir::CharBoxValue & rhs)402 void fir::factory::CharacterExprHelper::createLengthOneAssign(
403     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
404   auto addr = lhs.getBuffer();
405   auto toTy = fir::unwrapRefType(addr.getType());
406   mlir::Value val = rhs.getBuffer();
407   if (fir::isa_ref_type(val.getType())) {
408     auto fromCharLen1RefTy = builder.getRefType(getSingletonCharType(
409         builder.getContext(),
410         getCharacterKind(fir::unwrapRefType(val.getType()))));
411     val = builder.create<fir::LoadOp>(
412         loc, builder.createConvert(loc, fromCharLen1RefTy, val));
413   }
414   auto toCharLen1Ty =
415       getSingletonCharType(builder.getContext(), getCharacterKind(toTy));
416   val = builder.createConvert(loc, toCharLen1Ty, val);
417   builder.create<fir::StoreOp>(
418       loc, val,
419       builder.createConvert(loc, builder.getRefType(toCharLen1Ty), addr));
420 }
421 
422 /// Returns the minimum of integer mlir::Value \p a and \b.
genMin(fir::FirOpBuilder & builder,mlir::Location loc,mlir::Value a,mlir::Value b)423 mlir::Value genMin(fir::FirOpBuilder &builder, mlir::Location loc,
424                    mlir::Value a, mlir::Value b) {
425   auto cmp = builder.create<mlir::arith::CmpIOp>(
426       loc, mlir::arith::CmpIPredicate::slt, a, b);
427   return builder.create<mlir::arith::SelectOp>(loc, cmp, a, b);
428 }
429 
createAssign(const fir::CharBoxValue & lhs,const fir::CharBoxValue & rhs)430 void fir::factory::CharacterExprHelper::createAssign(
431     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
432   auto rhsCstLen = getCompileTimeLength(rhs);
433   auto lhsCstLen = getCompileTimeLength(lhs);
434   bool compileTimeSameLength = false;
435   bool isLengthOneAssign = false;
436 
437   if (lhsCstLen && rhsCstLen && *lhsCstLen == *rhsCstLen) {
438     compileTimeSameLength = true;
439     if (*lhsCstLen == 1)
440       isLengthOneAssign = true;
441   } else if (rhs.getLen() == lhs.getLen()) {
442     compileTimeSameLength = true;
443 
444     // If the length values are the same for LHS and RHS,
445     // then we can rely on the constant length deduced from
446     // any of the two types.
447     if (lhsCstLen && *lhsCstLen == 1)
448       isLengthOneAssign = true;
449     if (rhsCstLen && *rhsCstLen == 1)
450       isLengthOneAssign = true;
451 
452     // We could have recognized constant operations here (e.g.
453     // two different arith.constant ops may produce the same value),
454     // but for now leave it to CSE to get rid of the duplicates.
455   }
456   if (isLengthOneAssign) {
457     createLengthOneAssign(lhs, rhs);
458     return;
459   }
460 
461   // Copy the minimum of the lhs and rhs lengths and pad the lhs remainder
462   // if needed.
463   auto copyCount = lhs.getLen();
464   auto idxTy = builder.getIndexType();
465   if (!compileTimeSameLength) {
466     auto lhsLen = builder.createConvert(loc, idxTy, lhs.getLen());
467     auto rhsLen = builder.createConvert(loc, idxTy, rhs.getLen());
468     copyCount = genMin(builder, loc, lhsLen, rhsLen);
469   }
470 
471   // Actual copy
472   createCopy(lhs, rhs, copyCount);
473 
474   // Pad if needed.
475   if (!compileTimeSameLength) {
476     auto one = builder.createIntegerConstant(loc, lhs.getLen().getType(), 1);
477     auto maxPadding =
478         builder.create<mlir::arith::SubIOp>(loc, lhs.getLen(), one);
479     createPadding(lhs, copyCount, maxPadding);
480   }
481 }
482 
createConcatenate(const fir::CharBoxValue & lhs,const fir::CharBoxValue & rhs)483 fir::CharBoxValue fir::factory::CharacterExprHelper::createConcatenate(
484     const fir::CharBoxValue &lhs, const fir::CharBoxValue &rhs) {
485   auto lhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
486                                       lhs.getLen());
487   auto rhsLen = builder.createConvert(loc, builder.getCharacterLengthType(),
488                                       rhs.getLen());
489   mlir::Value len = builder.create<mlir::arith::AddIOp>(loc, lhsLen, rhsLen);
490   auto temp = createCharacterTemp(getCharacterType(rhs), len);
491   createCopy(temp, lhs, lhsLen);
492   auto one = builder.createIntegerConstant(loc, len.getType(), 1);
493   auto upperBound = builder.create<mlir::arith::SubIOp>(loc, len, one);
494   auto lhsLenIdx = builder.createConvert(loc, builder.getIndexType(), lhsLen);
495   auto fromBuff = getCharBoxBuffer(rhs);
496   auto toBuff = getCharBoxBuffer(temp);
497   fir::factory::DoLoopHelper{builder, loc}.createLoop(
498       lhsLenIdx, upperBound, one,
499       [&](fir::FirOpBuilder &bldr, mlir::Value index) {
500         auto rhsIndex = bldr.create<mlir::arith::SubIOp>(loc, index, lhsLenIdx);
501         auto charVal = createLoadCharAt(fromBuff, rhsIndex);
502         createStoreCharAt(toBuff, index, charVal);
503       });
504   return temp;
505 }
506 
genSubstringBase(mlir::Value stringRawAddr,mlir::Value lowerBound,mlir::Type substringAddrType,mlir::Value one)507 mlir::Value fir::factory::CharacterExprHelper::genSubstringBase(
508     mlir::Value stringRawAddr, mlir::Value lowerBound,
509     mlir::Type substringAddrType, mlir::Value one) {
510   if (!one)
511     one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
512   auto offset =
513       builder.create<mlir::arith::SubIOp>(loc, lowerBound, one).getResult();
514   auto addr = createElementAddr(stringRawAddr, offset);
515   return builder.createConvert(loc, substringAddrType, addr);
516 }
517 
createSubstring(const fir::CharBoxValue & box,llvm::ArrayRef<mlir::Value> bounds)518 fir::CharBoxValue fir::factory::CharacterExprHelper::createSubstring(
519     const fir::CharBoxValue &box, llvm::ArrayRef<mlir::Value> bounds) {
520   // Constant need to be materialize in memory to use fir.coordinate_of.
521   auto nbounds = bounds.size();
522   if (nbounds < 1 || nbounds > 2) {
523     mlir::emitError(loc, "Incorrect number of bounds in substring");
524     return {mlir::Value{}, mlir::Value{}};
525   }
526   mlir::SmallVector<mlir::Value> castBounds;
527   // Convert bounds to length type to do safe arithmetic on it.
528   for (auto bound : bounds)
529     castBounds.push_back(
530         builder.createConvert(loc, builder.getCharacterLengthType(), bound));
531   auto lowerBound = castBounds[0];
532   // FIR CoordinateOp is zero based but Fortran substring are one based.
533   auto kind = getCharacterKind(box.getBuffer().getType());
534   auto charTy = fir::CharacterType::getUnknownLen(builder.getContext(), kind);
535   auto resultType = builder.getRefType(charTy);
536   auto one = builder.createIntegerConstant(loc, lowerBound.getType(), 1);
537   auto substringRef =
538       genSubstringBase(box.getBuffer(), lowerBound, resultType, one);
539 
540   // Compute the length.
541   mlir::Value substringLen;
542   if (nbounds < 2) {
543     substringLen =
544         builder.create<mlir::arith::SubIOp>(loc, box.getLen(), castBounds[0]);
545   } else {
546     substringLen =
547         builder.create<mlir::arith::SubIOp>(loc, castBounds[1], castBounds[0]);
548   }
549   substringLen = builder.create<mlir::arith::AddIOp>(loc, substringLen, one);
550 
551   // Set length to zero if bounds were reversed (Fortran 2018 9.4.1)
552   auto zero = builder.createIntegerConstant(loc, substringLen.getType(), 0);
553   auto cdt = builder.create<mlir::arith::CmpIOp>(
554       loc, mlir::arith::CmpIPredicate::slt, substringLen, zero);
555   substringLen =
556       builder.create<mlir::arith::SelectOp>(loc, cdt, zero, substringLen);
557 
558   return {substringRef, substringLen};
559 }
560 
561 mlir::Value
createLenTrim(const fir::CharBoxValue & str)562 fir::factory::CharacterExprHelper::createLenTrim(const fir::CharBoxValue &str) {
563   // Note: Runtime for LEN_TRIM should also be available at some
564   // point. For now use an inlined implementation.
565   auto indexType = builder.getIndexType();
566   auto len = builder.createConvert(loc, indexType, str.getLen());
567   auto one = builder.createIntegerConstant(loc, indexType, 1);
568   auto minusOne = builder.createIntegerConstant(loc, indexType, -1);
569   auto zero = builder.createIntegerConstant(loc, indexType, 0);
570   auto trueVal = builder.createIntegerConstant(loc, builder.getI1Type(), 1);
571   auto blank = createBlankConstantCode(getCharacterType(str));
572   mlir::Value lastChar = builder.create<mlir::arith::SubIOp>(loc, len, one);
573 
574   auto iterWhile =
575       builder.create<fir::IterWhileOp>(loc, lastChar, zero, minusOne, trueVal,
576                                        /*returnFinalCount=*/false, lastChar);
577   auto insPt = builder.saveInsertionPoint();
578   builder.setInsertionPointToStart(iterWhile.getBody());
579   auto index = iterWhile.getInductionVar();
580   // Look for first non-blank from the right of the character.
581   auto fromBuff = getCharBoxBuffer(str);
582   auto elemAddr = createElementAddr(fromBuff, index);
583   auto codeAddr =
584       builder.createConvert(loc, builder.getRefType(blank.getType()), elemAddr);
585   auto c = builder.create<fir::LoadOp>(loc, codeAddr);
586   auto isBlank = builder.create<mlir::arith::CmpIOp>(
587       loc, mlir::arith::CmpIPredicate::eq, blank, c);
588   llvm::SmallVector<mlir::Value> results = {isBlank, index};
589   builder.create<fir::ResultOp>(loc, results);
590   builder.restoreInsertionPoint(insPt);
591   // Compute length after iteration (zero if all blanks)
592   mlir::Value newLen =
593       builder.create<mlir::arith::AddIOp>(loc, iterWhile.getResult(1), one);
594   auto result = builder.create<mlir::arith::SelectOp>(
595       loc, iterWhile.getResult(0), zero, newLen);
596   return builder.createConvert(loc, builder.getCharacterLengthType(), result);
597 }
598 
599 fir::CharBoxValue
createCharacterTemp(mlir::Type type,int len)600 fir::factory::CharacterExprHelper::createCharacterTemp(mlir::Type type,
601                                                        int len) {
602   assert(len >= 0 && "expected positive length");
603   auto kind = recoverCharacterType(type).getFKind();
604   auto charType = fir::CharacterType::get(builder.getContext(), kind, len);
605   auto addr = builder.create<fir::AllocaOp>(loc, charType);
606   auto mlirLen =
607       builder.createIntegerConstant(loc, builder.getCharacterLengthType(), len);
608   return {addr, mlirLen};
609 }
610 
611 // Returns integer with code for blank. The integer has the same
612 // size as the character. Blank has ascii space code for all kinds.
createBlankConstantCode(fir::CharacterType type)613 mlir::Value fir::factory::CharacterExprHelper::createBlankConstantCode(
614     fir::CharacterType type) {
615   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
616   auto intType = builder.getIntegerType(bits);
617   return builder.createIntegerConstant(loc, intType, ' ');
618 }
619 
createBlankConstant(fir::CharacterType type)620 mlir::Value fir::factory::CharacterExprHelper::createBlankConstant(
621     fir::CharacterType type) {
622   return createSingletonFromCode(createBlankConstantCode(type),
623                                  type.getFKind());
624 }
625 
createAssign(const fir::ExtendedValue & lhs,const fir::ExtendedValue & rhs)626 void fir::factory::CharacterExprHelper::createAssign(
627     const fir::ExtendedValue &lhs, const fir::ExtendedValue &rhs) {
628   if (auto *str = rhs.getBoxOf<fir::CharBoxValue>()) {
629     if (auto *to = lhs.getBoxOf<fir::CharBoxValue>()) {
630       createAssign(*to, *str);
631       return;
632     }
633   }
634   TODO(loc, "character array assignment");
635   // Note that it is not sure the array aspect should be handled
636   // by this utility.
637 }
638 
639 mlir::Value
createEmboxChar(mlir::Value addr,mlir::Value len)640 fir::factory::CharacterExprHelper::createEmboxChar(mlir::Value addr,
641                                                    mlir::Value len) {
642   return createEmbox(fir::CharBoxValue{addr, len});
643 }
644 
645 std::pair<mlir::Value, mlir::Value>
createUnboxChar(mlir::Value boxChar)646 fir::factory::CharacterExprHelper::createUnboxChar(mlir::Value boxChar) {
647   using T = std::pair<mlir::Value, mlir::Value>;
648   return toExtendedValue(boxChar).match(
649       [](const fir::CharBoxValue &b) -> T {
650         return {b.getBuffer(), b.getLen()};
651       },
652       [](const fir::CharArrayBoxValue &b) -> T {
653         return {b.getBuffer(), b.getLen()};
654       },
655       [](const auto &) -> T { llvm::report_fatal_error("not a character"); });
656 }
657 
isCharacterLiteral(mlir::Type type)658 bool fir::factory::CharacterExprHelper::isCharacterLiteral(mlir::Type type) {
659   if (auto seqType = mlir::dyn_cast<fir::SequenceType>(type))
660     return (seqType.getShape().size() == 1) &&
661            fir::isa_char(seqType.getEleTy());
662   return false;
663 }
664 
665 fir::KindTy
getCharacterKind(mlir::Type type)666 fir::factory::CharacterExprHelper::getCharacterKind(mlir::Type type) {
667   assert(isCharacterScalar(type) && "expected scalar character");
668   return recoverCharacterType(type).getFKind();
669 }
670 
671 fir::KindTy
getCharacterOrSequenceKind(mlir::Type type)672 fir::factory::CharacterExprHelper::getCharacterOrSequenceKind(mlir::Type type) {
673   return recoverCharacterType(type).getFKind();
674 }
675 
hasConstantLengthInType(const fir::ExtendedValue & exv)676 bool fir::factory::CharacterExprHelper::hasConstantLengthInType(
677     const fir::ExtendedValue &exv) {
678   auto charTy = recoverCharacterType(fir::getBase(exv).getType());
679   return charTy.hasConstantLen();
680 }
681 
682 mlir::Value
createSingletonFromCode(mlir::Value code,int kind)683 fir::factory::CharacterExprHelper::createSingletonFromCode(mlir::Value code,
684                                                            int kind) {
685   auto charType = fir::CharacterType::get(builder.getContext(), kind, 1);
686   auto bits = builder.getKindMap().getCharacterBitsize(kind);
687   auto intType = builder.getIntegerType(bits);
688   auto cast = builder.createConvert(loc, intType, code);
689   auto undef = builder.create<fir::UndefOp>(loc, charType);
690   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
691   return builder.create<fir::InsertValueOp>(loc, charType, undef, cast,
692                                             builder.getArrayAttr(zero));
693 }
694 
extractCodeFromSingleton(mlir::Value singleton)695 mlir::Value fir::factory::CharacterExprHelper::extractCodeFromSingleton(
696     mlir::Value singleton) {
697   auto type = getCharacterType(singleton);
698   assert(type.getLen() == 1);
699   auto bits = builder.getKindMap().getCharacterBitsize(type.getFKind());
700   auto intType = builder.getIntegerType(bits);
701   auto zero = builder.getIntegerAttr(builder.getIndexType(), 0);
702   return builder.create<fir::ExtractValueOp>(loc, intType, singleton,
703                                              builder.getArrayAttr(zero));
704 }
705 
706 mlir::Value
readLengthFromBox(mlir::Value box)707 fir::factory::CharacterExprHelper::readLengthFromBox(mlir::Value box) {
708   auto charTy = recoverCharacterType(box.getType());
709   return readLengthFromBox(box, charTy);
710 }
711 
readLengthFromBox(mlir::Value box,fir::CharacterType charTy)712 mlir::Value fir::factory::CharacterExprHelper::readLengthFromBox(
713     mlir::Value box, fir::CharacterType charTy) {
714   auto lenTy = builder.getCharacterLengthType();
715   auto size = builder.create<fir::BoxEleSizeOp>(loc, lenTy, box);
716   auto bits = builder.getKindMap().getCharacterBitsize(charTy.getFKind());
717   auto width = bits / 8;
718   if (width > 1) {
719     auto widthVal = builder.createIntegerConstant(loc, lenTy, width);
720     return builder.create<mlir::arith::DivSIOp>(loc, size, widthVal);
721   }
722   return size;
723 }
724 
getLength(mlir::Value memref)725 mlir::Value fir::factory::CharacterExprHelper::getLength(mlir::Value memref) {
726   auto memrefType = memref.getType();
727   auto charType = recoverCharacterType(memrefType);
728   assert(charType && "must be a character type");
729   if (charType.hasConstantLen())
730     return builder.createIntegerConstant(loc, builder.getCharacterLengthType(),
731                                          charType.getLen());
732   if (mlir::isa<fir::BoxType>(memrefType))
733     return readLengthFromBox(memref);
734   if (mlir::isa<fir::BoxCharType>(memrefType))
735     return createUnboxChar(memref).second;
736 
737   // Length cannot be deduced from memref.
738   return {};
739 }
740 
741 std::pair<mlir::Value, mlir::Value>
extractCharacterProcedureTuple(fir::FirOpBuilder & builder,mlir::Location loc,mlir::Value tuple,bool openBoxProc)742 fir::factory::extractCharacterProcedureTuple(fir::FirOpBuilder &builder,
743                                              mlir::Location loc,
744                                              mlir::Value tuple,
745                                              bool openBoxProc) {
746   mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(tuple.getType());
747   mlir::Value addr = builder.create<fir::ExtractValueOp>(
748       loc, tupleType.getType(0), tuple,
749       builder.getArrayAttr(
750           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
751   mlir::Value proc = [&]() -> mlir::Value {
752     if (openBoxProc)
753       if (auto addrTy = mlir::dyn_cast<fir::BoxProcType>(addr.getType()))
754         return builder.create<fir::BoxAddrOp>(loc, addrTy.getEleTy(), addr);
755     return addr;
756   }();
757   mlir::Value len = builder.create<fir::ExtractValueOp>(
758       loc, tupleType.getType(1), tuple,
759       builder.getArrayAttr(
760           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
761   return {proc, len};
762 }
763 
createCharacterProcedureTuple(fir::FirOpBuilder & builder,mlir::Location loc,mlir::Type argTy,mlir::Value addr,mlir::Value len)764 mlir::Value fir::factory::createCharacterProcedureTuple(
765     fir::FirOpBuilder &builder, mlir::Location loc, mlir::Type argTy,
766     mlir::Value addr, mlir::Value len) {
767   mlir::TupleType tupleType = mlir::cast<mlir::TupleType>(argTy);
768   addr = builder.createConvert(loc, tupleType.getType(0), addr);
769   if (len)
770     len = builder.createConvert(loc, tupleType.getType(1), len);
771   else
772     len = builder.create<fir::UndefOp>(loc, tupleType.getType(1));
773   mlir::Value tuple = builder.create<fir::UndefOp>(loc, tupleType);
774   tuple = builder.create<fir::InsertValueOp>(
775       loc, tupleType, tuple, addr,
776       builder.getArrayAttr(
777           {builder.getIntegerAttr(builder.getIndexType(), 0)}));
778   tuple = builder.create<fir::InsertValueOp>(
779       loc, tupleType, tuple, len,
780       builder.getArrayAttr(
781           {builder.getIntegerAttr(builder.getIndexType(), 1)}));
782   return tuple;
783 }
784 
785 mlir::Type
getCharacterProcedureTupleType(mlir::Type funcPointerType)786 fir::factory::getCharacterProcedureTupleType(mlir::Type funcPointerType) {
787   mlir::MLIRContext *context = funcPointerType.getContext();
788   mlir::Type lenType = mlir::IntegerType::get(context, 64);
789   return mlir::TupleType::get(context, {funcPointerType, lenType});
790 }
791 
createCharExtremum(bool predIsMin,llvm::ArrayRef<fir::CharBoxValue> opCBVs)792 fir::CharBoxValue fir::factory::CharacterExprHelper::createCharExtremum(
793     bool predIsMin, llvm::ArrayRef<fir::CharBoxValue> opCBVs) {
794   // inputs: we are given a vector of all of the charboxes of the arguments
795   // passed to hlfir.char_extremum, as well as the predicate for whether we
796   // want llt or lgt
797   //
798   // note: we know that, regardless of whether we're looking at smallest or
799   // largest char, the size of the output buffer will be the same size as the
800   // largest character out of all of the operands. so, we find the biggest
801   // length first. It's okay if these char lengths are not known at compile
802   // time.
803 
804   fir::CharBoxValue firstCBV = opCBVs[0];
805   mlir::Value firstBuf = getCharBoxBuffer(firstCBV);
806   auto firstLen = builder.createConvert(loc, builder.getCharacterLengthType(),
807                                         firstCBV.getLen());
808 
809   mlir::Value resultBuf = firstBuf;
810   mlir::Value resultLen = firstLen;
811   mlir::Value biggestLen = firstLen;
812 
813   // values for casting buf type and len type
814   auto typeLen = fir::CharacterType::unknownLen();
815   auto kind = recoverCharacterType(firstBuf.getType()).getFKind();
816   auto charTy = fir::CharacterType::get(builder.getContext(), kind, typeLen);
817   auto type = fir::ReferenceType::get(charTy);
818 
819   size_t numOperands = opCBVs.size();
820   for (size_t cbv_idx = 1; cbv_idx < numOperands; ++cbv_idx) {
821     auto currChar = opCBVs[cbv_idx];
822     auto currBuf = getCharBoxBuffer(currChar);
823     auto currLen = builder.createConvert(loc, builder.getCharacterLengthType(),
824                                          currChar.getLen());
825     // biggest len result
826     mlir::Value lhsBigger = builder.create<mlir::arith::CmpIOp>(
827         loc, mlir::arith::CmpIPredicate::uge, biggestLen, currLen);
828     biggestLen = builder.create<mlir::arith::SelectOp>(loc, lhsBigger,
829                                                        biggestLen, currLen);
830 
831     auto cmp = predIsMin ? mlir::arith::CmpIPredicate::slt
832                          : mlir::arith::CmpIPredicate::sgt;
833 
834     // lexical compare result
835     mlir::Value resultCmp = fir::runtime::genCharCompare(
836         builder, loc, cmp, currBuf, currLen, resultBuf, resultLen);
837 
838     // it's casting (to unknown size) time!
839     resultBuf = builder.createConvert(loc, type, resultBuf);
840     currBuf = builder.createConvert(loc, type, currBuf);
841 
842     resultBuf = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currBuf,
843                                                       resultBuf);
844     resultLen = builder.create<mlir::arith::SelectOp>(loc, resultCmp, currLen,
845                                                       resultLen);
846   }
847 
848   // now that we know the lexicographically biggest/smallest char and which char
849   // had the biggest len, we can populate a temp CBV and return it
850   fir::CharBoxValue temp = createCharacterTemp(resultBuf.getType(), biggestLen);
851   auto toBuf = temp;
852   fir::CharBoxValue fromBuf{resultBuf, resultLen};
853   createAssign(toBuf, fromBuf);
854   return temp;
855 }
856 
857 fir::CharBoxValue
convertCharacterKind(fir::FirOpBuilder & builder,mlir::Location loc,fir::CharBoxValue srcBoxChar,int toKind)858 fir::factory::convertCharacterKind(fir::FirOpBuilder &builder,
859                                    mlir::Location loc,
860                                    fir::CharBoxValue srcBoxChar, int toKind) {
861   // Use char_convert. Each code point is translated from a
862   // narrower/wider encoding to the target encoding. For example, 'A'
863   // may be translated from 0x41 : i8 to 0x0041 : i16. The symbol
864   // for euro (0x20AC : i16) may be translated from a wide character
865   // to "0xE2 0x82 0xAC" : UTF-8.
866   mlir::Value bufferSize = srcBoxChar.getLen();
867   auto kindMap = builder.getKindMap();
868   mlir::Value boxCharAddr = srcBoxChar.getAddr();
869   auto fromTy = boxCharAddr.getType();
870   if (auto charTy = mlir::dyn_cast<fir::CharacterType>(fromTy)) {
871     // boxchar is a value, not a variable. Turn it into a temporary.
872     // As a value, it ought to have a constant LEN value.
873     assert(charTy.hasConstantLen() && "must have constant length");
874     mlir::Value tmp = builder.createTemporary(loc, charTy);
875     builder.create<fir::StoreOp>(loc, boxCharAddr, tmp);
876     boxCharAddr = tmp;
877   }
878   auto fromBits = kindMap.getCharacterBitsize(
879       mlir::cast<fir::CharacterType>(fir::unwrapRefType(fromTy)).getFKind());
880   auto toBits = kindMap.getCharacterBitsize(toKind);
881   if (toBits < fromBits) {
882     // Scale by relative ratio to give a buffer of the same length.
883     auto ratio = builder.createIntegerConstant(loc, bufferSize.getType(),
884                                                fromBits / toBits);
885     bufferSize = builder.create<mlir::arith::MulIOp>(loc, bufferSize, ratio);
886   }
887   mlir::Type toType =
888       fir::CharacterType::getUnknownLen(builder.getContext(), toKind);
889   auto dest = builder.createTemporary(loc, toType, /*name=*/{}, /*shape=*/{},
890                                       mlir::ValueRange{bufferSize});
891   builder.create<fir::CharConvertOp>(loc, boxCharAddr, srcBoxChar.getLen(),
892                                      dest);
893   return fir::CharBoxValue{dest, srcBoxChar.getLen()};
894 }
895