xref: /llvm-project/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp (revision 27cfe7a07fc858bd890f2e0980f530a8573748b0)
1 //===-- ArrayValueCopy.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Optimizer/Builder/BoxValue.h"
10 #include "flang/Optimizer/Builder/FIRBuilder.h"
11 #include "flang/Optimizer/Builder/Factory.h"
12 #include "flang/Optimizer/Builder/Runtime/Derived.h"
13 #include "flang/Optimizer/Builder/Todo.h"
14 #include "flang/Optimizer/Dialect/FIRDialect.h"
15 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
16 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
17 #include "flang/Optimizer/Transforms/Passes.h"
18 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
19 #include "mlir/Dialect/SCF/IR/SCF.h"
20 #include "mlir/Transforms/DialectConversion.h"
21 #include "llvm/Support/Debug.h"
22 
23 namespace fir {
24 #define GEN_PASS_DEF_ARRAYVALUECOPY
25 #include "flang/Optimizer/Transforms/Passes.h.inc"
26 } // namespace fir
27 
28 #define DEBUG_TYPE "flang-array-value-copy"
29 
30 using namespace fir;
31 using namespace mlir;
32 
33 using OperationUseMapT = llvm::DenseMap<mlir::Operation *, mlir::Operation *>;
34 
35 namespace {
36 
37 /// Array copy analysis.
38 /// Perform an interference analysis between array values.
39 ///
40 /// Lowering will generate a sequence of the following form.
41 /// ```mlir
42 ///   %a_1 = fir.array_load %array_1(%shape) : ...
43 ///   ...
44 ///   %a_j = fir.array_load %array_j(%shape) : ...
45 ///   ...
46 ///   %a_n = fir.array_load %array_n(%shape) : ...
47 ///     ...
48 ///     %v_i = fir.array_fetch %a_i, ...
49 ///     %a_j1 = fir.array_update %a_j, ...
50 ///     ...
51 ///   fir.array_merge_store %a_j, %a_jn to %array_j : ...
52 /// ```
53 ///
54 /// The analysis is to determine if there are any conflicts. A conflict is when
55 /// one the following cases occurs.
56 ///
57 /// 1. There is an `array_update` to an array value, a_j, such that a_j was
58 /// loaded from the same array memory reference (array_j) but with a different
59 /// shape as the other array values a_i, where i != j. [Possible overlapping
60 /// arrays.]
61 ///
62 /// 2. There is either an array_fetch or array_update of a_j with a different
63 /// set of index values. [Possible loop-carried dependence.]
64 ///
65 /// If none of the array values overlap in storage and the accesses are not
66 /// loop-carried, then the arrays are conflict-free and no copies are required.
67 class ArrayCopyAnalysisBase {
68 public:
69   using ConflictSetT = llvm::SmallPtrSet<mlir::Operation *, 16>;
70   using UseSetT = llvm::SmallPtrSet<mlir::OpOperand *, 8>;
71   using LoadMapSetsT = llvm::DenseMap<mlir::Operation *, UseSetT>;
72   using AmendAccessSetT = llvm::SmallPtrSet<mlir::Operation *, 4>;
73 
74   ArrayCopyAnalysisBase(mlir::Operation *op, bool optimized)
75       : operation{op}, optimizeConflicts(optimized) {
76     construct(op);
77   }
78   virtual ~ArrayCopyAnalysisBase() = default;
79 
80   mlir::Operation *getOperation() const { return operation; }
81 
82   /// Return true iff the `array_merge_store` has potential conflicts.
83   bool hasPotentialConflict(mlir::Operation *op) const {
84     LLVM_DEBUG(llvm::dbgs()
85                << "looking for a conflict on " << *op
86                << " and the set has a total of " << conflicts.size() << '\n');
87     return conflicts.contains(op);
88   }
89 
90   /// Return the use map.
91   /// The use map maps array access, amend, fetch and update operations back to
92   /// the array load that is the original source of the array value.
93   /// It maps an array_load to an array_merge_store, if and only if the loaded
94   /// array value has pending modifications to be merged.
95   const OperationUseMapT &getUseMap() const { return useMap; }
96 
97   /// Return the set of array_access ops directly associated with array_amend
98   /// ops.
99   bool inAmendAccessSet(mlir::Operation *op) const {
100     return amendAccesses.count(op);
101   }
102 
103   /// For ArrayLoad `load`, return the transitive set of all OpOperands.
104   UseSetT getLoadUseSet(mlir::Operation *load) const {
105     assert(loadMapSets.count(load) && "analysis missed an array load?");
106     return loadMapSets.lookup(load);
107   }
108 
109   void arrayMentions(llvm::SmallVectorImpl<mlir::Operation *> &mentions,
110                      ArrayLoadOp load);
111 
112 private:
113   void construct(mlir::Operation *topLevelOp);
114 
115   mlir::Operation *operation; // operation that analysis ran upon
116   ConflictSetT conflicts;     // set of conflicts (loads and merge stores)
117   OperationUseMapT useMap;
118   LoadMapSetsT loadMapSets;
119   // Set of array_access ops associated with array_amend ops.
120   AmendAccessSetT amendAccesses;
121   bool optimizeConflicts;
122 };
123 
124 // Optimized array copy analysis that takes into account Fortran
125 // variable attributes to prove that no conflict is possible
126 // and reduce the number of temporary arrays.
127 class ArrayCopyAnalysisOptimized : public ArrayCopyAnalysisBase {
128 public:
129   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysisOptimized)
130 
131   ArrayCopyAnalysisOptimized(mlir::Operation *op)
132       : ArrayCopyAnalysisBase(op, /*optimized=*/true) {}
133 };
134 
135 // Unoptimized array copy analysis used at O0.
136 class ArrayCopyAnalysis : public ArrayCopyAnalysisBase {
137 public:
138   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis)
139 
140   ArrayCopyAnalysis(mlir::Operation *op)
141       : ArrayCopyAnalysisBase(op, /*optimized=*/false) {}
142 };
143 } // namespace
144 
145 namespace {
146 /// Helper class to collect all array operations that produced an array value.
147 class ReachCollector {
148 public:
149   ReachCollector(llvm::SmallVectorImpl<mlir::Operation *> &reach,
150                  mlir::Region *loopRegion)
151       : reach{reach}, loopRegion{loopRegion} {}
152 
153   void collectArrayMentionFrom(mlir::Operation *op, mlir::ValueRange range) {
154     if (range.empty()) {
155       collectArrayMentionFrom(op, mlir::Value{});
156       return;
157     }
158     for (mlir::Value v : range)
159       collectArrayMentionFrom(v);
160   }
161 
162   // Collect all the array_access ops in `block`. This recursively looks into
163   // blocks in ops with regions.
164   // FIXME: This is temporarily relying on the array_amend appearing in a
165   // do_loop Region.  This phase ordering assumption can be eliminated by using
166   // dominance information to find the array_access ops or by scanning the
167   // transitive closure of the amending array_access's users and the defs that
168   // reach them.
169   void collectAccesses(llvm::SmallVector<ArrayAccessOp> &result,
170                        mlir::Block *block) {
171     for (auto &op : *block) {
172       if (auto access = mlir::dyn_cast<ArrayAccessOp>(op)) {
173         LLVM_DEBUG(llvm::dbgs() << "adding access: " << access << '\n');
174         result.push_back(access);
175         continue;
176       }
177       for (auto &region : op.getRegions())
178         for (auto &bb : region.getBlocks())
179           collectAccesses(result, &bb);
180     }
181   }
182 
183   void collectArrayMentionFrom(mlir::Operation *op, mlir::Value val) {
184     // `val` is defined by an Op, process the defining Op.
185     // If `val` is defined by a region containing Op, we want to drill down
186     // and through that Op's region(s).
187     LLVM_DEBUG(llvm::dbgs() << "popset: " << *op << '\n');
188     auto popFn = [&](auto rop) {
189       assert(val && "op must have a result value");
190       auto resNum = val.cast<mlir::OpResult>().getResultNumber();
191       llvm::SmallVector<mlir::Value> results;
192       rop.resultToSourceOps(results, resNum);
193       for (auto u : results)
194         collectArrayMentionFrom(u);
195     };
196     if (auto rop = mlir::dyn_cast<DoLoopOp>(op)) {
197       popFn(rop);
198       return;
199     }
200     if (auto rop = mlir::dyn_cast<IterWhileOp>(op)) {
201       popFn(rop);
202       return;
203     }
204     if (auto rop = mlir::dyn_cast<fir::IfOp>(op)) {
205       popFn(rop);
206       return;
207     }
208     if (auto box = mlir::dyn_cast<EmboxOp>(op)) {
209       for (auto *user : box.getMemref().getUsers())
210         if (user != op)
211           collectArrayMentionFrom(user, user->getResults());
212       return;
213     }
214     if (auto mergeStore = mlir::dyn_cast<ArrayMergeStoreOp>(op)) {
215       if (opIsInsideLoops(mergeStore))
216         collectArrayMentionFrom(mergeStore.getSequence());
217       return;
218     }
219 
220     if (mlir::isa<AllocaOp, AllocMemOp>(op)) {
221       // Look for any stores inside the loops, and collect an array operation
222       // that produced the value being stored to it.
223       for (auto *user : op->getUsers())
224         if (auto store = mlir::dyn_cast<fir::StoreOp>(user))
225           if (opIsInsideLoops(store))
226             collectArrayMentionFrom(store.getValue());
227       return;
228     }
229 
230     // Scan the uses of amend's memref
231     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op)) {
232       reach.push_back(op);
233       llvm::SmallVector<ArrayAccessOp> accesses;
234       collectAccesses(accesses, op->getBlock());
235       for (auto access : accesses)
236         collectArrayMentionFrom(access.getResult());
237     }
238 
239     // Otherwise, Op does not contain a region so just chase its operands.
240     if (mlir::isa<ArrayAccessOp, ArrayLoadOp, ArrayUpdateOp, ArrayModifyOp,
241                   ArrayFetchOp>(op)) {
242       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
243       reach.push_back(op);
244     }
245 
246     // Include all array_access ops using an array_load.
247     if (auto arrLd = mlir::dyn_cast<ArrayLoadOp>(op))
248       for (auto *user : arrLd.getResult().getUsers())
249         if (mlir::isa<ArrayAccessOp>(user)) {
250           LLVM_DEBUG(llvm::dbgs() << "add " << *user << " to reachable set\n");
251           reach.push_back(user);
252         }
253 
254     // Array modify assignment is performed on the result. So the analysis must
255     // look at the what is done with the result.
256     if (mlir::isa<ArrayModifyOp>(op))
257       for (auto *user : op->getResult(0).getUsers())
258         followUsers(user);
259 
260     if (mlir::isa<fir::CallOp>(op)) {
261       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
262       reach.push_back(op);
263     }
264 
265     for (auto u : op->getOperands())
266       collectArrayMentionFrom(u);
267   }
268 
269   void collectArrayMentionFrom(mlir::BlockArgument ba) {
270     auto *parent = ba.getOwner()->getParentOp();
271     // If inside an Op holding a region, the block argument corresponds to an
272     // argument passed to the containing Op.
273     auto popFn = [&](auto rop) {
274       collectArrayMentionFrom(rop.blockArgToSourceOp(ba.getArgNumber()));
275     };
276     if (auto rop = mlir::dyn_cast<DoLoopOp>(parent)) {
277       popFn(rop);
278       return;
279     }
280     if (auto rop = mlir::dyn_cast<IterWhileOp>(parent)) {
281       popFn(rop);
282       return;
283     }
284     // Otherwise, a block argument is provided via the pred blocks.
285     for (auto *pred : ba.getOwner()->getPredecessors()) {
286       auto u = pred->getTerminator()->getOperand(ba.getArgNumber());
287       collectArrayMentionFrom(u);
288     }
289   }
290 
291   // Recursively trace operands to find all array operations relating to the
292   // values merged.
293   void collectArrayMentionFrom(mlir::Value val) {
294     if (!val || visited.contains(val))
295       return;
296     visited.insert(val);
297 
298     // Process a block argument.
299     if (auto ba = val.dyn_cast<mlir::BlockArgument>()) {
300       collectArrayMentionFrom(ba);
301       return;
302     }
303 
304     // Process an Op.
305     if (auto *op = val.getDefiningOp()) {
306       collectArrayMentionFrom(op, val);
307       return;
308     }
309 
310     emitFatalError(val.getLoc(), "unhandled value");
311   }
312 
313   /// Return all ops that produce the array value that is stored into the
314   /// `array_merge_store`.
315   static void reachingValues(llvm::SmallVectorImpl<mlir::Operation *> &reach,
316                              mlir::Value seq) {
317     reach.clear();
318     mlir::Region *loopRegion = nullptr;
319     if (auto doLoop = mlir::dyn_cast_or_null<DoLoopOp>(seq.getDefiningOp()))
320       loopRegion = &doLoop->getRegion(0);
321     ReachCollector collector(reach, loopRegion);
322     collector.collectArrayMentionFrom(seq);
323   }
324 
325 private:
326   /// Is \op inside the loop nest region ?
327   /// FIXME: replace this structural dependence with graph properties.
328   bool opIsInsideLoops(mlir::Operation *op) const {
329     auto *region = op->getParentRegion();
330     while (region) {
331       if (region == loopRegion)
332         return true;
333       region = region->getParentRegion();
334     }
335     return false;
336   }
337 
338   /// Recursively trace the use of an operation results, calling
339   /// collectArrayMentionFrom on the direct and indirect user operands.
340   void followUsers(mlir::Operation *op) {
341     for (auto userOperand : op->getOperands())
342       collectArrayMentionFrom(userOperand);
343     // Go through potential converts/coordinate_op.
344     for (auto indirectUser : op->getUsers())
345       followUsers(indirectUser);
346   }
347 
348   llvm::SmallVectorImpl<mlir::Operation *> &reach;
349   llvm::SmallPtrSet<mlir::Value, 16> visited;
350   /// Region of the loops nest that produced the array value.
351   mlir::Region *loopRegion;
352 };
353 } // namespace
354 
355 /// Find all the array operations that access the array value that is loaded by
356 /// the array load operation, `load`.
357 void ArrayCopyAnalysisBase::arrayMentions(
358     llvm::SmallVectorImpl<mlir::Operation *> &mentions, ArrayLoadOp load) {
359   mentions.clear();
360   auto lmIter = loadMapSets.find(load);
361   if (lmIter != loadMapSets.end()) {
362     for (auto *opnd : lmIter->second) {
363       auto *owner = opnd->getOwner();
364       if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
365                     ArrayModifyOp>(owner))
366         mentions.push_back(owner);
367     }
368     return;
369   }
370 
371   UseSetT visited;
372   llvm::SmallVector<mlir::OpOperand *> queue; // uses of ArrayLoad[orig]
373 
374   auto appendToQueue = [&](mlir::Value val) {
375     for (auto &use : val.getUses())
376       if (!visited.count(&use)) {
377         visited.insert(&use);
378         queue.push_back(&use);
379       }
380   };
381 
382   // Build the set of uses of `original`.
383   // let USES = { uses of original fir.load }
384   appendToQueue(load);
385 
386   // Process the worklist until done.
387   while (!queue.empty()) {
388     mlir::OpOperand *operand = queue.pop_back_val();
389     mlir::Operation *owner = operand->getOwner();
390     if (!owner)
391       continue;
392     auto structuredLoop = [&](auto ro) {
393       if (auto blockArg = ro.iterArgToBlockArg(operand->get())) {
394         int64_t arg = blockArg.getArgNumber();
395         mlir::Value output = ro.getResult(ro.getFinalValue() ? arg : arg - 1);
396         appendToQueue(output);
397         appendToQueue(blockArg);
398       }
399     };
400     // TODO: this need to be updated to use the control-flow interface.
401     auto branchOp = [&](mlir::Block *dest, OperandRange operands) {
402       if (operands.empty())
403         return;
404 
405       // Check if this operand is within the range.
406       unsigned operandIndex = operand->getOperandNumber();
407       unsigned operandsStart = operands.getBeginOperandIndex();
408       if (operandIndex < operandsStart ||
409           operandIndex >= (operandsStart + operands.size()))
410         return;
411 
412       // Index the successor.
413       unsigned argIndex = operandIndex - operandsStart;
414       appendToQueue(dest->getArgument(argIndex));
415     };
416     // Thread uses into structured loop bodies and return value uses.
417     if (auto ro = mlir::dyn_cast<DoLoopOp>(owner)) {
418       structuredLoop(ro);
419     } else if (auto ro = mlir::dyn_cast<IterWhileOp>(owner)) {
420       structuredLoop(ro);
421     } else if (auto rs = mlir::dyn_cast<ResultOp>(owner)) {
422       // Thread any uses of fir.if that return the marked array value.
423       mlir::Operation *parent = rs->getParentRegion()->getParentOp();
424       if (auto ifOp = mlir::dyn_cast<fir::IfOp>(parent))
425         appendToQueue(ifOp.getResult(operand->getOperandNumber()));
426     } else if (mlir::isa<ArrayFetchOp>(owner)) {
427       // Keep track of array value fetches.
428       LLVM_DEBUG(llvm::dbgs()
429                  << "add fetch {" << *owner << "} to array value set\n");
430       mentions.push_back(owner);
431     } else if (auto update = mlir::dyn_cast<ArrayUpdateOp>(owner)) {
432       // Keep track of array value updates and thread the return value uses.
433       LLVM_DEBUG(llvm::dbgs()
434                  << "add update {" << *owner << "} to array value set\n");
435       mentions.push_back(owner);
436       appendToQueue(update.getResult());
437     } else if (auto update = mlir::dyn_cast<ArrayModifyOp>(owner)) {
438       // Keep track of array value modification and thread the return value
439       // uses.
440       LLVM_DEBUG(llvm::dbgs()
441                  << "add modify {" << *owner << "} to array value set\n");
442       mentions.push_back(owner);
443       appendToQueue(update.getResult(1));
444     } else if (auto mention = mlir::dyn_cast<ArrayAccessOp>(owner)) {
445       mentions.push_back(owner);
446     } else if (auto amend = mlir::dyn_cast<ArrayAmendOp>(owner)) {
447       mentions.push_back(owner);
448       appendToQueue(amend.getResult());
449     } else if (auto br = mlir::dyn_cast<mlir::cf::BranchOp>(owner)) {
450       branchOp(br.getDest(), br.getDestOperands());
451     } else if (auto br = mlir::dyn_cast<mlir::cf::CondBranchOp>(owner)) {
452       branchOp(br.getTrueDest(), br.getTrueOperands());
453       branchOp(br.getFalseDest(), br.getFalseOperands());
454     } else if (mlir::isa<ArrayMergeStoreOp>(owner)) {
455       // do nothing
456     } else {
457       llvm::report_fatal_error("array value reached unexpected op");
458     }
459   }
460   loadMapSets.insert({load, visited});
461 }
462 
463 static bool hasPointerType(mlir::Type type) {
464   if (auto boxTy = type.dyn_cast<BoxType>())
465     type = boxTy.getEleTy();
466   return type.isa<fir::PointerType>();
467 }
468 
469 // This is a NF performance hack. It makes a simple test that the slices of the
470 // load, \p ld, and the merge store, \p st, are trivially mutually exclusive.
471 static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) {
472   // If the same array_load, then no further testing is warranted.
473   if (ld.getResult() == st.getOriginal())
474     return false;
475 
476   auto getSliceOp = [](mlir::Value val) -> SliceOp {
477     if (!val)
478       return {};
479     auto sliceOp = mlir::dyn_cast_or_null<SliceOp>(val.getDefiningOp());
480     if (!sliceOp)
481       return {};
482     return sliceOp;
483   };
484 
485   auto ldSlice = getSliceOp(ld.getSlice());
486   auto stSlice = getSliceOp(st.getSlice());
487   if (!ldSlice || !stSlice)
488     return false;
489 
490   // Resign on subobject slices.
491   if (!ldSlice.getFields().empty() || !stSlice.getFields().empty() ||
492       !ldSlice.getSubstr().empty() || !stSlice.getSubstr().empty())
493     return false;
494 
495   // Crudely test that the two slices do not overlap by looking for the
496   // following general condition. If the slices look like (i:j) and (j+1:k) then
497   // these ranges do not overlap. The addend must be a constant.
498   auto ldTriples = ldSlice.getTriples();
499   auto stTriples = stSlice.getTriples();
500   const auto size = ldTriples.size();
501   if (size != stTriples.size())
502     return false;
503 
504   auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) {
505     auto removeConvert = [](mlir::Value v) -> mlir::Operation * {
506       auto *op = v.getDefiningOp();
507       while (auto conv = mlir::dyn_cast_or_null<ConvertOp>(op))
508         op = conv.getValue().getDefiningOp();
509       return op;
510     };
511 
512     auto isPositiveConstant = [](mlir::Value v) -> bool {
513       if (auto conOp =
514               mlir::dyn_cast<mlir::arith::ConstantOp>(v.getDefiningOp()))
515         if (auto iattr = conOp.getValue().dyn_cast<mlir::IntegerAttr>())
516           return iattr.getInt() > 0;
517       return false;
518     };
519 
520     auto *op1 = removeConvert(v1);
521     auto *op2 = removeConvert(v2);
522     if (!op1 || !op2)
523       return false;
524     if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2))
525       if ((addi.getLhs().getDefiningOp() == op1 &&
526            isPositiveConstant(addi.getRhs())) ||
527           (addi.getRhs().getDefiningOp() == op1 &&
528            isPositiveConstant(addi.getLhs())))
529         return true;
530     if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1))
531       if (subi.getLhs().getDefiningOp() == op2 &&
532           isPositiveConstant(subi.getRhs()))
533         return true;
534     return false;
535   };
536 
537   for (std::remove_const_t<decltype(size)> i = 0; i < size; i += 3) {
538     // If both are loop invariant, skip to the next triple.
539     if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i + 1].getDefiningOp()) &&
540         mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i + 1].getDefiningOp())) {
541       // Unless either is a vector index, then be conservative.
542       if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i].getDefiningOp()) ||
543           mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i].getDefiningOp()))
544         return false;
545       continue;
546     }
547     // If identical, skip to the next triple.
548     if (ldTriples[i] == stTriples[i] && ldTriples[i + 1] == stTriples[i + 1] &&
549         ldTriples[i + 2] == stTriples[i + 2])
550       continue;
551     // If ubound and lbound are the same with a constant offset, skip to the
552     // next triple.
553     if (displacedByConstant(ldTriples[i + 1], stTriples[i]) ||
554         displacedByConstant(stTriples[i + 1], ldTriples[i]))
555       continue;
556     return false;
557   }
558   LLVM_DEBUG(llvm::dbgs() << "detected non-overlapping slice ranges on " << ld
559                           << " and " << st << ", which is not a conflict\n");
560   return true;
561 }
562 
563 /// Is there a conflict between the array value that was updated and to be
564 /// stored to `st` and the set of arrays loaded (`reach`) and used to compute
565 /// the updated value?
566 /// If `optimize` is true, use the variable attributes to prove that
567 /// there is no conflict.
568 static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
569                            ArrayMergeStoreOp st, bool optimize) {
570   mlir::Value load;
571   mlir::Value addr = st.getMemref();
572   const bool storeHasPointerType = hasPointerType(addr.getType());
573   for (auto *op : reach)
574     if (auto ld = mlir::dyn_cast<ArrayLoadOp>(op)) {
575       mlir::Type ldTy = ld.getMemref().getType();
576       if (ld.getMemref() == addr) {
577         if (mutuallyExclusiveSliceRange(ld, st))
578           continue;
579         if (ld.getResult() != st.getOriginal())
580           return true;
581         if (load) {
582           // TODO: extend this to allow checking if the first `load` and this
583           // `ld` are mutually exclusive accesses but not identical.
584           return true;
585         }
586         load = ld;
587       } else if (storeHasPointerType) {
588         if (optimize && !hasPointerType(ldTy) &&
589             !valueMayHaveFirAttributes(
590                 ld.getMemref(),
591                 {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
592           continue;
593 
594         return true;
595       } else if (hasPointerType(ldTy)) {
596         if (optimize && !storeHasPointerType &&
597             !valueMayHaveFirAttributes(
598                 addr, {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
599           continue;
600 
601         return true;
602       }
603       // TODO: Check if types can also allow ruling out some cases. For now,
604       // the fact that equivalences is using pointer attribute to enforce
605       // aliasing is preventing any attempt to do so, and in general, it may
606       // be wrong to use this if any of the types is a complex or a derived
607       // for which it is possible to create a pointer to a part with a
608       // different type than the whole, although this deserve some more
609       // investigation because existing compiler behavior seem to diverge
610       // here.
611     }
612   return false;
613 }
614 
615 /// Is there an access vector conflict on the array being merged into? If the
616 /// access vectors diverge, then assume that there are potentially overlapping
617 /// loop-carried references.
618 static bool conflictOnMerge(llvm::ArrayRef<mlir::Operation *> mentions) {
619   if (mentions.size() < 2)
620     return false;
621   llvm::SmallVector<mlir::Value> indices;
622   LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << mentions.size()
623                           << " mentions on the list\n");
624   bool valSeen = false;
625   bool refSeen = false;
626   for (auto *op : mentions) {
627     llvm::SmallVector<mlir::Value> compareVector;
628     if (auto u = mlir::dyn_cast<ArrayUpdateOp>(op)) {
629       valSeen = true;
630       if (indices.empty()) {
631         indices = u.getIndices();
632         continue;
633       }
634       compareVector = u.getIndices();
635     } else if (auto f = mlir::dyn_cast<ArrayModifyOp>(op)) {
636       valSeen = true;
637       if (indices.empty()) {
638         indices = f.getIndices();
639         continue;
640       }
641       compareVector = f.getIndices();
642     } else if (auto f = mlir::dyn_cast<ArrayFetchOp>(op)) {
643       valSeen = true;
644       if (indices.empty()) {
645         indices = f.getIndices();
646         continue;
647       }
648       compareVector = f.getIndices();
649     } else if (auto f = mlir::dyn_cast<ArrayAccessOp>(op)) {
650       refSeen = true;
651       if (indices.empty()) {
652         indices = f.getIndices();
653         continue;
654       }
655       compareVector = f.getIndices();
656     } else if (mlir::isa<ArrayAmendOp>(op)) {
657       refSeen = true;
658       continue;
659     } else {
660       mlir::emitError(op->getLoc(), "unexpected operation in analysis");
661     }
662     if (compareVector.size() != indices.size() ||
663         llvm::any_of(llvm::zip(compareVector, indices), [&](auto pair) {
664           return std::get<0>(pair) != std::get<1>(pair);
665         }))
666       return true;
667     LLVM_DEBUG(llvm::dbgs() << "vectors compare equal\n");
668   }
669   return valSeen && refSeen;
670 }
671 
672 /// With element-by-reference semantics, an amended array with more than once
673 /// access to the same loaded array are conservatively considered a conflict.
674 /// Note: the array copy can still be eliminated in subsequent optimizations.
675 static bool conflictOnReference(llvm::ArrayRef<mlir::Operation *> mentions) {
676   LLVM_DEBUG(llvm::dbgs() << "checking reference semantics " << mentions.size()
677                           << '\n');
678   if (mentions.size() < 3)
679     return false;
680   unsigned amendCount = 0;
681   unsigned accessCount = 0;
682   for (auto *op : mentions) {
683     if (mlir::isa<ArrayAmendOp>(op) && ++amendCount > 1) {
684       LLVM_DEBUG(llvm::dbgs() << "conflict: multiple amends of array value\n");
685       return true;
686     }
687     if (mlir::isa<ArrayAccessOp>(op) && ++accessCount > 1) {
688       LLVM_DEBUG(llvm::dbgs()
689                  << "conflict: multiple accesses of array value\n");
690       return true;
691     }
692     if (mlir::isa<ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp>(op)) {
693       LLVM_DEBUG(llvm::dbgs()
694                  << "conflict: array value has both uses by-value and uses "
695                     "by-reference. conservative assumption.\n");
696       return true;
697     }
698   }
699   return false;
700 }
701 
702 static mlir::Operation *
703 amendingAccess(llvm::ArrayRef<mlir::Operation *> mentions) {
704   for (auto *op : mentions)
705     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op))
706       return amend.getMemref().getDefiningOp();
707   return {};
708 }
709 
710 // Are any conflicts present? The conflicts detected here are described above.
711 static bool conflictDetected(llvm::ArrayRef<mlir::Operation *> reach,
712                              llvm::ArrayRef<mlir::Operation *> mentions,
713                              ArrayMergeStoreOp st, bool optimize) {
714   return conflictOnLoad(reach, st, optimize) || conflictOnMerge(mentions);
715 }
716 
717 // Assume that any call to a function that uses host-associations will be
718 // modifying the output array.
719 static bool
720 conservativeCallConflict(llvm::ArrayRef<mlir::Operation *> reaches) {
721   return llvm::any_of(reaches, [](mlir::Operation *op) {
722     if (auto call = mlir::dyn_cast<fir::CallOp>(op))
723       if (auto callee =
724               call.getCallableForCallee().dyn_cast<mlir::SymbolRefAttr>()) {
725         auto module = op->getParentOfType<mlir::ModuleOp>();
726         return isInternalPorcedure(
727             module.lookupSymbol<mlir::func::FuncOp>(callee));
728       }
729     return false;
730   });
731 }
732 
733 /// Constructor of the array copy analysis.
734 /// This performs the analysis and saves the intermediate results.
735 void ArrayCopyAnalysisBase::construct(mlir::Operation *topLevelOp) {
736   topLevelOp->walk([&](Operation *op) {
737     if (auto st = mlir::dyn_cast<fir::ArrayMergeStoreOp>(op)) {
738       llvm::SmallVector<mlir::Operation *> values;
739       ReachCollector::reachingValues(values, st.getSequence());
740       bool callConflict = conservativeCallConflict(values);
741       llvm::SmallVector<mlir::Operation *> mentions;
742       arrayMentions(mentions,
743                     mlir::cast<ArrayLoadOp>(st.getOriginal().getDefiningOp()));
744       bool conflict = conflictDetected(values, mentions, st, optimizeConflicts);
745       bool refConflict = conflictOnReference(mentions);
746       if (callConflict || conflict || refConflict) {
747         LLVM_DEBUG(llvm::dbgs()
748                    << "CONFLICT: copies required for " << st << '\n'
749                    << "   adding conflicts on: " << *op << " and "
750                    << st.getOriginal() << '\n');
751         conflicts.insert(op);
752         conflicts.insert(st.getOriginal().getDefiningOp());
753         if (auto *access = amendingAccess(mentions))
754           amendAccesses.insert(access);
755       }
756       auto *ld = st.getOriginal().getDefiningOp();
757       LLVM_DEBUG(llvm::dbgs()
758                  << "map: adding {" << *ld << " -> " << st << "}\n");
759       useMap.insert({ld, op});
760     } else if (auto load = mlir::dyn_cast<ArrayLoadOp>(op)) {
761       llvm::SmallVector<mlir::Operation *> mentions;
762       arrayMentions(mentions, load);
763       LLVM_DEBUG(llvm::dbgs() << "process load: " << load
764                               << ", mentions: " << mentions.size() << '\n');
765       for (auto *acc : mentions) {
766         LLVM_DEBUG(llvm::dbgs() << " mention: " << *acc << '\n');
767         if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
768                       ArrayModifyOp>(acc)) {
769           if (useMap.count(acc)) {
770             mlir::emitError(
771                 load.getLoc(),
772                 "The parallel semantics of multiple array_merge_stores per "
773                 "array_load are not supported.");
774             continue;
775           }
776           LLVM_DEBUG(llvm::dbgs()
777                      << "map: adding {" << *acc << "} -> {" << load << "}\n");
778           useMap.insert({acc, op});
779         }
780       }
781     }
782   });
783 }
784 
785 //===----------------------------------------------------------------------===//
786 // Conversions for converting out of array value form.
787 //===----------------------------------------------------------------------===//
788 
789 namespace {
790 class ArrayLoadConversion : public mlir::OpRewritePattern<ArrayLoadOp> {
791 public:
792   using OpRewritePattern::OpRewritePattern;
793 
794   mlir::LogicalResult
795   matchAndRewrite(ArrayLoadOp load,
796                   mlir::PatternRewriter &rewriter) const override {
797     LLVM_DEBUG(llvm::dbgs() << "replace load " << load << " with undef.\n");
798     rewriter.replaceOpWithNewOp<UndefOp>(load, load.getType());
799     return mlir::success();
800   }
801 };
802 
803 class ArrayMergeStoreConversion
804     : public mlir::OpRewritePattern<ArrayMergeStoreOp> {
805 public:
806   using OpRewritePattern::OpRewritePattern;
807 
808   mlir::LogicalResult
809   matchAndRewrite(ArrayMergeStoreOp store,
810                   mlir::PatternRewriter &rewriter) const override {
811     LLVM_DEBUG(llvm::dbgs() << "marking store " << store << " as dead.\n");
812     rewriter.eraseOp(store);
813     return mlir::success();
814   }
815 };
816 } // namespace
817 
818 static mlir::Type getEleTy(mlir::Type ty) {
819   auto eleTy = unwrapSequenceType(unwrapPassByRefType(ty));
820   // FIXME: keep ptr/heap/ref information.
821   return ReferenceType::get(eleTy);
822 }
823 
824 // This is an unsafe way to deduce this (won't be true in internal
825 // procedure or inside select-rank for assumed-size). Only here to satisfy
826 // legacy code until removed.
827 static bool isAssumedSize(llvm::SmallVectorImpl<mlir::Value> &extents) {
828   if (extents.empty())
829     return false;
830   auto cstLen = fir::getIntIfConstant(extents.back());
831   return cstLen.has_value() && *cstLen == -1;
832 }
833 
834 // Extract extents from the ShapeOp/ShapeShiftOp into the result vector.
835 static bool getAdjustedExtents(mlir::Location loc,
836                                mlir::PatternRewriter &rewriter,
837                                ArrayLoadOp arrLoad,
838                                llvm::SmallVectorImpl<mlir::Value> &result,
839                                mlir::Value shape) {
840   bool copyUsingSlice = false;
841   auto *shapeOp = shape.getDefiningOp();
842   if (auto s = mlir::dyn_cast_or_null<ShapeOp>(shapeOp)) {
843     auto e = s.getExtents();
844     result.insert(result.end(), e.begin(), e.end());
845   } else if (auto s = mlir::dyn_cast_or_null<ShapeShiftOp>(shapeOp)) {
846     auto e = s.getExtents();
847     result.insert(result.end(), e.begin(), e.end());
848   } else {
849     emitFatalError(loc, "not a fir.shape/fir.shape_shift op");
850   }
851   auto idxTy = rewriter.getIndexType();
852   if (isAssumedSize(result)) {
853     // Use slice information to compute the extent of the column.
854     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
855     mlir::Value size = one;
856     if (mlir::Value sliceArg = arrLoad.getSlice()) {
857       if (auto sliceOp =
858               mlir::dyn_cast_or_null<SliceOp>(sliceArg.getDefiningOp())) {
859         auto triples = sliceOp.getTriples();
860         const std::size_t tripleSize = triples.size();
861         auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
862         FirOpBuilder builder(rewriter, module);
863         size = builder.genExtentFromTriplet(loc, triples[tripleSize - 3],
864                                             triples[tripleSize - 2],
865                                             triples[tripleSize - 1], idxTy);
866         copyUsingSlice = true;
867       }
868     }
869     result[result.size() - 1] = size;
870   }
871   return copyUsingSlice;
872 }
873 
874 /// Place the extents of the array load, \p arrLoad, into \p result and
875 /// return a ShapeOp or ShapeShiftOp with the same extents. If \p arrLoad is
876 /// loading a `!fir.box`, code will be generated to read the extents from the
877 /// boxed value, and the retunred shape Op will be built with the extents read
878 /// from the box. Otherwise, the extents will be extracted from the ShapeOp (or
879 /// ShapeShiftOp) argument of \p arrLoad. \p copyUsingSlice will be set to true
880 /// if slicing of the output array is to be done in the copy-in/copy-out rather
881 /// than in the elemental computation step.
882 static mlir::Value getOrReadExtentsAndShapeOp(
883     mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayLoadOp arrLoad,
884     llvm::SmallVectorImpl<mlir::Value> &result, bool &copyUsingSlice) {
885   assert(result.empty());
886   if (arrLoad->hasAttr(fir::getOptionalAttrName()))
887     fir::emitFatalError(
888         loc, "shapes from array load of OPTIONAL arrays must not be used");
889   if (auto boxTy = arrLoad.getMemref().getType().dyn_cast<BoxType>()) {
890     auto rank =
891         dyn_cast_ptrOrBoxEleTy(boxTy).cast<SequenceType>().getDimension();
892     auto idxTy = rewriter.getIndexType();
893     for (decltype(rank) dim = 0; dim < rank; ++dim) {
894       auto dimVal = rewriter.create<mlir::arith::ConstantIndexOp>(loc, dim);
895       auto dimInfo = rewriter.create<BoxDimsOp>(loc, idxTy, idxTy, idxTy,
896                                                 arrLoad.getMemref(), dimVal);
897       result.emplace_back(dimInfo.getResult(1));
898     }
899     if (!arrLoad.getShape()) {
900       auto shapeType = ShapeType::get(rewriter.getContext(), rank);
901       return rewriter.create<ShapeOp>(loc, shapeType, result);
902     }
903     auto shiftOp = arrLoad.getShape().getDefiningOp<ShiftOp>();
904     auto shapeShiftType = ShapeShiftType::get(rewriter.getContext(), rank);
905     llvm::SmallVector<mlir::Value> shapeShiftOperands;
906     for (auto [lb, extent] : llvm::zip(shiftOp.getOrigins(), result)) {
907       shapeShiftOperands.push_back(lb);
908       shapeShiftOperands.push_back(extent);
909     }
910     return rewriter.create<ShapeShiftOp>(loc, shapeShiftType,
911                                          shapeShiftOperands);
912   }
913   copyUsingSlice =
914       getAdjustedExtents(loc, rewriter, arrLoad, result, arrLoad.getShape());
915   return arrLoad.getShape();
916 }
917 
918 static mlir::Type toRefType(mlir::Type ty) {
919   if (fir::isa_ref_type(ty))
920     return ty;
921   return fir::ReferenceType::get(ty);
922 }
923 
924 static llvm::SmallVector<mlir::Value>
925 getTypeParamsIfRawData(mlir::Location loc, FirOpBuilder &builder,
926                        ArrayLoadOp arrLoad, mlir::Type ty) {
927   if (ty.isa<BoxType>())
928     return {};
929   return fir::factory::getTypeParams(loc, builder, arrLoad);
930 }
931 
932 static mlir::Value genCoorOp(mlir::PatternRewriter &rewriter,
933                              mlir::Location loc, mlir::Type eleTy,
934                              mlir::Type resTy, mlir::Value alloc,
935                              mlir::Value shape, mlir::Value slice,
936                              mlir::ValueRange indices, ArrayLoadOp load,
937                              bool skipOrig = false) {
938   llvm::SmallVector<mlir::Value> originated;
939   if (skipOrig)
940     originated.assign(indices.begin(), indices.end());
941   else
942     originated = factory::originateIndices(loc, rewriter, alloc.getType(),
943                                            shape, indices);
944   auto seqTy = dyn_cast_ptrOrBoxEleTy(alloc.getType());
945   assert(seqTy && seqTy.isa<SequenceType>());
946   const auto dimension = seqTy.cast<SequenceType>().getDimension();
947   auto module = load->getParentOfType<mlir::ModuleOp>();
948   FirOpBuilder builder(rewriter, module);
949   auto typeparams = getTypeParamsIfRawData(loc, builder, load, alloc.getType());
950   mlir::Value result = rewriter.create<ArrayCoorOp>(
951       loc, eleTy, alloc, shape, slice,
952       llvm::ArrayRef<mlir::Value>{originated}.take_front(dimension),
953       typeparams);
954   if (dimension < originated.size())
955     result = rewriter.create<fir::CoordinateOp>(
956         loc, resTy, result,
957         llvm::ArrayRef<mlir::Value>{originated}.drop_front(dimension));
958   return result;
959 }
960 
961 static mlir::Value getCharacterLen(mlir::Location loc, FirOpBuilder &builder,
962                                    ArrayLoadOp load, CharacterType charTy) {
963   auto charLenTy = builder.getCharacterLengthType();
964   if (charTy.hasDynamicLen()) {
965     if (load.getMemref().getType().isa<BoxType>()) {
966       // The loaded array is an emboxed value. Get the CHARACTER length from
967       // the box value.
968       auto eleSzInBytes =
969           builder.create<BoxEleSizeOp>(loc, charLenTy, load.getMemref());
970       auto kindSize =
971           builder.getKindMap().getCharacterBitsize(charTy.getFKind());
972       auto kindByteSize =
973           builder.createIntegerConstant(loc, charLenTy, kindSize / 8);
974       return builder.create<mlir::arith::DivSIOp>(loc, eleSzInBytes,
975                                                   kindByteSize);
976     }
977     // The loaded array is a (set of) unboxed values. If the CHARACTER's
978     // length is not a constant, it must be provided as a type parameter to
979     // the array_load.
980     auto typeparams = load.getTypeparams();
981     assert(typeparams.size() > 0 && "expected type parameters on array_load");
982     return typeparams.back();
983   }
984   // The typical case: the length of the CHARACTER is a compile-time
985   // constant that is encoded in the type information.
986   return builder.createIntegerConstant(loc, charLenTy, charTy.getLen());
987 }
988 /// Generate a shallow array copy. This is used for both copy-in and copy-out.
989 template <bool CopyIn>
990 void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter,
991                   mlir::Value dst, mlir::Value src, mlir::Value shapeOp,
992                   mlir::Value sliceOp, ArrayLoadOp arrLoad) {
993   auto insPt = rewriter.saveInsertionPoint();
994   llvm::SmallVector<mlir::Value> indices;
995   llvm::SmallVector<mlir::Value> extents;
996   bool copyUsingSlice =
997       getAdjustedExtents(loc, rewriter, arrLoad, extents, shapeOp);
998   auto idxTy = rewriter.getIndexType();
999   // Build loop nest from column to row.
1000   for (auto sh : llvm::reverse(extents)) {
1001     auto ubi = rewriter.create<ConvertOp>(loc, idxTy, sh);
1002     auto zero = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 0);
1003     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
1004     auto ub = rewriter.create<mlir::arith::SubIOp>(loc, idxTy, ubi, one);
1005     auto loop = rewriter.create<DoLoopOp>(loc, zero, ub, one);
1006     rewriter.setInsertionPointToStart(loop.getBody());
1007     indices.push_back(loop.getInductionVar());
1008   }
1009   // Reverse the indices so they are in column-major order.
1010   std::reverse(indices.begin(), indices.end());
1011   auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
1012   FirOpBuilder builder(rewriter, module);
1013   auto fromAddr = rewriter.create<ArrayCoorOp>(
1014       loc, getEleTy(src.getType()), src, shapeOp,
1015       CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1016       factory::originateIndices(loc, rewriter, src.getType(), shapeOp, indices),
1017       getTypeParamsIfRawData(loc, builder, arrLoad, src.getType()));
1018   auto toAddr = rewriter.create<ArrayCoorOp>(
1019       loc, getEleTy(dst.getType()), dst, shapeOp,
1020       !CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1021       factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices),
1022       getTypeParamsIfRawData(loc, builder, arrLoad, dst.getType()));
1023   auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType()));
1024   // Copy from (to) object to (from) temp copy of same object.
1025   if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1026     auto len = getCharacterLen(loc, builder, arrLoad, charTy);
1027     CharBoxValue toChar(toAddr, len);
1028     CharBoxValue fromChar(fromAddr, len);
1029     factory::genScalarAssignment(builder, loc, toChar, fromChar);
1030   } else {
1031     if (hasDynamicSize(eleTy))
1032       TODO(loc, "copy element of dynamic size");
1033     factory::genScalarAssignment(builder, loc, toAddr, fromAddr);
1034   }
1035   rewriter.restoreInsertionPoint(insPt);
1036 }
1037 
1038 /// The array load may be either a boxed or unboxed value. If the value is
1039 /// boxed, we read the type parameters from the boxed value.
1040 static llvm::SmallVector<mlir::Value>
1041 genArrayLoadTypeParameters(mlir::Location loc, mlir::PatternRewriter &rewriter,
1042                            ArrayLoadOp load) {
1043   if (load.getTypeparams().empty()) {
1044     auto eleTy =
1045         unwrapSequenceType(unwrapPassByRefType(load.getMemref().getType()));
1046     if (hasDynamicSize(eleTy)) {
1047       if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1048         assert(load.getMemref().getType().isa<BoxType>());
1049         auto module = load->getParentOfType<mlir::ModuleOp>();
1050         FirOpBuilder builder(rewriter, module);
1051         return {getCharacterLen(loc, builder, load, charTy)};
1052       }
1053       TODO(loc, "unhandled dynamic type parameters");
1054     }
1055     return {};
1056   }
1057   return load.getTypeparams();
1058 }
1059 
1060 static llvm::SmallVector<mlir::Value>
1061 findNonconstantExtents(mlir::Type memrefTy,
1062                        llvm::ArrayRef<mlir::Value> extents) {
1063   llvm::SmallVector<mlir::Value> nce;
1064   auto arrTy = unwrapPassByRefType(memrefTy);
1065   auto seqTy = arrTy.cast<SequenceType>();
1066   for (auto [s, x] : llvm::zip(seqTy.getShape(), extents))
1067     if (s == SequenceType::getUnknownExtent())
1068       nce.emplace_back(x);
1069   if (extents.size() > seqTy.getShape().size())
1070     for (auto x : extents.drop_front(seqTy.getShape().size()))
1071       nce.emplace_back(x);
1072   return nce;
1073 }
1074 
1075 /// Allocate temporary storage for an ArrayLoadOp \load and initialize any
1076 /// allocatable direct components of the array elements with an unallocated
1077 /// status. Returns the temporary address as well as a callback to generate the
1078 /// temporary clean-up once it has been used. The clean-up will take care of
1079 /// deallocating all the element allocatable components that may have been
1080 /// allocated while using the temporary.
1081 static std::pair<mlir::Value,
1082                  std::function<void(mlir::PatternRewriter &rewriter)>>
1083 allocateArrayTemp(mlir::Location loc, mlir::PatternRewriter &rewriter,
1084                   ArrayLoadOp load, llvm::ArrayRef<mlir::Value> extents,
1085                   mlir::Value shape) {
1086   mlir::Type baseType = load.getMemref().getType();
1087   llvm::SmallVector<mlir::Value> nonconstantExtents =
1088       findNonconstantExtents(baseType, extents);
1089   llvm::SmallVector<mlir::Value> typeParams =
1090       genArrayLoadTypeParameters(loc, rewriter, load);
1091   mlir::Value allocmem = rewriter.create<AllocMemOp>(
1092       loc, dyn_cast_ptrOrBoxEleTy(baseType), typeParams, nonconstantExtents);
1093   mlir::Type eleType =
1094       fir::unwrapSequenceType(fir::unwrapPassByRefType(baseType));
1095   if (fir::isRecordWithAllocatableMember(eleType)) {
1096     // The allocatable component descriptors need to be set to a clean
1097     // deallocated status before anything is done with them.
1098     mlir::Value box = rewriter.create<fir::EmboxOp>(
1099         loc, fir::BoxType::get(allocmem.getType()), allocmem, shape,
1100         /*slice=*/mlir::Value{}, typeParams);
1101     auto module = load->getParentOfType<mlir::ModuleOp>();
1102     FirOpBuilder builder(rewriter, module);
1103     runtime::genDerivedTypeInitialize(builder, loc, box);
1104     // Any allocatable component that may have been allocated must be
1105     // deallocated during the clean-up.
1106     auto cleanup = [=](mlir::PatternRewriter &r) {
1107       FirOpBuilder builder(r, module);
1108       runtime::genDerivedTypeDestroy(builder, loc, box);
1109       r.create<FreeMemOp>(loc, allocmem);
1110     };
1111     return {allocmem, cleanup};
1112   }
1113   auto cleanup = [=](mlir::PatternRewriter &r) {
1114     r.create<FreeMemOp>(loc, allocmem);
1115   };
1116   return {allocmem, cleanup};
1117 }
1118 
1119 namespace {
1120 /// Conversion of fir.array_update and fir.array_modify Ops.
1121 /// If there is a conflict for the update, then we need to perform a
1122 /// copy-in/copy-out to preserve the original values of the array. If there is
1123 /// no conflict, then it is save to eschew making any copies.
1124 template <typename ArrayOp>
1125 class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> {
1126 public:
1127   // TODO: Implement copy/swap semantics?
1128   explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx,
1129                                      const ArrayCopyAnalysisBase &a,
1130                                      const OperationUseMapT &m)
1131       : mlir::OpRewritePattern<ArrayOp>{ctx}, analysis{a}, useMap{m} {}
1132 
1133   /// The array_access, \p access, is to be to a cloned copy due to a potential
1134   /// conflict. Uses copy-in/copy-out semantics and not copy/swap.
1135   mlir::Value referenceToClone(mlir::Location loc,
1136                                mlir::PatternRewriter &rewriter,
1137                                ArrayOp access) const {
1138     LLVM_DEBUG(llvm::dbgs()
1139                << "generating copy-in/copy-out loops for " << access << '\n');
1140     auto *op = access.getOperation();
1141     auto *loadOp = useMap.lookup(op);
1142     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1143     auto eleTy = access.getType();
1144     rewriter.setInsertionPoint(loadOp);
1145     // Copy in.
1146     llvm::SmallVector<mlir::Value> extents;
1147     bool copyUsingSlice = false;
1148     auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1149                                               copyUsingSlice);
1150     auto [allocmem, genTempCleanUp] =
1151         allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1152     genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1153                                   load.getMemref(), shapeOp, load.getSlice(),
1154                                   load);
1155     // Generate the reference for the access.
1156     rewriter.setInsertionPoint(op);
1157     auto coor = genCoorOp(
1158         rewriter, loc, getEleTy(load.getType()), eleTy, allocmem, shapeOp,
1159         copyUsingSlice ? mlir::Value{} : load.getSlice(), access.getIndices(),
1160         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1161     // Copy out.
1162     auto *storeOp = useMap.lookup(loadOp);
1163     auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1164     rewriter.setInsertionPoint(storeOp);
1165     // Copy out.
1166     genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter, store.getMemref(),
1167                                    allocmem, shapeOp, store.getSlice(), load);
1168     genTempCleanUp(rewriter);
1169     return coor;
1170   }
1171 
1172   /// Copy the RHS element into the LHS and insert copy-in/copy-out between a
1173   /// temp and the LHS if the analysis found potential overlaps between the RHS
1174   /// and LHS arrays. The element copy generator must be provided in \p
1175   /// assignElement. \p update must be the ArrayUpdateOp or the ArrayModifyOp.
1176   /// Returns the address of the LHS element inside the loop and the LHS
1177   /// ArrayLoad result.
1178   std::pair<mlir::Value, mlir::Value>
1179   materializeAssignment(mlir::Location loc, mlir::PatternRewriter &rewriter,
1180                         ArrayOp update,
1181                         const std::function<void(mlir::Value)> &assignElement,
1182                         mlir::Type lhsEltRefType) const {
1183     auto *op = update.getOperation();
1184     auto *loadOp = useMap.lookup(op);
1185     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1186     LLVM_DEBUG(llvm::outs() << "does " << load << " have a conflict?\n");
1187     if (analysis.hasPotentialConflict(loadOp)) {
1188       // If there is a conflict between the arrays, then we copy the lhs array
1189       // to a temporary, update the temporary, and copy the temporary back to
1190       // the lhs array. This yields Fortran's copy-in copy-out array semantics.
1191       LLVM_DEBUG(llvm::outs() << "Yes, conflict was found\n");
1192       rewriter.setInsertionPoint(loadOp);
1193       // Copy in.
1194       llvm::SmallVector<mlir::Value> extents;
1195       bool copyUsingSlice = false;
1196       auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1197                                                 copyUsingSlice);
1198       auto [allocmem, genTempCleanUp] =
1199           allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1200 
1201       genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1202                                     load.getMemref(), shapeOp, load.getSlice(),
1203                                     load);
1204       rewriter.setInsertionPoint(op);
1205       auto coor = genCoorOp(
1206           rewriter, loc, getEleTy(load.getType()), lhsEltRefType, allocmem,
1207           shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(),
1208           update.getIndices(), load,
1209           update->hasAttr(factory::attrFortranArrayOffsets()));
1210       assignElement(coor);
1211       auto *storeOp = useMap.lookup(loadOp);
1212       auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1213       rewriter.setInsertionPoint(storeOp);
1214       // Copy out.
1215       genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter,
1216                                      store.getMemref(), allocmem, shapeOp,
1217                                      store.getSlice(), load);
1218       genTempCleanUp(rewriter);
1219       return {coor, load.getResult()};
1220     }
1221     // Otherwise, when there is no conflict (a possible loop-carried
1222     // dependence), the lhs array can be updated in place.
1223     LLVM_DEBUG(llvm::outs() << "No, conflict wasn't found\n");
1224     rewriter.setInsertionPoint(op);
1225     auto coorTy = getEleTy(load.getType());
1226     auto coor =
1227         genCoorOp(rewriter, loc, coorTy, lhsEltRefType, load.getMemref(),
1228                   load.getShape(), load.getSlice(), update.getIndices(), load,
1229                   update->hasAttr(factory::attrFortranArrayOffsets()));
1230     assignElement(coor);
1231     return {coor, load.getResult()};
1232   }
1233 
1234 protected:
1235   const ArrayCopyAnalysisBase &analysis;
1236   const OperationUseMapT &useMap;
1237 };
1238 
1239 class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> {
1240 public:
1241   explicit ArrayUpdateConversion(mlir::MLIRContext *ctx,
1242                                  const ArrayCopyAnalysisBase &a,
1243                                  const OperationUseMapT &m)
1244       : ArrayUpdateConversionBase{ctx, a, m} {}
1245 
1246   mlir::LogicalResult
1247   matchAndRewrite(ArrayUpdateOp update,
1248                   mlir::PatternRewriter &rewriter) const override {
1249     auto loc = update.getLoc();
1250     auto assignElement = [&](mlir::Value coor) {
1251       auto input = update.getMerge();
1252       if (auto inEleTy = dyn_cast_ptrEleTy(input.getType())) {
1253         emitFatalError(loc, "array_update on references not supported");
1254       } else {
1255         rewriter.create<fir::StoreOp>(loc, input, coor);
1256       }
1257     };
1258     auto lhsEltRefType = toRefType(update.getMerge().getType());
1259     auto [_, lhsLoadResult] = materializeAssignment(
1260         loc, rewriter, update, assignElement, lhsEltRefType);
1261     update.replaceAllUsesWith(lhsLoadResult);
1262     rewriter.replaceOp(update, lhsLoadResult);
1263     return mlir::success();
1264   }
1265 };
1266 
1267 class ArrayModifyConversion : public ArrayUpdateConversionBase<ArrayModifyOp> {
1268 public:
1269   explicit ArrayModifyConversion(mlir::MLIRContext *ctx,
1270                                  const ArrayCopyAnalysisBase &a,
1271                                  const OperationUseMapT &m)
1272       : ArrayUpdateConversionBase{ctx, a, m} {}
1273 
1274   mlir::LogicalResult
1275   matchAndRewrite(ArrayModifyOp modify,
1276                   mlir::PatternRewriter &rewriter) const override {
1277     auto loc = modify.getLoc();
1278     auto assignElement = [](mlir::Value) {
1279       // Assignment already materialized by lowering using lhs element address.
1280     };
1281     auto lhsEltRefType = modify.getResult(0).getType();
1282     auto [lhsEltCoor, lhsLoadResult] = materializeAssignment(
1283         loc, rewriter, modify, assignElement, lhsEltRefType);
1284     modify.replaceAllUsesWith(mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1285     rewriter.replaceOp(modify, mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1286     return mlir::success();
1287   }
1288 };
1289 
1290 class ArrayFetchConversion : public mlir::OpRewritePattern<ArrayFetchOp> {
1291 public:
1292   explicit ArrayFetchConversion(mlir::MLIRContext *ctx,
1293                                 const OperationUseMapT &m)
1294       : OpRewritePattern{ctx}, useMap{m} {}
1295 
1296   mlir::LogicalResult
1297   matchAndRewrite(ArrayFetchOp fetch,
1298                   mlir::PatternRewriter &rewriter) const override {
1299     auto *op = fetch.getOperation();
1300     rewriter.setInsertionPoint(op);
1301     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1302     auto loc = fetch.getLoc();
1303     auto coor = genCoorOp(
1304         rewriter, loc, getEleTy(load.getType()), toRefType(fetch.getType()),
1305         load.getMemref(), load.getShape(), load.getSlice(), fetch.getIndices(),
1306         load, fetch->hasAttr(factory::attrFortranArrayOffsets()));
1307     if (isa_ref_type(fetch.getType()))
1308       rewriter.replaceOp(fetch, coor);
1309     else
1310       rewriter.replaceOpWithNewOp<fir::LoadOp>(fetch, coor);
1311     return mlir::success();
1312   }
1313 
1314 private:
1315   const OperationUseMapT &useMap;
1316 };
1317 
1318 /// As array_access op is like an array_fetch op, except that it does not imply
1319 /// a load op. (It operates in the reference domain.)
1320 class ArrayAccessConversion : public ArrayUpdateConversionBase<ArrayAccessOp> {
1321 public:
1322   explicit ArrayAccessConversion(mlir::MLIRContext *ctx,
1323                                  const ArrayCopyAnalysisBase &a,
1324                                  const OperationUseMapT &m)
1325       : ArrayUpdateConversionBase{ctx, a, m} {}
1326 
1327   mlir::LogicalResult
1328   matchAndRewrite(ArrayAccessOp access,
1329                   mlir::PatternRewriter &rewriter) const override {
1330     auto *op = access.getOperation();
1331     auto loc = access.getLoc();
1332     if (analysis.inAmendAccessSet(op)) {
1333       // This array_access is associated with an array_amend and there is a
1334       // conflict. Make a copy to store into.
1335       auto result = referenceToClone(loc, rewriter, access);
1336       access.replaceAllUsesWith(result);
1337       rewriter.replaceOp(access, result);
1338       return mlir::success();
1339     }
1340     rewriter.setInsertionPoint(op);
1341     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1342     auto coor = genCoorOp(
1343         rewriter, loc, getEleTy(load.getType()), toRefType(access.getType()),
1344         load.getMemref(), load.getShape(), load.getSlice(), access.getIndices(),
1345         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1346     rewriter.replaceOp(access, coor);
1347     return mlir::success();
1348   }
1349 };
1350 
1351 /// An array_amend op is a marker to record which array access is being used to
1352 /// update an array value. After this pass runs, an array_amend has no
1353 /// semantics. We rewrite these to undefined values here to remove them while
1354 /// preserving SSA form.
1355 class ArrayAmendConversion : public mlir::OpRewritePattern<ArrayAmendOp> {
1356 public:
1357   explicit ArrayAmendConversion(mlir::MLIRContext *ctx)
1358       : OpRewritePattern{ctx} {}
1359 
1360   mlir::LogicalResult
1361   matchAndRewrite(ArrayAmendOp amend,
1362                   mlir::PatternRewriter &rewriter) const override {
1363     auto *op = amend.getOperation();
1364     rewriter.setInsertionPoint(op);
1365     auto loc = amend.getLoc();
1366     auto undef = rewriter.create<UndefOp>(loc, amend.getType());
1367     rewriter.replaceOp(amend, undef.getResult());
1368     return mlir::success();
1369   }
1370 };
1371 
1372 class ArrayValueCopyConverter
1373     : public fir::impl::ArrayValueCopyBase<ArrayValueCopyConverter> {
1374 public:
1375   ArrayValueCopyConverter() = default;
1376   ArrayValueCopyConverter(const fir::ArrayValueCopyOptions &options)
1377       : Base(options) {}
1378 
1379   void runOnOperation() override {
1380     auto func = getOperation();
1381     LLVM_DEBUG(llvm::dbgs() << "\n\narray-value-copy pass on function '"
1382                             << func.getName() << "'\n");
1383     auto *context = &getContext();
1384 
1385     // Perform the conflict analysis.
1386     const ArrayCopyAnalysisBase *analysis;
1387     if (optimizeConflicts)
1388       analysis = &getAnalysis<ArrayCopyAnalysisOptimized>();
1389     else
1390       analysis = &getAnalysis<ArrayCopyAnalysis>();
1391 
1392     const auto &useMap = analysis->getUseMap();
1393 
1394     mlir::RewritePatternSet patterns1(context);
1395     patterns1.insert<ArrayFetchConversion>(context, useMap);
1396     patterns1.insert<ArrayUpdateConversion>(context, *analysis, useMap);
1397     patterns1.insert<ArrayModifyConversion>(context, *analysis, useMap);
1398     patterns1.insert<ArrayAccessConversion>(context, *analysis, useMap);
1399     patterns1.insert<ArrayAmendConversion>(context);
1400     mlir::ConversionTarget target(*context);
1401     target
1402         .addLegalDialect<FIROpsDialect, mlir::scf::SCFDialect,
1403                          mlir::arith::ArithDialect, mlir::func::FuncDialect>();
1404     target.addIllegalOp<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp,
1405                         ArrayUpdateOp, ArrayModifyOp>();
1406     // Rewrite the array fetch and array update ops.
1407     if (mlir::failed(
1408             mlir::applyPartialConversion(func, target, std::move(patterns1)))) {
1409       mlir::emitError(mlir::UnknownLoc::get(context),
1410                       "failure in array-value-copy pass, phase 1");
1411       signalPassFailure();
1412     }
1413 
1414     mlir::RewritePatternSet patterns2(context);
1415     patterns2.insert<ArrayLoadConversion>(context);
1416     patterns2.insert<ArrayMergeStoreConversion>(context);
1417     target.addIllegalOp<ArrayLoadOp, ArrayMergeStoreOp>();
1418     if (mlir::failed(
1419             mlir::applyPartialConversion(func, target, std::move(patterns2)))) {
1420       mlir::emitError(mlir::UnknownLoc::get(context),
1421                       "failure in array-value-copy pass, phase 2");
1422       signalPassFailure();
1423     }
1424   }
1425 };
1426 } // namespace
1427 
1428 std::unique_ptr<mlir::Pass>
1429 fir::createArrayValueCopyPass(fir::ArrayValueCopyOptions options) {
1430   return std::make_unique<ArrayValueCopyConverter>(options);
1431 }
1432