xref: /llvm-project/flang/lib/Optimizer/Transforms/ArrayValueCopy.cpp (revision b07ef9e7cd6f5348df0a4f63e70a60491427ff64)
1 //===-- ArrayValueCopy.cpp ------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "flang/Optimizer/Builder/Array.h"
10 #include "flang/Optimizer/Builder/BoxValue.h"
11 #include "flang/Optimizer/Builder/FIRBuilder.h"
12 #include "flang/Optimizer/Builder/Factory.h"
13 #include "flang/Optimizer/Builder/Runtime/Derived.h"
14 #include "flang/Optimizer/Builder/Todo.h"
15 #include "flang/Optimizer/Dialect/FIRDialect.h"
16 #include "flang/Optimizer/Dialect/FIROpsSupport.h"
17 #include "flang/Optimizer/Dialect/Support/FIRContext.h"
18 #include "flang/Optimizer/Transforms/Passes.h"
19 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h"
20 #include "mlir/Dialect/SCF/IR/SCF.h"
21 #include "mlir/Transforms/DialectConversion.h"
22 #include "llvm/Support/Debug.h"
23 
24 namespace fir {
25 #define GEN_PASS_DEF_ARRAYVALUECOPY
26 #include "flang/Optimizer/Transforms/Passes.h.inc"
27 } // namespace fir
28 
29 #define DEBUG_TYPE "flang-array-value-copy"
30 
31 using namespace fir;
32 using namespace mlir;
33 
34 using OperationUseMapT = llvm::DenseMap<mlir::Operation *, mlir::Operation *>;
35 
36 namespace {
37 
38 /// Array copy analysis.
39 /// Perform an interference analysis between array values.
40 ///
41 /// Lowering will generate a sequence of the following form.
42 /// ```mlir
43 ///   %a_1 = fir.array_load %array_1(%shape) : ...
44 ///   ...
45 ///   %a_j = fir.array_load %array_j(%shape) : ...
46 ///   ...
47 ///   %a_n = fir.array_load %array_n(%shape) : ...
48 ///     ...
49 ///     %v_i = fir.array_fetch %a_i, ...
50 ///     %a_j1 = fir.array_update %a_j, ...
51 ///     ...
52 ///   fir.array_merge_store %a_j, %a_jn to %array_j : ...
53 /// ```
54 ///
55 /// The analysis is to determine if there are any conflicts. A conflict is when
56 /// one the following cases occurs.
57 ///
58 /// 1. There is an `array_update` to an array value, a_j, such that a_j was
59 /// loaded from the same array memory reference (array_j) but with a different
60 /// shape as the other array values a_i, where i != j. [Possible overlapping
61 /// arrays.]
62 ///
63 /// 2. There is either an array_fetch or array_update of a_j with a different
64 /// set of index values. [Possible loop-carried dependence.]
65 ///
66 /// If none of the array values overlap in storage and the accesses are not
67 /// loop-carried, then the arrays are conflict-free and no copies are required.
68 class ArrayCopyAnalysisBase {
69 public:
70   using ConflictSetT = llvm::SmallPtrSet<mlir::Operation *, 16>;
71   using UseSetT = llvm::SmallPtrSet<mlir::OpOperand *, 8>;
72   using LoadMapSetsT = llvm::DenseMap<mlir::Operation *, UseSetT>;
73   using AmendAccessSetT = llvm::SmallPtrSet<mlir::Operation *, 4>;
74 
75   ArrayCopyAnalysisBase(mlir::Operation *op, bool optimized)
76       : operation{op}, optimizeConflicts(optimized) {
77     construct(op);
78   }
79   virtual ~ArrayCopyAnalysisBase() = default;
80 
81   mlir::Operation *getOperation() const { return operation; }
82 
83   /// Return true iff the `array_merge_store` has potential conflicts.
84   bool hasPotentialConflict(mlir::Operation *op) const {
85     LLVM_DEBUG(llvm::dbgs()
86                << "looking for a conflict on " << *op
87                << " and the set has a total of " << conflicts.size() << '\n');
88     return conflicts.contains(op);
89   }
90 
91   /// Return the use map.
92   /// The use map maps array access, amend, fetch and update operations back to
93   /// the array load that is the original source of the array value.
94   /// It maps an array_load to an array_merge_store, if and only if the loaded
95   /// array value has pending modifications to be merged.
96   const OperationUseMapT &getUseMap() const { return useMap; }
97 
98   /// Return the set of array_access ops directly associated with array_amend
99   /// ops.
100   bool inAmendAccessSet(mlir::Operation *op) const {
101     return amendAccesses.count(op);
102   }
103 
104   /// For ArrayLoad `load`, return the transitive set of all OpOperands.
105   UseSetT getLoadUseSet(mlir::Operation *load) const {
106     assert(loadMapSets.count(load) && "analysis missed an array load?");
107     return loadMapSets.lookup(load);
108   }
109 
110   void arrayMentions(llvm::SmallVectorImpl<mlir::Operation *> &mentions,
111                      ArrayLoadOp load);
112 
113 private:
114   void construct(mlir::Operation *topLevelOp);
115 
116   mlir::Operation *operation; // operation that analysis ran upon
117   ConflictSetT conflicts;     // set of conflicts (loads and merge stores)
118   OperationUseMapT useMap;
119   LoadMapSetsT loadMapSets;
120   // Set of array_access ops associated with array_amend ops.
121   AmendAccessSetT amendAccesses;
122   bool optimizeConflicts;
123 };
124 
125 // Optimized array copy analysis that takes into account Fortran
126 // variable attributes to prove that no conflict is possible
127 // and reduce the number of temporary arrays.
128 class ArrayCopyAnalysisOptimized : public ArrayCopyAnalysisBase {
129 public:
130   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysisOptimized)
131 
132   ArrayCopyAnalysisOptimized(mlir::Operation *op)
133       : ArrayCopyAnalysisBase(op, /*optimized=*/true) {}
134 };
135 
136 // Unoptimized array copy analysis used at O0.
137 class ArrayCopyAnalysis : public ArrayCopyAnalysisBase {
138 public:
139   MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis)
140 
141   ArrayCopyAnalysis(mlir::Operation *op)
142       : ArrayCopyAnalysisBase(op, /*optimized=*/false) {}
143 };
144 } // namespace
145 
146 namespace {
147 /// Helper class to collect all array operations that produced an array value.
148 class ReachCollector {
149 public:
150   ReachCollector(llvm::SmallVectorImpl<mlir::Operation *> &reach,
151                  mlir::Region *loopRegion)
152       : reach{reach}, loopRegion{loopRegion} {}
153 
154   void collectArrayMentionFrom(mlir::Operation *op, mlir::ValueRange range) {
155     if (range.empty()) {
156       collectArrayMentionFrom(op, mlir::Value{});
157       return;
158     }
159     for (mlir::Value v : range)
160       collectArrayMentionFrom(v);
161   }
162 
163   // Collect all the array_access ops in `block`. This recursively looks into
164   // blocks in ops with regions.
165   // FIXME: This is temporarily relying on the array_amend appearing in a
166   // do_loop Region.  This phase ordering assumption can be eliminated by using
167   // dominance information to find the array_access ops or by scanning the
168   // transitive closure of the amending array_access's users and the defs that
169   // reach them.
170   void collectAccesses(llvm::SmallVector<ArrayAccessOp> &result,
171                        mlir::Block *block) {
172     for (auto &op : *block) {
173       if (auto access = mlir::dyn_cast<ArrayAccessOp>(op)) {
174         LLVM_DEBUG(llvm::dbgs() << "adding access: " << access << '\n');
175         result.push_back(access);
176         continue;
177       }
178       for (auto &region : op.getRegions())
179         for (auto &bb : region.getBlocks())
180           collectAccesses(result, &bb);
181     }
182   }
183 
184   void collectArrayMentionFrom(mlir::Operation *op, mlir::Value val) {
185     // `val` is defined by an Op, process the defining Op.
186     // If `val` is defined by a region containing Op, we want to drill down
187     // and through that Op's region(s).
188     LLVM_DEBUG(llvm::dbgs() << "popset: " << *op << '\n');
189     auto popFn = [&](auto rop) {
190       assert(val && "op must have a result value");
191       auto resNum = val.cast<mlir::OpResult>().getResultNumber();
192       llvm::SmallVector<mlir::Value> results;
193       rop.resultToSourceOps(results, resNum);
194       for (auto u : results)
195         collectArrayMentionFrom(u);
196     };
197     if (auto rop = mlir::dyn_cast<DoLoopOp>(op)) {
198       popFn(rop);
199       return;
200     }
201     if (auto rop = mlir::dyn_cast<IterWhileOp>(op)) {
202       popFn(rop);
203       return;
204     }
205     if (auto rop = mlir::dyn_cast<fir::IfOp>(op)) {
206       popFn(rop);
207       return;
208     }
209     if (auto box = mlir::dyn_cast<EmboxOp>(op)) {
210       for (auto *user : box.getMemref().getUsers())
211         if (user != op)
212           collectArrayMentionFrom(user, user->getResults());
213       return;
214     }
215     if (auto mergeStore = mlir::dyn_cast<ArrayMergeStoreOp>(op)) {
216       if (opIsInsideLoops(mergeStore))
217         collectArrayMentionFrom(mergeStore.getSequence());
218       return;
219     }
220 
221     if (mlir::isa<AllocaOp, AllocMemOp>(op)) {
222       // Look for any stores inside the loops, and collect an array operation
223       // that produced the value being stored to it.
224       for (auto *user : op->getUsers())
225         if (auto store = mlir::dyn_cast<fir::StoreOp>(user))
226           if (opIsInsideLoops(store))
227             collectArrayMentionFrom(store.getValue());
228       return;
229     }
230 
231     // Scan the uses of amend's memref
232     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op)) {
233       reach.push_back(op);
234       llvm::SmallVector<ArrayAccessOp> accesses;
235       collectAccesses(accesses, op->getBlock());
236       for (auto access : accesses)
237         collectArrayMentionFrom(access.getResult());
238     }
239 
240     // Otherwise, Op does not contain a region so just chase its operands.
241     if (mlir::isa<ArrayAccessOp, ArrayLoadOp, ArrayUpdateOp, ArrayModifyOp,
242                   ArrayFetchOp>(op)) {
243       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
244       reach.push_back(op);
245     }
246 
247     // Include all array_access ops using an array_load.
248     if (auto arrLd = mlir::dyn_cast<ArrayLoadOp>(op))
249       for (auto *user : arrLd.getResult().getUsers())
250         if (mlir::isa<ArrayAccessOp>(user)) {
251           LLVM_DEBUG(llvm::dbgs() << "add " << *user << " to reachable set\n");
252           reach.push_back(user);
253         }
254 
255     // Array modify assignment is performed on the result. So the analysis must
256     // look at the what is done with the result.
257     if (mlir::isa<ArrayModifyOp>(op))
258       for (auto *user : op->getResult(0).getUsers())
259         followUsers(user);
260 
261     if (mlir::isa<fir::CallOp>(op)) {
262       LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n");
263       reach.push_back(op);
264     }
265 
266     for (auto u : op->getOperands())
267       collectArrayMentionFrom(u);
268   }
269 
270   void collectArrayMentionFrom(mlir::BlockArgument ba) {
271     auto *parent = ba.getOwner()->getParentOp();
272     // If inside an Op holding a region, the block argument corresponds to an
273     // argument passed to the containing Op.
274     auto popFn = [&](auto rop) {
275       collectArrayMentionFrom(rop.blockArgToSourceOp(ba.getArgNumber()));
276     };
277     if (auto rop = mlir::dyn_cast<DoLoopOp>(parent)) {
278       popFn(rop);
279       return;
280     }
281     if (auto rop = mlir::dyn_cast<IterWhileOp>(parent)) {
282       popFn(rop);
283       return;
284     }
285     // Otherwise, a block argument is provided via the pred blocks.
286     for (auto *pred : ba.getOwner()->getPredecessors()) {
287       auto u = pred->getTerminator()->getOperand(ba.getArgNumber());
288       collectArrayMentionFrom(u);
289     }
290   }
291 
292   // Recursively trace operands to find all array operations relating to the
293   // values merged.
294   void collectArrayMentionFrom(mlir::Value val) {
295     if (!val || visited.contains(val))
296       return;
297     visited.insert(val);
298 
299     // Process a block argument.
300     if (auto ba = val.dyn_cast<mlir::BlockArgument>()) {
301       collectArrayMentionFrom(ba);
302       return;
303     }
304 
305     // Process an Op.
306     if (auto *op = val.getDefiningOp()) {
307       collectArrayMentionFrom(op, val);
308       return;
309     }
310 
311     emitFatalError(val.getLoc(), "unhandled value");
312   }
313 
314   /// Return all ops that produce the array value that is stored into the
315   /// `array_merge_store`.
316   static void reachingValues(llvm::SmallVectorImpl<mlir::Operation *> &reach,
317                              mlir::Value seq) {
318     reach.clear();
319     mlir::Region *loopRegion = nullptr;
320     if (auto doLoop = mlir::dyn_cast_or_null<DoLoopOp>(seq.getDefiningOp()))
321       loopRegion = &doLoop->getRegion(0);
322     ReachCollector collector(reach, loopRegion);
323     collector.collectArrayMentionFrom(seq);
324   }
325 
326 private:
327   /// Is \op inside the loop nest region ?
328   /// FIXME: replace this structural dependence with graph properties.
329   bool opIsInsideLoops(mlir::Operation *op) const {
330     auto *region = op->getParentRegion();
331     while (region) {
332       if (region == loopRegion)
333         return true;
334       region = region->getParentRegion();
335     }
336     return false;
337   }
338 
339   /// Recursively trace the use of an operation results, calling
340   /// collectArrayMentionFrom on the direct and indirect user operands.
341   void followUsers(mlir::Operation *op) {
342     for (auto userOperand : op->getOperands())
343       collectArrayMentionFrom(userOperand);
344     // Go through potential converts/coordinate_op.
345     for (auto indirectUser : op->getUsers())
346       followUsers(indirectUser);
347   }
348 
349   llvm::SmallVectorImpl<mlir::Operation *> &reach;
350   llvm::SmallPtrSet<mlir::Value, 16> visited;
351   /// Region of the loops nest that produced the array value.
352   mlir::Region *loopRegion;
353 };
354 } // namespace
355 
356 /// Find all the array operations that access the array value that is loaded by
357 /// the array load operation, `load`.
358 void ArrayCopyAnalysisBase::arrayMentions(
359     llvm::SmallVectorImpl<mlir::Operation *> &mentions, ArrayLoadOp load) {
360   mentions.clear();
361   auto lmIter = loadMapSets.find(load);
362   if (lmIter != loadMapSets.end()) {
363     for (auto *opnd : lmIter->second) {
364       auto *owner = opnd->getOwner();
365       if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
366                     ArrayModifyOp>(owner))
367         mentions.push_back(owner);
368     }
369     return;
370   }
371 
372   UseSetT visited;
373   llvm::SmallVector<mlir::OpOperand *> queue; // uses of ArrayLoad[orig]
374 
375   auto appendToQueue = [&](mlir::Value val) {
376     for (auto &use : val.getUses())
377       if (!visited.count(&use)) {
378         visited.insert(&use);
379         queue.push_back(&use);
380       }
381   };
382 
383   // Build the set of uses of `original`.
384   // let USES = { uses of original fir.load }
385   appendToQueue(load);
386 
387   // Process the worklist until done.
388   while (!queue.empty()) {
389     mlir::OpOperand *operand = queue.pop_back_val();
390     mlir::Operation *owner = operand->getOwner();
391     if (!owner)
392       continue;
393     auto structuredLoop = [&](auto ro) {
394       if (auto blockArg = ro.iterArgToBlockArg(operand->get())) {
395         int64_t arg = blockArg.getArgNumber();
396         mlir::Value output = ro.getResult(ro.getFinalValue() ? arg : arg - 1);
397         appendToQueue(output);
398         appendToQueue(blockArg);
399       }
400     };
401     // TODO: this need to be updated to use the control-flow interface.
402     auto branchOp = [&](mlir::Block *dest, OperandRange operands) {
403       if (operands.empty())
404         return;
405 
406       // Check if this operand is within the range.
407       unsigned operandIndex = operand->getOperandNumber();
408       unsigned operandsStart = operands.getBeginOperandIndex();
409       if (operandIndex < operandsStart ||
410           operandIndex >= (operandsStart + operands.size()))
411         return;
412 
413       // Index the successor.
414       unsigned argIndex = operandIndex - operandsStart;
415       appendToQueue(dest->getArgument(argIndex));
416     };
417     // Thread uses into structured loop bodies and return value uses.
418     if (auto ro = mlir::dyn_cast<DoLoopOp>(owner)) {
419       structuredLoop(ro);
420     } else if (auto ro = mlir::dyn_cast<IterWhileOp>(owner)) {
421       structuredLoop(ro);
422     } else if (auto rs = mlir::dyn_cast<ResultOp>(owner)) {
423       // Thread any uses of fir.if that return the marked array value.
424       mlir::Operation *parent = rs->getParentRegion()->getParentOp();
425       if (auto ifOp = mlir::dyn_cast<fir::IfOp>(parent))
426         appendToQueue(ifOp.getResult(operand->getOperandNumber()));
427     } else if (mlir::isa<ArrayFetchOp>(owner)) {
428       // Keep track of array value fetches.
429       LLVM_DEBUG(llvm::dbgs()
430                  << "add fetch {" << *owner << "} to array value set\n");
431       mentions.push_back(owner);
432     } else if (auto update = mlir::dyn_cast<ArrayUpdateOp>(owner)) {
433       // Keep track of array value updates and thread the return value uses.
434       LLVM_DEBUG(llvm::dbgs()
435                  << "add update {" << *owner << "} to array value set\n");
436       mentions.push_back(owner);
437       appendToQueue(update.getResult());
438     } else if (auto update = mlir::dyn_cast<ArrayModifyOp>(owner)) {
439       // Keep track of array value modification and thread the return value
440       // uses.
441       LLVM_DEBUG(llvm::dbgs()
442                  << "add modify {" << *owner << "} to array value set\n");
443       mentions.push_back(owner);
444       appendToQueue(update.getResult(1));
445     } else if (auto mention = mlir::dyn_cast<ArrayAccessOp>(owner)) {
446       mentions.push_back(owner);
447     } else if (auto amend = mlir::dyn_cast<ArrayAmendOp>(owner)) {
448       mentions.push_back(owner);
449       appendToQueue(amend.getResult());
450     } else if (auto br = mlir::dyn_cast<mlir::cf::BranchOp>(owner)) {
451       branchOp(br.getDest(), br.getDestOperands());
452     } else if (auto br = mlir::dyn_cast<mlir::cf::CondBranchOp>(owner)) {
453       branchOp(br.getTrueDest(), br.getTrueOperands());
454       branchOp(br.getFalseDest(), br.getFalseOperands());
455     } else if (mlir::isa<ArrayMergeStoreOp>(owner)) {
456       // do nothing
457     } else {
458       llvm::report_fatal_error("array value reached unexpected op");
459     }
460   }
461   loadMapSets.insert({load, visited});
462 }
463 
464 static bool hasPointerType(mlir::Type type) {
465   if (auto boxTy = type.dyn_cast<BoxType>())
466     type = boxTy.getEleTy();
467   return type.isa<fir::PointerType>();
468 }
469 
470 // This is a NF performance hack. It makes a simple test that the slices of the
471 // load, \p ld, and the merge store, \p st, are trivially mutually exclusive.
472 static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) {
473   // If the same array_load, then no further testing is warranted.
474   if (ld.getResult() == st.getOriginal())
475     return false;
476 
477   auto getSliceOp = [](mlir::Value val) -> SliceOp {
478     if (!val)
479       return {};
480     auto sliceOp = mlir::dyn_cast_or_null<SliceOp>(val.getDefiningOp());
481     if (!sliceOp)
482       return {};
483     return sliceOp;
484   };
485 
486   auto ldSlice = getSliceOp(ld.getSlice());
487   auto stSlice = getSliceOp(st.getSlice());
488   if (!ldSlice || !stSlice)
489     return false;
490 
491   // Resign on subobject slices.
492   if (!ldSlice.getFields().empty() || !stSlice.getFields().empty() ||
493       !ldSlice.getSubstr().empty() || !stSlice.getSubstr().empty())
494     return false;
495 
496   // Crudely test that the two slices do not overlap by looking for the
497   // following general condition. If the slices look like (i:j) and (j+1:k) then
498   // these ranges do not overlap. The addend must be a constant.
499   auto ldTriples = ldSlice.getTriples();
500   auto stTriples = stSlice.getTriples();
501   const auto size = ldTriples.size();
502   if (size != stTriples.size())
503     return false;
504 
505   auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) {
506     auto removeConvert = [](mlir::Value v) -> mlir::Operation * {
507       auto *op = v.getDefiningOp();
508       while (auto conv = mlir::dyn_cast_or_null<ConvertOp>(op))
509         op = conv.getValue().getDefiningOp();
510       return op;
511     };
512 
513     auto isPositiveConstant = [](mlir::Value v) -> bool {
514       if (auto conOp =
515               mlir::dyn_cast<mlir::arith::ConstantOp>(v.getDefiningOp()))
516         if (auto iattr = conOp.getValue().dyn_cast<mlir::IntegerAttr>())
517           return iattr.getInt() > 0;
518       return false;
519     };
520 
521     auto *op1 = removeConvert(v1);
522     auto *op2 = removeConvert(v2);
523     if (!op1 || !op2)
524       return false;
525     if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2))
526       if ((addi.getLhs().getDefiningOp() == op1 &&
527            isPositiveConstant(addi.getRhs())) ||
528           (addi.getRhs().getDefiningOp() == op1 &&
529            isPositiveConstant(addi.getLhs())))
530         return true;
531     if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1))
532       if (subi.getLhs().getDefiningOp() == op2 &&
533           isPositiveConstant(subi.getRhs()))
534         return true;
535     return false;
536   };
537 
538   for (std::remove_const_t<decltype(size)> i = 0; i < size; i += 3) {
539     // If both are loop invariant, skip to the next triple.
540     if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i + 1].getDefiningOp()) &&
541         mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i + 1].getDefiningOp())) {
542       // Unless either is a vector index, then be conservative.
543       if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i].getDefiningOp()) ||
544           mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i].getDefiningOp()))
545         return false;
546       continue;
547     }
548     // If identical, skip to the next triple.
549     if (ldTriples[i] == stTriples[i] && ldTriples[i + 1] == stTriples[i + 1] &&
550         ldTriples[i + 2] == stTriples[i + 2])
551       continue;
552     // If ubound and lbound are the same with a constant offset, skip to the
553     // next triple.
554     if (displacedByConstant(ldTriples[i + 1], stTriples[i]) ||
555         displacedByConstant(stTriples[i + 1], ldTriples[i]))
556       continue;
557     return false;
558   }
559   LLVM_DEBUG(llvm::dbgs() << "detected non-overlapping slice ranges on " << ld
560                           << " and " << st << ", which is not a conflict\n");
561   return true;
562 }
563 
564 /// Is there a conflict between the array value that was updated and to be
565 /// stored to `st` and the set of arrays loaded (`reach`) and used to compute
566 /// the updated value?
567 /// If `optimize` is true, use the variable attributes to prove that
568 /// there is no conflict.
569 static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach,
570                            ArrayMergeStoreOp st, bool optimize) {
571   mlir::Value load;
572   mlir::Value addr = st.getMemref();
573   const bool storeHasPointerType = hasPointerType(addr.getType());
574   for (auto *op : reach)
575     if (auto ld = mlir::dyn_cast<ArrayLoadOp>(op)) {
576       mlir::Type ldTy = ld.getMemref().getType();
577       if (ld.getMemref() == addr) {
578         if (mutuallyExclusiveSliceRange(ld, st))
579           continue;
580         if (ld.getResult() != st.getOriginal())
581           return true;
582         if (load) {
583           // TODO: extend this to allow checking if the first `load` and this
584           // `ld` are mutually exclusive accesses but not identical.
585           return true;
586         }
587         load = ld;
588       } else if (storeHasPointerType) {
589         if (optimize && !hasPointerType(ldTy) &&
590             !valueMayHaveFirAttributes(
591                 ld.getMemref(),
592                 {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
593           continue;
594 
595         return true;
596       } else if (hasPointerType(ldTy)) {
597         if (optimize && !storeHasPointerType &&
598             !valueMayHaveFirAttributes(
599                 addr, {getTargetAttrName(), GlobalOp::getTargetAttrNameStr()}))
600           continue;
601 
602         return true;
603       }
604       // TODO: Check if types can also allow ruling out some cases. For now,
605       // the fact that equivalences is using pointer attribute to enforce
606       // aliasing is preventing any attempt to do so, and in general, it may
607       // be wrong to use this if any of the types is a complex or a derived
608       // for which it is possible to create a pointer to a part with a
609       // different type than the whole, although this deserve some more
610       // investigation because existing compiler behavior seem to diverge
611       // here.
612     }
613   return false;
614 }
615 
616 /// Is there an access vector conflict on the array being merged into? If the
617 /// access vectors diverge, then assume that there are potentially overlapping
618 /// loop-carried references.
619 static bool conflictOnMerge(llvm::ArrayRef<mlir::Operation *> mentions) {
620   if (mentions.size() < 2)
621     return false;
622   llvm::SmallVector<mlir::Value> indices;
623   LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << mentions.size()
624                           << " mentions on the list\n");
625   bool valSeen = false;
626   bool refSeen = false;
627   for (auto *op : mentions) {
628     llvm::SmallVector<mlir::Value> compareVector;
629     if (auto u = mlir::dyn_cast<ArrayUpdateOp>(op)) {
630       valSeen = true;
631       if (indices.empty()) {
632         indices = u.getIndices();
633         continue;
634       }
635       compareVector = u.getIndices();
636     } else if (auto f = mlir::dyn_cast<ArrayModifyOp>(op)) {
637       valSeen = true;
638       if (indices.empty()) {
639         indices = f.getIndices();
640         continue;
641       }
642       compareVector = f.getIndices();
643     } else if (auto f = mlir::dyn_cast<ArrayFetchOp>(op)) {
644       valSeen = true;
645       if (indices.empty()) {
646         indices = f.getIndices();
647         continue;
648       }
649       compareVector = f.getIndices();
650     } else if (auto f = mlir::dyn_cast<ArrayAccessOp>(op)) {
651       refSeen = true;
652       if (indices.empty()) {
653         indices = f.getIndices();
654         continue;
655       }
656       compareVector = f.getIndices();
657     } else if (mlir::isa<ArrayAmendOp>(op)) {
658       refSeen = true;
659       continue;
660     } else {
661       mlir::emitError(op->getLoc(), "unexpected operation in analysis");
662     }
663     if (compareVector.size() != indices.size() ||
664         llvm::any_of(llvm::zip(compareVector, indices), [&](auto pair) {
665           return std::get<0>(pair) != std::get<1>(pair);
666         }))
667       return true;
668     LLVM_DEBUG(llvm::dbgs() << "vectors compare equal\n");
669   }
670   return valSeen && refSeen;
671 }
672 
673 /// With element-by-reference semantics, an amended array with more than once
674 /// access to the same loaded array are conservatively considered a conflict.
675 /// Note: the array copy can still be eliminated in subsequent optimizations.
676 static bool conflictOnReference(llvm::ArrayRef<mlir::Operation *> mentions) {
677   LLVM_DEBUG(llvm::dbgs() << "checking reference semantics " << mentions.size()
678                           << '\n');
679   if (mentions.size() < 3)
680     return false;
681   unsigned amendCount = 0;
682   unsigned accessCount = 0;
683   for (auto *op : mentions) {
684     if (mlir::isa<ArrayAmendOp>(op) && ++amendCount > 1) {
685       LLVM_DEBUG(llvm::dbgs() << "conflict: multiple amends of array value\n");
686       return true;
687     }
688     if (mlir::isa<ArrayAccessOp>(op) && ++accessCount > 1) {
689       LLVM_DEBUG(llvm::dbgs()
690                  << "conflict: multiple accesses of array value\n");
691       return true;
692     }
693     if (mlir::isa<ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp>(op)) {
694       LLVM_DEBUG(llvm::dbgs()
695                  << "conflict: array value has both uses by-value and uses "
696                     "by-reference. conservative assumption.\n");
697       return true;
698     }
699   }
700   return false;
701 }
702 
703 static mlir::Operation *
704 amendingAccess(llvm::ArrayRef<mlir::Operation *> mentions) {
705   for (auto *op : mentions)
706     if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op))
707       return amend.getMemref().getDefiningOp();
708   return {};
709 }
710 
711 // Are any conflicts present? The conflicts detected here are described above.
712 static bool conflictDetected(llvm::ArrayRef<mlir::Operation *> reach,
713                              llvm::ArrayRef<mlir::Operation *> mentions,
714                              ArrayMergeStoreOp st, bool optimize) {
715   return conflictOnLoad(reach, st, optimize) || conflictOnMerge(mentions);
716 }
717 
718 // Assume that any call to a function that uses host-associations will be
719 // modifying the output array.
720 static bool
721 conservativeCallConflict(llvm::ArrayRef<mlir::Operation *> reaches) {
722   return llvm::any_of(reaches, [](mlir::Operation *op) {
723     if (auto call = mlir::dyn_cast<fir::CallOp>(op))
724       if (auto callee =
725               call.getCallableForCallee().dyn_cast<mlir::SymbolRefAttr>()) {
726         auto module = op->getParentOfType<mlir::ModuleOp>();
727         return isInternalPorcedure(
728             module.lookupSymbol<mlir::func::FuncOp>(callee));
729       }
730     return false;
731   });
732 }
733 
734 /// Constructor of the array copy analysis.
735 /// This performs the analysis and saves the intermediate results.
736 void ArrayCopyAnalysisBase::construct(mlir::Operation *topLevelOp) {
737   topLevelOp->walk([&](Operation *op) {
738     if (auto st = mlir::dyn_cast<fir::ArrayMergeStoreOp>(op)) {
739       llvm::SmallVector<mlir::Operation *> values;
740       ReachCollector::reachingValues(values, st.getSequence());
741       bool callConflict = conservativeCallConflict(values);
742       llvm::SmallVector<mlir::Operation *> mentions;
743       arrayMentions(mentions,
744                     mlir::cast<ArrayLoadOp>(st.getOriginal().getDefiningOp()));
745       bool conflict = conflictDetected(values, mentions, st, optimizeConflicts);
746       bool refConflict = conflictOnReference(mentions);
747       if (callConflict || conflict || refConflict) {
748         LLVM_DEBUG(llvm::dbgs()
749                    << "CONFLICT: copies required for " << st << '\n'
750                    << "   adding conflicts on: " << *op << " and "
751                    << st.getOriginal() << '\n');
752         conflicts.insert(op);
753         conflicts.insert(st.getOriginal().getDefiningOp());
754         if (auto *access = amendingAccess(mentions))
755           amendAccesses.insert(access);
756       }
757       auto *ld = st.getOriginal().getDefiningOp();
758       LLVM_DEBUG(llvm::dbgs()
759                  << "map: adding {" << *ld << " -> " << st << "}\n");
760       useMap.insert({ld, op});
761     } else if (auto load = mlir::dyn_cast<ArrayLoadOp>(op)) {
762       llvm::SmallVector<mlir::Operation *> mentions;
763       arrayMentions(mentions, load);
764       LLVM_DEBUG(llvm::dbgs() << "process load: " << load
765                               << ", mentions: " << mentions.size() << '\n');
766       for (auto *acc : mentions) {
767         LLVM_DEBUG(llvm::dbgs() << " mention: " << *acc << '\n');
768         if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp,
769                       ArrayModifyOp>(acc)) {
770           if (useMap.count(acc)) {
771             mlir::emitError(
772                 load.getLoc(),
773                 "The parallel semantics of multiple array_merge_stores per "
774                 "array_load are not supported.");
775             continue;
776           }
777           LLVM_DEBUG(llvm::dbgs()
778                      << "map: adding {" << *acc << "} -> {" << load << "}\n");
779           useMap.insert({acc, op});
780         }
781       }
782     }
783   });
784 }
785 
786 //===----------------------------------------------------------------------===//
787 // Conversions for converting out of array value form.
788 //===----------------------------------------------------------------------===//
789 
790 namespace {
791 class ArrayLoadConversion : public mlir::OpRewritePattern<ArrayLoadOp> {
792 public:
793   using OpRewritePattern::OpRewritePattern;
794 
795   mlir::LogicalResult
796   matchAndRewrite(ArrayLoadOp load,
797                   mlir::PatternRewriter &rewriter) const override {
798     LLVM_DEBUG(llvm::dbgs() << "replace load " << load << " with undef.\n");
799     rewriter.replaceOpWithNewOp<UndefOp>(load, load.getType());
800     return mlir::success();
801   }
802 };
803 
804 class ArrayMergeStoreConversion
805     : public mlir::OpRewritePattern<ArrayMergeStoreOp> {
806 public:
807   using OpRewritePattern::OpRewritePattern;
808 
809   mlir::LogicalResult
810   matchAndRewrite(ArrayMergeStoreOp store,
811                   mlir::PatternRewriter &rewriter) const override {
812     LLVM_DEBUG(llvm::dbgs() << "marking store " << store << " as dead.\n");
813     rewriter.eraseOp(store);
814     return mlir::success();
815   }
816 };
817 } // namespace
818 
819 static mlir::Type getEleTy(mlir::Type ty) {
820   auto eleTy = unwrapSequenceType(unwrapPassByRefType(ty));
821   // FIXME: keep ptr/heap/ref information.
822   return ReferenceType::get(eleTy);
823 }
824 
825 // Extract extents from the ShapeOp/ShapeShiftOp into the result vector.
826 static bool getAdjustedExtents(mlir::Location loc,
827                                mlir::PatternRewriter &rewriter,
828                                ArrayLoadOp arrLoad,
829                                llvm::SmallVectorImpl<mlir::Value> &result,
830                                mlir::Value shape) {
831   bool copyUsingSlice = false;
832   auto *shapeOp = shape.getDefiningOp();
833   if (auto s = mlir::dyn_cast_or_null<ShapeOp>(shapeOp)) {
834     auto e = s.getExtents();
835     result.insert(result.end(), e.begin(), e.end());
836   } else if (auto s = mlir::dyn_cast_or_null<ShapeShiftOp>(shapeOp)) {
837     auto e = s.getExtents();
838     result.insert(result.end(), e.begin(), e.end());
839   } else {
840     emitFatalError(loc, "not a fir.shape/fir.shape_shift op");
841   }
842   auto idxTy = rewriter.getIndexType();
843   if (factory::isAssumedSize(result)) {
844     // Use slice information to compute the extent of the column.
845     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
846     mlir::Value size = one;
847     if (mlir::Value sliceArg = arrLoad.getSlice()) {
848       if (auto sliceOp =
849               mlir::dyn_cast_or_null<SliceOp>(sliceArg.getDefiningOp())) {
850         auto triples = sliceOp.getTriples();
851         const std::size_t tripleSize = triples.size();
852         auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
853         fir::KindMapping kindMap = getKindMapping(module);
854         FirOpBuilder builder(rewriter, kindMap);
855         size = builder.genExtentFromTriplet(loc, triples[tripleSize - 3],
856                                             triples[tripleSize - 2],
857                                             triples[tripleSize - 1], idxTy);
858         copyUsingSlice = true;
859       }
860     }
861     result[result.size() - 1] = size;
862   }
863   return copyUsingSlice;
864 }
865 
866 /// Place the extents of the array load, \p arrLoad, into \p result and
867 /// return a ShapeOp or ShapeShiftOp with the same extents. If \p arrLoad is
868 /// loading a `!fir.box`, code will be generated to read the extents from the
869 /// boxed value, and the retunred shape Op will be built with the extents read
870 /// from the box. Otherwise, the extents will be extracted from the ShapeOp (or
871 /// ShapeShiftOp) argument of \p arrLoad. \p copyUsingSlice will be set to true
872 /// if slicing of the output array is to be done in the copy-in/copy-out rather
873 /// than in the elemental computation step.
874 static mlir::Value getOrReadExtentsAndShapeOp(
875     mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayLoadOp arrLoad,
876     llvm::SmallVectorImpl<mlir::Value> &result, bool &copyUsingSlice) {
877   assert(result.empty());
878   if (arrLoad->hasAttr(fir::getOptionalAttrName()))
879     fir::emitFatalError(
880         loc, "shapes from array load of OPTIONAL arrays must not be used");
881   if (auto boxTy = arrLoad.getMemref().getType().dyn_cast<BoxType>()) {
882     auto rank =
883         dyn_cast_ptrOrBoxEleTy(boxTy).cast<SequenceType>().getDimension();
884     auto idxTy = rewriter.getIndexType();
885     for (decltype(rank) dim = 0; dim < rank; ++dim) {
886       auto dimVal = rewriter.create<mlir::arith::ConstantIndexOp>(loc, dim);
887       auto dimInfo = rewriter.create<BoxDimsOp>(loc, idxTy, idxTy, idxTy,
888                                                 arrLoad.getMemref(), dimVal);
889       result.emplace_back(dimInfo.getResult(1));
890     }
891     if (!arrLoad.getShape()) {
892       auto shapeType = ShapeType::get(rewriter.getContext(), rank);
893       return rewriter.create<ShapeOp>(loc, shapeType, result);
894     }
895     auto shiftOp = arrLoad.getShape().getDefiningOp<ShiftOp>();
896     auto shapeShiftType = ShapeShiftType::get(rewriter.getContext(), rank);
897     llvm::SmallVector<mlir::Value> shapeShiftOperands;
898     for (auto [lb, extent] : llvm::zip(shiftOp.getOrigins(), result)) {
899       shapeShiftOperands.push_back(lb);
900       shapeShiftOperands.push_back(extent);
901     }
902     return rewriter.create<ShapeShiftOp>(loc, shapeShiftType,
903                                          shapeShiftOperands);
904   }
905   copyUsingSlice =
906       getAdjustedExtents(loc, rewriter, arrLoad, result, arrLoad.getShape());
907   return arrLoad.getShape();
908 }
909 
910 static mlir::Type toRefType(mlir::Type ty) {
911   if (fir::isa_ref_type(ty))
912     return ty;
913   return fir::ReferenceType::get(ty);
914 }
915 
916 static llvm::SmallVector<mlir::Value>
917 getTypeParamsIfRawData(mlir::Location loc, FirOpBuilder &builder,
918                        ArrayLoadOp arrLoad, mlir::Type ty) {
919   if (ty.isa<BoxType>())
920     return {};
921   return fir::factory::getTypeParams(loc, builder, arrLoad);
922 }
923 
924 static mlir::Value genCoorOp(mlir::PatternRewriter &rewriter,
925                              mlir::Location loc, mlir::Type eleTy,
926                              mlir::Type resTy, mlir::Value alloc,
927                              mlir::Value shape, mlir::Value slice,
928                              mlir::ValueRange indices, ArrayLoadOp load,
929                              bool skipOrig = false) {
930   llvm::SmallVector<mlir::Value> originated;
931   if (skipOrig)
932     originated.assign(indices.begin(), indices.end());
933   else
934     originated = factory::originateIndices(loc, rewriter, alloc.getType(),
935                                            shape, indices);
936   auto seqTy = dyn_cast_ptrOrBoxEleTy(alloc.getType());
937   assert(seqTy && seqTy.isa<SequenceType>());
938   const auto dimension = seqTy.cast<SequenceType>().getDimension();
939   auto module = load->getParentOfType<mlir::ModuleOp>();
940   fir::KindMapping kindMap = getKindMapping(module);
941   FirOpBuilder builder(rewriter, kindMap);
942   auto typeparams = getTypeParamsIfRawData(loc, builder, load, alloc.getType());
943   mlir::Value result = rewriter.create<ArrayCoorOp>(
944       loc, eleTy, alloc, shape, slice,
945       llvm::ArrayRef<mlir::Value>{originated}.take_front(dimension),
946       typeparams);
947   if (dimension < originated.size())
948     result = rewriter.create<fir::CoordinateOp>(
949         loc, resTy, result,
950         llvm::ArrayRef<mlir::Value>{originated}.drop_front(dimension));
951   return result;
952 }
953 
954 static mlir::Value getCharacterLen(mlir::Location loc, FirOpBuilder &builder,
955                                    ArrayLoadOp load, CharacterType charTy) {
956   auto charLenTy = builder.getCharacterLengthType();
957   if (charTy.hasDynamicLen()) {
958     if (load.getMemref().getType().isa<BoxType>()) {
959       // The loaded array is an emboxed value. Get the CHARACTER length from
960       // the box value.
961       auto eleSzInBytes =
962           builder.create<BoxEleSizeOp>(loc, charLenTy, load.getMemref());
963       auto kindSize =
964           builder.getKindMap().getCharacterBitsize(charTy.getFKind());
965       auto kindByteSize =
966           builder.createIntegerConstant(loc, charLenTy, kindSize / 8);
967       return builder.create<mlir::arith::DivSIOp>(loc, eleSzInBytes,
968                                                   kindByteSize);
969     }
970     // The loaded array is a (set of) unboxed values. If the CHARACTER's
971     // length is not a constant, it must be provided as a type parameter to
972     // the array_load.
973     auto typeparams = load.getTypeparams();
974     assert(typeparams.size() > 0 && "expected type parameters on array_load");
975     return typeparams.back();
976   }
977   // The typical case: the length of the CHARACTER is a compile-time
978   // constant that is encoded in the type information.
979   return builder.createIntegerConstant(loc, charLenTy, charTy.getLen());
980 }
981 /// Generate a shallow array copy. This is used for both copy-in and copy-out.
982 template <bool CopyIn>
983 void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter,
984                   mlir::Value dst, mlir::Value src, mlir::Value shapeOp,
985                   mlir::Value sliceOp, ArrayLoadOp arrLoad) {
986   auto insPt = rewriter.saveInsertionPoint();
987   llvm::SmallVector<mlir::Value> indices;
988   llvm::SmallVector<mlir::Value> extents;
989   bool copyUsingSlice =
990       getAdjustedExtents(loc, rewriter, arrLoad, extents, shapeOp);
991   auto idxTy = rewriter.getIndexType();
992   // Build loop nest from column to row.
993   for (auto sh : llvm::reverse(extents)) {
994     auto ubi = rewriter.create<ConvertOp>(loc, idxTy, sh);
995     auto zero = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 0);
996     auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1);
997     auto ub = rewriter.create<mlir::arith::SubIOp>(loc, idxTy, ubi, one);
998     auto loop = rewriter.create<DoLoopOp>(loc, zero, ub, one);
999     rewriter.setInsertionPointToStart(loop.getBody());
1000     indices.push_back(loop.getInductionVar());
1001   }
1002   // Reverse the indices so they are in column-major order.
1003   std::reverse(indices.begin(), indices.end());
1004   auto module = arrLoad->getParentOfType<mlir::ModuleOp>();
1005   fir::KindMapping kindMap = getKindMapping(module);
1006   FirOpBuilder builder(rewriter, kindMap);
1007   auto fromAddr = rewriter.create<ArrayCoorOp>(
1008       loc, getEleTy(src.getType()), src, shapeOp,
1009       CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1010       factory::originateIndices(loc, rewriter, src.getType(), shapeOp, indices),
1011       getTypeParamsIfRawData(loc, builder, arrLoad, src.getType()));
1012   auto toAddr = rewriter.create<ArrayCoorOp>(
1013       loc, getEleTy(dst.getType()), dst, shapeOp,
1014       !CopyIn && copyUsingSlice ? sliceOp : mlir::Value{},
1015       factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices),
1016       getTypeParamsIfRawData(loc, builder, arrLoad, dst.getType()));
1017   auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType()));
1018   // Copy from (to) object to (from) temp copy of same object.
1019   if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1020     auto len = getCharacterLen(loc, builder, arrLoad, charTy);
1021     CharBoxValue toChar(toAddr, len);
1022     CharBoxValue fromChar(fromAddr, len);
1023     factory::genScalarAssignment(builder, loc, toChar, fromChar);
1024   } else {
1025     if (hasDynamicSize(eleTy))
1026       TODO(loc, "copy element of dynamic size");
1027     factory::genScalarAssignment(builder, loc, toAddr, fromAddr);
1028   }
1029   rewriter.restoreInsertionPoint(insPt);
1030 }
1031 
1032 /// The array load may be either a boxed or unboxed value. If the value is
1033 /// boxed, we read the type parameters from the boxed value.
1034 static llvm::SmallVector<mlir::Value>
1035 genArrayLoadTypeParameters(mlir::Location loc, mlir::PatternRewriter &rewriter,
1036                            ArrayLoadOp load) {
1037   if (load.getTypeparams().empty()) {
1038     auto eleTy =
1039         unwrapSequenceType(unwrapPassByRefType(load.getMemref().getType()));
1040     if (hasDynamicSize(eleTy)) {
1041       if (auto charTy = eleTy.dyn_cast<CharacterType>()) {
1042         assert(load.getMemref().getType().isa<BoxType>());
1043         auto module = load->getParentOfType<mlir::ModuleOp>();
1044         fir::KindMapping kindMap = getKindMapping(module);
1045         FirOpBuilder builder(rewriter, kindMap);
1046         return {getCharacterLen(loc, builder, load, charTy)};
1047       }
1048       TODO(loc, "unhandled dynamic type parameters");
1049     }
1050     return {};
1051   }
1052   return load.getTypeparams();
1053 }
1054 
1055 static llvm::SmallVector<mlir::Value>
1056 findNonconstantExtents(mlir::Type memrefTy,
1057                        llvm::ArrayRef<mlir::Value> extents) {
1058   llvm::SmallVector<mlir::Value> nce;
1059   auto arrTy = unwrapPassByRefType(memrefTy);
1060   auto seqTy = arrTy.cast<SequenceType>();
1061   for (auto [s, x] : llvm::zip(seqTy.getShape(), extents))
1062     if (s == SequenceType::getUnknownExtent())
1063       nce.emplace_back(x);
1064   if (extents.size() > seqTy.getShape().size())
1065     for (auto x : extents.drop_front(seqTy.getShape().size()))
1066       nce.emplace_back(x);
1067   return nce;
1068 }
1069 
1070 /// Allocate temporary storage for an ArrayLoadOp \load and initialize any
1071 /// allocatable direct components of the array elements with an unallocated
1072 /// status. Returns the temporary address as well as a callback to generate the
1073 /// temporary clean-up once it has been used. The clean-up will take care of
1074 /// deallocating all the element allocatable components that may have been
1075 /// allocated while using the temporary.
1076 static std::pair<mlir::Value,
1077                  std::function<void(mlir::PatternRewriter &rewriter)>>
1078 allocateArrayTemp(mlir::Location loc, mlir::PatternRewriter &rewriter,
1079                   ArrayLoadOp load, llvm::ArrayRef<mlir::Value> extents,
1080                   mlir::Value shape) {
1081   mlir::Type baseType = load.getMemref().getType();
1082   llvm::SmallVector<mlir::Value> nonconstantExtents =
1083       findNonconstantExtents(baseType, extents);
1084   llvm::SmallVector<mlir::Value> typeParams =
1085       genArrayLoadTypeParameters(loc, rewriter, load);
1086   mlir::Value allocmem = rewriter.create<AllocMemOp>(
1087       loc, dyn_cast_ptrOrBoxEleTy(baseType), typeParams, nonconstantExtents);
1088   mlir::Type eleType =
1089       fir::unwrapSequenceType(fir::unwrapPassByRefType(baseType));
1090   if (fir::isRecordWithAllocatableMember(eleType)) {
1091     // The allocatable component descriptors need to be set to a clean
1092     // deallocated status before anything is done with them.
1093     mlir::Value box = rewriter.create<fir::EmboxOp>(
1094         loc, fir::BoxType::get(allocmem.getType()), allocmem, shape,
1095         /*slice=*/mlir::Value{}, typeParams);
1096     auto module = load->getParentOfType<mlir::ModuleOp>();
1097     fir::KindMapping kindMap = getKindMapping(module);
1098     FirOpBuilder builder(rewriter, kindMap);
1099     runtime::genDerivedTypeInitialize(builder, loc, box);
1100     // Any allocatable component that may have been allocated must be
1101     // deallocated during the clean-up.
1102     auto cleanup = [=](mlir::PatternRewriter &r) {
1103       fir::KindMapping kindMap = getKindMapping(module);
1104       FirOpBuilder builder(r, kindMap);
1105       runtime::genDerivedTypeDestroy(builder, loc, box);
1106       r.create<FreeMemOp>(loc, allocmem);
1107     };
1108     return {allocmem, cleanup};
1109   }
1110   auto cleanup = [=](mlir::PatternRewriter &r) {
1111     r.create<FreeMemOp>(loc, allocmem);
1112   };
1113   return {allocmem, cleanup};
1114 }
1115 
1116 namespace {
1117 /// Conversion of fir.array_update and fir.array_modify Ops.
1118 /// If there is a conflict for the update, then we need to perform a
1119 /// copy-in/copy-out to preserve the original values of the array. If there is
1120 /// no conflict, then it is save to eschew making any copies.
1121 template <typename ArrayOp>
1122 class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> {
1123 public:
1124   // TODO: Implement copy/swap semantics?
1125   explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx,
1126                                      const ArrayCopyAnalysisBase &a,
1127                                      const OperationUseMapT &m)
1128       : mlir::OpRewritePattern<ArrayOp>{ctx}, analysis{a}, useMap{m} {}
1129 
1130   /// The array_access, \p access, is to be to a cloned copy due to a potential
1131   /// conflict. Uses copy-in/copy-out semantics and not copy/swap.
1132   mlir::Value referenceToClone(mlir::Location loc,
1133                                mlir::PatternRewriter &rewriter,
1134                                ArrayOp access) const {
1135     LLVM_DEBUG(llvm::dbgs()
1136                << "generating copy-in/copy-out loops for " << access << '\n');
1137     auto *op = access.getOperation();
1138     auto *loadOp = useMap.lookup(op);
1139     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1140     auto eleTy = access.getType();
1141     rewriter.setInsertionPoint(loadOp);
1142     // Copy in.
1143     llvm::SmallVector<mlir::Value> extents;
1144     bool copyUsingSlice = false;
1145     auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1146                                               copyUsingSlice);
1147     auto [allocmem, genTempCleanUp] =
1148         allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1149     genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1150                                   load.getMemref(), shapeOp, load.getSlice(),
1151                                   load);
1152     // Generate the reference for the access.
1153     rewriter.setInsertionPoint(op);
1154     auto coor = genCoorOp(
1155         rewriter, loc, getEleTy(load.getType()), eleTy, allocmem, shapeOp,
1156         copyUsingSlice ? mlir::Value{} : load.getSlice(), access.getIndices(),
1157         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1158     // Copy out.
1159     auto *storeOp = useMap.lookup(loadOp);
1160     auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1161     rewriter.setInsertionPoint(storeOp);
1162     // Copy out.
1163     genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter, store.getMemref(),
1164                                    allocmem, shapeOp, store.getSlice(), load);
1165     genTempCleanUp(rewriter);
1166     return coor;
1167   }
1168 
1169   /// Copy the RHS element into the LHS and insert copy-in/copy-out between a
1170   /// temp and the LHS if the analysis found potential overlaps between the RHS
1171   /// and LHS arrays. The element copy generator must be provided in \p
1172   /// assignElement. \p update must be the ArrayUpdateOp or the ArrayModifyOp.
1173   /// Returns the address of the LHS element inside the loop and the LHS
1174   /// ArrayLoad result.
1175   std::pair<mlir::Value, mlir::Value>
1176   materializeAssignment(mlir::Location loc, mlir::PatternRewriter &rewriter,
1177                         ArrayOp update,
1178                         const std::function<void(mlir::Value)> &assignElement,
1179                         mlir::Type lhsEltRefType) const {
1180     auto *op = update.getOperation();
1181     auto *loadOp = useMap.lookup(op);
1182     auto load = mlir::cast<ArrayLoadOp>(loadOp);
1183     LLVM_DEBUG(llvm::outs() << "does " << load << " have a conflict?\n");
1184     if (analysis.hasPotentialConflict(loadOp)) {
1185       // If there is a conflict between the arrays, then we copy the lhs array
1186       // to a temporary, update the temporary, and copy the temporary back to
1187       // the lhs array. This yields Fortran's copy-in copy-out array semantics.
1188       LLVM_DEBUG(llvm::outs() << "Yes, conflict was found\n");
1189       rewriter.setInsertionPoint(loadOp);
1190       // Copy in.
1191       llvm::SmallVector<mlir::Value> extents;
1192       bool copyUsingSlice = false;
1193       auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents,
1194                                                 copyUsingSlice);
1195       auto [allocmem, genTempCleanUp] =
1196           allocateArrayTemp(loc, rewriter, load, extents, shapeOp);
1197 
1198       genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem,
1199                                     load.getMemref(), shapeOp, load.getSlice(),
1200                                     load);
1201       rewriter.setInsertionPoint(op);
1202       auto coor = genCoorOp(
1203           rewriter, loc, getEleTy(load.getType()), lhsEltRefType, allocmem,
1204           shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(),
1205           update.getIndices(), load,
1206           update->hasAttr(factory::attrFortranArrayOffsets()));
1207       assignElement(coor);
1208       auto *storeOp = useMap.lookup(loadOp);
1209       auto store = mlir::cast<ArrayMergeStoreOp>(storeOp);
1210       rewriter.setInsertionPoint(storeOp);
1211       // Copy out.
1212       genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter,
1213                                      store.getMemref(), allocmem, shapeOp,
1214                                      store.getSlice(), load);
1215       genTempCleanUp(rewriter);
1216       return {coor, load.getResult()};
1217     }
1218     // Otherwise, when there is no conflict (a possible loop-carried
1219     // dependence), the lhs array can be updated in place.
1220     LLVM_DEBUG(llvm::outs() << "No, conflict wasn't found\n");
1221     rewriter.setInsertionPoint(op);
1222     auto coorTy = getEleTy(load.getType());
1223     auto coor =
1224         genCoorOp(rewriter, loc, coorTy, lhsEltRefType, load.getMemref(),
1225                   load.getShape(), load.getSlice(), update.getIndices(), load,
1226                   update->hasAttr(factory::attrFortranArrayOffsets()));
1227     assignElement(coor);
1228     return {coor, load.getResult()};
1229   }
1230 
1231 protected:
1232   const ArrayCopyAnalysisBase &analysis;
1233   const OperationUseMapT &useMap;
1234 };
1235 
1236 class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> {
1237 public:
1238   explicit ArrayUpdateConversion(mlir::MLIRContext *ctx,
1239                                  const ArrayCopyAnalysisBase &a,
1240                                  const OperationUseMapT &m)
1241       : ArrayUpdateConversionBase{ctx, a, m} {}
1242 
1243   mlir::LogicalResult
1244   matchAndRewrite(ArrayUpdateOp update,
1245                   mlir::PatternRewriter &rewriter) const override {
1246     auto loc = update.getLoc();
1247     auto assignElement = [&](mlir::Value coor) {
1248       auto input = update.getMerge();
1249       if (auto inEleTy = dyn_cast_ptrEleTy(input.getType())) {
1250         emitFatalError(loc, "array_update on references not supported");
1251       } else {
1252         rewriter.create<fir::StoreOp>(loc, input, coor);
1253       }
1254     };
1255     auto lhsEltRefType = toRefType(update.getMerge().getType());
1256     auto [_, lhsLoadResult] = materializeAssignment(
1257         loc, rewriter, update, assignElement, lhsEltRefType);
1258     update.replaceAllUsesWith(lhsLoadResult);
1259     rewriter.replaceOp(update, lhsLoadResult);
1260     return mlir::success();
1261   }
1262 };
1263 
1264 class ArrayModifyConversion : public ArrayUpdateConversionBase<ArrayModifyOp> {
1265 public:
1266   explicit ArrayModifyConversion(mlir::MLIRContext *ctx,
1267                                  const ArrayCopyAnalysisBase &a,
1268                                  const OperationUseMapT &m)
1269       : ArrayUpdateConversionBase{ctx, a, m} {}
1270 
1271   mlir::LogicalResult
1272   matchAndRewrite(ArrayModifyOp modify,
1273                   mlir::PatternRewriter &rewriter) const override {
1274     auto loc = modify.getLoc();
1275     auto assignElement = [](mlir::Value) {
1276       // Assignment already materialized by lowering using lhs element address.
1277     };
1278     auto lhsEltRefType = modify.getResult(0).getType();
1279     auto [lhsEltCoor, lhsLoadResult] = materializeAssignment(
1280         loc, rewriter, modify, assignElement, lhsEltRefType);
1281     modify.replaceAllUsesWith(mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1282     rewriter.replaceOp(modify, mlir::ValueRange{lhsEltCoor, lhsLoadResult});
1283     return mlir::success();
1284   }
1285 };
1286 
1287 class ArrayFetchConversion : public mlir::OpRewritePattern<ArrayFetchOp> {
1288 public:
1289   explicit ArrayFetchConversion(mlir::MLIRContext *ctx,
1290                                 const OperationUseMapT &m)
1291       : OpRewritePattern{ctx}, useMap{m} {}
1292 
1293   mlir::LogicalResult
1294   matchAndRewrite(ArrayFetchOp fetch,
1295                   mlir::PatternRewriter &rewriter) const override {
1296     auto *op = fetch.getOperation();
1297     rewriter.setInsertionPoint(op);
1298     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1299     auto loc = fetch.getLoc();
1300     auto coor = genCoorOp(
1301         rewriter, loc, getEleTy(load.getType()), toRefType(fetch.getType()),
1302         load.getMemref(), load.getShape(), load.getSlice(), fetch.getIndices(),
1303         load, fetch->hasAttr(factory::attrFortranArrayOffsets()));
1304     if (isa_ref_type(fetch.getType()))
1305       rewriter.replaceOp(fetch, coor);
1306     else
1307       rewriter.replaceOpWithNewOp<fir::LoadOp>(fetch, coor);
1308     return mlir::success();
1309   }
1310 
1311 private:
1312   const OperationUseMapT &useMap;
1313 };
1314 
1315 /// As array_access op is like an array_fetch op, except that it does not imply
1316 /// a load op. (It operates in the reference domain.)
1317 class ArrayAccessConversion : public ArrayUpdateConversionBase<ArrayAccessOp> {
1318 public:
1319   explicit ArrayAccessConversion(mlir::MLIRContext *ctx,
1320                                  const ArrayCopyAnalysisBase &a,
1321                                  const OperationUseMapT &m)
1322       : ArrayUpdateConversionBase{ctx, a, m} {}
1323 
1324   mlir::LogicalResult
1325   matchAndRewrite(ArrayAccessOp access,
1326                   mlir::PatternRewriter &rewriter) const override {
1327     auto *op = access.getOperation();
1328     auto loc = access.getLoc();
1329     if (analysis.inAmendAccessSet(op)) {
1330       // This array_access is associated with an array_amend and there is a
1331       // conflict. Make a copy to store into.
1332       auto result = referenceToClone(loc, rewriter, access);
1333       access.replaceAllUsesWith(result);
1334       rewriter.replaceOp(access, result);
1335       return mlir::success();
1336     }
1337     rewriter.setInsertionPoint(op);
1338     auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op));
1339     auto coor = genCoorOp(
1340         rewriter, loc, getEleTy(load.getType()), toRefType(access.getType()),
1341         load.getMemref(), load.getShape(), load.getSlice(), access.getIndices(),
1342         load, access->hasAttr(factory::attrFortranArrayOffsets()));
1343     rewriter.replaceOp(access, coor);
1344     return mlir::success();
1345   }
1346 };
1347 
1348 /// An array_amend op is a marker to record which array access is being used to
1349 /// update an array value. After this pass runs, an array_amend has no
1350 /// semantics. We rewrite these to undefined values here to remove them while
1351 /// preserving SSA form.
1352 class ArrayAmendConversion : public mlir::OpRewritePattern<ArrayAmendOp> {
1353 public:
1354   explicit ArrayAmendConversion(mlir::MLIRContext *ctx)
1355       : OpRewritePattern{ctx} {}
1356 
1357   mlir::LogicalResult
1358   matchAndRewrite(ArrayAmendOp amend,
1359                   mlir::PatternRewriter &rewriter) const override {
1360     auto *op = amend.getOperation();
1361     rewriter.setInsertionPoint(op);
1362     auto loc = amend.getLoc();
1363     auto undef = rewriter.create<UndefOp>(loc, amend.getType());
1364     rewriter.replaceOp(amend, undef.getResult());
1365     return mlir::success();
1366   }
1367 };
1368 
1369 class ArrayValueCopyConverter
1370     : public fir::impl::ArrayValueCopyBase<ArrayValueCopyConverter> {
1371 public:
1372   ArrayValueCopyConverter() = default;
1373   ArrayValueCopyConverter(const fir::ArrayValueCopyOptions &options)
1374       : Base(options) {}
1375 
1376   void runOnOperation() override {
1377     auto func = getOperation();
1378     LLVM_DEBUG(llvm::dbgs() << "\n\narray-value-copy pass on function '"
1379                             << func.getName() << "'\n");
1380     auto *context = &getContext();
1381 
1382     // Perform the conflict analysis.
1383     const ArrayCopyAnalysisBase *analysis;
1384     if (optimizeConflicts)
1385       analysis = &getAnalysis<ArrayCopyAnalysisOptimized>();
1386     else
1387       analysis = &getAnalysis<ArrayCopyAnalysis>();
1388 
1389     const auto &useMap = analysis->getUseMap();
1390 
1391     mlir::RewritePatternSet patterns1(context);
1392     patterns1.insert<ArrayFetchConversion>(context, useMap);
1393     patterns1.insert<ArrayUpdateConversion>(context, *analysis, useMap);
1394     patterns1.insert<ArrayModifyConversion>(context, *analysis, useMap);
1395     patterns1.insert<ArrayAccessConversion>(context, *analysis, useMap);
1396     patterns1.insert<ArrayAmendConversion>(context);
1397     mlir::ConversionTarget target(*context);
1398     target
1399         .addLegalDialect<FIROpsDialect, mlir::scf::SCFDialect,
1400                          mlir::arith::ArithDialect, mlir::func::FuncDialect>();
1401     target.addIllegalOp<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp,
1402                         ArrayUpdateOp, ArrayModifyOp>();
1403     // Rewrite the array fetch and array update ops.
1404     if (mlir::failed(
1405             mlir::applyPartialConversion(func, target, std::move(patterns1)))) {
1406       mlir::emitError(mlir::UnknownLoc::get(context),
1407                       "failure in array-value-copy pass, phase 1");
1408       signalPassFailure();
1409     }
1410 
1411     mlir::RewritePatternSet patterns2(context);
1412     patterns2.insert<ArrayLoadConversion>(context);
1413     patterns2.insert<ArrayMergeStoreConversion>(context);
1414     target.addIllegalOp<ArrayLoadOp, ArrayMergeStoreOp>();
1415     if (mlir::failed(
1416             mlir::applyPartialConversion(func, target, std::move(patterns2)))) {
1417       mlir::emitError(mlir::UnknownLoc::get(context),
1418                       "failure in array-value-copy pass, phase 2");
1419       signalPassFailure();
1420     }
1421   }
1422 };
1423 } // namespace
1424 
1425 std::unique_ptr<mlir::Pass>
1426 fir::createArrayValueCopyPass(fir::ArrayValueCopyOptions options) {
1427   return std::make_unique<ArrayValueCopyConverter>(options);
1428 }
1429