1 //===-- ArrayValueCopy.cpp ------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "flang/Optimizer/Builder/BoxValue.h" 10 #include "flang/Optimizer/Builder/FIRBuilder.h" 11 #include "flang/Optimizer/Builder/Factory.h" 12 #include "flang/Optimizer/Builder/Runtime/Derived.h" 13 #include "flang/Optimizer/Builder/Todo.h" 14 #include "flang/Optimizer/Dialect/FIRDialect.h" 15 #include "flang/Optimizer/Dialect/FIROpsSupport.h" 16 #include "flang/Optimizer/Dialect/Support/FIRContext.h" 17 #include "flang/Optimizer/Transforms/Passes.h" 18 #include "mlir/Dialect/ControlFlow/IR/ControlFlowOps.h" 19 #include "mlir/Dialect/SCF/IR/SCF.h" 20 #include "mlir/Transforms/DialectConversion.h" 21 #include "llvm/Support/Debug.h" 22 23 namespace fir { 24 #define GEN_PASS_DEF_ARRAYVALUECOPY 25 #include "flang/Optimizer/Transforms/Passes.h.inc" 26 } // namespace fir 27 28 #define DEBUG_TYPE "flang-array-value-copy" 29 30 using namespace fir; 31 using namespace mlir; 32 33 using OperationUseMapT = llvm::DenseMap<mlir::Operation *, mlir::Operation *>; 34 35 namespace { 36 37 /// Array copy analysis. 38 /// Perform an interference analysis between array values. 39 /// 40 /// Lowering will generate a sequence of the following form. 41 /// ```mlir 42 /// %a_1 = fir.array_load %array_1(%shape) : ... 43 /// ... 44 /// %a_j = fir.array_load %array_j(%shape) : ... 45 /// ... 46 /// %a_n = fir.array_load %array_n(%shape) : ... 47 /// ... 48 /// %v_i = fir.array_fetch %a_i, ... 49 /// %a_j1 = fir.array_update %a_j, ... 50 /// ... 51 /// fir.array_merge_store %a_j, %a_jn to %array_j : ... 52 /// ``` 53 /// 54 /// The analysis is to determine if there are any conflicts. A conflict is when 55 /// one the following cases occurs. 56 /// 57 /// 1. There is an `array_update` to an array value, a_j, such that a_j was 58 /// loaded from the same array memory reference (array_j) but with a different 59 /// shape as the other array values a_i, where i != j. [Possible overlapping 60 /// arrays.] 61 /// 62 /// 2. There is either an array_fetch or array_update of a_j with a different 63 /// set of index values. [Possible loop-carried dependence.] 64 /// 65 /// If none of the array values overlap in storage and the accesses are not 66 /// loop-carried, then the arrays are conflict-free and no copies are required. 67 class ArrayCopyAnalysisBase { 68 public: 69 using ConflictSetT = llvm::SmallPtrSet<mlir::Operation *, 16>; 70 using UseSetT = llvm::SmallPtrSet<mlir::OpOperand *, 8>; 71 using LoadMapSetsT = llvm::DenseMap<mlir::Operation *, UseSetT>; 72 using AmendAccessSetT = llvm::SmallPtrSet<mlir::Operation *, 4>; 73 74 ArrayCopyAnalysisBase(mlir::Operation *op, bool optimized) 75 : operation{op}, optimizeConflicts(optimized) { 76 construct(op); 77 } 78 virtual ~ArrayCopyAnalysisBase() = default; 79 80 mlir::Operation *getOperation() const { return operation; } 81 82 /// Return true iff the `array_merge_store` has potential conflicts. 83 bool hasPotentialConflict(mlir::Operation *op) const { 84 LLVM_DEBUG(llvm::dbgs() 85 << "looking for a conflict on " << *op 86 << " and the set has a total of " << conflicts.size() << '\n'); 87 return conflicts.contains(op); 88 } 89 90 /// Return the use map. 91 /// The use map maps array access, amend, fetch and update operations back to 92 /// the array load that is the original source of the array value. 93 /// It maps an array_load to an array_merge_store, if and only if the loaded 94 /// array value has pending modifications to be merged. 95 const OperationUseMapT &getUseMap() const { return useMap; } 96 97 /// Return the set of array_access ops directly associated with array_amend 98 /// ops. 99 bool inAmendAccessSet(mlir::Operation *op) const { 100 return amendAccesses.count(op); 101 } 102 103 /// For ArrayLoad `load`, return the transitive set of all OpOperands. 104 UseSetT getLoadUseSet(mlir::Operation *load) const { 105 assert(loadMapSets.count(load) && "analysis missed an array load?"); 106 return loadMapSets.lookup(load); 107 } 108 109 void arrayMentions(llvm::SmallVectorImpl<mlir::Operation *> &mentions, 110 ArrayLoadOp load); 111 112 private: 113 void construct(mlir::Operation *topLevelOp); 114 115 mlir::Operation *operation; // operation that analysis ran upon 116 ConflictSetT conflicts; // set of conflicts (loads and merge stores) 117 OperationUseMapT useMap; 118 LoadMapSetsT loadMapSets; 119 // Set of array_access ops associated with array_amend ops. 120 AmendAccessSetT amendAccesses; 121 bool optimizeConflicts; 122 }; 123 124 // Optimized array copy analysis that takes into account Fortran 125 // variable attributes to prove that no conflict is possible 126 // and reduce the number of temporary arrays. 127 class ArrayCopyAnalysisOptimized : public ArrayCopyAnalysisBase { 128 public: 129 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysisOptimized) 130 131 ArrayCopyAnalysisOptimized(mlir::Operation *op) 132 : ArrayCopyAnalysisBase(op, /*optimized=*/true) {} 133 }; 134 135 // Unoptimized array copy analysis used at O0. 136 class ArrayCopyAnalysis : public ArrayCopyAnalysisBase { 137 public: 138 MLIR_DEFINE_EXPLICIT_INTERNAL_INLINE_TYPE_ID(ArrayCopyAnalysis) 139 140 ArrayCopyAnalysis(mlir::Operation *op) 141 : ArrayCopyAnalysisBase(op, /*optimized=*/false) {} 142 }; 143 } // namespace 144 145 namespace { 146 /// Helper class to collect all array operations that produced an array value. 147 class ReachCollector { 148 public: 149 ReachCollector(llvm::SmallVectorImpl<mlir::Operation *> &reach, 150 mlir::Region *loopRegion) 151 : reach{reach}, loopRegion{loopRegion} {} 152 153 void collectArrayMentionFrom(mlir::Operation *op, mlir::ValueRange range) { 154 if (range.empty()) { 155 collectArrayMentionFrom(op, mlir::Value{}); 156 return; 157 } 158 for (mlir::Value v : range) 159 collectArrayMentionFrom(v); 160 } 161 162 // Collect all the array_access ops in `block`. This recursively looks into 163 // blocks in ops with regions. 164 // FIXME: This is temporarily relying on the array_amend appearing in a 165 // do_loop Region. This phase ordering assumption can be eliminated by using 166 // dominance information to find the array_access ops or by scanning the 167 // transitive closure of the amending array_access's users and the defs that 168 // reach them. 169 void collectAccesses(llvm::SmallVector<ArrayAccessOp> &result, 170 mlir::Block *block) { 171 for (auto &op : *block) { 172 if (auto access = mlir::dyn_cast<ArrayAccessOp>(op)) { 173 LLVM_DEBUG(llvm::dbgs() << "adding access: " << access << '\n'); 174 result.push_back(access); 175 continue; 176 } 177 for (auto ®ion : op.getRegions()) 178 for (auto &bb : region.getBlocks()) 179 collectAccesses(result, &bb); 180 } 181 } 182 183 void collectArrayMentionFrom(mlir::Operation *op, mlir::Value val) { 184 // `val` is defined by an Op, process the defining Op. 185 // If `val` is defined by a region containing Op, we want to drill down 186 // and through that Op's region(s). 187 LLVM_DEBUG(llvm::dbgs() << "popset: " << *op << '\n'); 188 auto popFn = [&](auto rop) { 189 assert(val && "op must have a result value"); 190 auto resNum = val.cast<mlir::OpResult>().getResultNumber(); 191 llvm::SmallVector<mlir::Value> results; 192 rop.resultToSourceOps(results, resNum); 193 for (auto u : results) 194 collectArrayMentionFrom(u); 195 }; 196 if (auto rop = mlir::dyn_cast<DoLoopOp>(op)) { 197 popFn(rop); 198 return; 199 } 200 if (auto rop = mlir::dyn_cast<IterWhileOp>(op)) { 201 popFn(rop); 202 return; 203 } 204 if (auto rop = mlir::dyn_cast<fir::IfOp>(op)) { 205 popFn(rop); 206 return; 207 } 208 if (auto box = mlir::dyn_cast<EmboxOp>(op)) { 209 for (auto *user : box.getMemref().getUsers()) 210 if (user != op) 211 collectArrayMentionFrom(user, user->getResults()); 212 return; 213 } 214 if (auto mergeStore = mlir::dyn_cast<ArrayMergeStoreOp>(op)) { 215 if (opIsInsideLoops(mergeStore)) 216 collectArrayMentionFrom(mergeStore.getSequence()); 217 return; 218 } 219 220 if (mlir::isa<AllocaOp, AllocMemOp>(op)) { 221 // Look for any stores inside the loops, and collect an array operation 222 // that produced the value being stored to it. 223 for (auto *user : op->getUsers()) 224 if (auto store = mlir::dyn_cast<fir::StoreOp>(user)) 225 if (opIsInsideLoops(store)) 226 collectArrayMentionFrom(store.getValue()); 227 return; 228 } 229 230 // Scan the uses of amend's memref 231 if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op)) { 232 reach.push_back(op); 233 llvm::SmallVector<ArrayAccessOp> accesses; 234 collectAccesses(accesses, op->getBlock()); 235 for (auto access : accesses) 236 collectArrayMentionFrom(access.getResult()); 237 } 238 239 // Otherwise, Op does not contain a region so just chase its operands. 240 if (mlir::isa<ArrayAccessOp, ArrayLoadOp, ArrayUpdateOp, ArrayModifyOp, 241 ArrayFetchOp>(op)) { 242 LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n"); 243 reach.push_back(op); 244 } 245 246 // Include all array_access ops using an array_load. 247 if (auto arrLd = mlir::dyn_cast<ArrayLoadOp>(op)) 248 for (auto *user : arrLd.getResult().getUsers()) 249 if (mlir::isa<ArrayAccessOp>(user)) { 250 LLVM_DEBUG(llvm::dbgs() << "add " << *user << " to reachable set\n"); 251 reach.push_back(user); 252 } 253 254 // Array modify assignment is performed on the result. So the analysis must 255 // look at the what is done with the result. 256 if (mlir::isa<ArrayModifyOp>(op)) 257 for (auto *user : op->getResult(0).getUsers()) 258 followUsers(user); 259 260 if (mlir::isa<fir::CallOp>(op)) { 261 LLVM_DEBUG(llvm::dbgs() << "add " << *op << " to reachable set\n"); 262 reach.push_back(op); 263 } 264 265 for (auto u : op->getOperands()) 266 collectArrayMentionFrom(u); 267 } 268 269 void collectArrayMentionFrom(mlir::BlockArgument ba) { 270 auto *parent = ba.getOwner()->getParentOp(); 271 // If inside an Op holding a region, the block argument corresponds to an 272 // argument passed to the containing Op. 273 auto popFn = [&](auto rop) { 274 collectArrayMentionFrom(rop.blockArgToSourceOp(ba.getArgNumber())); 275 }; 276 if (auto rop = mlir::dyn_cast<DoLoopOp>(parent)) { 277 popFn(rop); 278 return; 279 } 280 if (auto rop = mlir::dyn_cast<IterWhileOp>(parent)) { 281 popFn(rop); 282 return; 283 } 284 // Otherwise, a block argument is provided via the pred blocks. 285 for (auto *pred : ba.getOwner()->getPredecessors()) { 286 auto u = pred->getTerminator()->getOperand(ba.getArgNumber()); 287 collectArrayMentionFrom(u); 288 } 289 } 290 291 // Recursively trace operands to find all array operations relating to the 292 // values merged. 293 void collectArrayMentionFrom(mlir::Value val) { 294 if (!val || visited.contains(val)) 295 return; 296 visited.insert(val); 297 298 // Process a block argument. 299 if (auto ba = val.dyn_cast<mlir::BlockArgument>()) { 300 collectArrayMentionFrom(ba); 301 return; 302 } 303 304 // Process an Op. 305 if (auto *op = val.getDefiningOp()) { 306 collectArrayMentionFrom(op, val); 307 return; 308 } 309 310 emitFatalError(val.getLoc(), "unhandled value"); 311 } 312 313 /// Return all ops that produce the array value that is stored into the 314 /// `array_merge_store`. 315 static void reachingValues(llvm::SmallVectorImpl<mlir::Operation *> &reach, 316 mlir::Value seq) { 317 reach.clear(); 318 mlir::Region *loopRegion = nullptr; 319 if (auto doLoop = mlir::dyn_cast_or_null<DoLoopOp>(seq.getDefiningOp())) 320 loopRegion = &doLoop->getRegion(0); 321 ReachCollector collector(reach, loopRegion); 322 collector.collectArrayMentionFrom(seq); 323 } 324 325 private: 326 /// Is \op inside the loop nest region ? 327 /// FIXME: replace this structural dependence with graph properties. 328 bool opIsInsideLoops(mlir::Operation *op) const { 329 auto *region = op->getParentRegion(); 330 while (region) { 331 if (region == loopRegion) 332 return true; 333 region = region->getParentRegion(); 334 } 335 return false; 336 } 337 338 /// Recursively trace the use of an operation results, calling 339 /// collectArrayMentionFrom on the direct and indirect user operands. 340 void followUsers(mlir::Operation *op) { 341 for (auto userOperand : op->getOperands()) 342 collectArrayMentionFrom(userOperand); 343 // Go through potential converts/coordinate_op. 344 for (auto indirectUser : op->getUsers()) 345 followUsers(indirectUser); 346 } 347 348 llvm::SmallVectorImpl<mlir::Operation *> &reach; 349 llvm::SmallPtrSet<mlir::Value, 16> visited; 350 /// Region of the loops nest that produced the array value. 351 mlir::Region *loopRegion; 352 }; 353 } // namespace 354 355 /// Find all the array operations that access the array value that is loaded by 356 /// the array load operation, `load`. 357 void ArrayCopyAnalysisBase::arrayMentions( 358 llvm::SmallVectorImpl<mlir::Operation *> &mentions, ArrayLoadOp load) { 359 mentions.clear(); 360 auto lmIter = loadMapSets.find(load); 361 if (lmIter != loadMapSets.end()) { 362 for (auto *opnd : lmIter->second) { 363 auto *owner = opnd->getOwner(); 364 if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp, 365 ArrayModifyOp>(owner)) 366 mentions.push_back(owner); 367 } 368 return; 369 } 370 371 UseSetT visited; 372 llvm::SmallVector<mlir::OpOperand *> queue; // uses of ArrayLoad[orig] 373 374 auto appendToQueue = [&](mlir::Value val) { 375 for (auto &use : val.getUses()) 376 if (!visited.count(&use)) { 377 visited.insert(&use); 378 queue.push_back(&use); 379 } 380 }; 381 382 // Build the set of uses of `original`. 383 // let USES = { uses of original fir.load } 384 appendToQueue(load); 385 386 // Process the worklist until done. 387 while (!queue.empty()) { 388 mlir::OpOperand *operand = queue.pop_back_val(); 389 mlir::Operation *owner = operand->getOwner(); 390 if (!owner) 391 continue; 392 auto structuredLoop = [&](auto ro) { 393 if (auto blockArg = ro.iterArgToBlockArg(operand->get())) { 394 int64_t arg = blockArg.getArgNumber(); 395 mlir::Value output = ro.getResult(ro.getFinalValue() ? arg : arg - 1); 396 appendToQueue(output); 397 appendToQueue(blockArg); 398 } 399 }; 400 // TODO: this need to be updated to use the control-flow interface. 401 auto branchOp = [&](mlir::Block *dest, OperandRange operands) { 402 if (operands.empty()) 403 return; 404 405 // Check if this operand is within the range. 406 unsigned operandIndex = operand->getOperandNumber(); 407 unsigned operandsStart = operands.getBeginOperandIndex(); 408 if (operandIndex < operandsStart || 409 operandIndex >= (operandsStart + operands.size())) 410 return; 411 412 // Index the successor. 413 unsigned argIndex = operandIndex - operandsStart; 414 appendToQueue(dest->getArgument(argIndex)); 415 }; 416 // Thread uses into structured loop bodies and return value uses. 417 if (auto ro = mlir::dyn_cast<DoLoopOp>(owner)) { 418 structuredLoop(ro); 419 } else if (auto ro = mlir::dyn_cast<IterWhileOp>(owner)) { 420 structuredLoop(ro); 421 } else if (auto rs = mlir::dyn_cast<ResultOp>(owner)) { 422 // Thread any uses of fir.if that return the marked array value. 423 mlir::Operation *parent = rs->getParentRegion()->getParentOp(); 424 if (auto ifOp = mlir::dyn_cast<fir::IfOp>(parent)) 425 appendToQueue(ifOp.getResult(operand->getOperandNumber())); 426 } else if (mlir::isa<ArrayFetchOp>(owner)) { 427 // Keep track of array value fetches. 428 LLVM_DEBUG(llvm::dbgs() 429 << "add fetch {" << *owner << "} to array value set\n"); 430 mentions.push_back(owner); 431 } else if (auto update = mlir::dyn_cast<ArrayUpdateOp>(owner)) { 432 // Keep track of array value updates and thread the return value uses. 433 LLVM_DEBUG(llvm::dbgs() 434 << "add update {" << *owner << "} to array value set\n"); 435 mentions.push_back(owner); 436 appendToQueue(update.getResult()); 437 } else if (auto update = mlir::dyn_cast<ArrayModifyOp>(owner)) { 438 // Keep track of array value modification and thread the return value 439 // uses. 440 LLVM_DEBUG(llvm::dbgs() 441 << "add modify {" << *owner << "} to array value set\n"); 442 mentions.push_back(owner); 443 appendToQueue(update.getResult(1)); 444 } else if (auto mention = mlir::dyn_cast<ArrayAccessOp>(owner)) { 445 mentions.push_back(owner); 446 } else if (auto amend = mlir::dyn_cast<ArrayAmendOp>(owner)) { 447 mentions.push_back(owner); 448 appendToQueue(amend.getResult()); 449 } else if (auto br = mlir::dyn_cast<mlir::cf::BranchOp>(owner)) { 450 branchOp(br.getDest(), br.getDestOperands()); 451 } else if (auto br = mlir::dyn_cast<mlir::cf::CondBranchOp>(owner)) { 452 branchOp(br.getTrueDest(), br.getTrueOperands()); 453 branchOp(br.getFalseDest(), br.getFalseOperands()); 454 } else if (mlir::isa<ArrayMergeStoreOp>(owner)) { 455 // do nothing 456 } else { 457 llvm::report_fatal_error("array value reached unexpected op"); 458 } 459 } 460 loadMapSets.insert({load, visited}); 461 } 462 463 static bool hasPointerType(mlir::Type type) { 464 if (auto boxTy = type.dyn_cast<BoxType>()) 465 type = boxTy.getEleTy(); 466 return type.isa<fir::PointerType>(); 467 } 468 469 // This is a NF performance hack. It makes a simple test that the slices of the 470 // load, \p ld, and the merge store, \p st, are trivially mutually exclusive. 471 static bool mutuallyExclusiveSliceRange(ArrayLoadOp ld, ArrayMergeStoreOp st) { 472 // If the same array_load, then no further testing is warranted. 473 if (ld.getResult() == st.getOriginal()) 474 return false; 475 476 auto getSliceOp = [](mlir::Value val) -> SliceOp { 477 if (!val) 478 return {}; 479 auto sliceOp = mlir::dyn_cast_or_null<SliceOp>(val.getDefiningOp()); 480 if (!sliceOp) 481 return {}; 482 return sliceOp; 483 }; 484 485 auto ldSlice = getSliceOp(ld.getSlice()); 486 auto stSlice = getSliceOp(st.getSlice()); 487 if (!ldSlice || !stSlice) 488 return false; 489 490 // Resign on subobject slices. 491 if (!ldSlice.getFields().empty() || !stSlice.getFields().empty() || 492 !ldSlice.getSubstr().empty() || !stSlice.getSubstr().empty()) 493 return false; 494 495 // Crudely test that the two slices do not overlap by looking for the 496 // following general condition. If the slices look like (i:j) and (j+1:k) then 497 // these ranges do not overlap. The addend must be a constant. 498 auto ldTriples = ldSlice.getTriples(); 499 auto stTriples = stSlice.getTriples(); 500 const auto size = ldTriples.size(); 501 if (size != stTriples.size()) 502 return false; 503 504 auto displacedByConstant = [](mlir::Value v1, mlir::Value v2) { 505 auto removeConvert = [](mlir::Value v) -> mlir::Operation * { 506 auto *op = v.getDefiningOp(); 507 while (auto conv = mlir::dyn_cast_or_null<ConvertOp>(op)) 508 op = conv.getValue().getDefiningOp(); 509 return op; 510 }; 511 512 auto isPositiveConstant = [](mlir::Value v) -> bool { 513 if (auto conOp = 514 mlir::dyn_cast<mlir::arith::ConstantOp>(v.getDefiningOp())) 515 if (auto iattr = conOp.getValue().dyn_cast<mlir::IntegerAttr>()) 516 return iattr.getInt() > 0; 517 return false; 518 }; 519 520 auto *op1 = removeConvert(v1); 521 auto *op2 = removeConvert(v2); 522 if (!op1 || !op2) 523 return false; 524 if (auto addi = mlir::dyn_cast<mlir::arith::AddIOp>(op2)) 525 if ((addi.getLhs().getDefiningOp() == op1 && 526 isPositiveConstant(addi.getRhs())) || 527 (addi.getRhs().getDefiningOp() == op1 && 528 isPositiveConstant(addi.getLhs()))) 529 return true; 530 if (auto subi = mlir::dyn_cast<mlir::arith::SubIOp>(op1)) 531 if (subi.getLhs().getDefiningOp() == op2 && 532 isPositiveConstant(subi.getRhs())) 533 return true; 534 return false; 535 }; 536 537 for (std::remove_const_t<decltype(size)> i = 0; i < size; i += 3) { 538 // If both are loop invariant, skip to the next triple. 539 if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i + 1].getDefiningOp()) && 540 mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i + 1].getDefiningOp())) { 541 // Unless either is a vector index, then be conservative. 542 if (mlir::isa_and_nonnull<fir::UndefOp>(ldTriples[i].getDefiningOp()) || 543 mlir::isa_and_nonnull<fir::UndefOp>(stTriples[i].getDefiningOp())) 544 return false; 545 continue; 546 } 547 // If identical, skip to the next triple. 548 if (ldTriples[i] == stTriples[i] && ldTriples[i + 1] == stTriples[i + 1] && 549 ldTriples[i + 2] == stTriples[i + 2]) 550 continue; 551 // If ubound and lbound are the same with a constant offset, skip to the 552 // next triple. 553 if (displacedByConstant(ldTriples[i + 1], stTriples[i]) || 554 displacedByConstant(stTriples[i + 1], ldTriples[i])) 555 continue; 556 return false; 557 } 558 LLVM_DEBUG(llvm::dbgs() << "detected non-overlapping slice ranges on " << ld 559 << " and " << st << ", which is not a conflict\n"); 560 return true; 561 } 562 563 /// Is there a conflict between the array value that was updated and to be 564 /// stored to `st` and the set of arrays loaded (`reach`) and used to compute 565 /// the updated value? 566 /// If `optimize` is true, use the variable attributes to prove that 567 /// there is no conflict. 568 static bool conflictOnLoad(llvm::ArrayRef<mlir::Operation *> reach, 569 ArrayMergeStoreOp st, bool optimize) { 570 mlir::Value load; 571 mlir::Value addr = st.getMemref(); 572 const bool storeHasPointerType = hasPointerType(addr.getType()); 573 for (auto *op : reach) 574 if (auto ld = mlir::dyn_cast<ArrayLoadOp>(op)) { 575 mlir::Type ldTy = ld.getMemref().getType(); 576 auto globalOpName = mlir::OperationName(fir::GlobalOp::getOperationName(), 577 ld.getContext()); 578 if (ld.getMemref() == addr) { 579 if (mutuallyExclusiveSliceRange(ld, st)) 580 continue; 581 if (ld.getResult() != st.getOriginal()) 582 return true; 583 if (load) { 584 // TODO: extend this to allow checking if the first `load` and this 585 // `ld` are mutually exclusive accesses but not identical. 586 return true; 587 } 588 load = ld; 589 } else if (storeHasPointerType) { 590 if (optimize && !hasPointerType(ldTy) && 591 !valueMayHaveFirAttributes( 592 ld.getMemref(), 593 {getTargetAttrName(), 594 fir::GlobalOp::getTargetAttrName(globalOpName).strref()})) 595 continue; 596 597 return true; 598 } else if (hasPointerType(ldTy)) { 599 if (optimize && !storeHasPointerType && 600 !valueMayHaveFirAttributes( 601 addr, 602 {getTargetAttrName(), 603 fir::GlobalOp::getTargetAttrName(globalOpName).strref()})) 604 continue; 605 606 return true; 607 } 608 // TODO: Check if types can also allow ruling out some cases. For now, 609 // the fact that equivalences is using pointer attribute to enforce 610 // aliasing is preventing any attempt to do so, and in general, it may 611 // be wrong to use this if any of the types is a complex or a derived 612 // for which it is possible to create a pointer to a part with a 613 // different type than the whole, although this deserve some more 614 // investigation because existing compiler behavior seem to diverge 615 // here. 616 } 617 return false; 618 } 619 620 /// Is there an access vector conflict on the array being merged into? If the 621 /// access vectors diverge, then assume that there are potentially overlapping 622 /// loop-carried references. 623 static bool conflictOnMerge(llvm::ArrayRef<mlir::Operation *> mentions) { 624 if (mentions.size() < 2) 625 return false; 626 llvm::SmallVector<mlir::Value> indices; 627 LLVM_DEBUG(llvm::dbgs() << "check merge conflict on with " << mentions.size() 628 << " mentions on the list\n"); 629 bool valSeen = false; 630 bool refSeen = false; 631 for (auto *op : mentions) { 632 llvm::SmallVector<mlir::Value> compareVector; 633 if (auto u = mlir::dyn_cast<ArrayUpdateOp>(op)) { 634 valSeen = true; 635 if (indices.empty()) { 636 indices = u.getIndices(); 637 continue; 638 } 639 compareVector = u.getIndices(); 640 } else if (auto f = mlir::dyn_cast<ArrayModifyOp>(op)) { 641 valSeen = true; 642 if (indices.empty()) { 643 indices = f.getIndices(); 644 continue; 645 } 646 compareVector = f.getIndices(); 647 } else if (auto f = mlir::dyn_cast<ArrayFetchOp>(op)) { 648 valSeen = true; 649 if (indices.empty()) { 650 indices = f.getIndices(); 651 continue; 652 } 653 compareVector = f.getIndices(); 654 } else if (auto f = mlir::dyn_cast<ArrayAccessOp>(op)) { 655 refSeen = true; 656 if (indices.empty()) { 657 indices = f.getIndices(); 658 continue; 659 } 660 compareVector = f.getIndices(); 661 } else if (mlir::isa<ArrayAmendOp>(op)) { 662 refSeen = true; 663 continue; 664 } else { 665 mlir::emitError(op->getLoc(), "unexpected operation in analysis"); 666 } 667 if (compareVector.size() != indices.size() || 668 llvm::any_of(llvm::zip(compareVector, indices), [&](auto pair) { 669 return std::get<0>(pair) != std::get<1>(pair); 670 })) 671 return true; 672 LLVM_DEBUG(llvm::dbgs() << "vectors compare equal\n"); 673 } 674 return valSeen && refSeen; 675 } 676 677 /// With element-by-reference semantics, an amended array with more than once 678 /// access to the same loaded array are conservatively considered a conflict. 679 /// Note: the array copy can still be eliminated in subsequent optimizations. 680 static bool conflictOnReference(llvm::ArrayRef<mlir::Operation *> mentions) { 681 LLVM_DEBUG(llvm::dbgs() << "checking reference semantics " << mentions.size() 682 << '\n'); 683 if (mentions.size() < 3) 684 return false; 685 unsigned amendCount = 0; 686 unsigned accessCount = 0; 687 for (auto *op : mentions) { 688 if (mlir::isa<ArrayAmendOp>(op) && ++amendCount > 1) { 689 LLVM_DEBUG(llvm::dbgs() << "conflict: multiple amends of array value\n"); 690 return true; 691 } 692 if (mlir::isa<ArrayAccessOp>(op) && ++accessCount > 1) { 693 LLVM_DEBUG(llvm::dbgs() 694 << "conflict: multiple accesses of array value\n"); 695 return true; 696 } 697 if (mlir::isa<ArrayFetchOp, ArrayUpdateOp, ArrayModifyOp>(op)) { 698 LLVM_DEBUG(llvm::dbgs() 699 << "conflict: array value has both uses by-value and uses " 700 "by-reference. conservative assumption.\n"); 701 return true; 702 } 703 } 704 return false; 705 } 706 707 static mlir::Operation * 708 amendingAccess(llvm::ArrayRef<mlir::Operation *> mentions) { 709 for (auto *op : mentions) 710 if (auto amend = mlir::dyn_cast<ArrayAmendOp>(op)) 711 return amend.getMemref().getDefiningOp(); 712 return {}; 713 } 714 715 // Are any conflicts present? The conflicts detected here are described above. 716 static bool conflictDetected(llvm::ArrayRef<mlir::Operation *> reach, 717 llvm::ArrayRef<mlir::Operation *> mentions, 718 ArrayMergeStoreOp st, bool optimize) { 719 return conflictOnLoad(reach, st, optimize) || conflictOnMerge(mentions); 720 } 721 722 // Assume that any call to a function that uses host-associations will be 723 // modifying the output array. 724 static bool 725 conservativeCallConflict(llvm::ArrayRef<mlir::Operation *> reaches) { 726 return llvm::any_of(reaches, [](mlir::Operation *op) { 727 if (auto call = mlir::dyn_cast<fir::CallOp>(op)) 728 if (auto callee = 729 call.getCallableForCallee().dyn_cast<mlir::SymbolRefAttr>()) { 730 auto module = op->getParentOfType<mlir::ModuleOp>(); 731 return isInternalPorcedure( 732 module.lookupSymbol<mlir::func::FuncOp>(callee)); 733 } 734 return false; 735 }); 736 } 737 738 /// Constructor of the array copy analysis. 739 /// This performs the analysis and saves the intermediate results. 740 void ArrayCopyAnalysisBase::construct(mlir::Operation *topLevelOp) { 741 topLevelOp->walk([&](Operation *op) { 742 if (auto st = mlir::dyn_cast<fir::ArrayMergeStoreOp>(op)) { 743 llvm::SmallVector<mlir::Operation *> values; 744 ReachCollector::reachingValues(values, st.getSequence()); 745 bool callConflict = conservativeCallConflict(values); 746 llvm::SmallVector<mlir::Operation *> mentions; 747 arrayMentions(mentions, 748 mlir::cast<ArrayLoadOp>(st.getOriginal().getDefiningOp())); 749 bool conflict = conflictDetected(values, mentions, st, optimizeConflicts); 750 bool refConflict = conflictOnReference(mentions); 751 if (callConflict || conflict || refConflict) { 752 LLVM_DEBUG(llvm::dbgs() 753 << "CONFLICT: copies required for " << st << '\n' 754 << " adding conflicts on: " << *op << " and " 755 << st.getOriginal() << '\n'); 756 conflicts.insert(op); 757 conflicts.insert(st.getOriginal().getDefiningOp()); 758 if (auto *access = amendingAccess(mentions)) 759 amendAccesses.insert(access); 760 } 761 auto *ld = st.getOriginal().getDefiningOp(); 762 LLVM_DEBUG(llvm::dbgs() 763 << "map: adding {" << *ld << " -> " << st << "}\n"); 764 useMap.insert({ld, op}); 765 } else if (auto load = mlir::dyn_cast<ArrayLoadOp>(op)) { 766 llvm::SmallVector<mlir::Operation *> mentions; 767 arrayMentions(mentions, load); 768 LLVM_DEBUG(llvm::dbgs() << "process load: " << load 769 << ", mentions: " << mentions.size() << '\n'); 770 for (auto *acc : mentions) { 771 LLVM_DEBUG(llvm::dbgs() << " mention: " << *acc << '\n'); 772 if (mlir::isa<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, ArrayUpdateOp, 773 ArrayModifyOp>(acc)) { 774 if (useMap.count(acc)) { 775 mlir::emitError( 776 load.getLoc(), 777 "The parallel semantics of multiple array_merge_stores per " 778 "array_load are not supported."); 779 continue; 780 } 781 LLVM_DEBUG(llvm::dbgs() 782 << "map: adding {" << *acc << "} -> {" << load << "}\n"); 783 useMap.insert({acc, op}); 784 } 785 } 786 } 787 }); 788 } 789 790 //===----------------------------------------------------------------------===// 791 // Conversions for converting out of array value form. 792 //===----------------------------------------------------------------------===// 793 794 namespace { 795 class ArrayLoadConversion : public mlir::OpRewritePattern<ArrayLoadOp> { 796 public: 797 using OpRewritePattern::OpRewritePattern; 798 799 mlir::LogicalResult 800 matchAndRewrite(ArrayLoadOp load, 801 mlir::PatternRewriter &rewriter) const override { 802 LLVM_DEBUG(llvm::dbgs() << "replace load " << load << " with undef.\n"); 803 rewriter.replaceOpWithNewOp<UndefOp>(load, load.getType()); 804 return mlir::success(); 805 } 806 }; 807 808 class ArrayMergeStoreConversion 809 : public mlir::OpRewritePattern<ArrayMergeStoreOp> { 810 public: 811 using OpRewritePattern::OpRewritePattern; 812 813 mlir::LogicalResult 814 matchAndRewrite(ArrayMergeStoreOp store, 815 mlir::PatternRewriter &rewriter) const override { 816 LLVM_DEBUG(llvm::dbgs() << "marking store " << store << " as dead.\n"); 817 rewriter.eraseOp(store); 818 return mlir::success(); 819 } 820 }; 821 } // namespace 822 823 static mlir::Type getEleTy(mlir::Type ty) { 824 auto eleTy = unwrapSequenceType(unwrapPassByRefType(ty)); 825 // FIXME: keep ptr/heap/ref information. 826 return ReferenceType::get(eleTy); 827 } 828 829 // This is an unsafe way to deduce this (won't be true in internal 830 // procedure or inside select-rank for assumed-size). Only here to satisfy 831 // legacy code until removed. 832 static bool isAssumedSize(llvm::SmallVectorImpl<mlir::Value> &extents) { 833 if (extents.empty()) 834 return false; 835 auto cstLen = fir::getIntIfConstant(extents.back()); 836 return cstLen.has_value() && *cstLen == -1; 837 } 838 839 // Extract extents from the ShapeOp/ShapeShiftOp into the result vector. 840 static bool getAdjustedExtents(mlir::Location loc, 841 mlir::PatternRewriter &rewriter, 842 ArrayLoadOp arrLoad, 843 llvm::SmallVectorImpl<mlir::Value> &result, 844 mlir::Value shape) { 845 bool copyUsingSlice = false; 846 auto *shapeOp = shape.getDefiningOp(); 847 if (auto s = mlir::dyn_cast_or_null<ShapeOp>(shapeOp)) { 848 auto e = s.getExtents(); 849 result.insert(result.end(), e.begin(), e.end()); 850 } else if (auto s = mlir::dyn_cast_or_null<ShapeShiftOp>(shapeOp)) { 851 auto e = s.getExtents(); 852 result.insert(result.end(), e.begin(), e.end()); 853 } else { 854 emitFatalError(loc, "not a fir.shape/fir.shape_shift op"); 855 } 856 auto idxTy = rewriter.getIndexType(); 857 if (isAssumedSize(result)) { 858 // Use slice information to compute the extent of the column. 859 auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1); 860 mlir::Value size = one; 861 if (mlir::Value sliceArg = arrLoad.getSlice()) { 862 if (auto sliceOp = 863 mlir::dyn_cast_or_null<SliceOp>(sliceArg.getDefiningOp())) { 864 auto triples = sliceOp.getTriples(); 865 const std::size_t tripleSize = triples.size(); 866 auto module = arrLoad->getParentOfType<mlir::ModuleOp>(); 867 FirOpBuilder builder(rewriter, module); 868 size = builder.genExtentFromTriplet(loc, triples[tripleSize - 3], 869 triples[tripleSize - 2], 870 triples[tripleSize - 1], idxTy); 871 copyUsingSlice = true; 872 } 873 } 874 result[result.size() - 1] = size; 875 } 876 return copyUsingSlice; 877 } 878 879 /// Place the extents of the array load, \p arrLoad, into \p result and 880 /// return a ShapeOp or ShapeShiftOp with the same extents. If \p arrLoad is 881 /// loading a `!fir.box`, code will be generated to read the extents from the 882 /// boxed value, and the retunred shape Op will be built with the extents read 883 /// from the box. Otherwise, the extents will be extracted from the ShapeOp (or 884 /// ShapeShiftOp) argument of \p arrLoad. \p copyUsingSlice will be set to true 885 /// if slicing of the output array is to be done in the copy-in/copy-out rather 886 /// than in the elemental computation step. 887 static mlir::Value getOrReadExtentsAndShapeOp( 888 mlir::Location loc, mlir::PatternRewriter &rewriter, ArrayLoadOp arrLoad, 889 llvm::SmallVectorImpl<mlir::Value> &result, bool ©UsingSlice) { 890 assert(result.empty()); 891 if (arrLoad->hasAttr(fir::getOptionalAttrName())) 892 fir::emitFatalError( 893 loc, "shapes from array load of OPTIONAL arrays must not be used"); 894 if (auto boxTy = arrLoad.getMemref().getType().dyn_cast<BoxType>()) { 895 auto rank = 896 dyn_cast_ptrOrBoxEleTy(boxTy).cast<SequenceType>().getDimension(); 897 auto idxTy = rewriter.getIndexType(); 898 for (decltype(rank) dim = 0; dim < rank; ++dim) { 899 auto dimVal = rewriter.create<mlir::arith::ConstantIndexOp>(loc, dim); 900 auto dimInfo = rewriter.create<BoxDimsOp>(loc, idxTy, idxTy, idxTy, 901 arrLoad.getMemref(), dimVal); 902 result.emplace_back(dimInfo.getResult(1)); 903 } 904 if (!arrLoad.getShape()) { 905 auto shapeType = ShapeType::get(rewriter.getContext(), rank); 906 return rewriter.create<ShapeOp>(loc, shapeType, result); 907 } 908 auto shiftOp = arrLoad.getShape().getDefiningOp<ShiftOp>(); 909 auto shapeShiftType = ShapeShiftType::get(rewriter.getContext(), rank); 910 llvm::SmallVector<mlir::Value> shapeShiftOperands; 911 for (auto [lb, extent] : llvm::zip(shiftOp.getOrigins(), result)) { 912 shapeShiftOperands.push_back(lb); 913 shapeShiftOperands.push_back(extent); 914 } 915 return rewriter.create<ShapeShiftOp>(loc, shapeShiftType, 916 shapeShiftOperands); 917 } 918 copyUsingSlice = 919 getAdjustedExtents(loc, rewriter, arrLoad, result, arrLoad.getShape()); 920 return arrLoad.getShape(); 921 } 922 923 static mlir::Type toRefType(mlir::Type ty) { 924 if (fir::isa_ref_type(ty)) 925 return ty; 926 return fir::ReferenceType::get(ty); 927 } 928 929 static llvm::SmallVector<mlir::Value> 930 getTypeParamsIfRawData(mlir::Location loc, FirOpBuilder &builder, 931 ArrayLoadOp arrLoad, mlir::Type ty) { 932 if (ty.isa<BoxType>()) 933 return {}; 934 return fir::factory::getTypeParams(loc, builder, arrLoad); 935 } 936 937 static mlir::Value genCoorOp(mlir::PatternRewriter &rewriter, 938 mlir::Location loc, mlir::Type eleTy, 939 mlir::Type resTy, mlir::Value alloc, 940 mlir::Value shape, mlir::Value slice, 941 mlir::ValueRange indices, ArrayLoadOp load, 942 bool skipOrig = false) { 943 llvm::SmallVector<mlir::Value> originated; 944 if (skipOrig) 945 originated.assign(indices.begin(), indices.end()); 946 else 947 originated = factory::originateIndices(loc, rewriter, alloc.getType(), 948 shape, indices); 949 auto seqTy = dyn_cast_ptrOrBoxEleTy(alloc.getType()); 950 assert(seqTy && seqTy.isa<SequenceType>()); 951 const auto dimension = seqTy.cast<SequenceType>().getDimension(); 952 auto module = load->getParentOfType<mlir::ModuleOp>(); 953 FirOpBuilder builder(rewriter, module); 954 auto typeparams = getTypeParamsIfRawData(loc, builder, load, alloc.getType()); 955 mlir::Value result = rewriter.create<ArrayCoorOp>( 956 loc, eleTy, alloc, shape, slice, 957 llvm::ArrayRef<mlir::Value>{originated}.take_front(dimension), 958 typeparams); 959 if (dimension < originated.size()) 960 result = rewriter.create<fir::CoordinateOp>( 961 loc, resTy, result, 962 llvm::ArrayRef<mlir::Value>{originated}.drop_front(dimension)); 963 return result; 964 } 965 966 static mlir::Value getCharacterLen(mlir::Location loc, FirOpBuilder &builder, 967 ArrayLoadOp load, CharacterType charTy) { 968 auto charLenTy = builder.getCharacterLengthType(); 969 if (charTy.hasDynamicLen()) { 970 if (load.getMemref().getType().isa<BoxType>()) { 971 // The loaded array is an emboxed value. Get the CHARACTER length from 972 // the box value. 973 auto eleSzInBytes = 974 builder.create<BoxEleSizeOp>(loc, charLenTy, load.getMemref()); 975 auto kindSize = 976 builder.getKindMap().getCharacterBitsize(charTy.getFKind()); 977 auto kindByteSize = 978 builder.createIntegerConstant(loc, charLenTy, kindSize / 8); 979 return builder.create<mlir::arith::DivSIOp>(loc, eleSzInBytes, 980 kindByteSize); 981 } 982 // The loaded array is a (set of) unboxed values. If the CHARACTER's 983 // length is not a constant, it must be provided as a type parameter to 984 // the array_load. 985 auto typeparams = load.getTypeparams(); 986 assert(typeparams.size() > 0 && "expected type parameters on array_load"); 987 return typeparams.back(); 988 } 989 // The typical case: the length of the CHARACTER is a compile-time 990 // constant that is encoded in the type information. 991 return builder.createIntegerConstant(loc, charLenTy, charTy.getLen()); 992 } 993 /// Generate a shallow array copy. This is used for both copy-in and copy-out. 994 template <bool CopyIn> 995 void genArrayCopy(mlir::Location loc, mlir::PatternRewriter &rewriter, 996 mlir::Value dst, mlir::Value src, mlir::Value shapeOp, 997 mlir::Value sliceOp, ArrayLoadOp arrLoad) { 998 auto insPt = rewriter.saveInsertionPoint(); 999 llvm::SmallVector<mlir::Value> indices; 1000 llvm::SmallVector<mlir::Value> extents; 1001 bool copyUsingSlice = 1002 getAdjustedExtents(loc, rewriter, arrLoad, extents, shapeOp); 1003 auto idxTy = rewriter.getIndexType(); 1004 // Build loop nest from column to row. 1005 for (auto sh : llvm::reverse(extents)) { 1006 auto ubi = rewriter.create<ConvertOp>(loc, idxTy, sh); 1007 auto zero = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 0); 1008 auto one = rewriter.create<mlir::arith::ConstantIndexOp>(loc, 1); 1009 auto ub = rewriter.create<mlir::arith::SubIOp>(loc, idxTy, ubi, one); 1010 auto loop = rewriter.create<DoLoopOp>(loc, zero, ub, one); 1011 rewriter.setInsertionPointToStart(loop.getBody()); 1012 indices.push_back(loop.getInductionVar()); 1013 } 1014 // Reverse the indices so they are in column-major order. 1015 std::reverse(indices.begin(), indices.end()); 1016 auto module = arrLoad->getParentOfType<mlir::ModuleOp>(); 1017 FirOpBuilder builder(rewriter, module); 1018 auto fromAddr = rewriter.create<ArrayCoorOp>( 1019 loc, getEleTy(src.getType()), src, shapeOp, 1020 CopyIn && copyUsingSlice ? sliceOp : mlir::Value{}, 1021 factory::originateIndices(loc, rewriter, src.getType(), shapeOp, indices), 1022 getTypeParamsIfRawData(loc, builder, arrLoad, src.getType())); 1023 auto toAddr = rewriter.create<ArrayCoorOp>( 1024 loc, getEleTy(dst.getType()), dst, shapeOp, 1025 !CopyIn && copyUsingSlice ? sliceOp : mlir::Value{}, 1026 factory::originateIndices(loc, rewriter, dst.getType(), shapeOp, indices), 1027 getTypeParamsIfRawData(loc, builder, arrLoad, dst.getType())); 1028 auto eleTy = unwrapSequenceType(unwrapPassByRefType(dst.getType())); 1029 // Copy from (to) object to (from) temp copy of same object. 1030 if (auto charTy = eleTy.dyn_cast<CharacterType>()) { 1031 auto len = getCharacterLen(loc, builder, arrLoad, charTy); 1032 CharBoxValue toChar(toAddr, len); 1033 CharBoxValue fromChar(fromAddr, len); 1034 factory::genScalarAssignment(builder, loc, toChar, fromChar); 1035 } else { 1036 if (hasDynamicSize(eleTy)) 1037 TODO(loc, "copy element of dynamic size"); 1038 factory::genScalarAssignment(builder, loc, toAddr, fromAddr); 1039 } 1040 rewriter.restoreInsertionPoint(insPt); 1041 } 1042 1043 /// The array load may be either a boxed or unboxed value. If the value is 1044 /// boxed, we read the type parameters from the boxed value. 1045 static llvm::SmallVector<mlir::Value> 1046 genArrayLoadTypeParameters(mlir::Location loc, mlir::PatternRewriter &rewriter, 1047 ArrayLoadOp load) { 1048 if (load.getTypeparams().empty()) { 1049 auto eleTy = 1050 unwrapSequenceType(unwrapPassByRefType(load.getMemref().getType())); 1051 if (hasDynamicSize(eleTy)) { 1052 if (auto charTy = eleTy.dyn_cast<CharacterType>()) { 1053 assert(load.getMemref().getType().isa<BoxType>()); 1054 auto module = load->getParentOfType<mlir::ModuleOp>(); 1055 FirOpBuilder builder(rewriter, module); 1056 return {getCharacterLen(loc, builder, load, charTy)}; 1057 } 1058 TODO(loc, "unhandled dynamic type parameters"); 1059 } 1060 return {}; 1061 } 1062 return load.getTypeparams(); 1063 } 1064 1065 static llvm::SmallVector<mlir::Value> 1066 findNonconstantExtents(mlir::Type memrefTy, 1067 llvm::ArrayRef<mlir::Value> extents) { 1068 llvm::SmallVector<mlir::Value> nce; 1069 auto arrTy = unwrapPassByRefType(memrefTy); 1070 auto seqTy = arrTy.cast<SequenceType>(); 1071 for (auto [s, x] : llvm::zip(seqTy.getShape(), extents)) 1072 if (s == SequenceType::getUnknownExtent()) 1073 nce.emplace_back(x); 1074 if (extents.size() > seqTy.getShape().size()) 1075 for (auto x : extents.drop_front(seqTy.getShape().size())) 1076 nce.emplace_back(x); 1077 return nce; 1078 } 1079 1080 /// Allocate temporary storage for an ArrayLoadOp \load and initialize any 1081 /// allocatable direct components of the array elements with an unallocated 1082 /// status. Returns the temporary address as well as a callback to generate the 1083 /// temporary clean-up once it has been used. The clean-up will take care of 1084 /// deallocating all the element allocatable components that may have been 1085 /// allocated while using the temporary. 1086 static std::pair<mlir::Value, 1087 std::function<void(mlir::PatternRewriter &rewriter)>> 1088 allocateArrayTemp(mlir::Location loc, mlir::PatternRewriter &rewriter, 1089 ArrayLoadOp load, llvm::ArrayRef<mlir::Value> extents, 1090 mlir::Value shape) { 1091 mlir::Type baseType = load.getMemref().getType(); 1092 llvm::SmallVector<mlir::Value> nonconstantExtents = 1093 findNonconstantExtents(baseType, extents); 1094 llvm::SmallVector<mlir::Value> typeParams = 1095 genArrayLoadTypeParameters(loc, rewriter, load); 1096 mlir::Value allocmem = rewriter.create<AllocMemOp>( 1097 loc, dyn_cast_ptrOrBoxEleTy(baseType), typeParams, nonconstantExtents); 1098 mlir::Type eleType = 1099 fir::unwrapSequenceType(fir::unwrapPassByRefType(baseType)); 1100 if (fir::isRecordWithAllocatableMember(eleType)) { 1101 // The allocatable component descriptors need to be set to a clean 1102 // deallocated status before anything is done with them. 1103 mlir::Value box = rewriter.create<fir::EmboxOp>( 1104 loc, fir::BoxType::get(allocmem.getType()), allocmem, shape, 1105 /*slice=*/mlir::Value{}, typeParams); 1106 auto module = load->getParentOfType<mlir::ModuleOp>(); 1107 FirOpBuilder builder(rewriter, module); 1108 runtime::genDerivedTypeInitialize(builder, loc, box); 1109 // Any allocatable component that may have been allocated must be 1110 // deallocated during the clean-up. 1111 auto cleanup = [=](mlir::PatternRewriter &r) { 1112 FirOpBuilder builder(r, module); 1113 runtime::genDerivedTypeDestroy(builder, loc, box); 1114 r.create<FreeMemOp>(loc, allocmem); 1115 }; 1116 return {allocmem, cleanup}; 1117 } 1118 auto cleanup = [=](mlir::PatternRewriter &r) { 1119 r.create<FreeMemOp>(loc, allocmem); 1120 }; 1121 return {allocmem, cleanup}; 1122 } 1123 1124 namespace { 1125 /// Conversion of fir.array_update and fir.array_modify Ops. 1126 /// If there is a conflict for the update, then we need to perform a 1127 /// copy-in/copy-out to preserve the original values of the array. If there is 1128 /// no conflict, then it is save to eschew making any copies. 1129 template <typename ArrayOp> 1130 class ArrayUpdateConversionBase : public mlir::OpRewritePattern<ArrayOp> { 1131 public: 1132 // TODO: Implement copy/swap semantics? 1133 explicit ArrayUpdateConversionBase(mlir::MLIRContext *ctx, 1134 const ArrayCopyAnalysisBase &a, 1135 const OperationUseMapT &m) 1136 : mlir::OpRewritePattern<ArrayOp>{ctx}, analysis{a}, useMap{m} {} 1137 1138 /// The array_access, \p access, is to be to a cloned copy due to a potential 1139 /// conflict. Uses copy-in/copy-out semantics and not copy/swap. 1140 mlir::Value referenceToClone(mlir::Location loc, 1141 mlir::PatternRewriter &rewriter, 1142 ArrayOp access) const { 1143 LLVM_DEBUG(llvm::dbgs() 1144 << "generating copy-in/copy-out loops for " << access << '\n'); 1145 auto *op = access.getOperation(); 1146 auto *loadOp = useMap.lookup(op); 1147 auto load = mlir::cast<ArrayLoadOp>(loadOp); 1148 auto eleTy = access.getType(); 1149 rewriter.setInsertionPoint(loadOp); 1150 // Copy in. 1151 llvm::SmallVector<mlir::Value> extents; 1152 bool copyUsingSlice = false; 1153 auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents, 1154 copyUsingSlice); 1155 auto [allocmem, genTempCleanUp] = 1156 allocateArrayTemp(loc, rewriter, load, extents, shapeOp); 1157 genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem, 1158 load.getMemref(), shapeOp, load.getSlice(), 1159 load); 1160 // Generate the reference for the access. 1161 rewriter.setInsertionPoint(op); 1162 auto coor = genCoorOp( 1163 rewriter, loc, getEleTy(load.getType()), eleTy, allocmem, shapeOp, 1164 copyUsingSlice ? mlir::Value{} : load.getSlice(), access.getIndices(), 1165 load, access->hasAttr(factory::attrFortranArrayOffsets())); 1166 // Copy out. 1167 auto *storeOp = useMap.lookup(loadOp); 1168 auto store = mlir::cast<ArrayMergeStoreOp>(storeOp); 1169 rewriter.setInsertionPoint(storeOp); 1170 // Copy out. 1171 genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter, store.getMemref(), 1172 allocmem, shapeOp, store.getSlice(), load); 1173 genTempCleanUp(rewriter); 1174 return coor; 1175 } 1176 1177 /// Copy the RHS element into the LHS and insert copy-in/copy-out between a 1178 /// temp and the LHS if the analysis found potential overlaps between the RHS 1179 /// and LHS arrays. The element copy generator must be provided in \p 1180 /// assignElement. \p update must be the ArrayUpdateOp or the ArrayModifyOp. 1181 /// Returns the address of the LHS element inside the loop and the LHS 1182 /// ArrayLoad result. 1183 std::pair<mlir::Value, mlir::Value> 1184 materializeAssignment(mlir::Location loc, mlir::PatternRewriter &rewriter, 1185 ArrayOp update, 1186 const std::function<void(mlir::Value)> &assignElement, 1187 mlir::Type lhsEltRefType) const { 1188 auto *op = update.getOperation(); 1189 auto *loadOp = useMap.lookup(op); 1190 auto load = mlir::cast<ArrayLoadOp>(loadOp); 1191 LLVM_DEBUG(llvm::outs() << "does " << load << " have a conflict?\n"); 1192 if (analysis.hasPotentialConflict(loadOp)) { 1193 // If there is a conflict between the arrays, then we copy the lhs array 1194 // to a temporary, update the temporary, and copy the temporary back to 1195 // the lhs array. This yields Fortran's copy-in copy-out array semantics. 1196 LLVM_DEBUG(llvm::outs() << "Yes, conflict was found\n"); 1197 rewriter.setInsertionPoint(loadOp); 1198 // Copy in. 1199 llvm::SmallVector<mlir::Value> extents; 1200 bool copyUsingSlice = false; 1201 auto shapeOp = getOrReadExtentsAndShapeOp(loc, rewriter, load, extents, 1202 copyUsingSlice); 1203 auto [allocmem, genTempCleanUp] = 1204 allocateArrayTemp(loc, rewriter, load, extents, shapeOp); 1205 1206 genArrayCopy</*copyIn=*/true>(load.getLoc(), rewriter, allocmem, 1207 load.getMemref(), shapeOp, load.getSlice(), 1208 load); 1209 rewriter.setInsertionPoint(op); 1210 auto coor = genCoorOp( 1211 rewriter, loc, getEleTy(load.getType()), lhsEltRefType, allocmem, 1212 shapeOp, copyUsingSlice ? mlir::Value{} : load.getSlice(), 1213 update.getIndices(), load, 1214 update->hasAttr(factory::attrFortranArrayOffsets())); 1215 assignElement(coor); 1216 auto *storeOp = useMap.lookup(loadOp); 1217 auto store = mlir::cast<ArrayMergeStoreOp>(storeOp); 1218 rewriter.setInsertionPoint(storeOp); 1219 // Copy out. 1220 genArrayCopy</*copyIn=*/false>(store.getLoc(), rewriter, 1221 store.getMemref(), allocmem, shapeOp, 1222 store.getSlice(), load); 1223 genTempCleanUp(rewriter); 1224 return {coor, load.getResult()}; 1225 } 1226 // Otherwise, when there is no conflict (a possible loop-carried 1227 // dependence), the lhs array can be updated in place. 1228 LLVM_DEBUG(llvm::outs() << "No, conflict wasn't found\n"); 1229 rewriter.setInsertionPoint(op); 1230 auto coorTy = getEleTy(load.getType()); 1231 auto coor = 1232 genCoorOp(rewriter, loc, coorTy, lhsEltRefType, load.getMemref(), 1233 load.getShape(), load.getSlice(), update.getIndices(), load, 1234 update->hasAttr(factory::attrFortranArrayOffsets())); 1235 assignElement(coor); 1236 return {coor, load.getResult()}; 1237 } 1238 1239 protected: 1240 const ArrayCopyAnalysisBase &analysis; 1241 const OperationUseMapT &useMap; 1242 }; 1243 1244 class ArrayUpdateConversion : public ArrayUpdateConversionBase<ArrayUpdateOp> { 1245 public: 1246 explicit ArrayUpdateConversion(mlir::MLIRContext *ctx, 1247 const ArrayCopyAnalysisBase &a, 1248 const OperationUseMapT &m) 1249 : ArrayUpdateConversionBase{ctx, a, m} {} 1250 1251 mlir::LogicalResult 1252 matchAndRewrite(ArrayUpdateOp update, 1253 mlir::PatternRewriter &rewriter) const override { 1254 auto loc = update.getLoc(); 1255 auto assignElement = [&](mlir::Value coor) { 1256 auto input = update.getMerge(); 1257 if (auto inEleTy = dyn_cast_ptrEleTy(input.getType())) { 1258 emitFatalError(loc, "array_update on references not supported"); 1259 } else { 1260 rewriter.create<fir::StoreOp>(loc, input, coor); 1261 } 1262 }; 1263 auto lhsEltRefType = toRefType(update.getMerge().getType()); 1264 auto [_, lhsLoadResult] = materializeAssignment( 1265 loc, rewriter, update, assignElement, lhsEltRefType); 1266 update.replaceAllUsesWith(lhsLoadResult); 1267 rewriter.replaceOp(update, lhsLoadResult); 1268 return mlir::success(); 1269 } 1270 }; 1271 1272 class ArrayModifyConversion : public ArrayUpdateConversionBase<ArrayModifyOp> { 1273 public: 1274 explicit ArrayModifyConversion(mlir::MLIRContext *ctx, 1275 const ArrayCopyAnalysisBase &a, 1276 const OperationUseMapT &m) 1277 : ArrayUpdateConversionBase{ctx, a, m} {} 1278 1279 mlir::LogicalResult 1280 matchAndRewrite(ArrayModifyOp modify, 1281 mlir::PatternRewriter &rewriter) const override { 1282 auto loc = modify.getLoc(); 1283 auto assignElement = [](mlir::Value) { 1284 // Assignment already materialized by lowering using lhs element address. 1285 }; 1286 auto lhsEltRefType = modify.getResult(0).getType(); 1287 auto [lhsEltCoor, lhsLoadResult] = materializeAssignment( 1288 loc, rewriter, modify, assignElement, lhsEltRefType); 1289 modify.replaceAllUsesWith(mlir::ValueRange{lhsEltCoor, lhsLoadResult}); 1290 rewriter.replaceOp(modify, mlir::ValueRange{lhsEltCoor, lhsLoadResult}); 1291 return mlir::success(); 1292 } 1293 }; 1294 1295 class ArrayFetchConversion : public mlir::OpRewritePattern<ArrayFetchOp> { 1296 public: 1297 explicit ArrayFetchConversion(mlir::MLIRContext *ctx, 1298 const OperationUseMapT &m) 1299 : OpRewritePattern{ctx}, useMap{m} {} 1300 1301 mlir::LogicalResult 1302 matchAndRewrite(ArrayFetchOp fetch, 1303 mlir::PatternRewriter &rewriter) const override { 1304 auto *op = fetch.getOperation(); 1305 rewriter.setInsertionPoint(op); 1306 auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op)); 1307 auto loc = fetch.getLoc(); 1308 auto coor = genCoorOp( 1309 rewriter, loc, getEleTy(load.getType()), toRefType(fetch.getType()), 1310 load.getMemref(), load.getShape(), load.getSlice(), fetch.getIndices(), 1311 load, fetch->hasAttr(factory::attrFortranArrayOffsets())); 1312 if (isa_ref_type(fetch.getType())) 1313 rewriter.replaceOp(fetch, coor); 1314 else 1315 rewriter.replaceOpWithNewOp<fir::LoadOp>(fetch, coor); 1316 return mlir::success(); 1317 } 1318 1319 private: 1320 const OperationUseMapT &useMap; 1321 }; 1322 1323 /// As array_access op is like an array_fetch op, except that it does not imply 1324 /// a load op. (It operates in the reference domain.) 1325 class ArrayAccessConversion : public ArrayUpdateConversionBase<ArrayAccessOp> { 1326 public: 1327 explicit ArrayAccessConversion(mlir::MLIRContext *ctx, 1328 const ArrayCopyAnalysisBase &a, 1329 const OperationUseMapT &m) 1330 : ArrayUpdateConversionBase{ctx, a, m} {} 1331 1332 mlir::LogicalResult 1333 matchAndRewrite(ArrayAccessOp access, 1334 mlir::PatternRewriter &rewriter) const override { 1335 auto *op = access.getOperation(); 1336 auto loc = access.getLoc(); 1337 if (analysis.inAmendAccessSet(op)) { 1338 // This array_access is associated with an array_amend and there is a 1339 // conflict. Make a copy to store into. 1340 auto result = referenceToClone(loc, rewriter, access); 1341 access.replaceAllUsesWith(result); 1342 rewriter.replaceOp(access, result); 1343 return mlir::success(); 1344 } 1345 rewriter.setInsertionPoint(op); 1346 auto load = mlir::cast<ArrayLoadOp>(useMap.lookup(op)); 1347 auto coor = genCoorOp( 1348 rewriter, loc, getEleTy(load.getType()), toRefType(access.getType()), 1349 load.getMemref(), load.getShape(), load.getSlice(), access.getIndices(), 1350 load, access->hasAttr(factory::attrFortranArrayOffsets())); 1351 rewriter.replaceOp(access, coor); 1352 return mlir::success(); 1353 } 1354 }; 1355 1356 /// An array_amend op is a marker to record which array access is being used to 1357 /// update an array value. After this pass runs, an array_amend has no 1358 /// semantics. We rewrite these to undefined values here to remove them while 1359 /// preserving SSA form. 1360 class ArrayAmendConversion : public mlir::OpRewritePattern<ArrayAmendOp> { 1361 public: 1362 explicit ArrayAmendConversion(mlir::MLIRContext *ctx) 1363 : OpRewritePattern{ctx} {} 1364 1365 mlir::LogicalResult 1366 matchAndRewrite(ArrayAmendOp amend, 1367 mlir::PatternRewriter &rewriter) const override { 1368 auto *op = amend.getOperation(); 1369 rewriter.setInsertionPoint(op); 1370 auto loc = amend.getLoc(); 1371 auto undef = rewriter.create<UndefOp>(loc, amend.getType()); 1372 rewriter.replaceOp(amend, undef.getResult()); 1373 return mlir::success(); 1374 } 1375 }; 1376 1377 class ArrayValueCopyConverter 1378 : public fir::impl::ArrayValueCopyBase<ArrayValueCopyConverter> { 1379 public: 1380 ArrayValueCopyConverter() = default; 1381 ArrayValueCopyConverter(const fir::ArrayValueCopyOptions &options) 1382 : Base(options) {} 1383 1384 void runOnOperation() override { 1385 auto func = getOperation(); 1386 LLVM_DEBUG(llvm::dbgs() << "\n\narray-value-copy pass on function '" 1387 << func.getName() << "'\n"); 1388 auto *context = &getContext(); 1389 1390 // Perform the conflict analysis. 1391 const ArrayCopyAnalysisBase *analysis; 1392 if (optimizeConflicts) 1393 analysis = &getAnalysis<ArrayCopyAnalysisOptimized>(); 1394 else 1395 analysis = &getAnalysis<ArrayCopyAnalysis>(); 1396 1397 const auto &useMap = analysis->getUseMap(); 1398 1399 mlir::RewritePatternSet patterns1(context); 1400 patterns1.insert<ArrayFetchConversion>(context, useMap); 1401 patterns1.insert<ArrayUpdateConversion>(context, *analysis, useMap); 1402 patterns1.insert<ArrayModifyConversion>(context, *analysis, useMap); 1403 patterns1.insert<ArrayAccessConversion>(context, *analysis, useMap); 1404 patterns1.insert<ArrayAmendConversion>(context); 1405 mlir::ConversionTarget target(*context); 1406 target 1407 .addLegalDialect<FIROpsDialect, mlir::scf::SCFDialect, 1408 mlir::arith::ArithDialect, mlir::func::FuncDialect>(); 1409 target.addIllegalOp<ArrayAccessOp, ArrayAmendOp, ArrayFetchOp, 1410 ArrayUpdateOp, ArrayModifyOp>(); 1411 // Rewrite the array fetch and array update ops. 1412 if (mlir::failed( 1413 mlir::applyPartialConversion(func, target, std::move(patterns1)))) { 1414 mlir::emitError(mlir::UnknownLoc::get(context), 1415 "failure in array-value-copy pass, phase 1"); 1416 signalPassFailure(); 1417 } 1418 1419 mlir::RewritePatternSet patterns2(context); 1420 patterns2.insert<ArrayLoadConversion>(context); 1421 patterns2.insert<ArrayMergeStoreConversion>(context); 1422 target.addIllegalOp<ArrayLoadOp, ArrayMergeStoreOp>(); 1423 if (mlir::failed( 1424 mlir::applyPartialConversion(func, target, std::move(patterns2)))) { 1425 mlir::emitError(mlir::UnknownLoc::get(context), 1426 "failure in array-value-copy pass, phase 2"); 1427 signalPassFailure(); 1428 } 1429 } 1430 }; 1431 } // namespace 1432 1433 std::unique_ptr<mlir::Pass> 1434 fir::createArrayValueCopyPass(fir::ArrayValueCopyOptions options) { 1435 return std::make_unique<ArrayValueCopyConverter>(options); 1436 } 1437