1 //===- LoopVersioning.cpp -------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 //===----------------------------------------------------------------------===// 10 /// \file 11 /// This pass looks for loops iterating over assumed-shape arrays, that can 12 /// be optimized by "guessing" that the stride is element-sized. 13 /// 14 /// This is done by creating two versions of the same loop: one which assumes 15 /// that the elements are contiguous (stride == size of element), and one that 16 /// is the original generic loop. 17 /// 18 /// As a side-effect of the assumed element size stride, the array is also 19 /// flattened to make it a 1D array - this is because the internal array 20 /// structure must be either 1D or have known sizes in all dimensions - and at 21 /// least one of the dimensions here is already unknown. 22 /// 23 /// There are two distinct benefits here: 24 /// 1. The loop that iterates over the elements is somewhat simplified by the 25 /// constant stride calculation. 26 /// 2. Since the compiler can understand the size of the stride, it can use 27 /// vector instructions, where an unknown (at compile time) stride does often 28 /// prevent vector operations from being used. 29 /// 30 /// A known drawback is that the code-size is increased, in some cases that can 31 /// be quite substantial - 3-4x is quite plausible (this includes that the loop 32 /// gets vectorized, which in itself often more than doubles the size of the 33 /// code, because unless the loop size is known, there will be a modulo 34 /// vector-size remainder to deal with. 35 /// 36 /// TODO: Do we need some size limit where loops no longer get duplicated? 37 // Maybe some sort of cost analysis. 38 /// TODO: Should some loop content - for example calls to functions and 39 /// subroutines inhibit the versioning of the loops. Plausibly, this 40 /// could be part of the cost analysis above. 41 //===----------------------------------------------------------------------===// 42 43 #include "flang/ISO_Fortran_binding_wrapper.h" 44 #include "flang/Optimizer/Builder/BoxValue.h" 45 #include "flang/Optimizer/Builder/FIRBuilder.h" 46 #include "flang/Optimizer/Builder/Runtime/Inquiry.h" 47 #include "flang/Optimizer/Dialect/FIRDialect.h" 48 #include "flang/Optimizer/Dialect/FIROps.h" 49 #include "flang/Optimizer/Dialect/FIRType.h" 50 #include "flang/Optimizer/Dialect/Support/FIRContext.h" 51 #include "flang/Optimizer/Dialect/Support/KindMapping.h" 52 #include "flang/Optimizer/Support/DataLayout.h" 53 #include "flang/Optimizer/Transforms/Passes.h" 54 #include "mlir/Dialect/LLVMIR/LLVMDialect.h" 55 #include "mlir/IR/Dominance.h" 56 #include "mlir/IR/Matchers.h" 57 #include "mlir/IR/TypeUtilities.h" 58 #include "mlir/Pass/Pass.h" 59 #include "mlir/Transforms/DialectConversion.h" 60 #include "mlir/Transforms/GreedyPatternRewriteDriver.h" 61 #include "mlir/Transforms/RegionUtils.h" 62 #include "llvm/Support/Debug.h" 63 #include "llvm/Support/raw_ostream.h" 64 65 #include <algorithm> 66 67 namespace fir { 68 #define GEN_PASS_DEF_LOOPVERSIONING 69 #include "flang/Optimizer/Transforms/Passes.h.inc" 70 } // namespace fir 71 72 #define DEBUG_TYPE "flang-loop-versioning" 73 74 namespace { 75 76 class LoopVersioningPass 77 : public fir::impl::LoopVersioningBase<LoopVersioningPass> { 78 public: 79 void runOnOperation() override; 80 }; 81 82 /// @struct ArgInfo 83 /// A structure to hold an argument, the size of the argument and dimension 84 /// information. 85 struct ArgInfo { 86 mlir::Value arg; 87 size_t size; 88 unsigned rank; 89 fir::BoxDimsOp dims[CFI_MAX_RANK]; 90 }; 91 92 /// @struct ArgsUsageInLoop 93 /// A structure providing information about the function arguments 94 /// usage by the instructions immediately nested in a loop. 95 struct ArgsUsageInLoop { 96 /// Mapping between the memref operand of an array indexing 97 /// operation (e.g. fir.coordinate_of) and the argument information. 98 llvm::DenseMap<mlir::Value, ArgInfo> usageInfo; 99 /// Some array indexing operations inside a loop cannot be transformed. 100 /// This vector holds the memref operands of such operations. 101 /// The vector is used to make sure that we do not try to transform 102 /// any outer loop, since this will imply the operation rewrite 103 /// in this loop. 104 llvm::SetVector<mlir::Value> cannotTransform; 105 106 // Debug dump of the structure members assuming that 107 // the information has been collected for the given loop. 108 void dump(fir::DoLoopOp loop) const { 109 LLVM_DEBUG({ 110 mlir::OpPrintingFlags printFlags; 111 printFlags.skipRegions(); 112 llvm::dbgs() << "Arguments usage info for loop:\n"; 113 loop.print(llvm::dbgs(), printFlags); 114 llvm::dbgs() << "\nUsed args:\n"; 115 for (auto &use : usageInfo) { 116 mlir::Value v = use.first; 117 v.print(llvm::dbgs(), printFlags); 118 llvm::dbgs() << "\n"; 119 } 120 llvm::dbgs() << "\nCannot transform args:\n"; 121 for (mlir::Value arg : cannotTransform) { 122 arg.print(llvm::dbgs(), printFlags); 123 llvm::dbgs() << "\n"; 124 } 125 llvm::dbgs() << "====\n"; 126 }); 127 } 128 129 // Erase usageInfo and cannotTransform entries for a set 130 // of given arguments. 131 void eraseUsage(const llvm::SetVector<mlir::Value> &args) { 132 for (auto &arg : args) 133 usageInfo.erase(arg); 134 cannotTransform.set_subtract(args); 135 } 136 137 // Erase usageInfo and cannotTransform entries for a set 138 // of given arguments provided in the form of usageInfo map. 139 void eraseUsage(const llvm::DenseMap<mlir::Value, ArgInfo> &args) { 140 for (auto &arg : args) { 141 usageInfo.erase(arg.first); 142 cannotTransform.remove(arg.first); 143 } 144 } 145 }; 146 } // namespace 147 148 static fir::SequenceType getAsSequenceType(mlir::Value v) { 149 mlir::Type argTy = fir::unwrapPassByRefType(fir::unwrapRefType(v.getType())); 150 return mlir::dyn_cast<fir::SequenceType>(argTy); 151 } 152 153 /// Return the rank and the element size (in bytes) of the given 154 /// value \p v. If it is not an array or the element type is not 155 /// supported, then return <0, 0>. Only trivial data types 156 /// are currently supported. 157 /// When \p isArgument is true, \p v is assumed to be a function 158 /// argument. If \p v's type does not look like a type of an assumed 159 /// shape array, then the function returns <0, 0>. 160 /// When \p isArgument is false, array types with known innermost 161 /// dimension are allowed to proceed. 162 static std::pair<unsigned, size_t> 163 getRankAndElementSize(const fir::KindMapping &kindMap, 164 const mlir::DataLayout &dl, mlir::Value v, 165 bool isArgument = false) { 166 if (auto seqTy = getAsSequenceType(v)) { 167 unsigned rank = seqTy.getDimension(); 168 if (rank > 0 && 169 (!isArgument || 170 seqTy.getShape()[0] == fir::SequenceType::getUnknownExtent())) { 171 size_t typeSize = 0; 172 mlir::Type elementType = fir::unwrapSeqOrBoxedSeqType(v.getType()); 173 if (fir::isa_trivial(elementType)) { 174 auto [eleSize, eleAlign] = fir::getTypeSizeAndAlignmentOrCrash( 175 v.getLoc(), elementType, dl, kindMap); 176 typeSize = llvm::alignTo(eleSize, eleAlign); 177 } 178 if (typeSize) 179 return {rank, typeSize}; 180 } 181 } 182 183 LLVM_DEBUG(llvm::dbgs() << "Unsupported rank/type: " << v << '\n'); 184 return {0, 0}; 185 } 186 187 /// if a value comes from a fir.declare, follow it to the original source, 188 /// otherwise return the value 189 static mlir::Value unwrapFirDeclare(mlir::Value val) { 190 // fir.declare is for source code variables. We don't have declares of 191 // declares 192 if (fir::DeclareOp declare = val.getDefiningOp<fir::DeclareOp>()) 193 return declare.getMemref(); 194 return val; 195 } 196 197 /// Return true, if \p rebox operation keeps the input array 198 /// continuous in the innermost dimension, if it is initially continuous 199 /// in the innermost dimension. 200 static bool reboxPreservesContinuity(fir::ReboxOp rebox) { 201 // If slicing is not involved, then the rebox does not affect 202 // the continuity of the array. 203 auto sliceArg = rebox.getSlice(); 204 if (!sliceArg) 205 return true; 206 207 // A slice with step=1 in the innermost dimension preserves 208 // the continuity of the array in the innermost dimension. 209 if (auto sliceOp = 210 mlir::dyn_cast_or_null<fir::SliceOp>(sliceArg.getDefiningOp())) { 211 if (sliceOp.getFields().empty() && sliceOp.getSubstr().empty()) { 212 auto triples = sliceOp.getTriples(); 213 if (triples.size() > 2) 214 if (auto innermostStep = fir::getIntIfConstant(triples[2])) 215 if (*innermostStep == 1) 216 return true; 217 } 218 219 LLVM_DEBUG(llvm::dbgs() 220 << "REBOX with slicing may produce non-contiguous array: " 221 << sliceOp << '\n' 222 << rebox << '\n'); 223 return false; 224 } 225 226 LLVM_DEBUG(llvm::dbgs() << "REBOX with unknown slice" << sliceArg << '\n' 227 << rebox << '\n'); 228 return false; 229 } 230 231 /// if a value comes from a fir.rebox, follow the rebox to the original source, 232 /// of the value, otherwise return the value 233 static mlir::Value unwrapReboxOp(mlir::Value val) { 234 while (fir::ReboxOp rebox = val.getDefiningOp<fir::ReboxOp>()) { 235 if (!reboxPreservesContinuity(rebox)) 236 break; 237 val = rebox.getBox(); 238 } 239 return val; 240 } 241 242 /// normalize a value (removing fir.declare and fir.rebox) so that we can 243 /// more conveniently spot values which came from function arguments 244 static mlir::Value normaliseVal(mlir::Value val) { 245 return unwrapFirDeclare(unwrapReboxOp(val)); 246 } 247 248 /// some FIR operations accept a fir.shape, a fir.shift or a fir.shapeshift. 249 /// fir.shift and fir.shapeshift allow us to extract lower bounds 250 /// if lowerbounds cannot be found, return nullptr 251 static mlir::Value tryGetLowerBoundsFromShapeLike(mlir::Value shapeLike, 252 unsigned dim) { 253 mlir::Value lowerBound{nullptr}; 254 if (auto shift = shapeLike.getDefiningOp<fir::ShiftOp>()) 255 lowerBound = shift.getOrigins()[dim]; 256 if (auto shapeShift = shapeLike.getDefiningOp<fir::ShapeShiftOp>()) 257 lowerBound = shapeShift.getOrigins()[dim]; 258 return lowerBound; 259 } 260 261 /// attempt to get the array lower bounds of dimension dim of the memref 262 /// argument to a fir.array_coor op 263 /// 0 <= dim < rank 264 /// May return nullptr if no lower bounds can be determined 265 static mlir::Value getLowerBound(fir::ArrayCoorOp coop, unsigned dim) { 266 // 1) try to get from the shape argument to fir.array_coor 267 if (mlir::Value shapeLike = coop.getShape()) 268 if (mlir::Value lb = tryGetLowerBoundsFromShapeLike(shapeLike, dim)) 269 return lb; 270 271 // It is important not to try to read the lower bound from the box, because 272 // in the FIR lowering, boxes will sometimes contain incorrect lower bound 273 // information 274 275 // out of ideas 276 return {}; 277 } 278 279 /// gets the i'th index from array coordinate operation op 280 /// dim should range between 0 and rank - 1 281 static mlir::Value getIndex(fir::FirOpBuilder &builder, mlir::Operation *op, 282 unsigned dim) { 283 if (fir::CoordinateOp coop = mlir::dyn_cast<fir::CoordinateOp>(op)) 284 return coop.getCoor()[dim]; 285 286 fir::ArrayCoorOp coop = mlir::dyn_cast<fir::ArrayCoorOp>(op); 287 assert(coop && 288 "operation must be either fir.coordiante_of or fir.array_coor"); 289 290 // fir.coordinate_of indices start at 0: adjust these indices to match by 291 // subtracting the lower bound 292 mlir::Value index = coop.getIndices()[dim]; 293 mlir::Value lb = getLowerBound(coop, dim); 294 if (!lb) 295 // assume a default lower bound of one 296 lb = builder.createIntegerConstant(coop.getLoc(), index.getType(), 1); 297 298 // index_0 = index - lb; 299 if (lb.getType() != index.getType()) 300 lb = builder.createConvert(coop.getLoc(), index.getType(), lb); 301 return builder.create<mlir::arith::SubIOp>(coop.getLoc(), index, lb); 302 } 303 304 void LoopVersioningPass::runOnOperation() { 305 LLVM_DEBUG(llvm::dbgs() << "=== Begin " DEBUG_TYPE " ===\n"); 306 mlir::func::FuncOp func = getOperation(); 307 308 // First look for arguments with assumed shape = unknown extent in the lowest 309 // dimension. 310 LLVM_DEBUG(llvm::dbgs() << "Func-name:" << func.getSymName() << "\n"); 311 mlir::Block::BlockArgListType args = func.getArguments(); 312 mlir::ModuleOp module = func->getParentOfType<mlir::ModuleOp>(); 313 fir::KindMapping kindMap = fir::getKindMapping(module); 314 mlir::SmallVector<ArgInfo, 4> argsOfInterest; 315 std::optional<mlir::DataLayout> dl = 316 fir::support::getOrSetDataLayout(module, /*allowDefaultLayout=*/false); 317 if (!dl) 318 mlir::emitError(module.getLoc(), 319 "data layout attribute is required to perform " DEBUG_TYPE 320 "pass"); 321 for (auto &arg : args) { 322 // Optional arguments must be checked for IsPresent before 323 // looking for the bounds. They are unsupported for the time being. 324 if (func.getArgAttrOfType<mlir::UnitAttr>(arg.getArgNumber(), 325 fir::getOptionalAttrName())) { 326 LLVM_DEBUG(llvm::dbgs() << "OPTIONAL is not supported\n"); 327 continue; 328 } 329 330 auto [rank, typeSize] = 331 getRankAndElementSize(kindMap, *dl, arg, /*isArgument=*/true); 332 if (rank != 0 && typeSize != 0) 333 argsOfInterest.push_back({arg, typeSize, rank, {}}); 334 } 335 336 if (argsOfInterest.empty()) { 337 LLVM_DEBUG(llvm::dbgs() 338 << "No suitable arguments.\n=== End " DEBUG_TYPE " ===\n"); 339 return; 340 } 341 342 // A list of all loops in the function in post-order. 343 mlir::SmallVector<fir::DoLoopOp> originalLoops; 344 // Information about the arguments usage by the instructions 345 // immediately nested in a loop. 346 llvm::DenseMap<fir::DoLoopOp, ArgsUsageInLoop> argsInLoops; 347 348 auto &domInfo = getAnalysis<mlir::DominanceInfo>(); 349 350 // Traverse the loops in post-order and see 351 // if those arguments are used inside any loop. 352 func.walk([&](fir::DoLoopOp loop) { 353 mlir::Block &body = *loop.getBody(); 354 auto &argsInLoop = argsInLoops[loop]; 355 originalLoops.push_back(loop); 356 body.walk([&](mlir::Operation *op) { 357 // Support either fir.array_coor or fir.coordinate_of. 358 if (!mlir::isa<fir::ArrayCoorOp, fir::CoordinateOp>(op)) 359 return; 360 // Process only operations immediately nested in the current loop. 361 if (op->getParentOfType<fir::DoLoopOp>() != loop) 362 return; 363 mlir::Value operand = op->getOperand(0); 364 for (auto a : argsOfInterest) { 365 if (a.arg == normaliseVal(operand)) { 366 // Use the reboxed value, not the block arg when re-creating the loop. 367 a.arg = operand; 368 369 // Check that the operand dominates the loop? 370 // If this is the case, record such operands in argsInLoop.cannot- 371 // Transform, so that they disable the transformation for the parent 372 /// loops as well. 373 if (!domInfo.dominates(a.arg, loop)) 374 argsInLoop.cannotTransform.insert(a.arg); 375 376 // No support currently for sliced arrays. 377 // This means that we cannot transform properly 378 // instructions referencing a.arg in the whole loop 379 // nest this loop is located in. 380 if (auto arrayCoor = mlir::dyn_cast<fir::ArrayCoorOp>(op)) 381 if (arrayCoor.getSlice()) 382 argsInLoop.cannotTransform.insert(a.arg); 383 384 // We need to compute the rank and element size 385 // based on the operand, not the original argument, 386 // because array slicing may affect it. 387 std::tie(a.rank, a.size) = getRankAndElementSize(kindMap, *dl, a.arg); 388 if (a.rank == 0 || a.size == 0) 389 argsInLoop.cannotTransform.insert(a.arg); 390 391 if (argsInLoop.cannotTransform.contains(a.arg)) { 392 // Remove any previously recorded usage, if any. 393 argsInLoop.usageInfo.erase(a.arg); 394 break; 395 } 396 397 // Record the a.arg usage, if not recorded yet. 398 argsInLoop.usageInfo.try_emplace(a.arg, a); 399 break; 400 } 401 } 402 }); 403 }); 404 405 // Dump loops info after initial collection. 406 LLVM_DEBUG({ 407 llvm::dbgs() << "Initial usage info:\n"; 408 for (fir::DoLoopOp loop : originalLoops) { 409 auto &argsInLoop = argsInLoops[loop]; 410 argsInLoop.dump(loop); 411 } 412 }); 413 414 // Clear argument usage for parent loops if an inner loop 415 // contains a non-transformable usage. 416 for (fir::DoLoopOp loop : originalLoops) { 417 auto &argsInLoop = argsInLoops[loop]; 418 if (argsInLoop.cannotTransform.empty()) 419 continue; 420 421 fir::DoLoopOp parent = loop; 422 while ((parent = parent->getParentOfType<fir::DoLoopOp>())) 423 argsInLoops[parent].eraseUsage(argsInLoop.cannotTransform); 424 } 425 426 // If an argument access can be optimized in a loop and 427 // its descendant loop, then it does not make sense to 428 // generate the contiguity check for the descendant loop. 429 // The check will be produced as part of the ancestor 430 // loop's transformation. So we can clear the argument 431 // usage for all descendant loops. 432 for (fir::DoLoopOp loop : originalLoops) { 433 auto &argsInLoop = argsInLoops[loop]; 434 if (argsInLoop.usageInfo.empty()) 435 continue; 436 437 loop.getBody()->walk([&](fir::DoLoopOp dloop) { 438 argsInLoops[dloop].eraseUsage(argsInLoop.usageInfo); 439 }); 440 } 441 442 LLVM_DEBUG({ 443 llvm::dbgs() << "Final usage info:\n"; 444 for (fir::DoLoopOp loop : originalLoops) { 445 auto &argsInLoop = argsInLoops[loop]; 446 argsInLoop.dump(loop); 447 } 448 }); 449 450 // Reduce the collected information to a list of loops 451 // with attached arguments usage information. 452 // The list must hold the loops in post order, so that 453 // the inner loops are transformed before the outer loops. 454 struct OpsWithArgs { 455 mlir::Operation *op; 456 mlir::SmallVector<ArgInfo, 4> argsAndDims; 457 }; 458 mlir::SmallVector<OpsWithArgs, 4> loopsOfInterest; 459 for (fir::DoLoopOp loop : originalLoops) { 460 auto &argsInLoop = argsInLoops[loop]; 461 if (argsInLoop.usageInfo.empty()) 462 continue; 463 OpsWithArgs info; 464 info.op = loop; 465 for (auto &arg : argsInLoop.usageInfo) 466 info.argsAndDims.push_back(arg.second); 467 loopsOfInterest.emplace_back(std::move(info)); 468 } 469 470 if (loopsOfInterest.empty()) { 471 LLVM_DEBUG(llvm::dbgs() 472 << "No loops to transform.\n=== End " DEBUG_TYPE " ===\n"); 473 return; 474 } 475 476 // If we get here, there are loops to process. 477 fir::FirOpBuilder builder{module, std::move(kindMap)}; 478 mlir::Location loc = builder.getUnknownLoc(); 479 mlir::IndexType idxTy = builder.getIndexType(); 480 481 LLVM_DEBUG(llvm::dbgs() << "Func Before transformation:\n"); 482 LLVM_DEBUG(func->dump()); 483 484 LLVM_DEBUG(llvm::dbgs() << "loopsOfInterest: " << loopsOfInterest.size() 485 << "\n"); 486 for (auto op : loopsOfInterest) { 487 LLVM_DEBUG(op.op->dump()); 488 builder.setInsertionPoint(op.op); 489 490 mlir::Value allCompares = nullptr; 491 // Ensure all of the arrays are unit-stride. 492 for (auto &arg : op.argsAndDims) { 493 // Fetch all the dimensions of the array, except the last dimension. 494 // Always fetch the first dimension, however, so set ndims = 1 if 495 // we have one dim 496 unsigned ndims = arg.rank; 497 for (unsigned i = 0; i < ndims; i++) { 498 mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); 499 arg.dims[i] = builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, 500 arg.arg, dimIdx); 501 } 502 // We only care about lowest order dimension, here. 503 mlir::Value elemSize = 504 builder.createIntegerConstant(loc, idxTy, arg.size); 505 mlir::Value cmp = builder.create<mlir::arith::CmpIOp>( 506 loc, mlir::arith::CmpIPredicate::eq, arg.dims[0].getResult(2), 507 elemSize); 508 if (!allCompares) { 509 allCompares = cmp; 510 } else { 511 allCompares = 512 builder.create<mlir::arith::AndIOp>(loc, cmp, allCompares); 513 } 514 } 515 516 auto ifOp = 517 builder.create<fir::IfOp>(loc, op.op->getResultTypes(), allCompares, 518 /*withElse=*/true); 519 builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); 520 521 LLVM_DEBUG(llvm::dbgs() << "Creating cloned loop\n"); 522 mlir::Operation *clonedLoop = op.op->clone(); 523 bool changed = false; 524 for (auto &arg : op.argsAndDims) { 525 fir::SequenceType::Shape newShape; 526 newShape.push_back(fir::SequenceType::getUnknownExtent()); 527 auto elementType = fir::unwrapSeqOrBoxedSeqType(arg.arg.getType()); 528 mlir::Type arrTy = fir::SequenceType::get(newShape, elementType); 529 mlir::Type boxArrTy = fir::BoxType::get(arrTy); 530 mlir::Type refArrTy = builder.getRefType(arrTy); 531 auto carg = builder.create<fir::ConvertOp>(loc, boxArrTy, arg.arg); 532 auto caddr = builder.create<fir::BoxAddrOp>(loc, refArrTy, carg); 533 auto insPt = builder.saveInsertionPoint(); 534 // Use caddr instead of arg. 535 clonedLoop->walk([&](mlir::Operation *coop) { 536 if (!mlir::isa<fir::CoordinateOp, fir::ArrayCoorOp>(coop)) 537 return; 538 // Reduce the multi-dimensioned index to a single index. 539 // This is required becase fir arrays do not support multiple dimensions 540 // with unknown dimensions at compile time. 541 // We then calculate the multidimensional array like this: 542 // arr(x, y, z) bedcomes arr(z * stride(2) + y * stride(1) + x) 543 // where stride is the distance between elements in the dimensions 544 // 0, 1 and 2 or x, y and z. 545 if (coop->getOperand(0) == arg.arg && coop->getOperands().size() >= 2) { 546 builder.setInsertionPoint(coop); 547 mlir::Value totalIndex; 548 for (unsigned i = arg.rank - 1; i > 0; i--) { 549 mlir::Value curIndex = 550 builder.createConvert(loc, idxTy, getIndex(builder, coop, i)); 551 // Multiply by the stride of this array. Later we'll divide by the 552 // element size. 553 mlir::Value scale = 554 builder.createConvert(loc, idxTy, arg.dims[i].getResult(2)); 555 curIndex = 556 builder.create<mlir::arith::MulIOp>(loc, scale, curIndex); 557 totalIndex = (totalIndex) ? builder.create<mlir::arith::AddIOp>( 558 loc, curIndex, totalIndex) 559 : curIndex; 560 } 561 // This is the lowest dimension - which doesn't need scaling 562 mlir::Value finalIndex = 563 builder.createConvert(loc, idxTy, getIndex(builder, coop, 0)); 564 if (totalIndex) { 565 assert(llvm::isPowerOf2_32(arg.size) && 566 "Expected power of two here"); 567 unsigned bits = llvm::Log2_32(arg.size); 568 mlir::Value elemShift = 569 builder.createIntegerConstant(loc, idxTy, bits); 570 totalIndex = builder.create<mlir::arith::AddIOp>( 571 loc, 572 builder.create<mlir::arith::ShRSIOp>(loc, totalIndex, 573 elemShift), 574 finalIndex); 575 } else { 576 totalIndex = finalIndex; 577 } 578 auto newOp = builder.create<fir::CoordinateOp>( 579 loc, builder.getRefType(elementType), caddr, 580 mlir::ValueRange{totalIndex}); 581 LLVM_DEBUG(newOp->dump()); 582 coop->getResult(0).replaceAllUsesWith(newOp->getResult(0)); 583 coop->erase(); 584 changed = true; 585 } 586 }); 587 588 builder.restoreInsertionPoint(insPt); 589 } 590 assert(changed && "Expected operations to have changed"); 591 592 builder.insert(clonedLoop); 593 // Forward the result(s), if any, from the loop operation to the 594 // 595 mlir::ResultRange results = clonedLoop->getResults(); 596 bool hasResults = (results.size() > 0); 597 if (hasResults) 598 builder.create<fir::ResultOp>(loc, results); 599 600 // Add the original loop in the else-side of the if operation. 601 builder.setInsertionPointToStart(&ifOp.getElseRegion().front()); 602 op.op->replaceAllUsesWith(ifOp); 603 op.op->remove(); 604 builder.insert(op.op); 605 // Rely on "cloned loop has results, so original loop also has results". 606 if (hasResults) { 607 builder.create<fir::ResultOp>(loc, op.op->getResults()); 608 } else { 609 // Use an assert to check this. 610 assert(op.op->getResults().size() == 0 && 611 "Weird, the cloned loop doesn't have results, but the original " 612 "does?"); 613 } 614 } 615 616 LLVM_DEBUG(llvm::dbgs() << "Func After transform:\n"); 617 LLVM_DEBUG(func->dump()); 618 619 LLVM_DEBUG(llvm::dbgs() << "=== End " DEBUG_TYPE " ===\n"); 620 } 621