1 //===-- ReductionProcessor.cpp ----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ReductionProcessor.h" 14 15 #include "flang/Lower/AbstractConverter.h" 16 #include "flang/Lower/ConvertType.h" 17 #include "flang/Lower/SymbolMap.h" 18 #include "flang/Optimizer/Builder/Complex.h" 19 #include "flang/Optimizer/Builder/HLFIRTools.h" 20 #include "flang/Optimizer/Builder/Todo.h" 21 #include "flang/Optimizer/Dialect/FIRType.h" 22 #include "flang/Optimizer/HLFIR/HLFIROps.h" 23 #include "flang/Optimizer/Support/FatalError.h" 24 #include "flang/Parser/tools.h" 25 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 26 #include "llvm/Support/CommandLine.h" 27 28 static llvm::cl::opt<bool> forceByrefReduction( 29 "force-byref-reduction", 30 llvm::cl::desc("Pass all reduction arguments by reference"), 31 llvm::cl::Hidden); 32 33 namespace Fortran { 34 namespace lower { 35 namespace omp { 36 37 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( 38 const omp::clause::ProcedureDesignator &pd) { 39 auto redType = llvm::StringSwitch<std::optional<ReductionIdentifier>>( 40 getRealName(pd.v.sym()).ToString()) 41 .Case("max", ReductionIdentifier::MAX) 42 .Case("min", ReductionIdentifier::MIN) 43 .Case("iand", ReductionIdentifier::IAND) 44 .Case("ior", ReductionIdentifier::IOR) 45 .Case("ieor", ReductionIdentifier::IEOR) 46 .Default(std::nullopt); 47 assert(redType && "Invalid Reduction"); 48 return *redType; 49 } 50 51 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( 52 omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp) { 53 switch (intrinsicOp) { 54 case omp::clause::DefinedOperator::IntrinsicOperator::Add: 55 return ReductionIdentifier::ADD; 56 case omp::clause::DefinedOperator::IntrinsicOperator::Subtract: 57 return ReductionIdentifier::SUBTRACT; 58 case omp::clause::DefinedOperator::IntrinsicOperator::Multiply: 59 return ReductionIdentifier::MULTIPLY; 60 case omp::clause::DefinedOperator::IntrinsicOperator::AND: 61 return ReductionIdentifier::AND; 62 case omp::clause::DefinedOperator::IntrinsicOperator::EQV: 63 return ReductionIdentifier::EQV; 64 case omp::clause::DefinedOperator::IntrinsicOperator::OR: 65 return ReductionIdentifier::OR; 66 case omp::clause::DefinedOperator::IntrinsicOperator::NEQV: 67 return ReductionIdentifier::NEQV; 68 default: 69 llvm_unreachable("unexpected intrinsic operator in reduction"); 70 } 71 } 72 73 bool ReductionProcessor::supportedIntrinsicProcReduction( 74 const omp::clause::ProcedureDesignator &pd) { 75 semantics::Symbol *sym = pd.v.sym(); 76 if (!sym->GetUltimate().attrs().test(semantics::Attr::INTRINSIC)) 77 return false; 78 auto redType = llvm::StringSwitch<bool>(getRealName(sym).ToString()) 79 .Case("max", true) 80 .Case("min", true) 81 .Case("iand", true) 82 .Case("ior", true) 83 .Case("ieor", true) 84 .Default(false); 85 return redType; 86 } 87 88 std::string 89 ReductionProcessor::getReductionName(llvm::StringRef name, 90 const fir::KindMapping &kindMap, 91 mlir::Type ty, bool isByRef) { 92 ty = fir::unwrapRefType(ty); 93 94 // extra string to distinguish reduction functions for variables passed by 95 // reference 96 llvm::StringRef byrefAddition{""}; 97 if (isByRef) 98 byrefAddition = "_byref"; 99 100 return fir::getTypeAsString(ty, kindMap, (name + byrefAddition).str()); 101 } 102 103 std::string ReductionProcessor::getReductionName( 104 omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp, 105 const fir::KindMapping &kindMap, mlir::Type ty, bool isByRef) { 106 std::string reductionName; 107 108 switch (intrinsicOp) { 109 case omp::clause::DefinedOperator::IntrinsicOperator::Add: 110 reductionName = "add_reduction"; 111 break; 112 case omp::clause::DefinedOperator::IntrinsicOperator::Multiply: 113 reductionName = "multiply_reduction"; 114 break; 115 case omp::clause::DefinedOperator::IntrinsicOperator::AND: 116 return "and_reduction"; 117 case omp::clause::DefinedOperator::IntrinsicOperator::EQV: 118 return "eqv_reduction"; 119 case omp::clause::DefinedOperator::IntrinsicOperator::OR: 120 return "or_reduction"; 121 case omp::clause::DefinedOperator::IntrinsicOperator::NEQV: 122 return "neqv_reduction"; 123 default: 124 reductionName = "other_reduction"; 125 break; 126 } 127 128 return getReductionName(reductionName, kindMap, ty, isByRef); 129 } 130 131 mlir::Value 132 ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type, 133 ReductionIdentifier redId, 134 fir::FirOpBuilder &builder) { 135 type = fir::unwrapRefType(type); 136 if (!fir::isa_integer(type) && !fir::isa_real(type) && 137 !fir::isa_complex(type) && !mlir::isa<fir::LogicalType>(type)) 138 TODO(loc, "Reduction of some types is not supported"); 139 switch (redId) { 140 case ReductionIdentifier::MAX: { 141 if (auto ty = mlir::dyn_cast<mlir::FloatType>(type)) { 142 const llvm::fltSemantics &sem = ty.getFloatSemantics(); 143 return builder.createRealConstant( 144 loc, type, llvm::APFloat::getLargest(sem, /*Negative=*/true)); 145 } 146 unsigned bits = type.getIntOrFloatBitWidth(); 147 int64_t minInt = llvm::APInt::getSignedMinValue(bits).getSExtValue(); 148 return builder.createIntegerConstant(loc, type, minInt); 149 } 150 case ReductionIdentifier::MIN: { 151 if (auto ty = mlir::dyn_cast<mlir::FloatType>(type)) { 152 const llvm::fltSemantics &sem = ty.getFloatSemantics(); 153 return builder.createRealConstant( 154 loc, type, llvm::APFloat::getLargest(sem, /*Negative=*/false)); 155 } 156 unsigned bits = type.getIntOrFloatBitWidth(); 157 int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue(); 158 return builder.createIntegerConstant(loc, type, maxInt); 159 } 160 case ReductionIdentifier::IOR: { 161 unsigned bits = type.getIntOrFloatBitWidth(); 162 int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue(); 163 return builder.createIntegerConstant(loc, type, zeroInt); 164 } 165 case ReductionIdentifier::IEOR: { 166 unsigned bits = type.getIntOrFloatBitWidth(); 167 int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue(); 168 return builder.createIntegerConstant(loc, type, zeroInt); 169 } 170 case ReductionIdentifier::IAND: { 171 unsigned bits = type.getIntOrFloatBitWidth(); 172 int64_t allOnInt = llvm::APInt::getAllOnes(bits).getSExtValue(); 173 return builder.createIntegerConstant(loc, type, allOnInt); 174 } 175 case ReductionIdentifier::ADD: 176 case ReductionIdentifier::MULTIPLY: 177 case ReductionIdentifier::AND: 178 case ReductionIdentifier::OR: 179 case ReductionIdentifier::EQV: 180 case ReductionIdentifier::NEQV: 181 if (auto cplxTy = mlir::dyn_cast<fir::ComplexType>(type)) { 182 mlir::Type realTy = 183 lower::convertReal(builder.getContext(), cplxTy.getFKind()); 184 mlir::Value initRe = builder.createRealConstant( 185 loc, realTy, getOperationIdentity(redId, loc)); 186 mlir::Value initIm = builder.createRealConstant(loc, realTy, 0); 187 188 return fir::factory::Complex{builder, loc}.createComplex(type, initRe, 189 initIm); 190 } 191 if (mlir::isa<mlir::FloatType>(type)) 192 return builder.create<mlir::arith::ConstantOp>( 193 loc, type, 194 builder.getFloatAttr(type, (double)getOperationIdentity(redId, loc))); 195 196 if (mlir::isa<fir::LogicalType>(type)) { 197 mlir::Value intConst = builder.create<mlir::arith::ConstantOp>( 198 loc, builder.getI1Type(), 199 builder.getIntegerAttr(builder.getI1Type(), 200 getOperationIdentity(redId, loc))); 201 return builder.createConvert(loc, type, intConst); 202 } 203 204 return builder.create<mlir::arith::ConstantOp>( 205 loc, type, 206 builder.getIntegerAttr(type, getOperationIdentity(redId, loc))); 207 case ReductionIdentifier::ID: 208 case ReductionIdentifier::USER_DEF_OP: 209 case ReductionIdentifier::SUBTRACT: 210 TODO(loc, "Reduction of some identifier types is not supported"); 211 } 212 llvm_unreachable("Unhandled Reduction identifier : getReductionInitValue"); 213 } 214 215 mlir::Value ReductionProcessor::createScalarCombiner( 216 fir::FirOpBuilder &builder, mlir::Location loc, ReductionIdentifier redId, 217 mlir::Type type, mlir::Value op1, mlir::Value op2) { 218 mlir::Value reductionOp; 219 type = fir::unwrapRefType(type); 220 switch (redId) { 221 case ReductionIdentifier::MAX: 222 reductionOp = 223 getReductionOperation<mlir::arith::MaxNumFOp, mlir::arith::MaxSIOp>( 224 builder, type, loc, op1, op2); 225 break; 226 case ReductionIdentifier::MIN: 227 reductionOp = 228 getReductionOperation<mlir::arith::MinNumFOp, mlir::arith::MinSIOp>( 229 builder, type, loc, op1, op2); 230 break; 231 case ReductionIdentifier::IOR: 232 assert((type.isIntOrIndex()) && "only integer is expected"); 233 reductionOp = builder.create<mlir::arith::OrIOp>(loc, op1, op2); 234 break; 235 case ReductionIdentifier::IEOR: 236 assert((type.isIntOrIndex()) && "only integer is expected"); 237 reductionOp = builder.create<mlir::arith::XOrIOp>(loc, op1, op2); 238 break; 239 case ReductionIdentifier::IAND: 240 assert((type.isIntOrIndex()) && "only integer is expected"); 241 reductionOp = builder.create<mlir::arith::AndIOp>(loc, op1, op2); 242 break; 243 case ReductionIdentifier::ADD: 244 reductionOp = 245 getReductionOperation<mlir::arith::AddFOp, mlir::arith::AddIOp, 246 fir::AddcOp>(builder, type, loc, op1, op2); 247 break; 248 case ReductionIdentifier::MULTIPLY: 249 reductionOp = 250 getReductionOperation<mlir::arith::MulFOp, mlir::arith::MulIOp, 251 fir::MulcOp>(builder, type, loc, op1, op2); 252 break; 253 case ReductionIdentifier::AND: { 254 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 255 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 256 257 mlir::Value andiOp = builder.create<mlir::arith::AndIOp>(loc, op1I1, op2I1); 258 259 reductionOp = builder.createConvert(loc, type, andiOp); 260 break; 261 } 262 case ReductionIdentifier::OR: { 263 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 264 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 265 266 mlir::Value oriOp = builder.create<mlir::arith::OrIOp>(loc, op1I1, op2I1); 267 268 reductionOp = builder.createConvert(loc, type, oriOp); 269 break; 270 } 271 case ReductionIdentifier::EQV: { 272 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 273 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 274 275 mlir::Value cmpiOp = builder.create<mlir::arith::CmpIOp>( 276 loc, mlir::arith::CmpIPredicate::eq, op1I1, op2I1); 277 278 reductionOp = builder.createConvert(loc, type, cmpiOp); 279 break; 280 } 281 case ReductionIdentifier::NEQV: { 282 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 283 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 284 285 mlir::Value cmpiOp = builder.create<mlir::arith::CmpIOp>( 286 loc, mlir::arith::CmpIPredicate::ne, op1I1, op2I1); 287 288 reductionOp = builder.createConvert(loc, type, cmpiOp); 289 break; 290 } 291 default: 292 TODO(loc, "Reduction of some intrinsic operators is not supported"); 293 } 294 295 return reductionOp; 296 } 297 298 /// Generate a fir::ShapeShift op describing the provided boxed array. 299 static fir::ShapeShiftOp getShapeShift(fir::FirOpBuilder &builder, 300 mlir::Location loc, mlir::Value box) { 301 fir::SequenceType sequenceType = mlir::cast<fir::SequenceType>( 302 hlfir::getFortranElementOrSequenceType(box.getType())); 303 const unsigned rank = sequenceType.getDimension(); 304 llvm::SmallVector<mlir::Value> lbAndExtents; 305 lbAndExtents.reserve(rank * 2); 306 307 mlir::Type idxTy = builder.getIndexType(); 308 for (unsigned i = 0; i < rank; ++i) { 309 // TODO: ideally we want to hoist box reads out of the critical section. 310 // We could do this by having box dimensions in block arguments like 311 // OpenACC does 312 mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); 313 auto dimInfo = 314 builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, box, dim); 315 lbAndExtents.push_back(dimInfo.getLowerBound()); 316 lbAndExtents.push_back(dimInfo.getExtent()); 317 } 318 319 auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); 320 auto shapeShift = 321 builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); 322 return shapeShift; 323 } 324 325 /// Create reduction combiner region for reduction variables which are boxed 326 /// arrays 327 static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, 328 ReductionProcessor::ReductionIdentifier redId, 329 fir::BaseBoxType boxTy, mlir::Value lhs, 330 mlir::Value rhs) { 331 fir::SequenceType seqTy = mlir::dyn_cast_or_null<fir::SequenceType>( 332 fir::unwrapRefType(boxTy.getEleTy())); 333 fir::HeapType heapTy = 334 mlir::dyn_cast_or_null<fir::HeapType>(boxTy.getEleTy()); 335 fir::PointerType ptrTy = 336 mlir::dyn_cast_or_null<fir::PointerType>(boxTy.getEleTy()); 337 if ((!seqTy || seqTy.hasUnknownShape()) && !heapTy && !ptrTy) 338 TODO(loc, "Unsupported boxed type in OpenMP reduction"); 339 340 // load fir.ref<fir.box<...>> 341 mlir::Value lhsAddr = lhs; 342 lhs = builder.create<fir::LoadOp>(loc, lhs); 343 rhs = builder.create<fir::LoadOp>(loc, rhs); 344 345 if ((heapTy || ptrTy) && !seqTy) { 346 // get box contents (heap pointers) 347 lhs = builder.create<fir::BoxAddrOp>(loc, lhs); 348 rhs = builder.create<fir::BoxAddrOp>(loc, rhs); 349 mlir::Value lhsValAddr = lhs; 350 351 // load heap pointers 352 lhs = builder.create<fir::LoadOp>(loc, lhs); 353 rhs = builder.create<fir::LoadOp>(loc, rhs); 354 355 mlir::Type eleTy = heapTy ? heapTy.getEleTy() : ptrTy.getEleTy(); 356 357 mlir::Value result = ReductionProcessor::createScalarCombiner( 358 builder, loc, redId, eleTy, lhs, rhs); 359 builder.create<fir::StoreOp>(loc, result, lhsValAddr); 360 builder.create<mlir::omp::YieldOp>(loc, lhsAddr); 361 return; 362 } 363 364 fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, lhs); 365 366 // Iterate over array elements, applying the equivalent scalar reduction: 367 368 // F2018 5.4.10.2: Unallocated allocatable variables may not be referenced 369 // and so no null check is needed here before indexing into the (possibly 370 // allocatable) arrays. 371 372 // A hlfir::elemental here gets inlined with a temporary so create the 373 // loop nest directly. 374 // This function already controls all of the code in this region so we 375 // know this won't miss any opportuinties for clever elemental inlining 376 hlfir::LoopNest nest = hlfir::genLoopNest( 377 loc, builder, shapeShift.getExtents(), /*isUnordered=*/true); 378 builder.setInsertionPointToStart(nest.innerLoop.getBody()); 379 mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); 380 auto lhsEleAddr = builder.create<fir::ArrayCoorOp>( 381 loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, 382 nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); 383 auto rhsEleAddr = builder.create<fir::ArrayCoorOp>( 384 loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{}, 385 nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); 386 auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr); 387 auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr); 388 mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner( 389 builder, loc, redId, refTy, lhsEle, rhsEle); 390 builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr); 391 392 builder.setInsertionPointAfter(nest.outerLoop); 393 builder.create<mlir::omp::YieldOp>(loc, lhsAddr); 394 } 395 396 // generate combiner region for reduction operations 397 static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, 398 ReductionProcessor::ReductionIdentifier redId, 399 mlir::Type ty, mlir::Value lhs, mlir::Value rhs, 400 bool isByRef) { 401 ty = fir::unwrapRefType(ty); 402 403 if (fir::isa_trivial(ty)) { 404 mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs); 405 mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs); 406 407 mlir::Value result = ReductionProcessor::createScalarCombiner( 408 builder, loc, redId, ty, lhsLoaded, rhsLoaded); 409 if (isByRef) { 410 builder.create<fir::StoreOp>(loc, result, lhs); 411 builder.create<mlir::omp::YieldOp>(loc, lhs); 412 } else { 413 builder.create<mlir::omp::YieldOp>(loc, result); 414 } 415 return; 416 } 417 // all arrays should have been boxed 418 if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) { 419 genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs); 420 return; 421 } 422 423 TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); 424 } 425 426 static void 427 createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, 428 mlir::omp::DeclareReductionOp &reductionDecl) { 429 mlir::Type redTy = reductionDecl.getType(); 430 431 mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion(); 432 assert(cleanupRegion.empty()); 433 mlir::Block *block = 434 builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc}); 435 builder.setInsertionPointToEnd(block); 436 437 auto typeError = [loc]() { 438 fir::emitFatalError(loc, 439 "Attempt to create an omp reduction cleanup region " 440 "for a type that wasn't allocated", 441 /*genCrashDiag=*/true); 442 }; 443 444 mlir::Type valTy = fir::unwrapRefType(redTy); 445 if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) { 446 if (!mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy())) { 447 mlir::Type innerTy = fir::extractSequenceType(boxTy); 448 if (!mlir::isa<fir::SequenceType>(innerTy)) 449 typeError(); 450 } 451 452 mlir::Value arg = block->getArgument(0); 453 arg = builder.loadIfRef(loc, arg); 454 assert(mlir::isa<fir::BaseBoxType>(arg.getType())); 455 456 // Deallocate box 457 // The FIR type system doesn't nesecarrily know that this is a mutable box 458 // if we allocated the thread local array on the heap to avoid looped stack 459 // allocations. 460 mlir::Value addr = 461 hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); 462 mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); 463 fir::IfOp ifOp = 464 builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false); 465 builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); 466 467 mlir::Value cast = builder.createConvert( 468 loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); 469 builder.create<fir::FreeMemOp>(loc, cast); 470 471 builder.setInsertionPointAfter(ifOp); 472 builder.create<mlir::omp::YieldOp>(loc); 473 return; 474 } 475 476 typeError(); 477 } 478 479 // like fir::unwrapSeqOrBoxedSeqType except it also works for non-sequence boxes 480 static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) { 481 if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(ty)) 482 return seqTy.getEleTy(); 483 if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) { 484 auto eleTy = fir::unwrapRefType(boxTy.getEleTy()); 485 if (auto seqTy = mlir::dyn_cast<fir::SequenceType>(eleTy)) 486 return seqTy.getEleTy(); 487 return eleTy; 488 } 489 return ty; 490 } 491 492 static void createReductionAllocAndInitRegions( 493 fir::FirOpBuilder &builder, mlir::Location loc, 494 mlir::omp::DeclareReductionOp &reductionDecl, 495 const ReductionProcessor::ReductionIdentifier redId, mlir::Type type, 496 bool isByRef) { 497 auto yield = [&](mlir::Value ret) { 498 builder.create<mlir::omp::YieldOp>(loc, ret); 499 }; 500 501 mlir::Block *allocBlock = nullptr; 502 mlir::Block *initBlock = nullptr; 503 if (isByRef) { 504 allocBlock = 505 builder.createBlock(&reductionDecl.getAllocRegion(), 506 reductionDecl.getAllocRegion().end(), {}, {}); 507 initBlock = builder.createBlock(&reductionDecl.getInitializerRegion(), 508 reductionDecl.getInitializerRegion().end(), 509 {type, type}, {loc, loc}); 510 } else { 511 initBlock = builder.createBlock(&reductionDecl.getInitializerRegion(), 512 reductionDecl.getInitializerRegion().end(), 513 {type}, {loc}); 514 } 515 516 mlir::Type ty = fir::unwrapRefType(type); 517 builder.setInsertionPointToEnd(initBlock); 518 mlir::Value initValue = ReductionProcessor::getReductionInitValue( 519 loc, unwrapSeqOrBoxedType(ty), redId, builder); 520 521 if (fir::isa_trivial(ty)) { 522 if (isByRef) { 523 // alloc region 524 { 525 builder.setInsertionPointToEnd(allocBlock); 526 mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty); 527 yield(alloca); 528 } 529 530 // init region 531 { 532 builder.setInsertionPointToEnd(initBlock); 533 // block arg is mapped to the alloca yielded from the alloc region 534 mlir::Value alloc = reductionDecl.getInitializerAllocArg(); 535 builder.createStoreWithConvert(loc, initValue, alloc); 536 yield(alloc); 537 } 538 return; 539 } 540 // by val 541 yield(initValue); 542 return; 543 } 544 545 // check if an allocatable box is unallocated. If so, initialize the boxAlloca 546 // to be unallocated e.g. 547 // %box_alloca = fir.alloca !fir.box<!fir.heap<...>> 548 // %addr = fir.box_addr %box 549 // if (%addr == 0) { 550 // %nullbox = fir.embox %addr 551 // fir.store %nullbox to %box_alloca 552 // } else { 553 // // ... 554 // fir.store %something to %box_alloca 555 // } 556 // omp.yield %box_alloca 557 mlir::Value moldArg = 558 builder.loadIfRef(loc, reductionDecl.getInitializerMoldArg()); 559 auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { 560 mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, moldArg); 561 mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); 562 fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated, 563 /*withElseRegion=*/true); 564 builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); 565 // just embox the null address and return 566 mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr); 567 builder.create<fir::StoreOp>(loc, nullBox, boxAlloca); 568 return ifOp; 569 }; 570 571 // all arrays are boxed 572 if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { 573 assert(isByRef && "passing boxes by value is unsupported"); 574 bool isAllocatableOrPointer = 575 mlir::isa<fir::HeapType, fir::PointerType>(boxTy.getEleTy()); 576 577 // alloc region 578 { 579 builder.setInsertionPointToEnd(allocBlock); 580 mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty); 581 yield(boxAlloca); 582 } 583 584 // init region 585 builder.setInsertionPointToEnd(initBlock); 586 mlir::Value boxAlloca = reductionDecl.getInitializerAllocArg(); 587 mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); 588 if (fir::isa_trivial(innerTy)) { 589 // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>> 590 if (!isAllocatableOrPointer) 591 TODO(loc, "Reduction of non-allocatable trivial typed box"); 592 593 fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); 594 595 builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); 596 mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy); 597 builder.createStoreWithConvert(loc, initValue, valAlloc); 598 mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc); 599 builder.create<fir::StoreOp>(loc, box, boxAlloca); 600 601 auto insPt = builder.saveInsertionPoint(); 602 createReductionCleanupRegion(builder, loc, reductionDecl); 603 builder.restoreInsertionPoint(insPt); 604 builder.setInsertionPointAfter(ifUnallocated); 605 yield(boxAlloca); 606 return; 607 } 608 innerTy = fir::extractSequenceType(boxTy); 609 if (!mlir::isa<fir::SequenceType>(innerTy)) 610 TODO(loc, "Unsupported boxed type for reduction"); 611 612 fir::IfOp ifUnallocated{nullptr}; 613 if (isAllocatableOrPointer) { 614 ifUnallocated = handleNullAllocatable(boxAlloca); 615 builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); 616 } 617 618 // Create the private copy from the initial fir.box: 619 mlir::Value loadedBox = builder.loadIfRef(loc, moldArg); 620 hlfir::Entity source = hlfir::Entity{loadedBox}; 621 622 // Allocating on the heap in case the whole reduction is nested inside of a 623 // loop 624 // TODO: compare performance here to using allocas - this could be made to 625 // work by inserting stacksave/stackrestore around the reduction in 626 // openmpirbuilder 627 auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); 628 // if needsDealloc isn't statically false, add cleanup region. Always 629 // do this for allocatable boxes because they might have been re-allocated 630 // in the body of the loop/parallel region 631 632 std::optional<int64_t> cstNeedsDealloc = 633 fir::getIntIfConstant(needsDealloc); 634 assert(cstNeedsDealloc.has_value() && 635 "createTempFromMold decides this statically"); 636 if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { 637 mlir::OpBuilder::InsertionGuard guard(builder); 638 createReductionCleanupRegion(builder, loc, reductionDecl); 639 } else { 640 assert(!isAllocatableOrPointer && 641 "Pointer-like arrays must be heap allocated"); 642 } 643 644 // Put the temporary inside of a box: 645 // hlfir::genVariableBox doesn't handle non-default lower bounds 646 mlir::Value box; 647 fir::ShapeShiftOp shapeShift = getShapeShift(builder, loc, loadedBox); 648 mlir::Type boxType = loadedBox.getType(); 649 if (mlir::isa<fir::BaseBoxType>(temp.getType())) 650 // the box created by the declare form createTempFromMold is missing lower 651 // bounds info 652 box = builder.create<fir::ReboxOp>(loc, boxType, temp, shapeShift, 653 /*shift=*/mlir::Value{}); 654 else 655 box = builder.create<fir::EmboxOp>( 656 loc, boxType, temp, shapeShift, 657 /*slice=*/mlir::Value{}, 658 /*typeParams=*/llvm::ArrayRef<mlir::Value>{}); 659 660 builder.create<hlfir::AssignOp>(loc, initValue, box); 661 builder.create<fir::StoreOp>(loc, box, boxAlloca); 662 if (ifUnallocated) 663 builder.setInsertionPointAfter(ifUnallocated); 664 yield(boxAlloca); 665 return; 666 } 667 668 TODO(loc, "createReductionInitRegion for unsupported type"); 669 } 670 671 mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( 672 fir::FirOpBuilder &builder, llvm::StringRef reductionOpName, 673 const ReductionIdentifier redId, mlir::Type type, mlir::Location loc, 674 bool isByRef) { 675 mlir::OpBuilder::InsertionGuard guard(builder); 676 mlir::ModuleOp module = builder.getModule(); 677 678 assert(!reductionOpName.empty()); 679 680 auto decl = 681 module.lookupSymbol<mlir::omp::DeclareReductionOp>(reductionOpName); 682 if (decl) 683 return decl; 684 685 mlir::OpBuilder modBuilder(module.getBodyRegion()); 686 mlir::Type valTy = fir::unwrapRefType(type); 687 if (!isByRef) 688 type = valTy; 689 690 decl = modBuilder.create<mlir::omp::DeclareReductionOp>(loc, reductionOpName, 691 type); 692 createReductionAllocAndInitRegions(builder, loc, decl, redId, type, isByRef); 693 694 builder.createBlock(&decl.getReductionRegion(), 695 decl.getReductionRegion().end(), {type, type}, 696 {loc, loc}); 697 698 builder.setInsertionPointToEnd(&decl.getReductionRegion().back()); 699 mlir::Value op1 = decl.getReductionRegion().front().getArgument(0); 700 mlir::Value op2 = decl.getReductionRegion().front().getArgument(1); 701 genCombiner(builder, loc, redId, type, op1, op2, isByRef); 702 703 return decl; 704 } 705 706 static bool doReductionByRef(mlir::Value reductionVar) { 707 if (forceByrefReduction) 708 return true; 709 710 if (auto declare = 711 mlir::dyn_cast<hlfir::DeclareOp>(reductionVar.getDefiningOp())) 712 reductionVar = declare.getMemref(); 713 714 if (!fir::isa_trivial(fir::unwrapRefType(reductionVar.getType()))) 715 return true; 716 717 return false; 718 } 719 720 void ReductionProcessor::addDeclareReduction( 721 mlir::Location currentLocation, lower::AbstractConverter &converter, 722 const omp::clause::Reduction &reduction, 723 llvm::SmallVectorImpl<mlir::Value> &reductionVars, 724 llvm::SmallVectorImpl<bool> &reduceVarByRef, 725 llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols, 726 llvm::SmallVectorImpl<const semantics::Symbol *> *reductionSymbols) { 727 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 728 729 if (std::get<std::optional<omp::clause::Reduction::ReductionModifier>>( 730 reduction.t)) 731 TODO(currentLocation, "Reduction modifiers are not supported"); 732 733 mlir::omp::DeclareReductionOp decl; 734 const auto &redOperatorList{ 735 std::get<omp::clause::Reduction::ReductionIdentifiers>(reduction.t)}; 736 assert(redOperatorList.size() == 1 && "Expecting single operator"); 737 const auto &redOperator = redOperatorList.front(); 738 const auto &objectList{std::get<omp::ObjectList>(reduction.t)}; 739 740 if (!std::holds_alternative<omp::clause::DefinedOperator>(redOperator.u)) { 741 if (const auto *reductionIntrinsic = 742 std::get_if<omp::clause::ProcedureDesignator>(&redOperator.u)) { 743 if (!ReductionProcessor::supportedIntrinsicProcReduction( 744 *reductionIntrinsic)) { 745 return; 746 } 747 } else { 748 return; 749 } 750 } 751 752 // Reduction variable processing common to both intrinsic operators and 753 // procedure designators 754 fir::FirOpBuilder &builder = converter.getFirOpBuilder(); 755 for (const Object &object : objectList) { 756 const semantics::Symbol *symbol = object.sym(); 757 if (reductionSymbols) 758 reductionSymbols->push_back(symbol); 759 mlir::Value symVal = converter.getSymbolAddress(*symbol); 760 mlir::Type eleType; 761 auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType()); 762 if (refType) 763 eleType = refType.getEleTy(); 764 else 765 eleType = symVal.getType(); 766 767 // all arrays must be boxed so that we have convenient access to all the 768 // information needed to iterate over the array 769 if (mlir::isa<fir::SequenceType>(eleType)) { 770 // For Host associated symbols, use `SymbolBox` instead 771 lower::SymbolBox symBox = converter.lookupOneLevelUpSymbol(*symbol); 772 hlfir::Entity entity{symBox.getAddr()}; 773 entity = genVariableBox(currentLocation, builder, entity); 774 mlir::Value box = entity.getBase(); 775 776 // Always pass the box by reference so that the OpenMP dialect 777 // verifiers don't need to know anything about fir.box 778 auto alloca = 779 builder.create<fir::AllocaOp>(currentLocation, box.getType()); 780 builder.create<fir::StoreOp>(currentLocation, box, alloca); 781 782 symVal = alloca; 783 } else if (mlir::isa<fir::BaseBoxType>(symVal.getType())) { 784 // boxed arrays are passed as values not by reference. Unfortunately, 785 // we can't pass a box by value to omp.redution_declare, so turn it 786 // into a reference 787 788 auto alloca = 789 builder.create<fir::AllocaOp>(currentLocation, symVal.getType()); 790 builder.create<fir::StoreOp>(currentLocation, symVal, alloca); 791 symVal = alloca; 792 } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) { 793 symVal = declOp.getBase(); 794 } 795 796 // this isn't the same as the by-val and by-ref passing later in the 797 // pipeline. Both styles assume that the variable is a reference at 798 // this point 799 assert(mlir::isa<fir::ReferenceType>(symVal.getType()) && 800 "reduction input var is a reference"); 801 802 reductionVars.push_back(symVal); 803 reduceVarByRef.push_back(doReductionByRef(symVal)); 804 } 805 806 for (auto [symVal, isByRef] : llvm::zip(reductionVars, reduceVarByRef)) { 807 auto redType = mlir::cast<fir::ReferenceType>(symVal.getType()); 808 const auto &kindMap = firOpBuilder.getKindMap(); 809 std::string reductionName; 810 ReductionIdentifier redId; 811 mlir::Type redNameTy = redType; 812 if (mlir::isa<fir::LogicalType>(redType.getEleTy())) 813 redNameTy = builder.getI1Type(); 814 815 if (const auto &redDefinedOp = 816 std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) { 817 const auto &intrinsicOp{ 818 std::get<omp::clause::DefinedOperator::IntrinsicOperator>( 819 redDefinedOp->u)}; 820 redId = getReductionType(intrinsicOp); 821 switch (redId) { 822 case ReductionIdentifier::ADD: 823 case ReductionIdentifier::MULTIPLY: 824 case ReductionIdentifier::AND: 825 case ReductionIdentifier::EQV: 826 case ReductionIdentifier::OR: 827 case ReductionIdentifier::NEQV: 828 break; 829 default: 830 TODO(currentLocation, 831 "Reduction of some intrinsic operators is not supported"); 832 break; 833 } 834 835 reductionName = 836 getReductionName(intrinsicOp, kindMap, redNameTy, isByRef); 837 } else if (const auto *reductionIntrinsic = 838 std::get_if<omp::clause::ProcedureDesignator>( 839 &redOperator.u)) { 840 if (!ReductionProcessor::supportedIntrinsicProcReduction( 841 *reductionIntrinsic)) { 842 TODO(currentLocation, "Unsupported intrinsic proc reduction"); 843 } 844 redId = getReductionType(*reductionIntrinsic); 845 reductionName = 846 getReductionName(getRealName(*reductionIntrinsic).ToString(), kindMap, 847 redNameTy, isByRef); 848 } else { 849 TODO(currentLocation, "Unexpected reduction type"); 850 } 851 852 decl = createDeclareReduction(firOpBuilder, reductionName, redId, redType, 853 currentLocation, isByRef); 854 reductionDeclSymbols.push_back( 855 mlir::SymbolRefAttr::get(firOpBuilder.getContext(), decl.getSymName())); 856 } 857 } 858 859 const semantics::SourceName 860 ReductionProcessor::getRealName(const semantics::Symbol *symbol) { 861 return symbol->GetUltimate().name(); 862 } 863 864 const semantics::SourceName 865 ReductionProcessor::getRealName(const omp::clause::ProcedureDesignator &pd) { 866 return getRealName(pd.v.sym()); 867 } 868 869 int ReductionProcessor::getOperationIdentity(ReductionIdentifier redId, 870 mlir::Location loc) { 871 switch (redId) { 872 case ReductionIdentifier::ADD: 873 case ReductionIdentifier::OR: 874 case ReductionIdentifier::NEQV: 875 return 0; 876 case ReductionIdentifier::MULTIPLY: 877 case ReductionIdentifier::AND: 878 case ReductionIdentifier::EQV: 879 return 1; 880 default: 881 TODO(loc, "Reduction of some intrinsic operators is not supported"); 882 } 883 } 884 885 } // namespace omp 886 } // namespace lower 887 } // namespace Fortran 888