1 //===-- ReductionProcessor.cpp ----------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/ 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ReductionProcessor.h" 14 15 #include "flang/Lower/AbstractConverter.h" 16 #include "flang/Lower/ConvertType.h" 17 #include "flang/Lower/SymbolMap.h" 18 #include "flang/Optimizer/Builder/Complex.h" 19 #include "flang/Optimizer/Builder/HLFIRTools.h" 20 #include "flang/Optimizer/Builder/Todo.h" 21 #include "flang/Optimizer/Dialect/FIRType.h" 22 #include "flang/Optimizer/HLFIR/HLFIROps.h" 23 #include "flang/Optimizer/Support/FatalError.h" 24 #include "flang/Parser/tools.h" 25 #include "mlir/Dialect/OpenMP/OpenMPDialect.h" 26 #include "llvm/Support/CommandLine.h" 27 28 static llvm::cl::opt<bool> forceByrefReduction( 29 "force-byref-reduction", 30 llvm::cl::desc("Pass all reduction arguments by reference"), 31 llvm::cl::Hidden); 32 33 namespace Fortran { 34 namespace lower { 35 namespace omp { 36 37 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( 38 const omp::clause::ProcedureDesignator &pd) { 39 auto redType = llvm::StringSwitch<std::optional<ReductionIdentifier>>( 40 getRealName(pd.v.id()).ToString()) 41 .Case("max", ReductionIdentifier::MAX) 42 .Case("min", ReductionIdentifier::MIN) 43 .Case("iand", ReductionIdentifier::IAND) 44 .Case("ior", ReductionIdentifier::IOR) 45 .Case("ieor", ReductionIdentifier::IEOR) 46 .Default(std::nullopt); 47 assert(redType && "Invalid Reduction"); 48 return *redType; 49 } 50 51 ReductionProcessor::ReductionIdentifier ReductionProcessor::getReductionType( 52 omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp) { 53 switch (intrinsicOp) { 54 case omp::clause::DefinedOperator::IntrinsicOperator::Add: 55 return ReductionIdentifier::ADD; 56 case omp::clause::DefinedOperator::IntrinsicOperator::Subtract: 57 return ReductionIdentifier::SUBTRACT; 58 case omp::clause::DefinedOperator::IntrinsicOperator::Multiply: 59 return ReductionIdentifier::MULTIPLY; 60 case omp::clause::DefinedOperator::IntrinsicOperator::AND: 61 return ReductionIdentifier::AND; 62 case omp::clause::DefinedOperator::IntrinsicOperator::EQV: 63 return ReductionIdentifier::EQV; 64 case omp::clause::DefinedOperator::IntrinsicOperator::OR: 65 return ReductionIdentifier::OR; 66 case omp::clause::DefinedOperator::IntrinsicOperator::NEQV: 67 return ReductionIdentifier::NEQV; 68 default: 69 llvm_unreachable("unexpected intrinsic operator in reduction"); 70 } 71 } 72 73 bool ReductionProcessor::supportedIntrinsicProcReduction( 74 const omp::clause::ProcedureDesignator &pd) { 75 Fortran::semantics::Symbol *sym = pd.v.id(); 76 if (!sym->GetUltimate().attrs().test(Fortran::semantics::Attr::INTRINSIC)) 77 return false; 78 auto redType = llvm::StringSwitch<bool>(getRealName(sym).ToString()) 79 .Case("max", true) 80 .Case("min", true) 81 .Case("iand", true) 82 .Case("ior", true) 83 .Case("ieor", true) 84 .Default(false); 85 return redType; 86 } 87 88 std::string 89 ReductionProcessor::getReductionName(llvm::StringRef name, 90 const fir::KindMapping &kindMap, 91 mlir::Type ty, bool isByRef) { 92 ty = fir::unwrapRefType(ty); 93 94 // extra string to distinguish reduction functions for variables passed by 95 // reference 96 llvm::StringRef byrefAddition{""}; 97 if (isByRef) 98 byrefAddition = "_byref"; 99 100 return fir::getTypeAsString(ty, kindMap, (name + byrefAddition).str()); 101 } 102 103 std::string ReductionProcessor::getReductionName( 104 omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp, 105 const fir::KindMapping &kindMap, mlir::Type ty, bool isByRef) { 106 std::string reductionName; 107 108 switch (intrinsicOp) { 109 case omp::clause::DefinedOperator::IntrinsicOperator::Add: 110 reductionName = "add_reduction"; 111 break; 112 case omp::clause::DefinedOperator::IntrinsicOperator::Multiply: 113 reductionName = "multiply_reduction"; 114 break; 115 case omp::clause::DefinedOperator::IntrinsicOperator::AND: 116 return "and_reduction"; 117 case omp::clause::DefinedOperator::IntrinsicOperator::EQV: 118 return "eqv_reduction"; 119 case omp::clause::DefinedOperator::IntrinsicOperator::OR: 120 return "or_reduction"; 121 case omp::clause::DefinedOperator::IntrinsicOperator::NEQV: 122 return "neqv_reduction"; 123 default: 124 reductionName = "other_reduction"; 125 break; 126 } 127 128 return getReductionName(reductionName, kindMap, ty, isByRef); 129 } 130 131 mlir::Value 132 ReductionProcessor::getReductionInitValue(mlir::Location loc, mlir::Type type, 133 ReductionIdentifier redId, 134 fir::FirOpBuilder &builder) { 135 type = fir::unwrapRefType(type); 136 if (!fir::isa_integer(type) && !fir::isa_real(type) && 137 !fir::isa_complex(type) && !mlir::isa<fir::LogicalType>(type)) 138 TODO(loc, "Reduction of some types is not supported"); 139 switch (redId) { 140 case ReductionIdentifier::MAX: { 141 if (auto ty = type.dyn_cast<mlir::FloatType>()) { 142 const llvm::fltSemantics &sem = ty.getFloatSemantics(); 143 return builder.createRealConstant( 144 loc, type, llvm::APFloat::getLargest(sem, /*Negative=*/true)); 145 } 146 unsigned bits = type.getIntOrFloatBitWidth(); 147 int64_t minInt = llvm::APInt::getSignedMinValue(bits).getSExtValue(); 148 return builder.createIntegerConstant(loc, type, minInt); 149 } 150 case ReductionIdentifier::MIN: { 151 if (auto ty = type.dyn_cast<mlir::FloatType>()) { 152 const llvm::fltSemantics &sem = ty.getFloatSemantics(); 153 return builder.createRealConstant( 154 loc, type, llvm::APFloat::getLargest(sem, /*Negative=*/false)); 155 } 156 unsigned bits = type.getIntOrFloatBitWidth(); 157 int64_t maxInt = llvm::APInt::getSignedMaxValue(bits).getSExtValue(); 158 return builder.createIntegerConstant(loc, type, maxInt); 159 } 160 case ReductionIdentifier::IOR: { 161 unsigned bits = type.getIntOrFloatBitWidth(); 162 int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue(); 163 return builder.createIntegerConstant(loc, type, zeroInt); 164 } 165 case ReductionIdentifier::IEOR: { 166 unsigned bits = type.getIntOrFloatBitWidth(); 167 int64_t zeroInt = llvm::APInt::getZero(bits).getSExtValue(); 168 return builder.createIntegerConstant(loc, type, zeroInt); 169 } 170 case ReductionIdentifier::IAND: { 171 unsigned bits = type.getIntOrFloatBitWidth(); 172 int64_t allOnInt = llvm::APInt::getAllOnes(bits).getSExtValue(); 173 return builder.createIntegerConstant(loc, type, allOnInt); 174 } 175 case ReductionIdentifier::ADD: 176 case ReductionIdentifier::MULTIPLY: 177 case ReductionIdentifier::AND: 178 case ReductionIdentifier::OR: 179 case ReductionIdentifier::EQV: 180 case ReductionIdentifier::NEQV: 181 if (auto cplxTy = mlir::dyn_cast<fir::ComplexType>(type)) { 182 mlir::Type realTy = 183 Fortran::lower::convertReal(builder.getContext(), cplxTy.getFKind()); 184 mlir::Value initRe = builder.createRealConstant( 185 loc, realTy, getOperationIdentity(redId, loc)); 186 mlir::Value initIm = builder.createRealConstant(loc, realTy, 0); 187 188 return fir::factory::Complex{builder, loc}.createComplex(type, initRe, 189 initIm); 190 } 191 if (type.isa<mlir::FloatType>()) 192 return builder.create<mlir::arith::ConstantOp>( 193 loc, type, 194 builder.getFloatAttr(type, (double)getOperationIdentity(redId, loc))); 195 196 if (type.isa<fir::LogicalType>()) { 197 mlir::Value intConst = builder.create<mlir::arith::ConstantOp>( 198 loc, builder.getI1Type(), 199 builder.getIntegerAttr(builder.getI1Type(), 200 getOperationIdentity(redId, loc))); 201 return builder.createConvert(loc, type, intConst); 202 } 203 204 return builder.create<mlir::arith::ConstantOp>( 205 loc, type, 206 builder.getIntegerAttr(type, getOperationIdentity(redId, loc))); 207 case ReductionIdentifier::ID: 208 case ReductionIdentifier::USER_DEF_OP: 209 case ReductionIdentifier::SUBTRACT: 210 TODO(loc, "Reduction of some identifier types is not supported"); 211 } 212 llvm_unreachable("Unhandled Reduction identifier : getReductionInitValue"); 213 } 214 215 mlir::Value ReductionProcessor::createScalarCombiner( 216 fir::FirOpBuilder &builder, mlir::Location loc, ReductionIdentifier redId, 217 mlir::Type type, mlir::Value op1, mlir::Value op2) { 218 mlir::Value reductionOp; 219 type = fir::unwrapRefType(type); 220 switch (redId) { 221 case ReductionIdentifier::MAX: 222 reductionOp = 223 getReductionOperation<mlir::arith::MaxNumFOp, mlir::arith::MaxSIOp>( 224 builder, type, loc, op1, op2); 225 break; 226 case ReductionIdentifier::MIN: 227 reductionOp = 228 getReductionOperation<mlir::arith::MinNumFOp, mlir::arith::MinSIOp>( 229 builder, type, loc, op1, op2); 230 break; 231 case ReductionIdentifier::IOR: 232 assert((type.isIntOrIndex()) && "only integer is expected"); 233 reductionOp = builder.create<mlir::arith::OrIOp>(loc, op1, op2); 234 break; 235 case ReductionIdentifier::IEOR: 236 assert((type.isIntOrIndex()) && "only integer is expected"); 237 reductionOp = builder.create<mlir::arith::XOrIOp>(loc, op1, op2); 238 break; 239 case ReductionIdentifier::IAND: 240 assert((type.isIntOrIndex()) && "only integer is expected"); 241 reductionOp = builder.create<mlir::arith::AndIOp>(loc, op1, op2); 242 break; 243 case ReductionIdentifier::ADD: 244 reductionOp = 245 getReductionOperation<mlir::arith::AddFOp, mlir::arith::AddIOp, 246 fir::AddcOp>(builder, type, loc, op1, op2); 247 break; 248 case ReductionIdentifier::MULTIPLY: 249 reductionOp = 250 getReductionOperation<mlir::arith::MulFOp, mlir::arith::MulIOp, 251 fir::MulcOp>(builder, type, loc, op1, op2); 252 break; 253 case ReductionIdentifier::AND: { 254 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 255 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 256 257 mlir::Value andiOp = builder.create<mlir::arith::AndIOp>(loc, op1I1, op2I1); 258 259 reductionOp = builder.createConvert(loc, type, andiOp); 260 break; 261 } 262 case ReductionIdentifier::OR: { 263 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 264 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 265 266 mlir::Value oriOp = builder.create<mlir::arith::OrIOp>(loc, op1I1, op2I1); 267 268 reductionOp = builder.createConvert(loc, type, oriOp); 269 break; 270 } 271 case ReductionIdentifier::EQV: { 272 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 273 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 274 275 mlir::Value cmpiOp = builder.create<mlir::arith::CmpIOp>( 276 loc, mlir::arith::CmpIPredicate::eq, op1I1, op2I1); 277 278 reductionOp = builder.createConvert(loc, type, cmpiOp); 279 break; 280 } 281 case ReductionIdentifier::NEQV: { 282 mlir::Value op1I1 = builder.createConvert(loc, builder.getI1Type(), op1); 283 mlir::Value op2I1 = builder.createConvert(loc, builder.getI1Type(), op2); 284 285 mlir::Value cmpiOp = builder.create<mlir::arith::CmpIOp>( 286 loc, mlir::arith::CmpIPredicate::ne, op1I1, op2I1); 287 288 reductionOp = builder.createConvert(loc, type, cmpiOp); 289 break; 290 } 291 default: 292 TODO(loc, "Reduction of some intrinsic operators is not supported"); 293 } 294 295 return reductionOp; 296 } 297 298 /// Create reduction combiner region for reduction variables which are boxed 299 /// arrays 300 static void genBoxCombiner(fir::FirOpBuilder &builder, mlir::Location loc, 301 ReductionProcessor::ReductionIdentifier redId, 302 fir::BaseBoxType boxTy, mlir::Value lhs, 303 mlir::Value rhs) { 304 fir::SequenceType seqTy = mlir::dyn_cast_or_null<fir::SequenceType>( 305 fir::unwrapRefType(boxTy.getEleTy())); 306 fir::HeapType heapTy = 307 mlir::dyn_cast_or_null<fir::HeapType>(boxTy.getEleTy()); 308 if ((!seqTy || seqTy.hasUnknownShape()) && !heapTy) 309 TODO(loc, "Unsupported boxed type in OpenMP reduction"); 310 311 // load fir.ref<fir.box<...>> 312 mlir::Value lhsAddr = lhs; 313 lhs = builder.create<fir::LoadOp>(loc, lhs); 314 rhs = builder.create<fir::LoadOp>(loc, rhs); 315 316 if (heapTy && !seqTy) { 317 // get box contents (heap pointers) 318 lhs = builder.create<fir::BoxAddrOp>(loc, lhs); 319 rhs = builder.create<fir::BoxAddrOp>(loc, rhs); 320 mlir::Value lhsValAddr = lhs; 321 322 // load heap pointers 323 lhs = builder.create<fir::LoadOp>(loc, lhs); 324 rhs = builder.create<fir::LoadOp>(loc, rhs); 325 326 mlir::Value result = ReductionProcessor::createScalarCombiner( 327 builder, loc, redId, heapTy.getEleTy(), lhs, rhs); 328 builder.create<fir::StoreOp>(loc, result, lhsValAddr); 329 builder.create<mlir::omp::YieldOp>(loc, lhsAddr); 330 return; 331 } 332 333 const unsigned rank = seqTy.getDimension(); 334 llvm::SmallVector<mlir::Value> extents; 335 extents.reserve(rank); 336 llvm::SmallVector<mlir::Value> lbAndExtents; 337 lbAndExtents.reserve(rank * 2); 338 339 // Get box lowerbounds and extents: 340 mlir::Type idxTy = builder.getIndexType(); 341 for (unsigned i = 0; i < rank; ++i) { 342 // TODO: ideally we want to hoist box reads out of the critical section. 343 // We could do this by having box dimensions in block arguments like 344 // OpenACC does 345 mlir::Value dim = builder.createIntegerConstant(loc, idxTy, i); 346 auto dimInfo = 347 builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, lhs, dim); 348 extents.push_back(dimInfo.getExtent()); 349 lbAndExtents.push_back(dimInfo.getLowerBound()); 350 lbAndExtents.push_back(dimInfo.getExtent()); 351 } 352 353 auto shapeShiftTy = fir::ShapeShiftType::get(builder.getContext(), rank); 354 auto shapeShift = 355 builder.create<fir::ShapeShiftOp>(loc, shapeShiftTy, lbAndExtents); 356 357 // Iterate over array elements, applying the equivalent scalar reduction: 358 359 // F2018 5.4.10.2: Unallocated allocatable variables may not be referenced 360 // and so no null check is needed here before indexing into the (possibly 361 // allocatable) arrays. 362 363 // A hlfir::elemental here gets inlined with a temporary so create the 364 // loop nest directly. 365 // This function already controls all of the code in this region so we 366 // know this won't miss any opportuinties for clever elemental inlining 367 hlfir::LoopNest nest = 368 hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true); 369 builder.setInsertionPointToStart(nest.innerLoop.getBody()); 370 mlir::Type refTy = fir::ReferenceType::get(seqTy.getEleTy()); 371 auto lhsEleAddr = builder.create<fir::ArrayCoorOp>( 372 loc, refTy, lhs, shapeShift, /*slice=*/mlir::Value{}, 373 nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); 374 auto rhsEleAddr = builder.create<fir::ArrayCoorOp>( 375 loc, refTy, rhs, shapeShift, /*slice=*/mlir::Value{}, 376 nest.oneBasedIndices, /*typeparms=*/mlir::ValueRange{}); 377 auto lhsEle = builder.create<fir::LoadOp>(loc, lhsEleAddr); 378 auto rhsEle = builder.create<fir::LoadOp>(loc, rhsEleAddr); 379 mlir::Value scalarReduction = ReductionProcessor::createScalarCombiner( 380 builder, loc, redId, refTy, lhsEle, rhsEle); 381 builder.create<fir::StoreOp>(loc, scalarReduction, lhsEleAddr); 382 383 builder.setInsertionPointAfter(nest.outerLoop); 384 builder.create<mlir::omp::YieldOp>(loc, lhsAddr); 385 } 386 387 // generate combiner region for reduction operations 388 static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, 389 ReductionProcessor::ReductionIdentifier redId, 390 mlir::Type ty, mlir::Value lhs, mlir::Value rhs, 391 bool isByRef) { 392 ty = fir::unwrapRefType(ty); 393 394 if (fir::isa_trivial(ty)) { 395 mlir::Value lhsLoaded = builder.loadIfRef(loc, lhs); 396 mlir::Value rhsLoaded = builder.loadIfRef(loc, rhs); 397 398 mlir::Value result = ReductionProcessor::createScalarCombiner( 399 builder, loc, redId, ty, lhsLoaded, rhsLoaded); 400 if (isByRef) { 401 builder.create<fir::StoreOp>(loc, result, lhs); 402 builder.create<mlir::omp::YieldOp>(loc, lhs); 403 } else { 404 builder.create<mlir::omp::YieldOp>(loc, result); 405 } 406 return; 407 } 408 // all arrays should have been boxed 409 if (auto boxTy = mlir::dyn_cast<fir::BaseBoxType>(ty)) { 410 genBoxCombiner(builder, loc, redId, boxTy, lhs, rhs); 411 return; 412 } 413 414 TODO(loc, "OpenMP genCombiner for unsupported reduction variable type"); 415 } 416 417 static void 418 createReductionCleanupRegion(fir::FirOpBuilder &builder, mlir::Location loc, 419 mlir::omp::DeclareReductionOp &reductionDecl) { 420 mlir::Type redTy = reductionDecl.getType(); 421 422 mlir::Region &cleanupRegion = reductionDecl.getCleanupRegion(); 423 assert(cleanupRegion.empty()); 424 mlir::Block *block = 425 builder.createBlock(&cleanupRegion, cleanupRegion.end(), {redTy}, {loc}); 426 builder.setInsertionPointToEnd(block); 427 428 auto typeError = [loc]() { 429 fir::emitFatalError(loc, 430 "Attempt to create an omp reduction cleanup region " 431 "for a type that wasn't allocated", 432 /*genCrashDiag=*/true); 433 }; 434 435 mlir::Type valTy = fir::unwrapRefType(redTy); 436 if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(valTy)) { 437 if (!mlir::isa<fir::HeapType>(boxTy.getEleTy())) { 438 mlir::Type innerTy = fir::extractSequenceType(boxTy); 439 if (!mlir::isa<fir::SequenceType>(innerTy)) 440 typeError(); 441 } 442 443 mlir::Value arg = block->getArgument(0); 444 arg = builder.loadIfRef(loc, arg); 445 assert(mlir::isa<fir::BaseBoxType>(arg.getType())); 446 447 // Deallocate box 448 // The FIR type system doesn't nesecarrily know that this is a mutable box 449 // if we allocated the thread local array on the heap to avoid looped stack 450 // allocations. 451 mlir::Value addr = 452 hlfir::genVariableRawAddress(loc, builder, hlfir::Entity{arg}); 453 mlir::Value isAllocated = builder.genIsNotNullAddr(loc, addr); 454 fir::IfOp ifOp = 455 builder.create<fir::IfOp>(loc, isAllocated, /*withElseRegion=*/false); 456 builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); 457 458 mlir::Value cast = builder.createConvert( 459 loc, fir::HeapType::get(fir::dyn_cast_ptrEleTy(addr.getType())), addr); 460 builder.create<fir::FreeMemOp>(loc, cast); 461 462 builder.setInsertionPointAfter(ifOp); 463 builder.create<mlir::omp::YieldOp>(loc); 464 return; 465 } 466 467 typeError(); 468 } 469 470 // like fir::unwrapSeqOrBoxedSeqType except it also works for non-sequence boxes 471 static mlir::Type unwrapSeqOrBoxedType(mlir::Type ty) { 472 if (auto seqTy = ty.dyn_cast<fir::SequenceType>()) 473 return seqTy.getEleTy(); 474 if (auto boxTy = ty.dyn_cast<fir::BaseBoxType>()) { 475 auto eleTy = fir::unwrapRefType(boxTy.getEleTy()); 476 if (auto seqTy = eleTy.dyn_cast<fir::SequenceType>()) 477 return seqTy.getEleTy(); 478 return eleTy; 479 } 480 return ty; 481 } 482 483 static mlir::Value 484 createReductionInitRegion(fir::FirOpBuilder &builder, mlir::Location loc, 485 mlir::omp::DeclareReductionOp &reductionDecl, 486 const ReductionProcessor::ReductionIdentifier redId, 487 mlir::Type type, bool isByRef) { 488 mlir::Type ty = fir::unwrapRefType(type); 489 mlir::Value initValue = ReductionProcessor::getReductionInitValue( 490 loc, unwrapSeqOrBoxedType(ty), redId, builder); 491 492 if (fir::isa_trivial(ty)) { 493 if (isByRef) { 494 mlir::Value alloca = builder.create<fir::AllocaOp>(loc, ty); 495 builder.createStoreWithConvert(loc, initValue, alloca); 496 return alloca; 497 } 498 // by val 499 return initValue; 500 } 501 502 // check if an allocatable box is unallocated. If so, initialize the boxAlloca 503 // to be unallocated e.g. 504 // %box_alloca = fir.alloca !fir.box<!fir.heap<...>> 505 // %addr = fir.box_addr %box 506 // if (%addr == 0) { 507 // %nullbox = fir.embox %addr 508 // fir.store %nullbox to %box_alloca 509 // } else { 510 // // ... 511 // fir.store %something to %box_alloca 512 // } 513 // omp.yield %box_alloca 514 mlir::Value blockArg = 515 builder.loadIfRef(loc, builder.getBlock()->getArgument(0)); 516 auto handleNullAllocatable = [&](mlir::Value boxAlloca) -> fir::IfOp { 517 mlir::Value addr = builder.create<fir::BoxAddrOp>(loc, blockArg); 518 mlir::Value isNotAllocated = builder.genIsNullAddr(loc, addr); 519 fir::IfOp ifOp = builder.create<fir::IfOp>(loc, isNotAllocated, 520 /*withElseRegion=*/true); 521 builder.setInsertionPointToStart(&ifOp.getThenRegion().front()); 522 // just embox the null address and return 523 mlir::Value nullBox = builder.create<fir::EmboxOp>(loc, ty, addr); 524 builder.create<fir::StoreOp>(loc, nullBox, boxAlloca); 525 return ifOp; 526 }; 527 528 // all arrays are boxed 529 if (auto boxTy = mlir::dyn_cast_or_null<fir::BaseBoxType>(ty)) { 530 assert(isByRef && "passing boxes by value is unsupported"); 531 bool isAllocatable = mlir::isa<fir::HeapType>(boxTy.getEleTy()); 532 mlir::Value boxAlloca = builder.create<fir::AllocaOp>(loc, ty); 533 mlir::Type innerTy = fir::unwrapRefType(boxTy.getEleTy()); 534 if (fir::isa_trivial(innerTy)) { 535 // boxed non-sequence value e.g. !fir.box<!fir.heap<i32>> 536 if (!isAllocatable) 537 TODO(loc, "Reduction of non-allocatable trivial typed box"); 538 539 fir::IfOp ifUnallocated = handleNullAllocatable(boxAlloca); 540 541 builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); 542 mlir::Value valAlloc = builder.create<fir::AllocMemOp>(loc, innerTy); 543 builder.createStoreWithConvert(loc, initValue, valAlloc); 544 mlir::Value box = builder.create<fir::EmboxOp>(loc, ty, valAlloc); 545 builder.create<fir::StoreOp>(loc, box, boxAlloca); 546 547 auto insPt = builder.saveInsertionPoint(); 548 createReductionCleanupRegion(builder, loc, reductionDecl); 549 builder.restoreInsertionPoint(insPt); 550 builder.setInsertionPointAfter(ifUnallocated); 551 return boxAlloca; 552 } 553 innerTy = fir::extractSequenceType(boxTy); 554 if (!mlir::isa<fir::SequenceType>(innerTy)) 555 TODO(loc, "Unsupported boxed type for reduction"); 556 557 fir::IfOp ifUnallocated{nullptr}; 558 if (isAllocatable) { 559 ifUnallocated = handleNullAllocatable(boxAlloca); 560 builder.setInsertionPointToStart(&ifUnallocated.getElseRegion().front()); 561 } 562 563 // Create the private copy from the initial fir.box: 564 hlfir::Entity source = hlfir::Entity{blockArg}; 565 566 // Allocating on the heap in case the whole reduction is nested inside of a 567 // loop 568 // TODO: compare performance here to using allocas - this could be made to 569 // work by inserting stacksave/stackrestore around the reduction in 570 // openmpirbuilder 571 auto [temp, needsDealloc] = createTempFromMold(loc, builder, source); 572 // if needsDealloc isn't statically false, add cleanup region. Always 573 // do this for allocatable boxes because they might have been re-allocated 574 // in the body of the loop/parallel region 575 576 std::optional<int64_t> cstNeedsDealloc = 577 fir::getIntIfConstant(needsDealloc); 578 assert(cstNeedsDealloc.has_value() && 579 "createTempFromMold decides this statically"); 580 if (cstNeedsDealloc.has_value() && *cstNeedsDealloc != false) { 581 mlir::OpBuilder::InsertionGuard guard(builder); 582 createReductionCleanupRegion(builder, loc, reductionDecl); 583 } else { 584 assert(!isAllocatable && "Allocatable arrays must be heap allocated"); 585 } 586 587 // Put the temporary inside of a box: 588 hlfir::Entity box = hlfir::genVariableBox(loc, builder, temp); 589 // hlfir::genVariableBox removes fir.heap<> around the element type 590 mlir::Value convertedBox = builder.createConvert(loc, ty, box.getBase()); 591 builder.create<hlfir::AssignOp>(loc, initValue, convertedBox); 592 builder.create<fir::StoreOp>(loc, convertedBox, boxAlloca); 593 if (ifUnallocated) 594 builder.setInsertionPointAfter(ifUnallocated); 595 return boxAlloca; 596 } 597 598 TODO(loc, "createReductionInitRegion for unsupported type"); 599 } 600 601 mlir::omp::DeclareReductionOp ReductionProcessor::createDeclareReduction( 602 fir::FirOpBuilder &builder, llvm::StringRef reductionOpName, 603 const ReductionIdentifier redId, mlir::Type type, mlir::Location loc, 604 bool isByRef) { 605 mlir::OpBuilder::InsertionGuard guard(builder); 606 mlir::ModuleOp module = builder.getModule(); 607 608 assert(!reductionOpName.empty()); 609 610 auto decl = 611 module.lookupSymbol<mlir::omp::DeclareReductionOp>(reductionOpName); 612 if (decl) 613 return decl; 614 615 mlir::OpBuilder modBuilder(module.getBodyRegion()); 616 mlir::Type valTy = fir::unwrapRefType(type); 617 if (!isByRef) 618 type = valTy; 619 620 decl = modBuilder.create<mlir::omp::DeclareReductionOp>(loc, reductionOpName, 621 type); 622 builder.createBlock(&decl.getInitializerRegion(), 623 decl.getInitializerRegion().end(), {type}, {loc}); 624 builder.setInsertionPointToEnd(&decl.getInitializerRegion().back()); 625 626 mlir::Value init = 627 createReductionInitRegion(builder, loc, decl, redId, type, isByRef); 628 builder.create<mlir::omp::YieldOp>(loc, init); 629 630 builder.createBlock(&decl.getReductionRegion(), 631 decl.getReductionRegion().end(), {type, type}, 632 {loc, loc}); 633 634 builder.setInsertionPointToEnd(&decl.getReductionRegion().back()); 635 mlir::Value op1 = decl.getReductionRegion().front().getArgument(0); 636 mlir::Value op2 = decl.getReductionRegion().front().getArgument(1); 637 genCombiner(builder, loc, redId, type, op1, op2, isByRef); 638 639 return decl; 640 } 641 642 // TODO: By-ref vs by-val reductions are currently toggled for the whole 643 // operation (possibly effecting multiple reduction variables). 644 // This could cause a problem with openmp target reductions because 645 // by-ref trivial types may not be supported. 646 bool ReductionProcessor::doReductionByRef( 647 const llvm::SmallVectorImpl<mlir::Value> &reductionVars) { 648 if (reductionVars.empty()) 649 return false; 650 if (forceByrefReduction) 651 return true; 652 653 for (mlir::Value reductionVar : reductionVars) { 654 if (auto declare = 655 mlir::dyn_cast<hlfir::DeclareOp>(reductionVar.getDefiningOp())) 656 reductionVar = declare.getMemref(); 657 658 if (!fir::isa_trivial(fir::unwrapRefType(reductionVar.getType()))) 659 return true; 660 } 661 return false; 662 } 663 664 void ReductionProcessor::addDeclareReduction( 665 mlir::Location currentLocation, 666 Fortran::lower::AbstractConverter &converter, 667 const omp::clause::Reduction &reduction, 668 llvm::SmallVectorImpl<mlir::Value> &reductionVars, 669 llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols, 670 llvm::SmallVectorImpl<const Fortran::semantics::Symbol *> 671 *reductionSymbols) { 672 fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); 673 674 if (std::get<std::optional<omp::clause::Reduction::ReductionModifier>>( 675 reduction.t)) 676 TODO(currentLocation, "Reduction modifiers are not supported"); 677 678 mlir::omp::DeclareReductionOp decl; 679 const auto &redOperatorList{ 680 std::get<omp::clause::Reduction::ReductionIdentifiers>(reduction.t)}; 681 assert(redOperatorList.size() == 1 && "Expecting single operator"); 682 const auto &redOperator = redOperatorList.front(); 683 const auto &objectList{std::get<omp::ObjectList>(reduction.t)}; 684 685 if (!std::holds_alternative<omp::clause::DefinedOperator>(redOperator.u)) { 686 if (const auto *reductionIntrinsic = 687 std::get_if<omp::clause::ProcedureDesignator>(&redOperator.u)) { 688 if (!ReductionProcessor::supportedIntrinsicProcReduction( 689 *reductionIntrinsic)) { 690 return; 691 } 692 } else { 693 return; 694 } 695 } 696 697 // initial pass to collect all reduction vars so we can figure out if this 698 // should happen byref 699 fir::FirOpBuilder &builder = converter.getFirOpBuilder(); 700 for (const Object &object : objectList) { 701 const Fortran::semantics::Symbol *symbol = object.id(); 702 if (reductionSymbols) 703 reductionSymbols->push_back(symbol); 704 mlir::Value symVal = converter.getSymbolAddress(*symbol); 705 mlir::Type eleType; 706 auto refType = mlir::dyn_cast_or_null<fir::ReferenceType>(symVal.getType()); 707 if (refType) 708 eleType = refType.getEleTy(); 709 else 710 eleType = symVal.getType(); 711 712 // all arrays must be boxed so that we have convenient access to all the 713 // information needed to iterate over the array 714 if (mlir::isa<fir::SequenceType>(eleType)) { 715 // For Host associated symbols, use `SymbolBox` instead 716 Fortran::lower::SymbolBox symBox = 717 converter.lookupOneLevelUpSymbol(*symbol); 718 hlfir::Entity entity{symBox.getAddr()}; 719 entity = genVariableBox(currentLocation, builder, entity); 720 mlir::Value box = entity.getBase(); 721 722 // Always pass the box by reference so that the OpenMP dialect 723 // verifiers don't need to know anything about fir.box 724 auto alloca = 725 builder.create<fir::AllocaOp>(currentLocation, box.getType()); 726 builder.create<fir::StoreOp>(currentLocation, box, alloca); 727 728 symVal = alloca; 729 } else if (mlir::isa<fir::BaseBoxType>(symVal.getType())) { 730 // boxed arrays are passed as values not by reference. Unfortunately, 731 // we can't pass a box by value to omp.redution_declare, so turn it 732 // into a reference 733 734 auto alloca = 735 builder.create<fir::AllocaOp>(currentLocation, symVal.getType()); 736 builder.create<fir::StoreOp>(currentLocation, symVal, alloca); 737 symVal = alloca; 738 } else if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) { 739 symVal = declOp.getBase(); 740 } 741 742 // this isn't the same as the by-val and by-ref passing later in the 743 // pipeline. Both styles assume that the variable is a reference at 744 // this point 745 assert(mlir::isa<fir::ReferenceType>(symVal.getType()) && 746 "reduction input var is a reference"); 747 748 reductionVars.push_back(symVal); 749 } 750 const bool isByRef = doReductionByRef(reductionVars); 751 752 if (const auto &redDefinedOp = 753 std::get_if<omp::clause::DefinedOperator>(&redOperator.u)) { 754 const auto &intrinsicOp{ 755 std::get<omp::clause::DefinedOperator::IntrinsicOperator>( 756 redDefinedOp->u)}; 757 ReductionIdentifier redId = getReductionType(intrinsicOp); 758 switch (redId) { 759 case ReductionIdentifier::ADD: 760 case ReductionIdentifier::MULTIPLY: 761 case ReductionIdentifier::AND: 762 case ReductionIdentifier::EQV: 763 case ReductionIdentifier::OR: 764 case ReductionIdentifier::NEQV: 765 break; 766 default: 767 TODO(currentLocation, 768 "Reduction of some intrinsic operators is not supported"); 769 break; 770 } 771 772 for (mlir::Value symVal : reductionVars) { 773 auto redType = mlir::cast<fir::ReferenceType>(symVal.getType()); 774 const auto &kindMap = firOpBuilder.getKindMap(); 775 if (redType.getEleTy().isa<fir::LogicalType>()) 776 decl = createDeclareReduction(firOpBuilder, 777 getReductionName(intrinsicOp, kindMap, 778 firOpBuilder.getI1Type(), 779 isByRef), 780 redId, redType, currentLocation, isByRef); 781 else 782 decl = createDeclareReduction( 783 firOpBuilder, 784 getReductionName(intrinsicOp, kindMap, redType, isByRef), redId, 785 redType, currentLocation, isByRef); 786 reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get( 787 firOpBuilder.getContext(), decl.getSymName())); 788 } 789 } else if (const auto *reductionIntrinsic = 790 std::get_if<omp::clause::ProcedureDesignator>( 791 &redOperator.u)) { 792 if (ReductionProcessor::supportedIntrinsicProcReduction( 793 *reductionIntrinsic)) { 794 ReductionProcessor::ReductionIdentifier redId = 795 ReductionProcessor::getReductionType(*reductionIntrinsic); 796 for (const Object &object : objectList) { 797 const Fortran::semantics::Symbol *symbol = object.id(); 798 mlir::Value symVal = converter.getSymbolAddress(*symbol); 799 if (auto declOp = symVal.getDefiningOp<hlfir::DeclareOp>()) 800 symVal = declOp.getBase(); 801 auto redType = symVal.getType().cast<fir::ReferenceType>(); 802 if (!redType.getEleTy().isIntOrIndexOrFloat()) 803 TODO(currentLocation, "User Defined Reduction on non-trivial type"); 804 decl = createDeclareReduction( 805 firOpBuilder, 806 getReductionName(getRealName(*reductionIntrinsic).ToString(), 807 firOpBuilder.getKindMap(), redType, isByRef), 808 redId, redType, currentLocation, isByRef); 809 reductionDeclSymbols.push_back(mlir::SymbolRefAttr::get( 810 firOpBuilder.getContext(), decl.getSymName())); 811 } 812 } 813 } 814 } 815 816 const Fortran::semantics::SourceName 817 ReductionProcessor::getRealName(const Fortran::semantics::Symbol *symbol) { 818 return symbol->GetUltimate().name(); 819 } 820 821 const Fortran::semantics::SourceName 822 ReductionProcessor::getRealName(const omp::clause::ProcedureDesignator &pd) { 823 return getRealName(pd.v.id()); 824 } 825 826 int ReductionProcessor::getOperationIdentity(ReductionIdentifier redId, 827 mlir::Location loc) { 828 switch (redId) { 829 case ReductionIdentifier::ADD: 830 case ReductionIdentifier::OR: 831 case ReductionIdentifier::NEQV: 832 return 0; 833 case ReductionIdentifier::MULTIPLY: 834 case ReductionIdentifier::AND: 835 case ReductionIdentifier::EQV: 836 return 1; 837 default: 838 TODO(loc, "Reduction of some intrinsic operators is not supported"); 839 } 840 } 841 842 } // namespace omp 843 } // namespace lower 844 } // namespace Fortran 845