1 //===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 // Generic function templates used by various reduction transformation 10 // intrinsic functions (SUM, PRODUCT, &c.) 11 // 12 // * Partial reductions (i.e., those with DIM= arguments that are not 13 // required to be 1 by the rank of the argument) return arrays that 14 // are dynamically allocated in a caller-supplied descriptor. 15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC 16 // return integer vectors of some kind, not scalars; a caller-supplied 17 // descriptor is used 18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary 19 // length results, dynamically allocated in a caller-supplied descriptor 20 21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ 22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ 23 24 #include "numeric-templates.h" 25 #include "terminator.h" 26 #include "tools.h" 27 #include "flang/Runtime/cpp-type.h" 28 #include "flang/Runtime/descriptor.h" 29 #include <algorithm> 30 31 namespace Fortran::runtime { 32 33 // Reductions are implemented with *accumulators*, which are instances of 34 // classes that incrementally build up the result (or an element thereof) during 35 // a traversal of the unmasked elements of an array. Each accumulator class 36 // supports a constructor (which captures a reference to the array), an 37 // AccumulateAt() member function that applies supplied subscripts to the 38 // array and does something with a scalar element, and a GetResult() 39 // member function that copies a final result into its destination. 40 41 // Total reduction of the array argument to a scalar (or to a vector in the 42 // cases of FINDLOC, MAXLOC, & MINLOC). These are the cases without DIM= or 43 // cases where the argument has rank 1 and DIM=, if present, must be 1. 44 template <typename TYPE, typename ACCUMULATOR> 45 inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim, 46 const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic, 47 Terminator &terminator) { 48 if (dim < 0 || dim > 1) { 49 terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d", 50 intrinsic, dim, x.rank()); 51 } 52 SubscriptValue xAt[maxRank]; 53 x.GetLowerBounds(xAt); 54 if (mask) { 55 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK"); 56 if (mask->rank() > 0) { 57 SubscriptValue maskAt[maxRank]; 58 mask->GetLowerBounds(maskAt); 59 for (auto elements{x.Elements()}; elements--; 60 x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) { 61 if (IsLogicalElementTrue(*mask, maskAt)) { 62 if (!accumulator.template AccumulateAt<TYPE>(xAt)) { 63 break; 64 } 65 } 66 } 67 return; 68 } else if (!IsLogicalScalarTrue(*mask)) { 69 // scalar MASK=.FALSE.: return identity value 70 return; 71 } 72 } 73 // No MASK=, or scalar MASK=.TRUE. 74 for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) { 75 if (!accumulator.template AccumulateAt<TYPE>(xAt)) { 76 break; // cut short, result is known 77 } 78 } 79 } 80 81 template <TypeCategory CAT, int KIND, typename ACCUMULATOR> 82 inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x, 83 const char *source, int line, int dim, const Descriptor *mask, 84 ACCUMULATOR &&accumulator, const char *intrinsic, 85 bool allowUnsignedForInteger = false) { 86 Terminator terminator{source, line}; 87 RUNTIME_CHECK(terminator, 88 TypeCode(CAT, KIND) == x.type() || 89 (CAT == TypeCategory::Integer && allowUnsignedForInteger && 90 TypeCode(TypeCategory::Unsigned, KIND) == x.type())); 91 using CppType = CppTypeFor<CAT, KIND>; 92 DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator); 93 if constexpr (std::is_void_v<CppType>) { 94 // Result is returned from accumulator, as in REDUCE() for derived type 95 #ifdef _MSC_VER // work around MSVC spurious error 96 accumulator.GetResult(); 97 #else 98 accumulator.template GetResult<CppType>(); 99 #endif 100 } else { 101 CppType result; 102 #ifdef _MSC_VER // work around MSVC spurious error 103 accumulator.GetResult(&result); 104 #else 105 accumulator.template GetResult<CppType>(&result); 106 #endif 107 return result; 108 } 109 } 110 111 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape 112 // of the array is [2,3,5], the shape of the result is [2,5] and 113 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has 114 // lower bounds other than one. This utility subroutine creates an 115 // array of subscripts [j,_,k] for result subscripts [j,k] so that the 116 // elements of array(j,:,k) can be reduced. 117 inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[], 118 const Descriptor &descriptor, int zeroBasedDim, 119 const SubscriptValue from[]) { 120 descriptor.GetLowerBounds(at); 121 int rank{descriptor.rank()}; 122 int j{0}; 123 for (; j < zeroBasedDim; ++j) { 124 at[j] += from[j] - 1 /*lower bound*/; 125 } 126 for (++j; j < rank; ++j) { 127 at[j] += from[j - 1] - 1; 128 } 129 } 130 131 template <typename TYPE, typename ACCUMULATOR> 132 inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x, 133 int zeroBasedDim, SubscriptValue subscripts[], TYPE *result, 134 ACCUMULATOR &accumulator) { 135 SubscriptValue xAt[maxRank]; 136 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); 137 const auto &dim{x.GetDimension(zeroBasedDim)}; 138 SubscriptValue at{dim.LowerBound()}; 139 for (auto n{dim.Extent()}; n-- > 0; ++at) { 140 xAt[zeroBasedDim] = at; 141 if (!accumulator.template AccumulateAt<TYPE>(xAt)) { 142 break; 143 } 144 } 145 #ifdef _MSC_VER // work around MSVC spurious error 146 accumulator.GetResult(result, zeroBasedDim); 147 #else 148 accumulator.template GetResult<TYPE>(result, zeroBasedDim); 149 #endif 150 } 151 152 template <typename TYPE, typename ACCUMULATOR> 153 inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x, 154 int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask, 155 TYPE *result, ACCUMULATOR &accumulator) { 156 SubscriptValue xAt[maxRank], maskAt[maxRank]; 157 GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts); 158 GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts); 159 const auto &xDim{x.GetDimension(zeroBasedDim)}; 160 SubscriptValue xPos{xDim.LowerBound()}; 161 const auto &maskDim{mask.GetDimension(zeroBasedDim)}; 162 SubscriptValue maskPos{maskDim.LowerBound()}; 163 for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0; 164 ++xPos, ++maskPos) { 165 maskAt[zeroBasedDim] = maskPos; 166 if (IsLogicalElementTrue(mask, maskAt)) { 167 xAt[zeroBasedDim] = xPos; 168 if (!accumulator.template AccumulateAt<TYPE>(xAt)) { 169 break; 170 } 171 } 172 } 173 #ifdef _MSC_VER // work around MSVC spurious error 174 accumulator.GetResult(result, zeroBasedDim); 175 #else 176 accumulator.template GetResult<TYPE>(result, zeroBasedDim); 177 #endif 178 } 179 180 // Partial reductions with DIM= 181 182 template <typename ACCUMULATOR, TypeCategory CAT, int KIND> 183 inline RT_API_ATTRS void PartialReduction(Descriptor &result, 184 const Descriptor &x, std::size_t resultElementSize, int dim, 185 const Descriptor *mask, Terminator &terminator, const char *intrinsic, 186 ACCUMULATOR &accumulator) { 187 CreatePartialReductionResult(result, x, resultElementSize, dim, terminator, 188 intrinsic, TypeCode{CAT, KIND}); 189 SubscriptValue at[maxRank]; 190 result.GetLowerBounds(at); 191 INTERNAL_CHECK(result.rank() == 0 || at[0] == 1); 192 using CppType = CppTypeFor<CAT, KIND>; 193 if (mask) { 194 CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK"); 195 if (mask->rank() > 0) { 196 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { 197 accumulator.Reinitialize(); 198 ReduceDimMaskToScalar<CppType, ACCUMULATOR>( 199 x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator); 200 } 201 return; 202 } else if (!IsLogicalScalarTrue(*mask)) { 203 // scalar MASK=.FALSE. 204 accumulator.Reinitialize(); 205 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { 206 accumulator.GetResult(result.Element<CppType>(at)); 207 } 208 return; 209 } 210 } 211 // No MASK= or scalar MASK=.TRUE. 212 for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) { 213 accumulator.Reinitialize(); 214 ReduceDimToScalar<CppType, ACCUMULATOR>( 215 x, dim - 1, at, result.Element<CppType>(at), accumulator); 216 } 217 } 218 219 template <template <typename> class ACCUM> 220 struct PartialIntegerReductionHelper { 221 template <int KIND> struct Functor { 222 static constexpr int Intermediate{ 223 std::max(KIND, 4)}; // use at least "int" for intermediate results 224 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, 225 int dim, const Descriptor *mask, Terminator &terminator, 226 const char *intrinsic) const { 227 using Accumulator = 228 ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>; 229 Accumulator accumulator{x}; 230 // Element size of the destination descriptor is the same 231 // as the element size of the source. 232 PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x, 233 x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator); 234 } 235 }; 236 }; 237 238 template <template <typename> class INTEGER_ACCUM> 239 inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result, 240 const Descriptor &x, int dim, int kind, const Descriptor *mask, 241 const char *intrinsic, Terminator &terminator) { 242 ApplyIntegerKind< 243 PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>( 244 kind, terminator, result, x, dim, mask, terminator, intrinsic); 245 } 246 247 template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND> 248 struct PartialFloatingReductionHelper { 249 template <int KIND> struct Functor { 250 static constexpr int Intermediate{std::max(KIND, MIN_KIND)}; 251 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, 252 int dim, const Descriptor *mask, Terminator &terminator, 253 const char *intrinsic) const { 254 using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>; 255 Accumulator accumulator{x}; 256 // Element size of the destination descriptor is the same 257 // as the element size of the source. 258 PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim, 259 mask, terminator, intrinsic, accumulator); 260 } 261 }; 262 }; 263 264 template <template <typename> class INTEGER_ACCUM, 265 template <typename> class REAL_ACCUM, 266 template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND> 267 inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result, 268 const Descriptor &x, int dim, const char *source, int line, 269 const Descriptor *mask, const char *intrinsic) { 270 Terminator terminator{source, line}; 271 auto catKind{x.type().GetCategoryAndKind()}; 272 RUNTIME_CHECK(terminator, catKind.has_value()); 273 switch (catKind->first) { 274 case TypeCategory::Integer: 275 PartialIntegerReduction<INTEGER_ACCUM>( 276 result, x, dim, catKind->second, mask, intrinsic, terminator); 277 break; 278 case TypeCategory::Real: 279 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real, 280 REAL_ACCUM, MIN_REAL_KIND>::template Functor, 281 void>(catKind->second, terminator, result, x, dim, mask, terminator, 282 intrinsic); 283 break; 284 case TypeCategory::Complex: 285 ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex, 286 COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor, 287 void>(catKind->second, terminator, result, x, dim, mask, terminator, 288 intrinsic); 289 break; 290 default: 291 terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw()); 292 } 293 } 294 295 template <typename ACCUMULATOR> struct LocationResultHelper { 296 template <int KIND> struct Functor { 297 RT_API_ATTRS void operator()( 298 ACCUMULATOR &accumulator, const Descriptor &result) const { 299 accumulator.GetResult( 300 result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>()); 301 } 302 }; 303 }; 304 305 template <typename ACCUMULATOR> struct PartialLocationHelper { 306 template <int KIND> struct Functor { 307 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, 308 int dim, const Descriptor *mask, Terminator &terminator, 309 const char *intrinsic, ACCUMULATOR &accumulator) const { 310 // Element size of the destination descriptor is the size 311 // of {TypeCategory::Integer, KIND}. 312 PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x, 313 Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask, 314 terminator, intrinsic, accumulator); 315 } 316 }; 317 }; 318 319 // NORM2 templates 320 321 RT_VAR_GROUP_BEGIN 322 323 // Use at least double precision for accumulators. 324 // Don't use __float128, it doesn't work with abs() or sqrt() yet. 325 static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind{ 326 #if HAS_LDBL128 || HAS_FLOAT128 327 16 328 #elif HAS_FLOAT80 329 10 330 #else 331 8 332 #endif 333 }; 334 335 RT_VAR_GROUP_END 336 337 template <TypeCategory CAT, int KIND, typename ACCUMULATOR> 338 inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x, 339 int dim, const Descriptor *mask, const char *intrinsic, 340 Terminator &terminator) { 341 using Type = CppTypeFor<CAT, KIND>; 342 ACCUMULATOR accumulator{x}; 343 if (dim == 0 || x.rank() == 1) { 344 // Total reduction 345 346 // Element size of the destination descriptor is the same 347 // as the element size of the source. 348 result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr, 349 CFI_attribute_allocatable); 350 if (int stat{result.Allocate()}) { 351 terminator.Crash( 352 "%s: could not allocate memory for result; STAT=%d", intrinsic, stat); 353 } 354 DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator); 355 accumulator.GetResult(result.OffsetElement<Type>()); 356 } else { 357 // Partial reduction 358 359 // Element size of the destination descriptor is the same 360 // as the element size of the source. 361 PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim, 362 mask, terminator, intrinsic, accumulator); 363 } 364 } 365 366 // The data type used by Norm2Accumulator. 367 template <int KIND> 368 using Norm2AccumType = 369 CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>; 370 371 template <int KIND> class Norm2Accumulator { 372 public: 373 using Type = CppTypeFor<TypeCategory::Real, KIND>; 374 using AccumType = Norm2AccumType<KIND>; 375 explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array) 376 : array_{array} {} 377 RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; } 378 template <typename A> 379 RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const { 380 // m * sqrt(1 + sum((others(:)/m)**2)) 381 *p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_)); 382 } 383 RT_API_ATTRS bool Accumulate(Type x) { 384 auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))}; 385 if (!max_) { 386 max_ = absX; 387 } else if (absX > max_) { 388 auto t{max_ / absX}; // < 1.0 389 auto tsq{t * t}; 390 sum_ *= tsq; // scale sum to reflect change to the max 391 sum_ += tsq; // include a term for the previous max 392 max_ = absX; 393 } else { // absX <= max_ 394 auto t{absX / max_}; 395 sum_ += t * t; 396 } 397 return true; 398 } 399 template <typename A> 400 RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) { 401 return Accumulate(*array_.Element<A>(at)); 402 } 403 404 private: 405 const Descriptor &array_; 406 AccumType max_{0}; // value (m) with largest magnitude 407 AccumType sum_{0}; // sum((others(:)/m)**2) 408 }; 409 410 template <int KIND> struct Norm2Helper { 411 RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim, 412 const Descriptor *mask, Terminator &terminator) const { 413 DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>( 414 result, x, dim, mask, "NORM2", terminator); 415 } 416 }; 417 418 } // namespace Fortran::runtime 419 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_ 420