xref: /llvm-project/flang/runtime/reduction-templates.h (revision fc97d2e68b03bc2979395e84b645e5b3ba35aecd)
1 //===-- runtime/reduction-templates.h ---------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 // Generic function templates used by various reduction transformation
10 // intrinsic functions (SUM, PRODUCT, &c.)
11 //
12 // * Partial reductions (i.e., those with DIM= arguments that are not
13 //   required to be 1 by the rank of the argument) return arrays that
14 //   are dynamically allocated in a caller-supplied descriptor.
15 // * Total reductions (i.e., no DIM= argument) with FINDLOC, MAXLOC, & MINLOC
16 //   return integer vectors of some kind, not scalars; a caller-supplied
17 //   descriptor is used
18 // * Character-valued reductions (MAXVAL & MINVAL) return arbitrary
19 //   length results, dynamically allocated in a caller-supplied descriptor
20 
21 #ifndef FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
22 #define FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
23 
24 #include "numeric-templates.h"
25 #include "terminator.h"
26 #include "tools.h"
27 #include "flang/Runtime/cpp-type.h"
28 #include "flang/Runtime/descriptor.h"
29 #include <algorithm>
30 
31 namespace Fortran::runtime {
32 
33 // Reductions are implemented with *accumulators*, which are instances of
34 // classes that incrementally build up the result (or an element thereof) during
35 // a traversal of the unmasked elements of an array.  Each accumulator class
36 // supports a constructor (which captures a reference to the array), an
37 // AccumulateAt() member function that applies supplied subscripts to the
38 // array and does something with a scalar element, and a GetResult()
39 // member function that copies a final result into its destination.
40 
41 // Total reduction of the array argument to a scalar (or to a vector in the
42 // cases of FINDLOC, MAXLOC, & MINLOC).  These are the cases without DIM= or
43 // cases where the argument has rank 1 and DIM=, if present, must be 1.
44 template <typename TYPE, typename ACCUMULATOR>
45 inline RT_API_ATTRS void DoTotalReduction(const Descriptor &x, int dim,
46     const Descriptor *mask, ACCUMULATOR &accumulator, const char *intrinsic,
47     Terminator &terminator) {
48   if (dim < 0 || dim > 1) {
49     terminator.Crash("%s: bad DIM=%d for ARRAY argument with rank %d",
50         intrinsic, dim, x.rank());
51   }
52   SubscriptValue xAt[maxRank];
53   x.GetLowerBounds(xAt);
54   if (mask) {
55     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
56     if (mask->rank() > 0) {
57       SubscriptValue maskAt[maxRank];
58       mask->GetLowerBounds(maskAt);
59       for (auto elements{x.Elements()}; elements--;
60            x.IncrementSubscripts(xAt), mask->IncrementSubscripts(maskAt)) {
61         if (IsLogicalElementTrue(*mask, maskAt)) {
62           if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
63             break;
64           }
65         }
66       }
67       return;
68     } else if (!IsLogicalScalarTrue(*mask)) {
69       // scalar MASK=.FALSE.: return identity value
70       return;
71     }
72   }
73   // No MASK=, or scalar MASK=.TRUE.
74   for (auto elements{x.Elements()}; elements--; x.IncrementSubscripts(xAt)) {
75     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
76       break; // cut short, result is known
77     }
78   }
79 }
80 
81 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
82 inline RT_API_ATTRS CppTypeFor<CAT, KIND> GetTotalReduction(const Descriptor &x,
83     const char *source, int line, int dim, const Descriptor *mask,
84     ACCUMULATOR &&accumulator, const char *intrinsic,
85     bool allowUnsignedForInteger = false) {
86   Terminator terminator{source, line};
87   RUNTIME_CHECK(terminator,
88       TypeCode(CAT, KIND) == x.type() ||
89           (CAT == TypeCategory::Integer && allowUnsignedForInteger &&
90               TypeCode(TypeCategory::Unsigned, KIND) == x.type()));
91   using CppType = CppTypeFor<CAT, KIND>;
92   DoTotalReduction<CppType>(x, dim, mask, accumulator, intrinsic, terminator);
93   if constexpr (std::is_void_v<CppType>) {
94     // Result is returned from accumulator, as in REDUCE() for derived type
95 #ifdef _MSC_VER // work around MSVC spurious error
96     accumulator.GetResult();
97 #else
98     accumulator.template GetResult<CppType>();
99 #endif
100   } else {
101     CppType result;
102 #ifdef _MSC_VER // work around MSVC spurious error
103     accumulator.GetResult(&result);
104 #else
105     accumulator.template GetResult<CppType>(&result);
106 #endif
107     return result;
108   }
109 }
110 
111 // For reductions on a dimension, e.g. SUM(array,DIM=2) where the shape
112 // of the array is [2,3,5], the shape of the result is [2,5] and
113 // result(j,k) = SUM(array(j,:,k)), possibly modified if the array has
114 // lower bounds other than one.  This utility subroutine creates an
115 // array of subscripts [j,_,k] for result subscripts [j,k] so that the
116 // elements of array(j,:,k) can be reduced.
117 inline RT_API_ATTRS void GetExpandedSubscripts(SubscriptValue at[],
118     const Descriptor &descriptor, int zeroBasedDim,
119     const SubscriptValue from[]) {
120   descriptor.GetLowerBounds(at);
121   int rank{descriptor.rank()};
122   int j{0};
123   for (; j < zeroBasedDim; ++j) {
124     at[j] += from[j] - 1 /*lower bound*/;
125   }
126   for (++j; j < rank; ++j) {
127     at[j] += from[j - 1] - 1;
128   }
129 }
130 
131 template <typename TYPE, typename ACCUMULATOR>
132 inline RT_API_ATTRS void ReduceDimToScalar(const Descriptor &x,
133     int zeroBasedDim, SubscriptValue subscripts[], TYPE *result,
134     ACCUMULATOR &accumulator) {
135   SubscriptValue xAt[maxRank];
136   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
137   const auto &dim{x.GetDimension(zeroBasedDim)};
138   SubscriptValue at{dim.LowerBound()};
139   for (auto n{dim.Extent()}; n-- > 0; ++at) {
140     xAt[zeroBasedDim] = at;
141     if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
142       break;
143     }
144   }
145 #ifdef _MSC_VER // work around MSVC spurious error
146   accumulator.GetResult(result, zeroBasedDim);
147 #else
148   accumulator.template GetResult<TYPE>(result, zeroBasedDim);
149 #endif
150 }
151 
152 template <typename TYPE, typename ACCUMULATOR>
153 inline RT_API_ATTRS void ReduceDimMaskToScalar(const Descriptor &x,
154     int zeroBasedDim, SubscriptValue subscripts[], const Descriptor &mask,
155     TYPE *result, ACCUMULATOR &accumulator) {
156   SubscriptValue xAt[maxRank], maskAt[maxRank];
157   GetExpandedSubscripts(xAt, x, zeroBasedDim, subscripts);
158   GetExpandedSubscripts(maskAt, mask, zeroBasedDim, subscripts);
159   const auto &xDim{x.GetDimension(zeroBasedDim)};
160   SubscriptValue xPos{xDim.LowerBound()};
161   const auto &maskDim{mask.GetDimension(zeroBasedDim)};
162   SubscriptValue maskPos{maskDim.LowerBound()};
163   for (auto n{x.GetDimension(zeroBasedDim).Extent()}; n-- > 0;
164        ++xPos, ++maskPos) {
165     maskAt[zeroBasedDim] = maskPos;
166     if (IsLogicalElementTrue(mask, maskAt)) {
167       xAt[zeroBasedDim] = xPos;
168       if (!accumulator.template AccumulateAt<TYPE>(xAt)) {
169         break;
170       }
171     }
172   }
173 #ifdef _MSC_VER // work around MSVC spurious error
174   accumulator.GetResult(result, zeroBasedDim);
175 #else
176   accumulator.template GetResult<TYPE>(result, zeroBasedDim);
177 #endif
178 }
179 
180 // Partial reductions with DIM=
181 
182 template <typename ACCUMULATOR, TypeCategory CAT, int KIND>
183 inline RT_API_ATTRS void PartialReduction(Descriptor &result,
184     const Descriptor &x, std::size_t resultElementSize, int dim,
185     const Descriptor *mask, Terminator &terminator, const char *intrinsic,
186     ACCUMULATOR &accumulator) {
187   CreatePartialReductionResult(result, x, resultElementSize, dim, terminator,
188       intrinsic, TypeCode{CAT, KIND});
189   SubscriptValue at[maxRank];
190   result.GetLowerBounds(at);
191   INTERNAL_CHECK(result.rank() == 0 || at[0] == 1);
192   using CppType = CppTypeFor<CAT, KIND>;
193   if (mask) {
194     CheckConformability(x, *mask, terminator, intrinsic, "ARRAY", "MASK");
195     if (mask->rank() > 0) {
196       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
197         accumulator.Reinitialize();
198         ReduceDimMaskToScalar<CppType, ACCUMULATOR>(
199             x, dim - 1, at, *mask, result.Element<CppType>(at), accumulator);
200       }
201       return;
202     } else if (!IsLogicalScalarTrue(*mask)) {
203       // scalar MASK=.FALSE.
204       accumulator.Reinitialize();
205       for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
206         accumulator.GetResult(result.Element<CppType>(at));
207       }
208       return;
209     }
210   }
211   // No MASK= or scalar MASK=.TRUE.
212   for (auto n{result.Elements()}; n-- > 0; result.IncrementSubscripts(at)) {
213     accumulator.Reinitialize();
214     ReduceDimToScalar<CppType, ACCUMULATOR>(
215         x, dim - 1, at, result.Element<CppType>(at), accumulator);
216   }
217 }
218 
219 template <template <typename> class ACCUM>
220 struct PartialIntegerReductionHelper {
221   template <int KIND> struct Functor {
222     static constexpr int Intermediate{
223         std::max(KIND, 4)}; // use at least "int" for intermediate results
224     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
225         int dim, const Descriptor *mask, Terminator &terminator,
226         const char *intrinsic) const {
227       using Accumulator =
228           ACCUM<CppTypeFor<TypeCategory::Integer, Intermediate>>;
229       Accumulator accumulator{x};
230       // Element size of the destination descriptor is the same
231       // as the element size of the source.
232       PartialReduction<Accumulator, TypeCategory::Integer, KIND>(result, x,
233           x.ElementBytes(), dim, mask, terminator, intrinsic, accumulator);
234     }
235   };
236 };
237 
238 template <template <typename> class INTEGER_ACCUM>
239 inline RT_API_ATTRS void PartialIntegerReduction(Descriptor &result,
240     const Descriptor &x, int dim, int kind, const Descriptor *mask,
241     const char *intrinsic, Terminator &terminator) {
242   ApplyIntegerKind<
243       PartialIntegerReductionHelper<INTEGER_ACCUM>::template Functor, void>(
244       kind, terminator, result, x, dim, mask, terminator, intrinsic);
245 }
246 
247 template <TypeCategory CAT, template <typename> class ACCUM, int MIN_KIND>
248 struct PartialFloatingReductionHelper {
249   template <int KIND> struct Functor {
250     static constexpr int Intermediate{std::max(KIND, MIN_KIND)};
251     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
252         int dim, const Descriptor *mask, Terminator &terminator,
253         const char *intrinsic) const {
254       using Accumulator = ACCUM<CppTypeFor<TypeCategory::Real, Intermediate>>;
255       Accumulator accumulator{x};
256       // Element size of the destination descriptor is the same
257       // as the element size of the source.
258       PartialReduction<Accumulator, CAT, KIND>(result, x, x.ElementBytes(), dim,
259           mask, terminator, intrinsic, accumulator);
260     }
261   };
262 };
263 
264 template <template <typename> class INTEGER_ACCUM,
265     template <typename> class REAL_ACCUM,
266     template <typename> class COMPLEX_ACCUM, int MIN_REAL_KIND>
267 inline RT_API_ATTRS void TypedPartialNumericReduction(Descriptor &result,
268     const Descriptor &x, int dim, const char *source, int line,
269     const Descriptor *mask, const char *intrinsic) {
270   Terminator terminator{source, line};
271   auto catKind{x.type().GetCategoryAndKind()};
272   RUNTIME_CHECK(terminator, catKind.has_value());
273   switch (catKind->first) {
274   case TypeCategory::Integer:
275     PartialIntegerReduction<INTEGER_ACCUM>(
276         result, x, dim, catKind->second, mask, intrinsic, terminator);
277     break;
278   case TypeCategory::Real:
279     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Real,
280                                REAL_ACCUM, MIN_REAL_KIND>::template Functor,
281         void>(catKind->second, terminator, result, x, dim, mask, terminator,
282         intrinsic);
283     break;
284   case TypeCategory::Complex:
285     ApplyFloatingPointKind<PartialFloatingReductionHelper<TypeCategory::Complex,
286                                COMPLEX_ACCUM, MIN_REAL_KIND>::template Functor,
287         void>(catKind->second, terminator, result, x, dim, mask, terminator,
288         intrinsic);
289     break;
290   default:
291     terminator.Crash("%s: bad type code %d", intrinsic, x.type().raw());
292   }
293 }
294 
295 template <typename ACCUMULATOR> struct LocationResultHelper {
296   template <int KIND> struct Functor {
297     RT_API_ATTRS void operator()(
298         ACCUMULATOR &accumulator, const Descriptor &result) const {
299       accumulator.GetResult(
300           result.OffsetElement<CppTypeFor<TypeCategory::Integer, KIND>>());
301     }
302   };
303 };
304 
305 template <typename ACCUMULATOR> struct PartialLocationHelper {
306   template <int KIND> struct Functor {
307     RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x,
308         int dim, const Descriptor *mask, Terminator &terminator,
309         const char *intrinsic, ACCUMULATOR &accumulator) const {
310       // Element size of the destination descriptor is the size
311       // of {TypeCategory::Integer, KIND}.
312       PartialReduction<ACCUMULATOR, TypeCategory::Integer, KIND>(result, x,
313           Descriptor::BytesFor(TypeCategory::Integer, KIND), dim, mask,
314           terminator, intrinsic, accumulator);
315     }
316   };
317 };
318 
319 // NORM2 templates
320 
321 RT_VAR_GROUP_BEGIN
322 
323 // Use at least double precision for accumulators.
324 // Don't use __float128, it doesn't work with abs() or sqrt() yet.
325 static constexpr RT_CONST_VAR_ATTRS int Norm2LargestLDKind{
326 #if HAS_LDBL128 || HAS_FLOAT128
327     16
328 #elif HAS_FLOAT80
329     10
330 #else
331     8
332 #endif
333 };
334 
335 RT_VAR_GROUP_END
336 
337 template <TypeCategory CAT, int KIND, typename ACCUMULATOR>
338 inline RT_API_ATTRS void DoMaxMinNorm2(Descriptor &result, const Descriptor &x,
339     int dim, const Descriptor *mask, const char *intrinsic,
340     Terminator &terminator) {
341   using Type = CppTypeFor<CAT, KIND>;
342   ACCUMULATOR accumulator{x};
343   if (dim == 0 || x.rank() == 1) {
344     // Total reduction
345 
346     // Element size of the destination descriptor is the same
347     // as the element size of the source.
348     result.Establish(x.type(), x.ElementBytes(), nullptr, 0, nullptr,
349         CFI_attribute_allocatable);
350     if (int stat{result.Allocate()}) {
351       terminator.Crash(
352           "%s: could not allocate memory for result; STAT=%d", intrinsic, stat);
353     }
354     DoTotalReduction<Type>(x, dim, mask, accumulator, intrinsic, terminator);
355     accumulator.GetResult(result.OffsetElement<Type>());
356   } else {
357     // Partial reduction
358 
359     // Element size of the destination descriptor is the same
360     // as the element size of the source.
361     PartialReduction<ACCUMULATOR, CAT, KIND>(result, x, x.ElementBytes(), dim,
362         mask, terminator, intrinsic, accumulator);
363   }
364 }
365 
366 // The data type used by Norm2Accumulator.
367 template <int KIND>
368 using Norm2AccumType =
369     CppTypeFor<TypeCategory::Real, std::clamp(KIND, 8, Norm2LargestLDKind)>;
370 
371 template <int KIND> class Norm2Accumulator {
372 public:
373   using Type = CppTypeFor<TypeCategory::Real, KIND>;
374   using AccumType = Norm2AccumType<KIND>;
375   explicit RT_API_ATTRS Norm2Accumulator(const Descriptor &array)
376       : array_{array} {}
377   RT_API_ATTRS void Reinitialize() { max_ = sum_ = 0; }
378   template <typename A>
379   RT_API_ATTRS void GetResult(A *p, int /*zeroBasedDim*/ = -1) const {
380     // m * sqrt(1 + sum((others(:)/m)**2))
381     *p = static_cast<Type>(max_ * SQRTTy<AccumType>::compute(1 + sum_));
382   }
383   RT_API_ATTRS bool Accumulate(Type x) {
384     auto absX{ABSTy<AccumType>::compute(static_cast<AccumType>(x))};
385     if (!max_) {
386       max_ = absX;
387     } else if (absX > max_) {
388       auto t{max_ / absX}; // < 1.0
389       auto tsq{t * t};
390       sum_ *= tsq; // scale sum to reflect change to the max
391       sum_ += tsq; // include a term for the previous max
392       max_ = absX;
393     } else { // absX <= max_
394       auto t{absX / max_};
395       sum_ += t * t;
396     }
397     return true;
398   }
399   template <typename A>
400   RT_API_ATTRS bool AccumulateAt(const SubscriptValue at[]) {
401     return Accumulate(*array_.Element<A>(at));
402   }
403 
404 private:
405   const Descriptor &array_;
406   AccumType max_{0}; // value (m) with largest magnitude
407   AccumType sum_{0}; // sum((others(:)/m)**2)
408 };
409 
410 template <int KIND> struct Norm2Helper {
411   RT_API_ATTRS void operator()(Descriptor &result, const Descriptor &x, int dim,
412       const Descriptor *mask, Terminator &terminator) const {
413     DoMaxMinNorm2<TypeCategory::Real, KIND, Norm2Accumulator<KIND>>(
414         result, x, dim, mask, "NORM2", terminator);
415   }
416 };
417 
418 } // namespace Fortran::runtime
419 #endif // FORTRAN_RUNTIME_REDUCTION_TEMPLATES_H_
420