1627f7eb2Smrg /* Implementation of the NORM2 intrinsic
2*4c3eb207Smrg Copyright (C) 2010-2020 Free Software Foundation, Inc.
3627f7eb2Smrg Contributed by Tobias Burnus <burnus@net-b.de>
4627f7eb2Smrg
5627f7eb2Smrg This file is part of the GNU Fortran runtime library (libgfortran).
6627f7eb2Smrg
7627f7eb2Smrg Libgfortran is free software; you can redistribute it and/or
8627f7eb2Smrg modify it under the terms of the GNU General Public
9627f7eb2Smrg License as published by the Free Software Foundation; either
10627f7eb2Smrg version 3 of the License, or (at your option) any later version.
11627f7eb2Smrg
12627f7eb2Smrg Libgfortran is distributed in the hope that it will be useful,
13627f7eb2Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
14627f7eb2Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15627f7eb2Smrg GNU General Public License for more details.
16627f7eb2Smrg
17627f7eb2Smrg Under Section 7 of GPL version 3, you are granted additional
18627f7eb2Smrg permissions described in the GCC Runtime Library Exception, version
19627f7eb2Smrg 3.1, as published by the Free Software Foundation.
20627f7eb2Smrg
21627f7eb2Smrg You should have received a copy of the GNU General Public License and
22627f7eb2Smrg a copy of the GCC Runtime Library Exception along with this program;
23627f7eb2Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24627f7eb2Smrg <http://www.gnu.org/licenses/>. */
25627f7eb2Smrg
26627f7eb2Smrg #include "libgfortran.h"
27627f7eb2Smrg
28627f7eb2Smrg
29627f7eb2Smrg
30627f7eb2Smrg #if defined (HAVE_GFC_REAL_16) && defined (HAVE_GFC_REAL_16) && (defined(GFC_REAL_16_IS_FLOAT128) || defined(HAVE_SQRTL)) && (defined(GFC_REAL_16_IS_FLOAT128) || defined(HAVE_FABSL))
31627f7eb2Smrg
32627f7eb2Smrg #if defined(GFC_REAL_16_IS_FLOAT128)
33627f7eb2Smrg #define MATHFUNC(funcname) funcname ## q
34627f7eb2Smrg #else
35627f7eb2Smrg #define MATHFUNC(funcname) funcname ## l
36627f7eb2Smrg #endif
37627f7eb2Smrg
38627f7eb2Smrg
39627f7eb2Smrg extern void norm2_r16 (gfc_array_r16 * const restrict,
40627f7eb2Smrg gfc_array_r16 * const restrict, const index_type * const restrict);
41627f7eb2Smrg export_proto(norm2_r16);
42627f7eb2Smrg
43627f7eb2Smrg void
norm2_r16(gfc_array_r16 * const restrict retarray,gfc_array_r16 * const restrict array,const index_type * const restrict pdim)44627f7eb2Smrg norm2_r16 (gfc_array_r16 * const restrict retarray,
45627f7eb2Smrg gfc_array_r16 * const restrict array,
46627f7eb2Smrg const index_type * const restrict pdim)
47627f7eb2Smrg {
48627f7eb2Smrg index_type count[GFC_MAX_DIMENSIONS];
49627f7eb2Smrg index_type extent[GFC_MAX_DIMENSIONS];
50627f7eb2Smrg index_type sstride[GFC_MAX_DIMENSIONS];
51627f7eb2Smrg index_type dstride[GFC_MAX_DIMENSIONS];
52627f7eb2Smrg const GFC_REAL_16 * restrict base;
53627f7eb2Smrg GFC_REAL_16 * restrict dest;
54627f7eb2Smrg index_type rank;
55627f7eb2Smrg index_type n;
56627f7eb2Smrg index_type len;
57627f7eb2Smrg index_type delta;
58627f7eb2Smrg index_type dim;
59627f7eb2Smrg int continue_loop;
60627f7eb2Smrg
61627f7eb2Smrg /* Make dim zero based to avoid confusion. */
62627f7eb2Smrg rank = GFC_DESCRIPTOR_RANK (array) - 1;
63627f7eb2Smrg dim = (*pdim) - 1;
64627f7eb2Smrg
65627f7eb2Smrg if (unlikely (dim < 0 || dim > rank))
66627f7eb2Smrg {
67627f7eb2Smrg runtime_error ("Dim argument incorrect in NORM intrinsic: "
68627f7eb2Smrg "is %ld, should be between 1 and %ld",
69627f7eb2Smrg (long int) dim + 1, (long int) rank + 1);
70627f7eb2Smrg }
71627f7eb2Smrg
72627f7eb2Smrg len = GFC_DESCRIPTOR_EXTENT(array,dim);
73627f7eb2Smrg if (len < 0)
74627f7eb2Smrg len = 0;
75627f7eb2Smrg delta = GFC_DESCRIPTOR_STRIDE(array,dim);
76627f7eb2Smrg
77627f7eb2Smrg for (n = 0; n < dim; n++)
78627f7eb2Smrg {
79627f7eb2Smrg sstride[n] = GFC_DESCRIPTOR_STRIDE(array,n);
80627f7eb2Smrg extent[n] = GFC_DESCRIPTOR_EXTENT(array,n);
81627f7eb2Smrg
82627f7eb2Smrg if (extent[n] < 0)
83627f7eb2Smrg extent[n] = 0;
84627f7eb2Smrg }
85627f7eb2Smrg for (n = dim; n < rank; n++)
86627f7eb2Smrg {
87627f7eb2Smrg sstride[n] = GFC_DESCRIPTOR_STRIDE(array, n + 1);
88627f7eb2Smrg extent[n] = GFC_DESCRIPTOR_EXTENT(array, n + 1);
89627f7eb2Smrg
90627f7eb2Smrg if (extent[n] < 0)
91627f7eb2Smrg extent[n] = 0;
92627f7eb2Smrg }
93627f7eb2Smrg
94627f7eb2Smrg if (retarray->base_addr == NULL)
95627f7eb2Smrg {
96627f7eb2Smrg size_t alloc_size, str;
97627f7eb2Smrg
98627f7eb2Smrg for (n = 0; n < rank; n++)
99627f7eb2Smrg {
100627f7eb2Smrg if (n == 0)
101627f7eb2Smrg str = 1;
102627f7eb2Smrg else
103627f7eb2Smrg str = GFC_DESCRIPTOR_STRIDE(retarray,n-1) * extent[n-1];
104627f7eb2Smrg
105627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[n], 0, extent[n] - 1, str);
106627f7eb2Smrg
107627f7eb2Smrg }
108627f7eb2Smrg
109627f7eb2Smrg retarray->offset = 0;
110627f7eb2Smrg retarray->dtype.rank = rank;
111627f7eb2Smrg
112627f7eb2Smrg alloc_size = GFC_DESCRIPTOR_STRIDE(retarray,rank-1) * extent[rank-1];
113627f7eb2Smrg
114627f7eb2Smrg retarray->base_addr = xmallocarray (alloc_size, sizeof (GFC_REAL_16));
115627f7eb2Smrg if (alloc_size == 0)
116627f7eb2Smrg {
117627f7eb2Smrg /* Make sure we have a zero-sized array. */
118627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[0], 0, -1, 1);
119627f7eb2Smrg return;
120627f7eb2Smrg
121627f7eb2Smrg }
122627f7eb2Smrg }
123627f7eb2Smrg else
124627f7eb2Smrg {
125627f7eb2Smrg if (rank != GFC_DESCRIPTOR_RANK (retarray))
126627f7eb2Smrg runtime_error ("rank of return array incorrect in"
127627f7eb2Smrg " NORM intrinsic: is %ld, should be %ld",
128627f7eb2Smrg (long int) (GFC_DESCRIPTOR_RANK (retarray)),
129627f7eb2Smrg (long int) rank);
130627f7eb2Smrg
131627f7eb2Smrg if (unlikely (compile_options.bounds_check))
132627f7eb2Smrg bounds_ifunction_return ((array_t *) retarray, extent,
133627f7eb2Smrg "return value", "NORM");
134627f7eb2Smrg }
135627f7eb2Smrg
136627f7eb2Smrg for (n = 0; n < rank; n++)
137627f7eb2Smrg {
138627f7eb2Smrg count[n] = 0;
139627f7eb2Smrg dstride[n] = GFC_DESCRIPTOR_STRIDE(retarray,n);
140627f7eb2Smrg if (extent[n] <= 0)
141627f7eb2Smrg return;
142627f7eb2Smrg }
143627f7eb2Smrg
144627f7eb2Smrg base = array->base_addr;
145627f7eb2Smrg dest = retarray->base_addr;
146627f7eb2Smrg
147627f7eb2Smrg continue_loop = 1;
148627f7eb2Smrg while (continue_loop)
149627f7eb2Smrg {
150627f7eb2Smrg const GFC_REAL_16 * restrict src;
151627f7eb2Smrg GFC_REAL_16 result;
152627f7eb2Smrg src = base;
153627f7eb2Smrg {
154627f7eb2Smrg
155627f7eb2Smrg GFC_REAL_16 scale;
156627f7eb2Smrg result = 0;
157627f7eb2Smrg scale = 1;
158627f7eb2Smrg if (len <= 0)
159627f7eb2Smrg *dest = 0;
160627f7eb2Smrg else
161627f7eb2Smrg {
162627f7eb2Smrg #if ! defined HAVE_BACK_ARG
163627f7eb2Smrg for (n = 0; n < len; n++, src += delta)
164627f7eb2Smrg {
165627f7eb2Smrg #endif
166627f7eb2Smrg
167627f7eb2Smrg if (*src != 0)
168627f7eb2Smrg {
169627f7eb2Smrg GFC_REAL_16 absX, val;
170627f7eb2Smrg absX = MATHFUNC(fabs) (*src);
171627f7eb2Smrg if (scale < absX)
172627f7eb2Smrg {
173627f7eb2Smrg val = scale / absX;
174627f7eb2Smrg result = 1 + result * val * val;
175627f7eb2Smrg scale = absX;
176627f7eb2Smrg }
177627f7eb2Smrg else
178627f7eb2Smrg {
179627f7eb2Smrg val = absX / scale;
180627f7eb2Smrg result += val * val;
181627f7eb2Smrg }
182627f7eb2Smrg }
183627f7eb2Smrg }
184627f7eb2Smrg result = scale * MATHFUNC(sqrt) (result);
185627f7eb2Smrg *dest = result;
186627f7eb2Smrg }
187627f7eb2Smrg }
188627f7eb2Smrg /* Advance to the next element. */
189627f7eb2Smrg count[0]++;
190627f7eb2Smrg base += sstride[0];
191627f7eb2Smrg dest += dstride[0];
192627f7eb2Smrg n = 0;
193627f7eb2Smrg while (count[n] == extent[n])
194627f7eb2Smrg {
195627f7eb2Smrg /* When we get to the end of a dimension, reset it and increment
196627f7eb2Smrg the next dimension. */
197627f7eb2Smrg count[n] = 0;
198627f7eb2Smrg /* We could precalculate these products, but this is a less
199627f7eb2Smrg frequently used path so probably not worth it. */
200627f7eb2Smrg base -= sstride[n] * extent[n];
201627f7eb2Smrg dest -= dstride[n] * extent[n];
202627f7eb2Smrg n++;
203627f7eb2Smrg if (n >= rank)
204627f7eb2Smrg {
205627f7eb2Smrg /* Break out of the loop. */
206627f7eb2Smrg continue_loop = 0;
207627f7eb2Smrg break;
208627f7eb2Smrg }
209627f7eb2Smrg else
210627f7eb2Smrg {
211627f7eb2Smrg count[n]++;
212627f7eb2Smrg base += sstride[n];
213627f7eb2Smrg dest += dstride[n];
214627f7eb2Smrg }
215627f7eb2Smrg }
216627f7eb2Smrg }
217627f7eb2Smrg }
218627f7eb2Smrg
219627f7eb2Smrg #endif
220