1627f7eb2Smrg /* Implementation of the MATMUL intrinsic
2*4c3eb207Smrg Copyright (C) 2002-2020 Free Software Foundation, Inc.
3627f7eb2Smrg Contributed by Paul Brook <paul@nowt.org>
4627f7eb2Smrg
5627f7eb2Smrg This file is part of the GNU Fortran runtime library (libgfortran).
6627f7eb2Smrg
7627f7eb2Smrg Libgfortran is free software; you can redistribute it and/or
8627f7eb2Smrg modify it under the terms of the GNU General Public
9627f7eb2Smrg License as published by the Free Software Foundation; either
10627f7eb2Smrg version 3 of the License, or (at your option) any later version.
11627f7eb2Smrg
12627f7eb2Smrg Libgfortran is distributed in the hope that it will be useful,
13627f7eb2Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
14627f7eb2Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15627f7eb2Smrg GNU General Public License for more details.
16627f7eb2Smrg
17627f7eb2Smrg Under Section 7 of GPL version 3, you are granted additional
18627f7eb2Smrg permissions described in the GCC Runtime Library Exception, version
19627f7eb2Smrg 3.1, as published by the Free Software Foundation.
20627f7eb2Smrg
21627f7eb2Smrg You should have received a copy of the GNU General Public License and
22627f7eb2Smrg a copy of the GCC Runtime Library Exception along with this program;
23627f7eb2Smrg see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
24627f7eb2Smrg <http://www.gnu.org/licenses/>. */
25627f7eb2Smrg
26627f7eb2Smrg #include "libgfortran.h"
27627f7eb2Smrg #include <assert.h>
28627f7eb2Smrg
29627f7eb2Smrg
30627f7eb2Smrg #if defined (HAVE_GFC_LOGICAL_8)
31627f7eb2Smrg
32627f7eb2Smrg /* Dimensions: retarray(x,y) a(x, count) b(count,y).
33627f7eb2Smrg Either a or b can be rank 1. In this case x or y is 1. */
34627f7eb2Smrg
35627f7eb2Smrg extern void matmul_l8 (gfc_array_l8 * const restrict,
36627f7eb2Smrg gfc_array_l1 * const restrict, gfc_array_l1 * const restrict);
37627f7eb2Smrg export_proto(matmul_l8);
38627f7eb2Smrg
39627f7eb2Smrg void
matmul_l8(gfc_array_l8 * const restrict retarray,gfc_array_l1 * const restrict a,gfc_array_l1 * const restrict b)40627f7eb2Smrg matmul_l8 (gfc_array_l8 * const restrict retarray,
41627f7eb2Smrg gfc_array_l1 * const restrict a, gfc_array_l1 * const restrict b)
42627f7eb2Smrg {
43627f7eb2Smrg const GFC_LOGICAL_1 * restrict abase;
44627f7eb2Smrg const GFC_LOGICAL_1 * restrict bbase;
45627f7eb2Smrg GFC_LOGICAL_8 * restrict dest;
46627f7eb2Smrg index_type rxstride;
47627f7eb2Smrg index_type rystride;
48627f7eb2Smrg index_type xcount;
49627f7eb2Smrg index_type ycount;
50627f7eb2Smrg index_type xstride;
51627f7eb2Smrg index_type ystride;
52627f7eb2Smrg index_type x;
53627f7eb2Smrg index_type y;
54627f7eb2Smrg int a_kind;
55627f7eb2Smrg int b_kind;
56627f7eb2Smrg
57627f7eb2Smrg const GFC_LOGICAL_1 * restrict pa;
58627f7eb2Smrg const GFC_LOGICAL_1 * restrict pb;
59627f7eb2Smrg index_type astride;
60627f7eb2Smrg index_type bstride;
61627f7eb2Smrg index_type count;
62627f7eb2Smrg index_type n;
63627f7eb2Smrg
64627f7eb2Smrg assert (GFC_DESCRIPTOR_RANK (a) == 2
65627f7eb2Smrg || GFC_DESCRIPTOR_RANK (b) == 2);
66627f7eb2Smrg
67627f7eb2Smrg if (retarray->base_addr == NULL)
68627f7eb2Smrg {
69627f7eb2Smrg if (GFC_DESCRIPTOR_RANK (a) == 1)
70627f7eb2Smrg {
71627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[0], 0,
72627f7eb2Smrg GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
73627f7eb2Smrg }
74627f7eb2Smrg else if (GFC_DESCRIPTOR_RANK (b) == 1)
75627f7eb2Smrg {
76627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[0], 0,
77627f7eb2Smrg GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
78627f7eb2Smrg }
79627f7eb2Smrg else
80627f7eb2Smrg {
81627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[0], 0,
82627f7eb2Smrg GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
83627f7eb2Smrg
84627f7eb2Smrg GFC_DIMENSION_SET(retarray->dim[1], 0,
85627f7eb2Smrg GFC_DESCRIPTOR_EXTENT(b,1) - 1,
86627f7eb2Smrg GFC_DESCRIPTOR_EXTENT(retarray,0));
87627f7eb2Smrg }
88627f7eb2Smrg
89627f7eb2Smrg retarray->base_addr
90627f7eb2Smrg = xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_8));
91627f7eb2Smrg retarray->offset = 0;
92627f7eb2Smrg }
93627f7eb2Smrg else if (unlikely (compile_options.bounds_check))
94627f7eb2Smrg {
95627f7eb2Smrg index_type ret_extent, arg_extent;
96627f7eb2Smrg
97627f7eb2Smrg if (GFC_DESCRIPTOR_RANK (a) == 1)
98627f7eb2Smrg {
99627f7eb2Smrg arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
100627f7eb2Smrg ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
101627f7eb2Smrg if (arg_extent != ret_extent)
102627f7eb2Smrg runtime_error ("Incorrect extent in return array in"
103627f7eb2Smrg " MATMUL intrinsic: is %ld, should be %ld",
104627f7eb2Smrg (long int) ret_extent, (long int) arg_extent);
105627f7eb2Smrg }
106627f7eb2Smrg else if (GFC_DESCRIPTOR_RANK (b) == 1)
107627f7eb2Smrg {
108627f7eb2Smrg arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
109627f7eb2Smrg ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
110627f7eb2Smrg if (arg_extent != ret_extent)
111627f7eb2Smrg runtime_error ("Incorrect extent in return array in"
112627f7eb2Smrg " MATMUL intrinsic: is %ld, should be %ld",
113627f7eb2Smrg (long int) ret_extent, (long int) arg_extent);
114627f7eb2Smrg }
115627f7eb2Smrg else
116627f7eb2Smrg {
117627f7eb2Smrg arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
118627f7eb2Smrg ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
119627f7eb2Smrg if (arg_extent != ret_extent)
120627f7eb2Smrg runtime_error ("Incorrect extent in return array in"
121627f7eb2Smrg " MATMUL intrinsic for dimension 1:"
122627f7eb2Smrg " is %ld, should be %ld",
123627f7eb2Smrg (long int) ret_extent, (long int) arg_extent);
124627f7eb2Smrg
125627f7eb2Smrg arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
126627f7eb2Smrg ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
127627f7eb2Smrg if (arg_extent != ret_extent)
128627f7eb2Smrg runtime_error ("Incorrect extent in return array in"
129627f7eb2Smrg " MATMUL intrinsic for dimension 2:"
130627f7eb2Smrg " is %ld, should be %ld",
131627f7eb2Smrg (long int) ret_extent, (long int) arg_extent);
132627f7eb2Smrg }
133627f7eb2Smrg }
134627f7eb2Smrg
135627f7eb2Smrg abase = a->base_addr;
136627f7eb2Smrg a_kind = GFC_DESCRIPTOR_SIZE (a);
137627f7eb2Smrg
138627f7eb2Smrg if (a_kind == 1 || a_kind == 2 || a_kind == 4 || a_kind == 8
139627f7eb2Smrg #ifdef HAVE_GFC_LOGICAL_16
140627f7eb2Smrg || a_kind == 16
141627f7eb2Smrg #endif
142627f7eb2Smrg )
143627f7eb2Smrg abase = GFOR_POINTER_TO_L1 (abase, a_kind);
144627f7eb2Smrg else
145627f7eb2Smrg internal_error (NULL, "Funny sized logical array");
146627f7eb2Smrg
147627f7eb2Smrg bbase = b->base_addr;
148627f7eb2Smrg b_kind = GFC_DESCRIPTOR_SIZE (b);
149627f7eb2Smrg
150627f7eb2Smrg if (b_kind == 1 || b_kind == 2 || b_kind == 4 || b_kind == 8
151627f7eb2Smrg #ifdef HAVE_GFC_LOGICAL_16
152627f7eb2Smrg || b_kind == 16
153627f7eb2Smrg #endif
154627f7eb2Smrg )
155627f7eb2Smrg bbase = GFOR_POINTER_TO_L1 (bbase, b_kind);
156627f7eb2Smrg else
157627f7eb2Smrg internal_error (NULL, "Funny sized logical array");
158627f7eb2Smrg
159627f7eb2Smrg dest = retarray->base_addr;
160627f7eb2Smrg
161627f7eb2Smrg
162627f7eb2Smrg if (GFC_DESCRIPTOR_RANK (retarray) == 1)
163627f7eb2Smrg {
164627f7eb2Smrg rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
165627f7eb2Smrg rystride = rxstride;
166627f7eb2Smrg }
167627f7eb2Smrg else
168627f7eb2Smrg {
169627f7eb2Smrg rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
170627f7eb2Smrg rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
171627f7eb2Smrg }
172627f7eb2Smrg
173627f7eb2Smrg /* If we have rank 1 parameters, zero the absent stride, and set the size to
174627f7eb2Smrg one. */
175627f7eb2Smrg if (GFC_DESCRIPTOR_RANK (a) == 1)
176627f7eb2Smrg {
177627f7eb2Smrg astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0);
178627f7eb2Smrg count = GFC_DESCRIPTOR_EXTENT(a,0);
179627f7eb2Smrg xstride = 0;
180627f7eb2Smrg rxstride = 0;
181627f7eb2Smrg xcount = 1;
182627f7eb2Smrg }
183627f7eb2Smrg else
184627f7eb2Smrg {
185627f7eb2Smrg astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,1);
186627f7eb2Smrg count = GFC_DESCRIPTOR_EXTENT(a,1);
187627f7eb2Smrg xstride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0);
188627f7eb2Smrg xcount = GFC_DESCRIPTOR_EXTENT(a,0);
189627f7eb2Smrg }
190627f7eb2Smrg if (GFC_DESCRIPTOR_RANK (b) == 1)
191627f7eb2Smrg {
192627f7eb2Smrg bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0);
193627f7eb2Smrg assert(count == GFC_DESCRIPTOR_EXTENT(b,0));
194627f7eb2Smrg ystride = 0;
195627f7eb2Smrg rystride = 0;
196627f7eb2Smrg ycount = 1;
197627f7eb2Smrg }
198627f7eb2Smrg else
199627f7eb2Smrg {
200627f7eb2Smrg bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0);
201627f7eb2Smrg assert(count == GFC_DESCRIPTOR_EXTENT(b,0));
202627f7eb2Smrg ystride = GFC_DESCRIPTOR_STRIDE_BYTES(b,1);
203627f7eb2Smrg ycount = GFC_DESCRIPTOR_EXTENT(b,1);
204627f7eb2Smrg }
205627f7eb2Smrg
206627f7eb2Smrg for (y = 0; y < ycount; y++)
207627f7eb2Smrg {
208627f7eb2Smrg for (x = 0; x < xcount; x++)
209627f7eb2Smrg {
210627f7eb2Smrg /* Do the summation for this element. For real and integer types
211627f7eb2Smrg this is the same as DOT_PRODUCT. For complex types we use do
212627f7eb2Smrg a*b, not conjg(a)*b. */
213627f7eb2Smrg pa = abase;
214627f7eb2Smrg pb = bbase;
215627f7eb2Smrg *dest = 0;
216627f7eb2Smrg
217627f7eb2Smrg for (n = 0; n < count; n++)
218627f7eb2Smrg {
219627f7eb2Smrg if (*pa && *pb)
220627f7eb2Smrg {
221627f7eb2Smrg *dest = 1;
222627f7eb2Smrg break;
223627f7eb2Smrg }
224627f7eb2Smrg pa += astride;
225627f7eb2Smrg pb += bstride;
226627f7eb2Smrg }
227627f7eb2Smrg
228627f7eb2Smrg dest += rxstride;
229627f7eb2Smrg abase += xstride;
230627f7eb2Smrg }
231627f7eb2Smrg abase -= xstride * xcount;
232627f7eb2Smrg bbase += ystride;
233627f7eb2Smrg dest += rystride - (rxstride * xcount);
234627f7eb2Smrg }
235627f7eb2Smrg }
236627f7eb2Smrg
237627f7eb2Smrg #endif
238627f7eb2Smrg
239