xref: /netbsd-src/external/gpl3/gcc.old/dist/libgfortran/generated/matmul_l8.c (revision 4c3eb207d36f67d31994830c0a694161fc1ca39b)
1627f7eb2Smrg /* Implementation of the MATMUL intrinsic
2*4c3eb207Smrg    Copyright (C) 2002-2020 Free Software Foundation, Inc.
3627f7eb2Smrg    Contributed by Paul Brook <paul@nowt.org>
4627f7eb2Smrg 
5627f7eb2Smrg This file is part of the GNU Fortran runtime library (libgfortran).
6627f7eb2Smrg 
7627f7eb2Smrg Libgfortran is free software; you can redistribute it and/or
8627f7eb2Smrg modify it under the terms of the GNU General Public
9627f7eb2Smrg License as published by the Free Software Foundation; either
10627f7eb2Smrg version 3 of the License, or (at your option) any later version.
11627f7eb2Smrg 
12627f7eb2Smrg Libgfortran is distributed in the hope that it will be useful,
13627f7eb2Smrg but WITHOUT ANY WARRANTY; without even the implied warranty of
14627f7eb2Smrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15627f7eb2Smrg GNU General Public License for more details.
16627f7eb2Smrg 
17627f7eb2Smrg Under Section 7 of GPL version 3, you are granted additional
18627f7eb2Smrg permissions described in the GCC Runtime Library Exception, version
19627f7eb2Smrg 3.1, as published by the Free Software Foundation.
20627f7eb2Smrg 
21627f7eb2Smrg You should have received a copy of the GNU General Public License and
22627f7eb2Smrg a copy of the GCC Runtime Library Exception along with this program;
23627f7eb2Smrg see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
24627f7eb2Smrg <http://www.gnu.org/licenses/>.  */
25627f7eb2Smrg 
26627f7eb2Smrg #include "libgfortran.h"
27627f7eb2Smrg #include <assert.h>
28627f7eb2Smrg 
29627f7eb2Smrg 
30627f7eb2Smrg #if defined (HAVE_GFC_LOGICAL_8)
31627f7eb2Smrg 
32627f7eb2Smrg /* Dimensions: retarray(x,y) a(x, count) b(count,y).
33627f7eb2Smrg    Either a or b can be rank 1.  In this case x or y is 1.  */
34627f7eb2Smrg 
35627f7eb2Smrg extern void matmul_l8 (gfc_array_l8 * const restrict,
36627f7eb2Smrg 	gfc_array_l1 * const restrict, gfc_array_l1 * const restrict);
37627f7eb2Smrg export_proto(matmul_l8);
38627f7eb2Smrg 
39627f7eb2Smrg void
matmul_l8(gfc_array_l8 * const restrict retarray,gfc_array_l1 * const restrict a,gfc_array_l1 * const restrict b)40627f7eb2Smrg matmul_l8 (gfc_array_l8 * const restrict retarray,
41627f7eb2Smrg 	gfc_array_l1 * const restrict a, gfc_array_l1 * const restrict b)
42627f7eb2Smrg {
43627f7eb2Smrg   const GFC_LOGICAL_1 * restrict abase;
44627f7eb2Smrg   const GFC_LOGICAL_1 * restrict bbase;
45627f7eb2Smrg   GFC_LOGICAL_8 * restrict dest;
46627f7eb2Smrg   index_type rxstride;
47627f7eb2Smrg   index_type rystride;
48627f7eb2Smrg   index_type xcount;
49627f7eb2Smrg   index_type ycount;
50627f7eb2Smrg   index_type xstride;
51627f7eb2Smrg   index_type ystride;
52627f7eb2Smrg   index_type x;
53627f7eb2Smrg   index_type y;
54627f7eb2Smrg   int a_kind;
55627f7eb2Smrg   int b_kind;
56627f7eb2Smrg 
57627f7eb2Smrg   const GFC_LOGICAL_1 * restrict pa;
58627f7eb2Smrg   const GFC_LOGICAL_1 * restrict pb;
59627f7eb2Smrg   index_type astride;
60627f7eb2Smrg   index_type bstride;
61627f7eb2Smrg   index_type count;
62627f7eb2Smrg   index_type n;
63627f7eb2Smrg 
64627f7eb2Smrg   assert (GFC_DESCRIPTOR_RANK (a) == 2
65627f7eb2Smrg           || GFC_DESCRIPTOR_RANK (b) == 2);
66627f7eb2Smrg 
67627f7eb2Smrg   if (retarray->base_addr == NULL)
68627f7eb2Smrg     {
69627f7eb2Smrg       if (GFC_DESCRIPTOR_RANK (a) == 1)
70627f7eb2Smrg         {
71627f7eb2Smrg 	  GFC_DIMENSION_SET(retarray->dim[0], 0,
72627f7eb2Smrg 	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1, 1);
73627f7eb2Smrg         }
74627f7eb2Smrg       else if (GFC_DESCRIPTOR_RANK (b) == 1)
75627f7eb2Smrg         {
76627f7eb2Smrg 	  GFC_DIMENSION_SET(retarray->dim[0], 0,
77627f7eb2Smrg 	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
78627f7eb2Smrg         }
79627f7eb2Smrg       else
80627f7eb2Smrg         {
81627f7eb2Smrg 	  GFC_DIMENSION_SET(retarray->dim[0], 0,
82627f7eb2Smrg 	                    GFC_DESCRIPTOR_EXTENT(a,0) - 1, 1);
83627f7eb2Smrg 
84627f7eb2Smrg           GFC_DIMENSION_SET(retarray->dim[1], 0,
85627f7eb2Smrg 	                    GFC_DESCRIPTOR_EXTENT(b,1) - 1,
86627f7eb2Smrg 			    GFC_DESCRIPTOR_EXTENT(retarray,0));
87627f7eb2Smrg         }
88627f7eb2Smrg 
89627f7eb2Smrg       retarray->base_addr
90627f7eb2Smrg 	= xmallocarray (size0 ((array_t *) retarray), sizeof (GFC_LOGICAL_8));
91627f7eb2Smrg       retarray->offset = 0;
92627f7eb2Smrg     }
93627f7eb2Smrg     else if (unlikely (compile_options.bounds_check))
94627f7eb2Smrg       {
95627f7eb2Smrg 	index_type ret_extent, arg_extent;
96627f7eb2Smrg 
97627f7eb2Smrg 	if (GFC_DESCRIPTOR_RANK (a) == 1)
98627f7eb2Smrg 	  {
99627f7eb2Smrg 	    arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
100627f7eb2Smrg 	    ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
101627f7eb2Smrg 	    if (arg_extent != ret_extent)
102627f7eb2Smrg 	      runtime_error ("Incorrect extent in return array in"
103627f7eb2Smrg 			     " MATMUL intrinsic: is %ld, should be %ld",
104627f7eb2Smrg 			     (long int) ret_extent, (long int) arg_extent);
105627f7eb2Smrg 	  }
106627f7eb2Smrg 	else if (GFC_DESCRIPTOR_RANK (b) == 1)
107627f7eb2Smrg 	  {
108627f7eb2Smrg 	    arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
109627f7eb2Smrg 	    ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
110627f7eb2Smrg 	    if (arg_extent != ret_extent)
111627f7eb2Smrg 	      runtime_error ("Incorrect extent in return array in"
112627f7eb2Smrg 			     " MATMUL intrinsic: is %ld, should be %ld",
113627f7eb2Smrg 			     (long int) ret_extent, (long int) arg_extent);
114627f7eb2Smrg 	  }
115627f7eb2Smrg 	else
116627f7eb2Smrg 	  {
117627f7eb2Smrg 	    arg_extent = GFC_DESCRIPTOR_EXTENT(a,0);
118627f7eb2Smrg 	    ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,0);
119627f7eb2Smrg 	    if (arg_extent != ret_extent)
120627f7eb2Smrg 	      runtime_error ("Incorrect extent in return array in"
121627f7eb2Smrg 			     " MATMUL intrinsic for dimension 1:"
122627f7eb2Smrg 			     " is %ld, should be %ld",
123627f7eb2Smrg 			     (long int) ret_extent, (long int) arg_extent);
124627f7eb2Smrg 
125627f7eb2Smrg 	    arg_extent = GFC_DESCRIPTOR_EXTENT(b,1);
126627f7eb2Smrg 	    ret_extent = GFC_DESCRIPTOR_EXTENT(retarray,1);
127627f7eb2Smrg 	    if (arg_extent != ret_extent)
128627f7eb2Smrg 	      runtime_error ("Incorrect extent in return array in"
129627f7eb2Smrg 			     " MATMUL intrinsic for dimension 2:"
130627f7eb2Smrg 			     " is %ld, should be %ld",
131627f7eb2Smrg 			     (long int) ret_extent, (long int) arg_extent);
132627f7eb2Smrg 	  }
133627f7eb2Smrg       }
134627f7eb2Smrg 
135627f7eb2Smrg   abase = a->base_addr;
136627f7eb2Smrg   a_kind = GFC_DESCRIPTOR_SIZE (a);
137627f7eb2Smrg 
138627f7eb2Smrg   if (a_kind == 1 || a_kind == 2 || a_kind == 4 || a_kind == 8
139627f7eb2Smrg #ifdef HAVE_GFC_LOGICAL_16
140627f7eb2Smrg      || a_kind == 16
141627f7eb2Smrg #endif
142627f7eb2Smrg      )
143627f7eb2Smrg     abase = GFOR_POINTER_TO_L1 (abase, a_kind);
144627f7eb2Smrg   else
145627f7eb2Smrg     internal_error (NULL, "Funny sized logical array");
146627f7eb2Smrg 
147627f7eb2Smrg   bbase = b->base_addr;
148627f7eb2Smrg   b_kind = GFC_DESCRIPTOR_SIZE (b);
149627f7eb2Smrg 
150627f7eb2Smrg   if (b_kind == 1 || b_kind == 2 || b_kind == 4 || b_kind == 8
151627f7eb2Smrg #ifdef HAVE_GFC_LOGICAL_16
152627f7eb2Smrg      || b_kind == 16
153627f7eb2Smrg #endif
154627f7eb2Smrg      )
155627f7eb2Smrg     bbase = GFOR_POINTER_TO_L1 (bbase, b_kind);
156627f7eb2Smrg   else
157627f7eb2Smrg     internal_error (NULL, "Funny sized logical array");
158627f7eb2Smrg 
159627f7eb2Smrg   dest = retarray->base_addr;
160627f7eb2Smrg 
161627f7eb2Smrg 
162627f7eb2Smrg   if (GFC_DESCRIPTOR_RANK (retarray) == 1)
163627f7eb2Smrg     {
164627f7eb2Smrg       rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
165627f7eb2Smrg       rystride = rxstride;
166627f7eb2Smrg     }
167627f7eb2Smrg   else
168627f7eb2Smrg     {
169627f7eb2Smrg       rxstride = GFC_DESCRIPTOR_STRIDE(retarray,0);
170627f7eb2Smrg       rystride = GFC_DESCRIPTOR_STRIDE(retarray,1);
171627f7eb2Smrg     }
172627f7eb2Smrg 
173627f7eb2Smrg   /* If we have rank 1 parameters, zero the absent stride, and set the size to
174627f7eb2Smrg      one.  */
175627f7eb2Smrg   if (GFC_DESCRIPTOR_RANK (a) == 1)
176627f7eb2Smrg     {
177627f7eb2Smrg       astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0);
178627f7eb2Smrg       count = GFC_DESCRIPTOR_EXTENT(a,0);
179627f7eb2Smrg       xstride = 0;
180627f7eb2Smrg       rxstride = 0;
181627f7eb2Smrg       xcount = 1;
182627f7eb2Smrg     }
183627f7eb2Smrg   else
184627f7eb2Smrg     {
185627f7eb2Smrg       astride = GFC_DESCRIPTOR_STRIDE_BYTES(a,1);
186627f7eb2Smrg       count = GFC_DESCRIPTOR_EXTENT(a,1);
187627f7eb2Smrg       xstride = GFC_DESCRIPTOR_STRIDE_BYTES(a,0);
188627f7eb2Smrg       xcount = GFC_DESCRIPTOR_EXTENT(a,0);
189627f7eb2Smrg     }
190627f7eb2Smrg   if (GFC_DESCRIPTOR_RANK (b) == 1)
191627f7eb2Smrg     {
192627f7eb2Smrg       bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0);
193627f7eb2Smrg       assert(count == GFC_DESCRIPTOR_EXTENT(b,0));
194627f7eb2Smrg       ystride = 0;
195627f7eb2Smrg       rystride = 0;
196627f7eb2Smrg       ycount = 1;
197627f7eb2Smrg     }
198627f7eb2Smrg   else
199627f7eb2Smrg     {
200627f7eb2Smrg       bstride = GFC_DESCRIPTOR_STRIDE_BYTES(b,0);
201627f7eb2Smrg       assert(count == GFC_DESCRIPTOR_EXTENT(b,0));
202627f7eb2Smrg       ystride = GFC_DESCRIPTOR_STRIDE_BYTES(b,1);
203627f7eb2Smrg       ycount = GFC_DESCRIPTOR_EXTENT(b,1);
204627f7eb2Smrg     }
205627f7eb2Smrg 
206627f7eb2Smrg   for (y = 0; y < ycount; y++)
207627f7eb2Smrg     {
208627f7eb2Smrg       for (x = 0; x < xcount; x++)
209627f7eb2Smrg         {
210627f7eb2Smrg           /* Do the summation for this element.  For real and integer types
211627f7eb2Smrg              this is the same as DOT_PRODUCT.  For complex types we use do
212627f7eb2Smrg              a*b, not conjg(a)*b.  */
213627f7eb2Smrg           pa = abase;
214627f7eb2Smrg           pb = bbase;
215627f7eb2Smrg           *dest = 0;
216627f7eb2Smrg 
217627f7eb2Smrg           for (n = 0; n < count; n++)
218627f7eb2Smrg             {
219627f7eb2Smrg               if (*pa && *pb)
220627f7eb2Smrg                 {
221627f7eb2Smrg                   *dest = 1;
222627f7eb2Smrg                   break;
223627f7eb2Smrg                 }
224627f7eb2Smrg               pa += astride;
225627f7eb2Smrg               pb += bstride;
226627f7eb2Smrg             }
227627f7eb2Smrg 
228627f7eb2Smrg           dest += rxstride;
229627f7eb2Smrg           abase += xstride;
230627f7eb2Smrg         }
231627f7eb2Smrg       abase -= xstride * xcount;
232627f7eb2Smrg       bbase += ystride;
233627f7eb2Smrg       dest += rystride - (rxstride * xcount);
234627f7eb2Smrg     }
235627f7eb2Smrg }
236627f7eb2Smrg 
237627f7eb2Smrg #endif
238627f7eb2Smrg 
239