1*37da2899SCharles.Forsyth #include "lib9.h"
2*37da2899SCharles.Forsyth #include "mathi.h"
3*37da2899SCharles.Forsyth void
gemm(int transa,int transb,int m,int n,int k,double alpha,double * a,int lda,double * b,int ldb,double beta,double * c,int ldc)4*37da2899SCharles.Forsyth gemm(int transa, int transb, int m, int n, int k, double alpha,
5*37da2899SCharles.Forsyth double *a, int lda,
6*37da2899SCharles.Forsyth double *b, int ldb, double beta,
7*37da2899SCharles.Forsyth double *c, int ldc)
8*37da2899SCharles.Forsyth {
9*37da2899SCharles.Forsyth int i1, i2, i3, nota, notb, i, j, jb, jc, l, la;
10*37da2899SCharles.Forsyth double temp;
11*37da2899SCharles.Forsyth
12*37da2899SCharles.Forsyth nota = transa=='N';
13*37da2899SCharles.Forsyth notb = transb=='N';
14*37da2899SCharles.Forsyth
15*37da2899SCharles.Forsyth if(m == 0 || n == 0 || (alpha == 0. || k == 0) && beta == 1.){
16*37da2899SCharles.Forsyth return;
17*37da2899SCharles.Forsyth }
18*37da2899SCharles.Forsyth if(alpha == 0.){
19*37da2899SCharles.Forsyth if(beta == 0.){
20*37da2899SCharles.Forsyth i1 = n;
21*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
22*37da2899SCharles.Forsyth jc = j*ldc;
23*37da2899SCharles.Forsyth i2 = m;
24*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
25*37da2899SCharles.Forsyth c[i + jc] = 0.;
26*37da2899SCharles.Forsyth }
27*37da2899SCharles.Forsyth }
28*37da2899SCharles.Forsyth }else{
29*37da2899SCharles.Forsyth i1 = n;
30*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
31*37da2899SCharles.Forsyth jc = j*ldc;
32*37da2899SCharles.Forsyth i2 = m;
33*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
34*37da2899SCharles.Forsyth c[i + jc] = beta * c[i + jc];
35*37da2899SCharles.Forsyth }
36*37da2899SCharles.Forsyth }
37*37da2899SCharles.Forsyth }
38*37da2899SCharles.Forsyth return;
39*37da2899SCharles.Forsyth }
40*37da2899SCharles.Forsyth
41*37da2899SCharles.Forsyth if(!a){
42*37da2899SCharles.Forsyth if(notb){ /* C := alpha*B + beta*C. */
43*37da2899SCharles.Forsyth i1 = n;
44*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
45*37da2899SCharles.Forsyth jb = j*ldb;
46*37da2899SCharles.Forsyth jc = j*ldc;
47*37da2899SCharles.Forsyth i2 = m;
48*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
49*37da2899SCharles.Forsyth c[i + jc] = alpha*b[i+jb] + beta*c[i+jc];
50*37da2899SCharles.Forsyth }
51*37da2899SCharles.Forsyth }
52*37da2899SCharles.Forsyth }else{ /* C := alpha*B' + beta*C. */
53*37da2899SCharles.Forsyth i1 = n;
54*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
55*37da2899SCharles.Forsyth jc = j*ldc;
56*37da2899SCharles.Forsyth i2 = m;
57*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
58*37da2899SCharles.Forsyth c[i + jc] = alpha*b[j+i*ldb] + beta*c[i+jc];
59*37da2899SCharles.Forsyth }
60*37da2899SCharles.Forsyth }
61*37da2899SCharles.Forsyth }
62*37da2899SCharles.Forsyth return;
63*37da2899SCharles.Forsyth }
64*37da2899SCharles.Forsyth
65*37da2899SCharles.Forsyth if(notb){
66*37da2899SCharles.Forsyth if(nota){
67*37da2899SCharles.Forsyth
68*37da2899SCharles.Forsyth /* Form C := alpha*A*B + beta*C. */
69*37da2899SCharles.Forsyth i1 = n;
70*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
71*37da2899SCharles.Forsyth jc = j*ldc;
72*37da2899SCharles.Forsyth if(beta == 0.){
73*37da2899SCharles.Forsyth i2 = m;
74*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
75*37da2899SCharles.Forsyth c[i + jc] = 0.;
76*37da2899SCharles.Forsyth }
77*37da2899SCharles.Forsyth }else if(beta != 1.){
78*37da2899SCharles.Forsyth i2 = m;
79*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
80*37da2899SCharles.Forsyth c[i + jc] = beta * c[i + jc];
81*37da2899SCharles.Forsyth }
82*37da2899SCharles.Forsyth }
83*37da2899SCharles.Forsyth i2 = k;
84*37da2899SCharles.Forsyth for(l = 0; l < i2; ++l){
85*37da2899SCharles.Forsyth la = l*lda;
86*37da2899SCharles.Forsyth if(b[l + j*ldb] != 0.){
87*37da2899SCharles.Forsyth temp = alpha * b[l + j*ldb];
88*37da2899SCharles.Forsyth i3 = m;
89*37da2899SCharles.Forsyth for(i = 0; i < i3; ++i){
90*37da2899SCharles.Forsyth c[i + jc] += temp * a[i + la];
91*37da2899SCharles.Forsyth }
92*37da2899SCharles.Forsyth }
93*37da2899SCharles.Forsyth }
94*37da2899SCharles.Forsyth }
95*37da2899SCharles.Forsyth }else{
96*37da2899SCharles.Forsyth
97*37da2899SCharles.Forsyth /* Form C := alpha*A'*B + beta*C */
98*37da2899SCharles.Forsyth i1 = n;
99*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
100*37da2899SCharles.Forsyth jc = j*ldc;
101*37da2899SCharles.Forsyth i2 = m;
102*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
103*37da2899SCharles.Forsyth temp = 0.;
104*37da2899SCharles.Forsyth i3 = k;
105*37da2899SCharles.Forsyth for(l = 0; l < i3; ++l){
106*37da2899SCharles.Forsyth temp += a[l + i*lda] * b[l + j*ldb];
107*37da2899SCharles.Forsyth }
108*37da2899SCharles.Forsyth if(beta == 0.){
109*37da2899SCharles.Forsyth c[i + jc] = alpha * temp;
110*37da2899SCharles.Forsyth }else{
111*37da2899SCharles.Forsyth c[i + jc] = alpha * temp + beta * c[i + jc];
112*37da2899SCharles.Forsyth }
113*37da2899SCharles.Forsyth }
114*37da2899SCharles.Forsyth }
115*37da2899SCharles.Forsyth }
116*37da2899SCharles.Forsyth }else{
117*37da2899SCharles.Forsyth if(nota){
118*37da2899SCharles.Forsyth
119*37da2899SCharles.Forsyth /* Form C := alpha*A*B' + beta*C */
120*37da2899SCharles.Forsyth i1 = n;
121*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
122*37da2899SCharles.Forsyth jc = j*ldc;
123*37da2899SCharles.Forsyth if(beta == 0.){
124*37da2899SCharles.Forsyth i2 = m;
125*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
126*37da2899SCharles.Forsyth c[i + jc] = 0.;
127*37da2899SCharles.Forsyth }
128*37da2899SCharles.Forsyth }else if(beta != 1.){
129*37da2899SCharles.Forsyth i2 = m;
130*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
131*37da2899SCharles.Forsyth c[i + jc] = beta * c[i + jc];
132*37da2899SCharles.Forsyth }
133*37da2899SCharles.Forsyth }
134*37da2899SCharles.Forsyth i2 = k;
135*37da2899SCharles.Forsyth for(l = 0; l < i2; ++l){
136*37da2899SCharles.Forsyth if(b[j + l*ldb] != 0.){
137*37da2899SCharles.Forsyth temp = alpha * b[j + l*ldb];
138*37da2899SCharles.Forsyth i3 = m;
139*37da2899SCharles.Forsyth for(i = 0; i < i3; ++i){
140*37da2899SCharles.Forsyth c[i + jc] += temp * a[i + l*lda];
141*37da2899SCharles.Forsyth }
142*37da2899SCharles.Forsyth }
143*37da2899SCharles.Forsyth }
144*37da2899SCharles.Forsyth }
145*37da2899SCharles.Forsyth }else{
146*37da2899SCharles.Forsyth
147*37da2899SCharles.Forsyth /* Form C := alpha*A'*B' + beta*C */
148*37da2899SCharles.Forsyth i1 = n;
149*37da2899SCharles.Forsyth for(j = 0; j < i1; ++j){
150*37da2899SCharles.Forsyth jc = j*ldc;
151*37da2899SCharles.Forsyth i2 = m;
152*37da2899SCharles.Forsyth for(i = 0; i < i2; ++i){
153*37da2899SCharles.Forsyth temp = 0.;
154*37da2899SCharles.Forsyth i3 = k;
155*37da2899SCharles.Forsyth for(l = 0; l < i3; ++l){
156*37da2899SCharles.Forsyth temp += a[l + i*lda] * b[j + l*ldb];
157*37da2899SCharles.Forsyth }
158*37da2899SCharles.Forsyth if(beta == 0.){
159*37da2899SCharles.Forsyth c[i + jc] = alpha * temp;
160*37da2899SCharles.Forsyth }else{
161*37da2899SCharles.Forsyth c[i + jc] = alpha * temp + beta * c[i + jc];
162*37da2899SCharles.Forsyth }
163*37da2899SCharles.Forsyth }
164*37da2899SCharles.Forsyth }
165*37da2899SCharles.Forsyth }
166*37da2899SCharles.Forsyth }
167*37da2899SCharles.Forsyth }
168