xref: /inferno-os/libmath/gemm.c (revision 37da2899f40661e3e9631e497da8dc59b971cbd0)
1*37da2899SCharles.Forsyth #include "lib9.h"
2*37da2899SCharles.Forsyth #include "mathi.h"
3*37da2899SCharles.Forsyth void
gemm(int transa,int transb,int m,int n,int k,double alpha,double * a,int lda,double * b,int ldb,double beta,double * c,int ldc)4*37da2899SCharles.Forsyth gemm(int transa, int transb, int m, int n, int k, double alpha,
5*37da2899SCharles.Forsyth 	double *a, int lda,
6*37da2899SCharles.Forsyth 	double *b, int ldb, double beta,
7*37da2899SCharles.Forsyth 	double *c, int ldc)
8*37da2899SCharles.Forsyth {
9*37da2899SCharles.Forsyth     int i1, i2, i3, nota, notb, i, j, jb, jc, l, la;
10*37da2899SCharles.Forsyth     double temp;
11*37da2899SCharles.Forsyth 
12*37da2899SCharles.Forsyth     nota = transa=='N';
13*37da2899SCharles.Forsyth     notb = transb=='N';
14*37da2899SCharles.Forsyth 
15*37da2899SCharles.Forsyth     if(m == 0 || n == 0 || (alpha == 0. || k == 0) && beta == 1.){
16*37da2899SCharles.Forsyth 	return;
17*37da2899SCharles.Forsyth     }
18*37da2899SCharles.Forsyth     if(alpha == 0.){
19*37da2899SCharles.Forsyth 	if(beta == 0.){
20*37da2899SCharles.Forsyth 	    i1 = n;
21*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
22*37da2899SCharles.Forsyth 		jc = j*ldc;
23*37da2899SCharles.Forsyth 		i2 = m;
24*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
25*37da2899SCharles.Forsyth 		    c[i + jc] = 0.;
26*37da2899SCharles.Forsyth 		}
27*37da2899SCharles.Forsyth 	    }
28*37da2899SCharles.Forsyth 	}else{
29*37da2899SCharles.Forsyth 	    i1 = n;
30*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
31*37da2899SCharles.Forsyth 		jc = j*ldc;
32*37da2899SCharles.Forsyth 		i2 = m;
33*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
34*37da2899SCharles.Forsyth 		    c[i + jc] = beta * c[i + jc];
35*37da2899SCharles.Forsyth 		}
36*37da2899SCharles.Forsyth 	    }
37*37da2899SCharles.Forsyth 	}
38*37da2899SCharles.Forsyth 	return;
39*37da2899SCharles.Forsyth     }
40*37da2899SCharles.Forsyth 
41*37da2899SCharles.Forsyth     if(!a){
42*37da2899SCharles.Forsyth 	if(notb){   /* C := alpha*B + beta*C. */
43*37da2899SCharles.Forsyth 	    i1 = n;
44*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
45*37da2899SCharles.Forsyth 		jb = j*ldb;
46*37da2899SCharles.Forsyth 		jc = j*ldc;
47*37da2899SCharles.Forsyth 		i2 = m;
48*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
49*37da2899SCharles.Forsyth 		    c[i + jc] = alpha*b[i+jb] + beta*c[i+jc];
50*37da2899SCharles.Forsyth 		}
51*37da2899SCharles.Forsyth 	    }
52*37da2899SCharles.Forsyth 	}else{   /* C := alpha*B' + beta*C. */
53*37da2899SCharles.Forsyth 	    i1 = n;
54*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
55*37da2899SCharles.Forsyth 		jc = j*ldc;
56*37da2899SCharles.Forsyth 		i2 = m;
57*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
58*37da2899SCharles.Forsyth 		    c[i + jc] = alpha*b[j+i*ldb] + beta*c[i+jc];
59*37da2899SCharles.Forsyth 		}
60*37da2899SCharles.Forsyth 	    }
61*37da2899SCharles.Forsyth 	}
62*37da2899SCharles.Forsyth 	return;
63*37da2899SCharles.Forsyth     }
64*37da2899SCharles.Forsyth 
65*37da2899SCharles.Forsyth     if(notb){
66*37da2899SCharles.Forsyth 	if(nota){
67*37da2899SCharles.Forsyth 
68*37da2899SCharles.Forsyth /*          Form  C := alpha*A*B + beta*C. */
69*37da2899SCharles.Forsyth 	    i1 = n;
70*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
71*37da2899SCharles.Forsyth 		jc = j*ldc;
72*37da2899SCharles.Forsyth 		if(beta == 0.){
73*37da2899SCharles.Forsyth 		    i2 = m;
74*37da2899SCharles.Forsyth 		    for(i = 0; i < i2; ++i){
75*37da2899SCharles.Forsyth 			c[i + jc] = 0.;
76*37da2899SCharles.Forsyth 		    }
77*37da2899SCharles.Forsyth 		}else if(beta != 1.){
78*37da2899SCharles.Forsyth 		    i2 = m;
79*37da2899SCharles.Forsyth 		    for(i = 0; i < i2; ++i){
80*37da2899SCharles.Forsyth 			c[i + jc] = beta * c[i + jc];
81*37da2899SCharles.Forsyth 		    }
82*37da2899SCharles.Forsyth 		}
83*37da2899SCharles.Forsyth 		i2 = k;
84*37da2899SCharles.Forsyth 		for(l = 0; l < i2; ++l){
85*37da2899SCharles.Forsyth 		    la = l*lda;
86*37da2899SCharles.Forsyth 		    if(b[l + j*ldb] != 0.){
87*37da2899SCharles.Forsyth 			temp = alpha * b[l + j*ldb];
88*37da2899SCharles.Forsyth 			i3 = m;
89*37da2899SCharles.Forsyth 			for(i = 0; i < i3; ++i){
90*37da2899SCharles.Forsyth 			    c[i + jc] += temp * a[i + la];
91*37da2899SCharles.Forsyth 			}
92*37da2899SCharles.Forsyth 		    }
93*37da2899SCharles.Forsyth 		}
94*37da2899SCharles.Forsyth 	    }
95*37da2899SCharles.Forsyth 	}else{
96*37da2899SCharles.Forsyth 
97*37da2899SCharles.Forsyth /*          Form  C := alpha*A'*B + beta*C */
98*37da2899SCharles.Forsyth 	    i1 = n;
99*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
100*37da2899SCharles.Forsyth 		jc = j*ldc;
101*37da2899SCharles.Forsyth 		i2 = m;
102*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
103*37da2899SCharles.Forsyth 		    temp = 0.;
104*37da2899SCharles.Forsyth 		    i3 = k;
105*37da2899SCharles.Forsyth 		    for(l = 0; l < i3; ++l){
106*37da2899SCharles.Forsyth 			temp += a[l + i*lda] * b[l + j*ldb];
107*37da2899SCharles.Forsyth 		    }
108*37da2899SCharles.Forsyth 		    if(beta == 0.){
109*37da2899SCharles.Forsyth 			c[i + jc] = alpha * temp;
110*37da2899SCharles.Forsyth 		    }else{
111*37da2899SCharles.Forsyth 			c[i + jc] = alpha * temp + beta * c[i + jc];
112*37da2899SCharles.Forsyth 		    }
113*37da2899SCharles.Forsyth 		}
114*37da2899SCharles.Forsyth 	    }
115*37da2899SCharles.Forsyth 	}
116*37da2899SCharles.Forsyth     }else{
117*37da2899SCharles.Forsyth 	if(nota){
118*37da2899SCharles.Forsyth 
119*37da2899SCharles.Forsyth /*          Form  C := alpha*A*B' + beta*C */
120*37da2899SCharles.Forsyth 	    i1 = n;
121*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
122*37da2899SCharles.Forsyth 		jc = j*ldc;
123*37da2899SCharles.Forsyth 		if(beta == 0.){
124*37da2899SCharles.Forsyth 		    i2 = m;
125*37da2899SCharles.Forsyth 		    for(i = 0; i < i2; ++i){
126*37da2899SCharles.Forsyth 			c[i + jc] = 0.;
127*37da2899SCharles.Forsyth 		    }
128*37da2899SCharles.Forsyth 		}else if(beta != 1.){
129*37da2899SCharles.Forsyth 		    i2 = m;
130*37da2899SCharles.Forsyth 		    for(i = 0; i < i2; ++i){
131*37da2899SCharles.Forsyth 			c[i + jc] = beta * c[i + jc];
132*37da2899SCharles.Forsyth 		    }
133*37da2899SCharles.Forsyth 		}
134*37da2899SCharles.Forsyth 		i2 = k;
135*37da2899SCharles.Forsyth 		for(l = 0; l < i2; ++l){
136*37da2899SCharles.Forsyth 		    if(b[j + l*ldb] != 0.){
137*37da2899SCharles.Forsyth 			temp = alpha * b[j + l*ldb];
138*37da2899SCharles.Forsyth 			i3 = m;
139*37da2899SCharles.Forsyth 			for(i = 0; i < i3; ++i){
140*37da2899SCharles.Forsyth 			    c[i + jc] += temp * a[i + l*lda];
141*37da2899SCharles.Forsyth 			}
142*37da2899SCharles.Forsyth 		    }
143*37da2899SCharles.Forsyth 		}
144*37da2899SCharles.Forsyth 	    }
145*37da2899SCharles.Forsyth 	}else{
146*37da2899SCharles.Forsyth 
147*37da2899SCharles.Forsyth /*          Form  C := alpha*A'*B' + beta*C */
148*37da2899SCharles.Forsyth 	    i1 = n;
149*37da2899SCharles.Forsyth 	    for(j = 0; j < i1; ++j){
150*37da2899SCharles.Forsyth 		jc = j*ldc;
151*37da2899SCharles.Forsyth 		i2 = m;
152*37da2899SCharles.Forsyth 		for(i = 0; i < i2; ++i){
153*37da2899SCharles.Forsyth 		    temp = 0.;
154*37da2899SCharles.Forsyth 		    i3 = k;
155*37da2899SCharles.Forsyth 		    for(l = 0; l < i3; ++l){
156*37da2899SCharles.Forsyth 			temp += a[l + i*lda] * b[j + l*ldb];
157*37da2899SCharles.Forsyth 		    }
158*37da2899SCharles.Forsyth 		    if(beta == 0.){
159*37da2899SCharles.Forsyth 			c[i + jc] = alpha * temp;
160*37da2899SCharles.Forsyth 		    }else{
161*37da2899SCharles.Forsyth 			c[i + jc] = alpha * temp + beta * c[i + jc];
162*37da2899SCharles.Forsyth 		    }
163*37da2899SCharles.Forsyth 		}
164*37da2899SCharles.Forsyth 	    }
165*37da2899SCharles.Forsyth 	}
166*37da2899SCharles.Forsyth     }
167*37da2899SCharles.Forsyth }
168