xref: /onnv-gate/usr/src/cmd/fps/fptest/linpack.c (revision 7186:e728311aafb0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <errno.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <sys/time.h>
33 #include <unistd.h>
34 #include <externs.h>
35 #include <fp.h>
36 #include <fps_ereport.h>
37 #include <fpstestmsg.h>
38 #include <linpack.h>
39 #include <sunperf.h>
40 
41 double fabs(double x);
42 
43 extern void	___pl_dss_set_chip_cache_(int *cache_size);
44 static double dran(int iseed[4]);
45 static int LINSUB(REAL * residn, REAL * resid,
46     REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg);
47 static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma);
48 static REAL EPSLON(REAL x);
49 static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
50 
51 extern int errno;
52 static int MAT_SIZE;
53 #ifndef __lint
54 static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024;
55 #endif
56 
57 /*
58  * LINPACK(int Stress, int unit, struct fps_test_ereport *report,
59  * int fps_verbose_msg)
60  * performs the single and double precision lapack test. If an
61  * error is found, relevant data is collected and stored in report.
62  */
63 int
LINPACK(int Stress,int unit,struct fps_test_ereport * report,int fps_verbose_msg)64 LINPACK(int Stress, int unit, struct fps_test_ereport *report,
65     int fps_verbose_msg)
66 {
67 	char err_data[MAX_INFO_SIZE];
68 	char l_buf[64];
69 	int c_index;
70 	int ret;
71 	REAL eps;
72 	REAL resid;
73 	REAL residn;
74 	REAL x11;
75 	REAL xn1;
76 	REAL EPS;
77 	REAL RESID;
78 	REAL RESIDN;
79 	REAL X11;
80 	REAL XN1;
81 	uint64_t expected[5];
82 	uint64_t observed[5];
83 
84 #ifdef  FPS_LAPA_UNK
85 #ifndef DP
86 	if (Stress > 1000)
87 			return (0);
88 #endif /* DP */
89 #endif /* FPS_LAPA_UNK */
90 
91 	if (Stress > 10000)
92 		return (0);
93 
94 	/*
95 	 * make sure is no dependency on the E$ size Without this call the
96 	 * computed results will depend on the size of the E$ (
97 	 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ...
98 	 */
99 #ifndef __lint
100 	___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE);
101 #endif
102 
103 	c_index = Stress;
104 
105 	if (2000 == c_index)
106 		c_index = 1001;
107 	if (3000 == c_index)
108 		c_index = 1002;
109 	if (4016 == c_index)
110 		c_index = 1003;
111 	if (5000 == c_index)
112 		c_index = 1004;
113 	if (6000 == c_index)
114 		c_index = 1005;
115 	if (7016 == c_index)
116 		c_index = 1006;
117 	if (8034 == c_index)
118 		c_index = 1007;
119 	if (9000 == c_index)
120 		c_index = 1008;
121 	if (10000 == c_index)
122 		c_index = 1009;
123 
124 	(void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit);
125 	fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit);
126 
127 	MAT_SIZE = Stress;
128 	ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg);
129 
130 	if (2 == ret) {
131 		if (errno == EAGAIN || errno == ENOMEM)
132 			_exit(FPU_SYSCALL_TRYAGAIN);
133 		else
134 			_exit(FPU_SYSCALL_FAIL);
135 	}
136 
137 #ifdef  FPS_LAPA_UNK
138 	RESIDN  = RESID   = X11 = XN1 = 0.0000000000000000e+00;
139 
140 #ifdef DP
141 	EPS = 2.2204460492503131e-16;
142 #else /* DP */
143 	EPS = 1.1920928955078125e-07;
144 #endif /* DP */
145 
146 #else /* FPS_LAPA_UNK */
147 
148 	RESIDN = LinpValsA[c_index].residn;
149 	RESID = LinpValsA[c_index].resid;
150 	EPS = LinpValsA[c_index].eps;
151 	X11 = LinpValsA[c_index].x11;
152 	XN1 = LinpValsA[c_index].xn1;
153 
154 #endif /* FPS_LAPA_UNK */
155 
156 	if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) &&
157 	    (x11 == X11) && (xn1 == XN1)) {
158 
159 		return (0);
160 	} else {
161 		(void) snprintf(err_data, sizeof (err_data),
162 		    "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e"
163 		    "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e",
164 		    RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1);
165 
166 
167 #ifdef	DP
168 		observed[0] = *(uint64_t *)&residn;
169 		observed[1] = *(uint64_t *)&resid;
170 		observed[2] = *(uint64_t *)&eps;
171 		observed[3] = *(uint64_t *)&x11;
172 		observed[4] = *(uint64_t *)&xn1;
173 		expected[0] = *(uint64_t *)&RESIDN;
174 		expected[1] = *(uint64_t *)&RESID;
175 		expected[2] = *(uint64_t *)&EPS;
176 		expected[3] = *(uint64_t *)&X11;
177 		expected[4] = *(uint64_t *)&XN1;
178 
179 		setup_fps_test_struct(IS_EREPORT_INFO, report,
180 		    6317, &observed, &expected, 5, 5, err_data);
181 #else
182 		observed[0] = (uint64_t)(*(uint32_t *)&residn);
183 		observed[1] = (uint64_t)(*(uint32_t *)&resid);
184 		observed[2] = (uint64_t)(*(uint32_t *)&eps);
185 		observed[3] = (uint64_t)(*(uint32_t *)&x11);
186 		observed[4] = (uint64_t)(*(uint32_t *)&xn1);
187 		expected[0] = (uint64_t)(*(uint32_t *)&RESIDN);
188 		expected[1] = (uint64_t)(*(uint32_t *)&RESID);
189 		expected[2] = (uint64_t)(*(uint32_t *)&EPS);
190 		expected[3] = (uint64_t)(*(uint32_t *)&X11);
191 		expected[4] = (uint64_t)(*(uint32_t *)&XN1);
192 
193 		setup_fps_test_struct(IS_EREPORT_INFO, report,
194 		    6316, &observed, &expected, 5, 5, err_data);
195 #endif
196 
197 		return (-1);
198 	}
199 }
200 
201 /*
202  * LINSUB(REAL *residn, REAL *resid, REAL *eps,
203  * REAL *x11, REAL *xn1, int fps_verbose_msg)begins
204  * the lapack calculation calls.
205  */
206 static int
LINSUB(REAL * residn,REAL * resid,REAL * eps,REAL * x11,REAL * xn1,int fps_verbose_msg)207 LINSUB(REAL *residn, REAL *resid,
208 	REAL *eps, REAL *x11, REAL *xn1,
209 	int fps_verbose_msg)
210 {
211 	int i;
212 	int lda;
213 	int n;
214 	int nr_malloc;
215 	REAL *a;
216 	REAL abs;
217 	REAL *b;
218 	REAL norma;
219 	REAL normx;
220 	REAL *x;
221 	struct timeval timeout;
222 	long *ipvt;
223 #ifndef __lint
224 	long info;
225 #endif
226 
227 	timeout.tv_sec = 0;
228 	timeout.tv_usec = 10000; /* microseconds, 10ms */
229 	nr_malloc = 0;
230 
231 mallocAgain:
232 
233 	a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) *
234 	    (size_t)sizeof (REAL));
235 	b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
236 	x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
237 
238 	ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long));
239 
240 	if ((NULL == a) || (NULL == b) ||
241 	    (NULL == x) || (NULL == ipvt)) {
242 		if (NULL != a)
243 			free(a);
244 		if (NULL != b)
245 			free(b);
246 		if (NULL != x)
247 			free(x);
248 		if (NULL != ipvt)
249 			free(ipvt);
250 
251 		/* sleep 10 ms. wait for 100 ms */
252 		if (nr_malloc++ < 11) {
253 			(void) select(1, NULL, NULL, NULL, &timeout);
254 			goto mallocAgain;
255 		}
256 		fps_msg(fps_verbose_msg,
257 		    "Malloc failed in lapack, matrix size %d",
258 		    MAT_SIZE);
259 
260 		return (2);
261 	}
262 	lda = MAT_SIZE + 8;
263 	n = MAT_SIZE;
264 
265 	(void) MATGEN(a, lda, n, b, &norma);
266 #ifndef __lint
267 	GEFA(n, n, a, lda, ipvt, &info);
268 	GESL('N', n, 1, a, lda, ipvt, b, n, &info);
269 #endif
270 	free(ipvt);
271 
272 	for (i = 0; i < n; i++) {
273 		x[i] = b[i];
274 	}
275 
276 	(void) MATGEN((REAL *) a, lda, n, b, &norma);
277 
278 	for (i = 0; i < n; i++) {
279 		b[i] = -b[i];
280 	}
281 
282 	MXPY(n, b, n, lda, x, (REAL *) a);
283 	free(a);
284 
285 	*resid = 0.0;
286 	normx = 0.0;
287 
288 	for (i = 0; i < n; i++) {
289 		abs = (REAL)fabs((double)b[i]);
290 		*resid = (*resid > abs) ? *resid : abs;
291 		abs = (REAL)fabs((double)x[i]);
292 		normx = (normx > abs) ? normx : abs;
293 	}
294 
295 	free(b);
296 
297 	*eps = EPSLON((REAL) LP_ONE);
298 
299 	*residn = *resid / (n * norma * normx * (*eps));
300 
301 	*x11 = x[0] - 1;
302 	*xn1 = x[n - 1] - 1;
303 
304 	free(x);
305 
306 	return (0);
307 }
308 
309 /*
310  * dran(int iseed[4]) returns a random real number from a
311  * uniform (0,1) distribution.
312  */
313 static double
dran(int iseed[4])314 dran(int iseed[4])
315 {
316 	double r;
317 	double value;
318 	int ipw2;
319 	int it1;
320 	int it2;
321 	int it3;
322 	int it4;
323 	int m1;
324 	int m2;
325 	int m3;
326 	int m4;
327 
328 	/* Set constants */
329 	m1 = 494;
330 	m2 = 322;
331 	m3 = 2508;
332 	m4 = 2549;
333 	ipw2 = 4096;
334 	r = 1.0 / ipw2;
335 
336 	/* multiply the seed by the multiplier modulo 2**48 */
337 	it4 = iseed[3] * m4;
338 	it3 = it4 / ipw2;
339 	it4 = it4 - ipw2 * it3;
340 	it3 = it3 + iseed[2] * m4 + iseed[3] * m3;
341 	it2 = it3 / ipw2;
342 	it3 = it3 - ipw2 * it2;
343 	it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2;
344 	it1 = it2 / ipw2;
345 	it2 = it2 - ipw2 * it1;
346 	it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 +
347 	    iseed[3] * m1;
348 	it1 = it1 % ipw2;
349 
350 	/* return updated seed */
351 	iseed[0] = it1;
352 	iseed[1] = it2;
353 	iseed[2] = it3;
354 	iseed[3] = it4;
355 
356 	/* convert 48-bit integer to a real number in the interval (0,1) */
357 	value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 +
358 	    r * ((double)it4))));
359 
360 	return (value);
361 }
362 
363 /*
364  * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
365  * generates matrix a and b.
366  */
367 
368 #define	ALPHA 1.68750
369 static int
MATGEN(REAL a[],int lda,int n,REAL b[],REAL * norma)370 MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
371 {
372 	int		i;
373 	int		init[4];
374 	int		j;
375 	REAL	value;
376 
377 	init[0] = 1;
378 	init[1] = 2;
379 	init[2] = 3;
380 	init[3] = 1325;
381 	*norma = LP_ZERO;
382 	for (j = 0; j < n; j++) {
383 		for (i = 0; i < n; i++) {
384 #ifdef FPS_LAPA_UNK
385 			a[lda*j+i] =
386 			    (i < j) ? (double)(i+1) : (double)(j+ALPHA);
387 			if (fabs(a[lda*j+i]) > *norma)
388 				*norma = fabs(a[lda*j+i]);
389 			} /* i */
390 #else
391 			value = (REAL) dran(init) - 0.5;
392 			a[lda * j + i] = value;
393 			value = fabs(value);
394 			if (value > *norma) {
395 				*norma = value;
396 			}
397 		} /* i */
398 #endif /* FPS_LAPA_UNK */
399 	} /* j */
400 
401 
402 	for (i = 0; i < n; i++) {
403 		b[i] = LP_ZERO;
404 	}
405 	for (j = 0; j < n; j++) {
406 		for (i = 0; i < n; i++) {
407 			b[i] = b[i] + a[lda * j + i];
408 		}
409 	}
410 
411 	return (0);
412 }
413 
414 /*
415  * EPSLON(REAL x) estimates unit roundoff in
416  * quantities of size x.
417  */
418 static REAL
EPSLON(REAL x)419 EPSLON(REAL x)
420 {
421 	REAL a;
422 	REAL abs;
423 	REAL b;
424 	REAL c;
425 	REAL eps;
426 
427 	a = 4.0e0 / 3.0e0;
428 	eps = LP_ZERO;
429 
430 	while (eps == LP_ZERO) {
431 		b = a - LP_ONE;
432 		c = b + b + b;
433 		eps = (REAL)fabs((double)(c - LP_ONE));
434 	}
435 
436 	abs = (REAL)fabs((double)x);
437 
438 	return (eps * abs);
439 }
440 
441 /*
442  * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
443  * multiplies matrix m times vector x and add the result to
444  * vector y.
445  */
446 static void
MXPY(int n1,REAL y[],int n2,int ldm,REAL x[],REAL m[])447 MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
448 {
449 	int i;
450 	int j;
451 	int jmin;
452 
453 	/* cleanup odd vector */
454 	j = n2 % 2;
455 	if (j >= 1) {
456 		j = j - 1;
457 		for (i = 0; i < n1; i++)
458 			y[i] = (y[i]) + x[j] * m[ldm * j + i];
459 	}
460 
461 	/* cleanup odd group of two vectors */
462 	j = n2 % 4;
463 	if (j >= 2) {
464 		j = j - 1;
465 		for (i = 0; i < n1; i++)
466 			y[i] = ((y[i])
467 			    + x[j - 1] * m[ldm * (j - 1) + i])
468 			    + x[j] * m[ldm * j + i];
469 	}
470 
471 	/* cleanup odd group of four vectors */
472 	j = n2 % 8;
473 	if (j >= 4) {
474 		j = j - 1;
475 		for (i = 0; i < n1; i++)
476 			y[i] = ((((y[i])
477 			    + x[j - 3] * m[ldm * (j - 3) + i])
478 			    + x[j - 2] * m[ldm * (j - 2) + i])
479 			    + x[j - 1] * m[ldm * (j - 1) + i])
480 			    + x[j] * m[ldm * j + i];
481 	}
482 
483 	/* cleanup odd group of eight vectors */
484 	j = n2 % 16;
485 	if (j >= 8) {
486 		j = j - 1;
487 		for (i = 0; i < n1; i++)
488 			y[i] = ((((((((y[i])
489 			    + x[j - 7] * m[ldm * (j - 7) + i])
490 			    + x[j - 6] * m[ldm * (j - 6) + i])
491 			    + x[j - 5] * m[ldm * (j - 5) + i])
492 			    + x[j - 4] * m[ldm * (j - 4) + i])
493 			    + x[j - 3] * m[ldm * (j - 3) + i])
494 			    + x[j - 2] * m[ldm * (j - 2) + i])
495 			    + x[j - 1] * m[ldm * (j - 1) + i])
496 			    + x[j] * m[ldm * j + i];
497 	}
498 
499 	/* main loop - groups of sixteen vectors */
500 	jmin = (n2 % 16) + 16;
501 	for (j = jmin - 1; j < n2; j = j + 16) {
502 		for (i = 0; i < n1; i++)
503 			y[i] = ((((((((((((((((y[i])
504 			    + x[j - 15] * m[ldm * (j - 15) + i])
505 			    + x[j - 14] * m[ldm * (j - 14) + i])
506 			    + x[j - 13] * m[ldm * (j - 13) + i])
507 			    + x[j - 12] * m[ldm * (j - 12) + i])
508 			    + x[j - 11] * m[ldm * (j - 11) + i])
509 			    + x[j - 10] * m[ldm * (j - 10) + i])
510 			    + x[j - 9] * m[ldm * (j - 9) + i])
511 			    + x[j - 8] * m[ldm * (j - 8) + i])
512 			    + x[j - 7] * m[ldm * (j - 7) + i])
513 			    + x[j - 6] * m[ldm * (j - 6) + i])
514 			    + x[j - 5] * m[ldm * (j - 5) + i])
515 			    + x[j - 4] * m[ldm * (j - 4) + i])
516 			    + x[j - 3] * m[ldm * (j - 3) + i])
517 			    + x[j - 2] * m[ldm * (j - 2) + i])
518 			    + x[j - 1] * m[ldm * (j - 1) + i])
519 			    + x[j] * m[ldm * j + i];
520 	}
521 }
522