1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <errno.h>
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <sys/time.h>
33 #include <unistd.h>
34 #include <externs.h>
35 #include <fp.h>
36 #include <fps_ereport.h>
37 #include <fpstestmsg.h>
38 #include <linpack.h>
39 #include <sunperf.h>
40
41 double fabs(double x);
42
43 extern void ___pl_dss_set_chip_cache_(int *cache_size);
44 static double dran(int iseed[4]);
45 static int LINSUB(REAL * residn, REAL * resid,
46 REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg);
47 static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma);
48 static REAL EPSLON(REAL x);
49 static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
50
51 extern int errno;
52 static int MAT_SIZE;
53 #ifndef __lint
54 static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024;
55 #endif
56
57 /*
58 * LINPACK(int Stress, int unit, struct fps_test_ereport *report,
59 * int fps_verbose_msg)
60 * performs the single and double precision lapack test. If an
61 * error is found, relevant data is collected and stored in report.
62 */
63 int
LINPACK(int Stress,int unit,struct fps_test_ereport * report,int fps_verbose_msg)64 LINPACK(int Stress, int unit, struct fps_test_ereport *report,
65 int fps_verbose_msg)
66 {
67 char err_data[MAX_INFO_SIZE];
68 char l_buf[64];
69 int c_index;
70 int ret;
71 REAL eps;
72 REAL resid;
73 REAL residn;
74 REAL x11;
75 REAL xn1;
76 REAL EPS;
77 REAL RESID;
78 REAL RESIDN;
79 REAL X11;
80 REAL XN1;
81 uint64_t expected[5];
82 uint64_t observed[5];
83
84 #ifdef FPS_LAPA_UNK
85 #ifndef DP
86 if (Stress > 1000)
87 return (0);
88 #endif /* DP */
89 #endif /* FPS_LAPA_UNK */
90
91 if (Stress > 10000)
92 return (0);
93
94 /*
95 * make sure is no dependency on the E$ size Without this call the
96 * computed results will depend on the size of the E$ (
97 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ...
98 */
99 #ifndef __lint
100 ___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE);
101 #endif
102
103 c_index = Stress;
104
105 if (2000 == c_index)
106 c_index = 1001;
107 if (3000 == c_index)
108 c_index = 1002;
109 if (4016 == c_index)
110 c_index = 1003;
111 if (5000 == c_index)
112 c_index = 1004;
113 if (6000 == c_index)
114 c_index = 1005;
115 if (7016 == c_index)
116 c_index = 1006;
117 if (8034 == c_index)
118 c_index = 1007;
119 if (9000 == c_index)
120 c_index = 1008;
121 if (10000 == c_index)
122 c_index = 1009;
123
124 (void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit);
125 fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit);
126
127 MAT_SIZE = Stress;
128 ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg);
129
130 if (2 == ret) {
131 if (errno == EAGAIN || errno == ENOMEM)
132 _exit(FPU_SYSCALL_TRYAGAIN);
133 else
134 _exit(FPU_SYSCALL_FAIL);
135 }
136
137 #ifdef FPS_LAPA_UNK
138 RESIDN = RESID = X11 = XN1 = 0.0000000000000000e+00;
139
140 #ifdef DP
141 EPS = 2.2204460492503131e-16;
142 #else /* DP */
143 EPS = 1.1920928955078125e-07;
144 #endif /* DP */
145
146 #else /* FPS_LAPA_UNK */
147
148 RESIDN = LinpValsA[c_index].residn;
149 RESID = LinpValsA[c_index].resid;
150 EPS = LinpValsA[c_index].eps;
151 X11 = LinpValsA[c_index].x11;
152 XN1 = LinpValsA[c_index].xn1;
153
154 #endif /* FPS_LAPA_UNK */
155
156 if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) &&
157 (x11 == X11) && (xn1 == XN1)) {
158
159 return (0);
160 } else {
161 (void) snprintf(err_data, sizeof (err_data),
162 "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e"
163 "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e",
164 RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1);
165
166
167 #ifdef DP
168 observed[0] = *(uint64_t *)&residn;
169 observed[1] = *(uint64_t *)&resid;
170 observed[2] = *(uint64_t *)&eps;
171 observed[3] = *(uint64_t *)&x11;
172 observed[4] = *(uint64_t *)&xn1;
173 expected[0] = *(uint64_t *)&RESIDN;
174 expected[1] = *(uint64_t *)&RESID;
175 expected[2] = *(uint64_t *)&EPS;
176 expected[3] = *(uint64_t *)&X11;
177 expected[4] = *(uint64_t *)&XN1;
178
179 setup_fps_test_struct(IS_EREPORT_INFO, report,
180 6317, &observed, &expected, 5, 5, err_data);
181 #else
182 observed[0] = (uint64_t)(*(uint32_t *)&residn);
183 observed[1] = (uint64_t)(*(uint32_t *)&resid);
184 observed[2] = (uint64_t)(*(uint32_t *)&eps);
185 observed[3] = (uint64_t)(*(uint32_t *)&x11);
186 observed[4] = (uint64_t)(*(uint32_t *)&xn1);
187 expected[0] = (uint64_t)(*(uint32_t *)&RESIDN);
188 expected[1] = (uint64_t)(*(uint32_t *)&RESID);
189 expected[2] = (uint64_t)(*(uint32_t *)&EPS);
190 expected[3] = (uint64_t)(*(uint32_t *)&X11);
191 expected[4] = (uint64_t)(*(uint32_t *)&XN1);
192
193 setup_fps_test_struct(IS_EREPORT_INFO, report,
194 6316, &observed, &expected, 5, 5, err_data);
195 #endif
196
197 return (-1);
198 }
199 }
200
201 /*
202 * LINSUB(REAL *residn, REAL *resid, REAL *eps,
203 * REAL *x11, REAL *xn1, int fps_verbose_msg)begins
204 * the lapack calculation calls.
205 */
206 static int
LINSUB(REAL * residn,REAL * resid,REAL * eps,REAL * x11,REAL * xn1,int fps_verbose_msg)207 LINSUB(REAL *residn, REAL *resid,
208 REAL *eps, REAL *x11, REAL *xn1,
209 int fps_verbose_msg)
210 {
211 int i;
212 int lda;
213 int n;
214 int nr_malloc;
215 REAL *a;
216 REAL abs;
217 REAL *b;
218 REAL norma;
219 REAL normx;
220 REAL *x;
221 struct timeval timeout;
222 long *ipvt;
223 #ifndef __lint
224 long info;
225 #endif
226
227 timeout.tv_sec = 0;
228 timeout.tv_usec = 10000; /* microseconds, 10ms */
229 nr_malloc = 0;
230
231 mallocAgain:
232
233 a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) *
234 (size_t)sizeof (REAL));
235 b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
236 x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
237
238 ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long));
239
240 if ((NULL == a) || (NULL == b) ||
241 (NULL == x) || (NULL == ipvt)) {
242 if (NULL != a)
243 free(a);
244 if (NULL != b)
245 free(b);
246 if (NULL != x)
247 free(x);
248 if (NULL != ipvt)
249 free(ipvt);
250
251 /* sleep 10 ms. wait for 100 ms */
252 if (nr_malloc++ < 11) {
253 (void) select(1, NULL, NULL, NULL, &timeout);
254 goto mallocAgain;
255 }
256 fps_msg(fps_verbose_msg,
257 "Malloc failed in lapack, matrix size %d",
258 MAT_SIZE);
259
260 return (2);
261 }
262 lda = MAT_SIZE + 8;
263 n = MAT_SIZE;
264
265 (void) MATGEN(a, lda, n, b, &norma);
266 #ifndef __lint
267 GEFA(n, n, a, lda, ipvt, &info);
268 GESL('N', n, 1, a, lda, ipvt, b, n, &info);
269 #endif
270 free(ipvt);
271
272 for (i = 0; i < n; i++) {
273 x[i] = b[i];
274 }
275
276 (void) MATGEN((REAL *) a, lda, n, b, &norma);
277
278 for (i = 0; i < n; i++) {
279 b[i] = -b[i];
280 }
281
282 MXPY(n, b, n, lda, x, (REAL *) a);
283 free(a);
284
285 *resid = 0.0;
286 normx = 0.0;
287
288 for (i = 0; i < n; i++) {
289 abs = (REAL)fabs((double)b[i]);
290 *resid = (*resid > abs) ? *resid : abs;
291 abs = (REAL)fabs((double)x[i]);
292 normx = (normx > abs) ? normx : abs;
293 }
294
295 free(b);
296
297 *eps = EPSLON((REAL) LP_ONE);
298
299 *residn = *resid / (n * norma * normx * (*eps));
300
301 *x11 = x[0] - 1;
302 *xn1 = x[n - 1] - 1;
303
304 free(x);
305
306 return (0);
307 }
308
309 /*
310 * dran(int iseed[4]) returns a random real number from a
311 * uniform (0,1) distribution.
312 */
313 static double
dran(int iseed[4])314 dran(int iseed[4])
315 {
316 double r;
317 double value;
318 int ipw2;
319 int it1;
320 int it2;
321 int it3;
322 int it4;
323 int m1;
324 int m2;
325 int m3;
326 int m4;
327
328 /* Set constants */
329 m1 = 494;
330 m2 = 322;
331 m3 = 2508;
332 m4 = 2549;
333 ipw2 = 4096;
334 r = 1.0 / ipw2;
335
336 /* multiply the seed by the multiplier modulo 2**48 */
337 it4 = iseed[3] * m4;
338 it3 = it4 / ipw2;
339 it4 = it4 - ipw2 * it3;
340 it3 = it3 + iseed[2] * m4 + iseed[3] * m3;
341 it2 = it3 / ipw2;
342 it3 = it3 - ipw2 * it2;
343 it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2;
344 it1 = it2 / ipw2;
345 it2 = it2 - ipw2 * it1;
346 it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 +
347 iseed[3] * m1;
348 it1 = it1 % ipw2;
349
350 /* return updated seed */
351 iseed[0] = it1;
352 iseed[1] = it2;
353 iseed[2] = it3;
354 iseed[3] = it4;
355
356 /* convert 48-bit integer to a real number in the interval (0,1) */
357 value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 +
358 r * ((double)it4))));
359
360 return (value);
361 }
362
363 /*
364 * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
365 * generates matrix a and b.
366 */
367
368 #define ALPHA 1.68750
369 static int
MATGEN(REAL a[],int lda,int n,REAL b[],REAL * norma)370 MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
371 {
372 int i;
373 int init[4];
374 int j;
375 REAL value;
376
377 init[0] = 1;
378 init[1] = 2;
379 init[2] = 3;
380 init[3] = 1325;
381 *norma = LP_ZERO;
382 for (j = 0; j < n; j++) {
383 for (i = 0; i < n; i++) {
384 #ifdef FPS_LAPA_UNK
385 a[lda*j+i] =
386 (i < j) ? (double)(i+1) : (double)(j+ALPHA);
387 if (fabs(a[lda*j+i]) > *norma)
388 *norma = fabs(a[lda*j+i]);
389 } /* i */
390 #else
391 value = (REAL) dran(init) - 0.5;
392 a[lda * j + i] = value;
393 value = fabs(value);
394 if (value > *norma) {
395 *norma = value;
396 }
397 } /* i */
398 #endif /* FPS_LAPA_UNK */
399 } /* j */
400
401
402 for (i = 0; i < n; i++) {
403 b[i] = LP_ZERO;
404 }
405 for (j = 0; j < n; j++) {
406 for (i = 0; i < n; i++) {
407 b[i] = b[i] + a[lda * j + i];
408 }
409 }
410
411 return (0);
412 }
413
414 /*
415 * EPSLON(REAL x) estimates unit roundoff in
416 * quantities of size x.
417 */
418 static REAL
EPSLON(REAL x)419 EPSLON(REAL x)
420 {
421 REAL a;
422 REAL abs;
423 REAL b;
424 REAL c;
425 REAL eps;
426
427 a = 4.0e0 / 3.0e0;
428 eps = LP_ZERO;
429
430 while (eps == LP_ZERO) {
431 b = a - LP_ONE;
432 c = b + b + b;
433 eps = (REAL)fabs((double)(c - LP_ONE));
434 }
435
436 abs = (REAL)fabs((double)x);
437
438 return (eps * abs);
439 }
440
441 /*
442 * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
443 * multiplies matrix m times vector x and add the result to
444 * vector y.
445 */
446 static void
MXPY(int n1,REAL y[],int n2,int ldm,REAL x[],REAL m[])447 MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
448 {
449 int i;
450 int j;
451 int jmin;
452
453 /* cleanup odd vector */
454 j = n2 % 2;
455 if (j >= 1) {
456 j = j - 1;
457 for (i = 0; i < n1; i++)
458 y[i] = (y[i]) + x[j] * m[ldm * j + i];
459 }
460
461 /* cleanup odd group of two vectors */
462 j = n2 % 4;
463 if (j >= 2) {
464 j = j - 1;
465 for (i = 0; i < n1; i++)
466 y[i] = ((y[i])
467 + x[j - 1] * m[ldm * (j - 1) + i])
468 + x[j] * m[ldm * j + i];
469 }
470
471 /* cleanup odd group of four vectors */
472 j = n2 % 8;
473 if (j >= 4) {
474 j = j - 1;
475 for (i = 0; i < n1; i++)
476 y[i] = ((((y[i])
477 + x[j - 3] * m[ldm * (j - 3) + i])
478 + x[j - 2] * m[ldm * (j - 2) + i])
479 + x[j - 1] * m[ldm * (j - 1) + i])
480 + x[j] * m[ldm * j + i];
481 }
482
483 /* cleanup odd group of eight vectors */
484 j = n2 % 16;
485 if (j >= 8) {
486 j = j - 1;
487 for (i = 0; i < n1; i++)
488 y[i] = ((((((((y[i])
489 + x[j - 7] * m[ldm * (j - 7) + i])
490 + x[j - 6] * m[ldm * (j - 6) + i])
491 + x[j - 5] * m[ldm * (j - 5) + i])
492 + x[j - 4] * m[ldm * (j - 4) + i])
493 + x[j - 3] * m[ldm * (j - 3) + i])
494 + x[j - 2] * m[ldm * (j - 2) + i])
495 + x[j - 1] * m[ldm * (j - 1) + i])
496 + x[j] * m[ldm * j + i];
497 }
498
499 /* main loop - groups of sixteen vectors */
500 jmin = (n2 % 16) + 16;
501 for (j = jmin - 1; j < n2; j = j + 16) {
502 for (i = 0; i < n1; i++)
503 y[i] = ((((((((((((((((y[i])
504 + x[j - 15] * m[ldm * (j - 15) + i])
505 + x[j - 14] * m[ldm * (j - 14) + i])
506 + x[j - 13] * m[ldm * (j - 13) + i])
507 + x[j - 12] * m[ldm * (j - 12) + i])
508 + x[j - 11] * m[ldm * (j - 11) + i])
509 + x[j - 10] * m[ldm * (j - 10) + i])
510 + x[j - 9] * m[ldm * (j - 9) + i])
511 + x[j - 8] * m[ldm * (j - 8) + i])
512 + x[j - 7] * m[ldm * (j - 7) + i])
513 + x[j - 6] * m[ldm * (j - 6) + i])
514 + x[j - 5] * m[ldm * (j - 5) + i])
515 + x[j - 4] * m[ldm * (j - 4) + i])
516 + x[j - 3] * m[ldm * (j - 3) + i])
517 + x[j - 2] * m[ldm * (j - 2) + i])
518 + x[j - 1] * m[ldm * (j - 1) + i])
519 + x[j] * m[ldm * j + i];
520 }
521 }
522