1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <errno.h> 30 #include <stdio.h> 31 #include <stdlib.h> 32 #include <sys/time.h> 33 #include <unistd.h> 34 #include <externs.h> 35 #include <fp.h> 36 #include <fps_ereport.h> 37 #include <fpstestmsg.h> 38 #include <linpack.h> 39 #include <sunperf.h> 40 41 double fabs(double x); 42 43 extern void ___pl_dss_set_chip_cache_(int *cache_size); 44 static double dran(int iseed[4]); 45 static int LINSUB(REAL * residn, REAL * resid, 46 REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg); 47 static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma); 48 static REAL EPSLON(REAL x); 49 static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]); 50 51 extern int errno; 52 static int MAT_SIZE; 53 #ifndef __lint 54 static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024; 55 #endif 56 57 /* 58 * LINPACK(int Stress, int unit, struct fps_test_ereport *report, 59 * int fps_verbose_msg) 60 * performs the single and double precision lapack test. If an 61 * error is found, relevant data is collected and stored in report. 62 */ 63 int 64 LINPACK(int Stress, int unit, struct fps_test_ereport *report, 65 int fps_verbose_msg) 66 { 67 char err_data[MAX_INFO_SIZE]; 68 char l_buf[64]; 69 int c_index; 70 int ret; 71 REAL eps; 72 REAL resid; 73 REAL residn; 74 REAL x11; 75 REAL xn1; 76 REAL EPS; 77 REAL RESID; 78 REAL RESIDN; 79 REAL X11; 80 REAL XN1; 81 uint64_t expected[5]; 82 uint64_t observed[5]; 83 84 #ifdef FPS_LAPA_UNK 85 #ifndef DP 86 if (Stress > 1000) 87 return (0); 88 #endif /* DP */ 89 #endif /* FPS_LAPA_UNK */ 90 91 if (Stress > 10000) 92 return (0); 93 94 /* 95 * make sure is no dependency on the E$ size Without this call the 96 * computed results will depend on the size of the E$ ( 97 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ... 98 */ 99 #ifndef __lint 100 ___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE); 101 #endif 102 103 c_index = Stress; 104 105 if (2000 == c_index) 106 c_index = 1001; 107 if (3000 == c_index) 108 c_index = 1002; 109 if (4016 == c_index) 110 c_index = 1003; 111 if (5000 == c_index) 112 c_index = 1004; 113 if (6000 == c_index) 114 c_index = 1005; 115 if (7016 == c_index) 116 c_index = 1006; 117 if (8034 == c_index) 118 c_index = 1007; 119 if (9000 == c_index) 120 c_index = 1008; 121 if (10000 == c_index) 122 c_index = 1009; 123 124 (void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit); 125 fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit); 126 127 MAT_SIZE = Stress; 128 ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg); 129 130 if (2 == ret) { 131 if (errno == EAGAIN || errno == ENOMEM) 132 _exit(FPU_SYSCALL_TRYAGAIN); 133 else 134 _exit(FPU_SYSCALL_FAIL); 135 } 136 137 #ifdef FPS_LAPA_UNK 138 RESIDN = RESID = X11 = XN1 = 0.0000000000000000e+00; 139 140 #ifdef DP 141 EPS = 2.2204460492503131e-16; 142 #else /* DP */ 143 EPS = 1.1920928955078125e-07; 144 #endif /* DP */ 145 146 #else /* FPS_LAPA_UNK */ 147 148 RESIDN = LinpValsA[c_index].residn; 149 RESID = LinpValsA[c_index].resid; 150 EPS = LinpValsA[c_index].eps; 151 X11 = LinpValsA[c_index].x11; 152 XN1 = LinpValsA[c_index].xn1; 153 154 #endif /* FPS_LAPA_UNK */ 155 156 if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) && 157 (x11 == X11) && (xn1 == XN1)) { 158 159 return (0); 160 } else { 161 (void) snprintf(err_data, sizeof (err_data), 162 "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e" 163 "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e", 164 RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1); 165 166 167 #ifdef DP 168 observed[0] = *(uint64_t *)&residn; 169 observed[1] = *(uint64_t *)&resid; 170 observed[2] = *(uint64_t *)&eps; 171 observed[3] = *(uint64_t *)&x11; 172 observed[4] = *(uint64_t *)&xn1; 173 expected[0] = *(uint64_t *)&RESIDN; 174 expected[1] = *(uint64_t *)&RESID; 175 expected[2] = *(uint64_t *)&EPS; 176 expected[3] = *(uint64_t *)&X11; 177 expected[4] = *(uint64_t *)&XN1; 178 179 setup_fps_test_struct(IS_EREPORT_INFO, report, 180 6317, &observed, &expected, 5, 5, err_data); 181 #else 182 observed[0] = (uint64_t)(*(uint32_t *)&residn); 183 observed[1] = (uint64_t)(*(uint32_t *)&resid); 184 observed[2] = (uint64_t)(*(uint32_t *)&eps); 185 observed[3] = (uint64_t)(*(uint32_t *)&x11); 186 observed[4] = (uint64_t)(*(uint32_t *)&xn1); 187 expected[0] = (uint64_t)(*(uint32_t *)&RESIDN); 188 expected[1] = (uint64_t)(*(uint32_t *)&RESID); 189 expected[2] = (uint64_t)(*(uint32_t *)&EPS); 190 expected[3] = (uint64_t)(*(uint32_t *)&X11); 191 expected[4] = (uint64_t)(*(uint32_t *)&XN1); 192 193 setup_fps_test_struct(IS_EREPORT_INFO, report, 194 6316, &observed, &expected, 5, 5, err_data); 195 #endif 196 197 return (-1); 198 } 199 } 200 201 /* 202 * LINSUB(REAL *residn, REAL *resid, REAL *eps, 203 * REAL *x11, REAL *xn1, int fps_verbose_msg)begins 204 * the lapack calculation calls. 205 */ 206 static int 207 LINSUB(REAL *residn, REAL *resid, 208 REAL *eps, REAL *x11, REAL *xn1, 209 int fps_verbose_msg) 210 { 211 int i; 212 int lda; 213 int n; 214 int nr_malloc; 215 REAL *a; 216 REAL abs; 217 REAL *b; 218 REAL norma; 219 REAL normx; 220 REAL *x; 221 struct timeval timeout; 222 long *ipvt; 223 #ifndef __lint 224 long info; 225 #endif 226 227 timeout.tv_sec = 0; 228 timeout.tv_usec = 10000; /* microseconds, 10ms */ 229 nr_malloc = 0; 230 231 mallocAgain: 232 233 a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) * 234 (size_t)sizeof (REAL)); 235 b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); 236 x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL)); 237 238 ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long)); 239 240 if ((NULL == a) || (NULL == b) || 241 (NULL == x) || (NULL == ipvt)) { 242 if (NULL != a) 243 free(a); 244 if (NULL != b) 245 free(b); 246 if (NULL != x) 247 free(x); 248 if (NULL != ipvt) 249 free(ipvt); 250 251 /* sleep 10 ms. wait for 100 ms */ 252 if (nr_malloc++ < 11) { 253 (void) select(1, NULL, NULL, NULL, &timeout); 254 goto mallocAgain; 255 } 256 fps_msg(fps_verbose_msg, 257 "Malloc failed in lapack, matrix size %d", 258 MAT_SIZE); 259 260 return (2); 261 } 262 lda = MAT_SIZE + 8; 263 n = MAT_SIZE; 264 265 (void) MATGEN(a, lda, n, b, &norma); 266 #ifndef __lint 267 GEFA(n, n, a, lda, ipvt, &info); 268 GESL('N', n, 1, a, lda, ipvt, b, n, &info); 269 #endif 270 free(ipvt); 271 272 for (i = 0; i < n; i++) { 273 x[i] = b[i]; 274 } 275 276 (void) MATGEN((REAL *) a, lda, n, b, &norma); 277 278 for (i = 0; i < n; i++) { 279 b[i] = -b[i]; 280 } 281 282 MXPY(n, b, n, lda, x, (REAL *) a); 283 free(a); 284 285 *resid = 0.0; 286 normx = 0.0; 287 288 for (i = 0; i < n; i++) { 289 abs = (REAL)fabs((double)b[i]); 290 *resid = (*resid > abs) ? *resid : abs; 291 abs = (REAL)fabs((double)x[i]); 292 normx = (normx > abs) ? normx : abs; 293 } 294 295 free(b); 296 297 *eps = EPSLON((REAL) LP_ONE); 298 299 *residn = *resid / (n * norma * normx * (*eps)); 300 301 *x11 = x[0] - 1; 302 *xn1 = x[n - 1] - 1; 303 304 free(x); 305 306 return (0); 307 } 308 309 /* 310 * dran(int iseed[4]) returns a random real number from a 311 * uniform (0,1) distribution. 312 */ 313 static double 314 dran(int iseed[4]) 315 { 316 double r; 317 double value; 318 int ipw2; 319 int it1; 320 int it2; 321 int it3; 322 int it4; 323 int m1; 324 int m2; 325 int m3; 326 int m4; 327 328 /* Set constants */ 329 m1 = 494; 330 m2 = 322; 331 m3 = 2508; 332 m4 = 2549; 333 ipw2 = 4096; 334 r = 1.0 / ipw2; 335 336 /* multiply the seed by the multiplier modulo 2**48 */ 337 it4 = iseed[3] * m4; 338 it3 = it4 / ipw2; 339 it4 = it4 - ipw2 * it3; 340 it3 = it3 + iseed[2] * m4 + iseed[3] * m3; 341 it2 = it3 / ipw2; 342 it3 = it3 - ipw2 * it2; 343 it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2; 344 it1 = it2 / ipw2; 345 it2 = it2 - ipw2 * it1; 346 it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 + 347 iseed[3] * m1; 348 it1 = it1 % ipw2; 349 350 /* return updated seed */ 351 iseed[0] = it1; 352 iseed[1] = it2; 353 iseed[2] = it3; 354 iseed[3] = it4; 355 356 /* convert 48-bit integer to a real number in the interval (0,1) */ 357 value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 + 358 r * ((double)it4)))); 359 360 return (value); 361 } 362 363 /* 364 * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) 365 * generates matrix a and b. 366 */ 367 368 #define ALPHA 1.68750 369 static int 370 MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma) 371 { 372 int i; 373 int init[4]; 374 int j; 375 REAL value; 376 377 init[0] = 1; 378 init[1] = 2; 379 init[2] = 3; 380 init[3] = 1325; 381 *norma = LP_ZERO; 382 for (j = 0; j < n; j++) { 383 for (i = 0; i < n; i++) { 384 #ifdef FPS_LAPA_UNK 385 a[lda*j+i] = 386 (i < j) ? (double)(i+1) : (double)(j+ALPHA); 387 if (fabs(a[lda*j+i]) > *norma) 388 *norma = fabs(a[lda*j+i]); 389 } /* i */ 390 #else 391 value = (REAL) dran(init) - 0.5; 392 a[lda * j + i] = value; 393 value = fabs(value); 394 if (value > *norma) { 395 *norma = value; 396 } 397 } /* i */ 398 #endif /* FPS_LAPA_UNK */ 399 } /* j */ 400 401 402 for (i = 0; i < n; i++) { 403 b[i] = LP_ZERO; 404 } 405 for (j = 0; j < n; j++) { 406 for (i = 0; i < n; i++) { 407 b[i] = b[i] + a[lda * j + i]; 408 } 409 } 410 411 return (0); 412 } 413 414 /* 415 * EPSLON(REAL x) estimates unit roundoff in 416 * quantities of size x. 417 */ 418 static REAL 419 EPSLON(REAL x) 420 { 421 REAL a; 422 REAL abs; 423 REAL b; 424 REAL c; 425 REAL eps; 426 427 a = 4.0e0 / 3.0e0; 428 eps = LP_ZERO; 429 430 while (eps == LP_ZERO) { 431 b = a - LP_ONE; 432 c = b + b + b; 433 eps = (REAL)fabs((double)(c - LP_ONE)); 434 } 435 436 abs = (REAL)fabs((double)x); 437 438 return (eps * abs); 439 } 440 441 /* 442 * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) 443 * multiplies matrix m times vector x and add the result to 444 * vector y. 445 */ 446 static void 447 MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]) 448 { 449 int i; 450 int j; 451 int jmin; 452 453 /* cleanup odd vector */ 454 j = n2 % 2; 455 if (j >= 1) { 456 j = j - 1; 457 for (i = 0; i < n1; i++) 458 y[i] = (y[i]) + x[j] * m[ldm * j + i]; 459 } 460 461 /* cleanup odd group of two vectors */ 462 j = n2 % 4; 463 if (j >= 2) { 464 j = j - 1; 465 for (i = 0; i < n1; i++) 466 y[i] = ((y[i]) 467 + x[j - 1] * m[ldm * (j - 1) + i]) 468 + x[j] * m[ldm * j + i]; 469 } 470 471 /* cleanup odd group of four vectors */ 472 j = n2 % 8; 473 if (j >= 4) { 474 j = j - 1; 475 for (i = 0; i < n1; i++) 476 y[i] = ((((y[i]) 477 + x[j - 3] * m[ldm * (j - 3) + i]) 478 + x[j - 2] * m[ldm * (j - 2) + i]) 479 + x[j - 1] * m[ldm * (j - 1) + i]) 480 + x[j] * m[ldm * j + i]; 481 } 482 483 /* cleanup odd group of eight vectors */ 484 j = n2 % 16; 485 if (j >= 8) { 486 j = j - 1; 487 for (i = 0; i < n1; i++) 488 y[i] = ((((((((y[i]) 489 + x[j - 7] * m[ldm * (j - 7) + i]) 490 + x[j - 6] * m[ldm * (j - 6) + i]) 491 + x[j - 5] * m[ldm * (j - 5) + i]) 492 + x[j - 4] * m[ldm * (j - 4) + i]) 493 + x[j - 3] * m[ldm * (j - 3) + i]) 494 + x[j - 2] * m[ldm * (j - 2) + i]) 495 + x[j - 1] * m[ldm * (j - 1) + i]) 496 + x[j] * m[ldm * j + i]; 497 } 498 499 /* main loop - groups of sixteen vectors */ 500 jmin = (n2 % 16) + 16; 501 for (j = jmin - 1; j < n2; j = j + 16) { 502 for (i = 0; i < n1; i++) 503 y[i] = ((((((((((((((((y[i]) 504 + x[j - 15] * m[ldm * (j - 15) + i]) 505 + x[j - 14] * m[ldm * (j - 14) + i]) 506 + x[j - 13] * m[ldm * (j - 13) + i]) 507 + x[j - 12] * m[ldm * (j - 12) + i]) 508 + x[j - 11] * m[ldm * (j - 11) + i]) 509 + x[j - 10] * m[ldm * (j - 10) + i]) 510 + x[j - 9] * m[ldm * (j - 9) + i]) 511 + x[j - 8] * m[ldm * (j - 8) + i]) 512 + x[j - 7] * m[ldm * (j - 7) + i]) 513 + x[j - 6] * m[ldm * (j - 6) + i]) 514 + x[j - 5] * m[ldm * (j - 5) + i]) 515 + x[j - 4] * m[ldm * (j - 4) + i]) 516 + x[j - 3] * m[ldm * (j - 3) + i]) 517 + x[j - 2] * m[ldm * (j - 2) + i]) 518 + x[j - 1] * m[ldm * (j - 1) + i]) 519 + x[j] * m[ldm * j + i]; 520 } 521 } 522