Home | History | Annotate | Download | only in fptest
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     28 
     29 #include <errno.h>
     30 #include <stdio.h>
     31 #include <stdlib.h>
     32 #include <sys/time.h>
     33 #include <unistd.h>
     34 #include <externs.h>
     35 #include <fp.h>
     36 #include <fps_ereport.h>
     37 #include <fpstestmsg.h>
     38 #include <linpack.h>
     39 #include <sunperf.h>
     40 
     41 double fabs(double x);
     42 
     43 extern void	___pl_dss_set_chip_cache_(int *cache_size);
     44 static double dran(int iseed[4]);
     45 static int LINSUB(REAL * residn, REAL * resid,
     46     REAL * eps, REAL * x11, REAL * xn1, int fps_verbose_msg);
     47 static int MATGEN(REAL a[], int lda, int n, REAL b[], REAL * norma);
     48 static REAL EPSLON(REAL x);
     49 static void MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[]);
     50 
     51 extern int errno;
     52 static int MAT_SIZE;
     53 #ifndef __lint
     54 static int LAPACK_ECACHE_SIZE = 8 * 1024 * 1024;
     55 #endif
     56 
     57 /*
     58  * LINPACK(int Stress, int unit, struct fps_test_ereport *report,
     59  * int fps_verbose_msg)
     60  * performs the single and double precision lapack test. If an
     61  * error is found, relevant data is collected and stored in report.
     62  */
     63 int
     64 LINPACK(int Stress, int unit, struct fps_test_ereport *report,
     65     int fps_verbose_msg)
     66 {
     67 	char err_data[MAX_INFO_SIZE];
     68 	char l_buf[64];
     69 	int c_index;
     70 	int ret;
     71 	REAL eps;
     72 	REAL resid;
     73 	REAL residn;
     74 	REAL x11;
     75 	REAL xn1;
     76 	REAL EPS;
     77 	REAL RESID;
     78 	REAL RESIDN;
     79 	REAL X11;
     80 	REAL XN1;
     81 	uint64_t expected[5];
     82 	uint64_t observed[5];
     83 
     84 #ifdef  FPS_LAPA_UNK
     85 #ifndef DP
     86 	if (Stress > 1000)
     87 			return (0);
     88 #endif /* DP */
     89 #endif /* FPS_LAPA_UNK */
     90 
     91 	if (Stress > 10000)
     92 		return (0);
     93 
     94 	/*
     95 	 * make sure is no dependency on the E$ size Without this call the
     96 	 * computed results will depend on the size of the E$ (
     97 	 * sos10/libsunperf ) IIIi computed results != IV+/IV/III+/III ...
     98 	 */
     99 #ifndef __lint
    100 	___pl_dss_set_chip_cache_(&LAPACK_ECACHE_SIZE);
    101 #endif
    102 
    103 	c_index = Stress;
    104 
    105 	if (2000 == c_index)
    106 		c_index = 1001;
    107 	if (3000 == c_index)
    108 		c_index = 1002;
    109 	if (4016 == c_index)
    110 		c_index = 1003;
    111 	if (5000 == c_index)
    112 		c_index = 1004;
    113 	if (6000 == c_index)
    114 		c_index = 1005;
    115 	if (7016 == c_index)
    116 		c_index = 1006;
    117 	if (8034 == c_index)
    118 		c_index = 1007;
    119 	if (9000 == c_index)
    120 		c_index = 1008;
    121 	if (10000 == c_index)
    122 		c_index = 1009;
    123 
    124 	(void) snprintf(l_buf, 63, "%s(%d,cpu=%d)", PREC, Stress, unit);
    125 	fps_msg(fps_verbose_msg, gettext(FPSM_02), l_buf, unit);
    126 
    127 	MAT_SIZE = Stress;
    128 	ret = LINSUB(&residn, &resid, &eps, &x11, &xn1, fps_verbose_msg);
    129 
    130 	if (2 == ret) {
    131 		if (errno == EAGAIN || errno == ENOMEM)
    132 			_exit(FPU_SYSCALL_TRYAGAIN);
    133 		else
    134 			_exit(FPU_SYSCALL_FAIL);
    135 	}
    136 
    137 #ifdef  FPS_LAPA_UNK
    138 	RESIDN  = RESID   = X11 = XN1 = 0.0000000000000000e+00;
    139 
    140 #ifdef DP
    141 	EPS = 2.2204460492503131e-16;
    142 #else /* DP */
    143 	EPS = 1.1920928955078125e-07;
    144 #endif /* DP */
    145 
    146 #else /* FPS_LAPA_UNK */
    147 
    148 	RESIDN = LinpValsA[c_index].residn;
    149 	RESID = LinpValsA[c_index].resid;
    150 	EPS = LinpValsA[c_index].eps;
    151 	X11 = LinpValsA[c_index].x11;
    152 	XN1 = LinpValsA[c_index].xn1;
    153 
    154 #endif /* FPS_LAPA_UNK */
    155 
    156 	if ((residn == RESIDN) && (resid == RESID) && (eps == EPS) &&
    157 	    (x11 == X11) && (xn1 == XN1)) {
    158 
    159 		return (0);
    160 	} else {
    161 		(void) snprintf(err_data, sizeof (err_data),
    162 		    "\nExpected: %.16e, %.16e, %.16e, %.16e, %.16e"
    163 		    "\nObserved: %.16e, %.16e, %.16e, %.16e, %.16e",
    164 		    RESIDN, RESID, EPS, X11, XN1, residn, resid, eps, x11, xn1);
    165 
    166 
    167 #ifdef	DP
    168 		observed[0] = *(uint64_t *)&residn;
    169 		observed[1] = *(uint64_t *)&resid;
    170 		observed[2] = *(uint64_t *)&eps;
    171 		observed[3] = *(uint64_t *)&x11;
    172 		observed[4] = *(uint64_t *)&xn1;
    173 		expected[0] = *(uint64_t *)&RESIDN;
    174 		expected[1] = *(uint64_t *)&RESID;
    175 		expected[2] = *(uint64_t *)&EPS;
    176 		expected[3] = *(uint64_t *)&X11;
    177 		expected[4] = *(uint64_t *)&XN1;
    178 
    179 		setup_fps_test_struct(IS_EREPORT_INFO, report,
    180 		    6317, &observed, &expected, 5, 5, err_data);
    181 #else
    182 		observed[0] = (uint64_t)(*(uint32_t *)&residn);
    183 		observed[1] = (uint64_t)(*(uint32_t *)&resid);
    184 		observed[2] = (uint64_t)(*(uint32_t *)&eps);
    185 		observed[3] = (uint64_t)(*(uint32_t *)&x11);
    186 		observed[4] = (uint64_t)(*(uint32_t *)&xn1);
    187 		expected[0] = (uint64_t)(*(uint32_t *)&RESIDN);
    188 		expected[1] = (uint64_t)(*(uint32_t *)&RESID);
    189 		expected[2] = (uint64_t)(*(uint32_t *)&EPS);
    190 		expected[3] = (uint64_t)(*(uint32_t *)&X11);
    191 		expected[4] = (uint64_t)(*(uint32_t *)&XN1);
    192 
    193 		setup_fps_test_struct(IS_EREPORT_INFO, report,
    194 		    6316, &observed, &expected, 5, 5, err_data);
    195 #endif
    196 
    197 		return (-1);
    198 	}
    199 }
    200 
    201 /*
    202  * LINSUB(REAL *residn, REAL *resid, REAL *eps,
    203  * REAL *x11, REAL *xn1, int fps_verbose_msg)begins
    204  * the lapack calculation calls.
    205  */
    206 static int
    207 LINSUB(REAL *residn, REAL *resid,
    208 	REAL *eps, REAL *x11, REAL *xn1,
    209 	int fps_verbose_msg)
    210 {
    211 	int i;
    212 	int lda;
    213 	int n;
    214 	int nr_malloc;
    215 	REAL *a;
    216 	REAL abs;
    217 	REAL *b;
    218 	REAL norma;
    219 	REAL normx;
    220 	REAL *x;
    221 	struct timeval timeout;
    222 	long *ipvt;
    223 #ifndef __lint
    224 	long info;
    225 #endif
    226 
    227 	timeout.tv_sec = 0;
    228 	timeout.tv_usec = 10000; /* microseconds, 10ms */
    229 	nr_malloc = 0;
    230 
    231 mallocAgain:
    232 
    233 	a = (REAL *) malloc((MAT_SIZE + 8) * (MAT_SIZE + 1) *
    234 	    (size_t)sizeof (REAL));
    235 	b = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
    236 	x = (REAL *) malloc(MAT_SIZE * (size_t)sizeof (REAL));
    237 
    238 	ipvt = (long *)malloc(MAT_SIZE * (size_t)sizeof (long));
    239 
    240 	if ((NULL == a) || (NULL == b) ||
    241 	    (NULL == x) || (NULL == ipvt)) {
    242 		if (NULL != a)
    243 			free(a);
    244 		if (NULL != b)
    245 			free(b);
    246 		if (NULL != x)
    247 			free(x);
    248 		if (NULL != ipvt)
    249 			free(ipvt);
    250 
    251 		/* sleep 10 ms. wait for 100 ms */
    252 		if (nr_malloc++ < 11) {
    253 			(void) select(1, NULL, NULL, NULL, &timeout);
    254 			goto mallocAgain;
    255 		}
    256 		fps_msg(fps_verbose_msg,
    257 		    "Malloc failed in lapack, matrix size %d",
    258 		    MAT_SIZE);
    259 
    260 		return (2);
    261 	}
    262 	lda = MAT_SIZE + 8;
    263 	n = MAT_SIZE;
    264 
    265 	(void) MATGEN(a, lda, n, b, &norma);
    266 #ifndef __lint
    267 	GEFA(n, n, a, lda, ipvt, &info);
    268 	GESL('N', n, 1, a, lda, ipvt, b, n, &info);
    269 #endif
    270 	free(ipvt);
    271 
    272 	for (i = 0; i < n; i++) {
    273 		x[i] = b[i];
    274 	}
    275 
    276 	(void) MATGEN((REAL *) a, lda, n, b, &norma);
    277 
    278 	for (i = 0; i < n; i++) {
    279 		b[i] = -b[i];
    280 	}
    281 
    282 	MXPY(n, b, n, lda, x, (REAL *) a);
    283 	free(a);
    284 
    285 	*resid = 0.0;
    286 	normx = 0.0;
    287 
    288 	for (i = 0; i < n; i++) {
    289 		abs = (REAL)fabs((double)b[i]);
    290 		*resid = (*resid > abs) ? *resid : abs;
    291 		abs = (REAL)fabs((double)x[i]);
    292 		normx = (normx > abs) ? normx : abs;
    293 	}
    294 
    295 	free(b);
    296 
    297 	*eps = EPSLON((REAL) LP_ONE);
    298 
    299 	*residn = *resid / (n * norma * normx * (*eps));
    300 
    301 	*x11 = x[0] - 1;
    302 	*xn1 = x[n - 1] - 1;
    303 
    304 	free(x);
    305 
    306 	return (0);
    307 }
    308 
    309 /*
    310  * dran(int iseed[4]) returns a random real number from a
    311  * uniform (0,1) distribution.
    312  */
    313 static double
    314 dran(int iseed[4])
    315 {
    316 	double r;
    317 	double value;
    318 	int ipw2;
    319 	int it1;
    320 	int it2;
    321 	int it3;
    322 	int it4;
    323 	int m1;
    324 	int m2;
    325 	int m3;
    326 	int m4;
    327 
    328 	/* Set constants */
    329 	m1 = 494;
    330 	m2 = 322;
    331 	m3 = 2508;
    332 	m4 = 2549;
    333 	ipw2 = 4096;
    334 	r = 1.0 / ipw2;
    335 
    336 	/* multiply the seed by the multiplier modulo 2**48 */
    337 	it4 = iseed[3] * m4;
    338 	it3 = it4 / ipw2;
    339 	it4 = it4 - ipw2 * it3;
    340 	it3 = it3 + iseed[2] * m4 + iseed[3] * m3;
    341 	it2 = it3 / ipw2;
    342 	it3 = it3 - ipw2 * it2;
    343 	it2 = it2 + iseed[1] * m4 + iseed[2] * m3 + iseed[3] * m2;
    344 	it1 = it2 / ipw2;
    345 	it2 = it2 - ipw2 * it1;
    346 	it1 = it1 + iseed[0] * m4 + iseed[1] * m3 + iseed[2] * m2 +
    347 	    iseed[3] * m1;
    348 	it1 = it1 % ipw2;
    349 
    350 	/* return updated seed */
    351 	iseed[0] = it1;
    352 	iseed[1] = it2;
    353 	iseed[2] = it3;
    354 	iseed[3] = it4;
    355 
    356 	/* convert 48-bit integer to a real number in the interval (0,1) */
    357 	value = r * ((double)it1 + r * ((double)it2 + r * ((double)it3 +
    358 	    r * ((double)it4))));
    359 
    360 	return (value);
    361 }
    362 
    363 /*
    364  * MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
    365  * generates matrix a and b.
    366  */
    367 
    368 #define	ALPHA 1.68750
    369 static int
    370 MATGEN(REAL a[], int lda, int n, REAL b[], REAL *norma)
    371 {
    372 	int		i;
    373 	int		init[4];
    374 	int		j;
    375 	REAL	value;
    376 
    377 	init[0] = 1;
    378 	init[1] = 2;
    379 	init[2] = 3;
    380 	init[3] = 1325;
    381 	*norma = LP_ZERO;
    382 	for (j = 0; j < n; j++) {
    383 		for (i = 0; i < n; i++) {
    384 #ifdef FPS_LAPA_UNK
    385 			a[lda*j+i] =
    386 			    (i < j) ? (double)(i+1) : (double)(j+ALPHA);
    387 			if (fabs(a[lda*j+i]) > *norma)
    388 				*norma = fabs(a[lda*j+i]);
    389 			} /* i */
    390 #else
    391 			value = (REAL) dran(init) - 0.5;
    392 			a[lda * j + i] = value;
    393 			value = fabs(value);
    394 			if (value > *norma) {
    395 				*norma = value;
    396 			}
    397 		} /* i */
    398 #endif /* FPS_LAPA_UNK */
    399 	} /* j */
    400 
    401 
    402 	for (i = 0; i < n; i++) {
    403 		b[i] = LP_ZERO;
    404 	}
    405 	for (j = 0; j < n; j++) {
    406 		for (i = 0; i < n; i++) {
    407 			b[i] = b[i] + a[lda * j + i];
    408 		}
    409 	}
    410 
    411 	return (0);
    412 }
    413 
    414 /*
    415  * EPSLON(REAL x) estimates unit roundoff in
    416  * quantities of size x.
    417  */
    418 static REAL
    419 EPSLON(REAL x)
    420 {
    421 	REAL a;
    422 	REAL abs;
    423 	REAL b;
    424 	REAL c;
    425 	REAL eps;
    426 
    427 	a = 4.0e0 / 3.0e0;
    428 	eps = LP_ZERO;
    429 
    430 	while (eps == LP_ZERO) {
    431 		b = a - LP_ONE;
    432 		c = b + b + b;
    433 		eps = (REAL)fabs((double)(c - LP_ONE));
    434 	}
    435 
    436 	abs = (REAL)fabs((double)x);
    437 
    438 	return (eps * abs);
    439 }
    440 
    441 /*
    442  * MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
    443  * multiplies matrix m times vector x and add the result to
    444  * vector y.
    445  */
    446 static void
    447 MXPY(int n1, REAL y[], int n2, int ldm, REAL x[], REAL m[])
    448 {
    449 	int i;
    450 	int j;
    451 	int jmin;
    452 
    453 	/* cleanup odd vector */
    454 	j = n2 % 2;
    455 	if (j >= 1) {
    456 		j = j - 1;
    457 		for (i = 0; i < n1; i++)
    458 			y[i] = (y[i]) + x[j] * m[ldm * j + i];
    459 	}
    460 
    461 	/* cleanup odd group of two vectors */
    462 	j = n2 % 4;
    463 	if (j >= 2) {
    464 		j = j - 1;
    465 		for (i = 0; i < n1; i++)
    466 			y[i] = ((y[i])
    467 			    + x[j - 1] * m[ldm * (j - 1) + i])
    468 			    + x[j] * m[ldm * j + i];
    469 	}
    470 
    471 	/* cleanup odd group of four vectors */
    472 	j = n2 % 8;
    473 	if (j >= 4) {
    474 		j = j - 1;
    475 		for (i = 0; i < n1; i++)
    476 			y[i] = ((((y[i])
    477 			    + x[j - 3] * m[ldm * (j - 3) + i])
    478 			    + x[j - 2] * m[ldm * (j - 2) + i])
    479 			    + x[j - 1] * m[ldm * (j - 1) + i])
    480 			    + x[j] * m[ldm * j + i];
    481 	}
    482 
    483 	/* cleanup odd group of eight vectors */
    484 	j = n2 % 16;
    485 	if (j >= 8) {
    486 		j = j - 1;
    487 		for (i = 0; i < n1; i++)
    488 			y[i] = ((((((((y[i])
    489 			    + x[j - 7] * m[ldm * (j - 7) + i])
    490 			    + x[j - 6] * m[ldm * (j - 6) + i])
    491 			    + x[j - 5] * m[ldm * (j - 5) + i])
    492 			    + x[j - 4] * m[ldm * (j - 4) + i])
    493 			    + x[j - 3] * m[ldm * (j - 3) + i])
    494 			    + x[j - 2] * m[ldm * (j - 2) + i])
    495 			    + x[j - 1] * m[ldm * (j - 1) + i])
    496 			    + x[j] * m[ldm * j + i];
    497 	}
    498 
    499 	/* main loop - groups of sixteen vectors */
    500 	jmin = (n2 % 16) + 16;
    501 	for (j = jmin - 1; j < n2; j = j + 16) {
    502 		for (i = 0; i < n1; i++)
    503 			y[i] = ((((((((((((((((y[i])
    504 			    + x[j - 15] * m[ldm * (j - 15) + i])
    505 			    + x[j - 14] * m[ldm * (j - 14) + i])
    506 			    + x[j - 13] * m[ldm * (j - 13) + i])
    507 			    + x[j - 12] * m[ldm * (j - 12) + i])
    508 			    + x[j - 11] * m[ldm * (j - 11) + i])
    509 			    + x[j - 10] * m[ldm * (j - 10) + i])
    510 			    + x[j - 9] * m[ldm * (j - 9) + i])
    511 			    + x[j - 8] * m[ldm * (j - 8) + i])
    512 			    + x[j - 7] * m[ldm * (j - 7) + i])
    513 			    + x[j - 6] * m[ldm * (j - 6) + i])
    514 			    + x[j - 5] * m[ldm * (j - 5) + i])
    515 			    + x[j - 4] * m[ldm * (j - 4) + i])
    516 			    + x[j - 3] * m[ldm * (j - 3) + i])
    517 			    + x[j - 2] * m[ldm * (j - 2) + i])
    518 			    + x[j - 1] * m[ldm * (j - 1) + i])
    519 			    + x[j] * m[ldm * j + i];
    520 	}
    521 }
    522