Home | History | Annotate | Download | only in sun4vpi
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <strings.h>
     28 #include <string.h>
     29 #include <libnvpair.h>
     30 #include <sys/fm/ldom.h>
     31 #include <fm/libtopo.h>
     32 #include <fm/topo_mod.h>
     33 #include <fm/fmd_fmri.h>
     34 #include <fm/fmd_agent.h>
     35 #include <sys/fm/ldom.h>
     36 
     37 struct cpu_walk_data {
     38 	tnode_t		*parent;	/* walk start node */
     39 	ldom_hdl_t	*lhp;		/* ldom handle */
     40 	int		(*func)(ldom_hdl_t *, nvlist_t *); /* callback func */
     41 	int		err;		/* walk errors count */
     42 	int		online;		/* online cpus count */
     43 	int		offline;	/* offline cpus count */
     44 	int		fail;		/* callback fails */
     45 };
     46 
     47 static topo_method_f
     48 	cpu_retire, cpu_unretire, cpu_service_state,
     49 	cpu_unusable, mem_asru_compute, dimm_page_unusable,
     50 	dimm_page_service_state, dimm_page_retire, dimm_page_unretire;
     51 
     52 const topo_method_t pi_cpu_methods[] = {
     53 	{ TOPO_METH_RETIRE, TOPO_METH_RETIRE_DESC,
     54 	    TOPO_METH_RETIRE_VERSION, TOPO_STABILITY_INTERNAL,
     55 	    cpu_retire },
     56 	{ TOPO_METH_UNRETIRE, TOPO_METH_UNRETIRE_DESC,
     57 	    TOPO_METH_UNRETIRE_VERSION, TOPO_STABILITY_INTERNAL,
     58 	    cpu_unretire },
     59 	{ TOPO_METH_SERVICE_STATE, TOPO_METH_SERVICE_STATE_DESC,
     60 	    TOPO_METH_SERVICE_STATE_VERSION, TOPO_STABILITY_INTERNAL,
     61 	    cpu_service_state },
     62 	{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
     63 	    TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL,
     64 	    cpu_unusable },
     65 	{ NULL }
     66 };
     67 
     68 const topo_method_t pi_mem_methods[] = {
     69 	{ TOPO_METH_ASRU_COMPUTE, TOPO_METH_ASRU_COMPUTE_DESC,
     70 	    TOPO_METH_ASRU_COMPUTE_VERSION, TOPO_STABILITY_INTERNAL,
     71 	    mem_asru_compute },
     72 	{ TOPO_METH_SERVICE_STATE, TOPO_METH_SERVICE_STATE_DESC,
     73 	    TOPO_METH_SERVICE_STATE_VERSION, TOPO_STABILITY_INTERNAL,
     74 	    dimm_page_service_state },
     75 	{ TOPO_METH_UNUSABLE, TOPO_METH_UNUSABLE_DESC,
     76 	    TOPO_METH_UNUSABLE_VERSION, TOPO_STABILITY_INTERNAL,
     77 	    dimm_page_unusable },
     78 	{ TOPO_METH_RETIRE, TOPO_METH_RETIRE_DESC,
     79 	    TOPO_METH_RETIRE_VERSION, TOPO_STABILITY_INTERNAL,
     80 	    dimm_page_retire },
     81 	{ TOPO_METH_UNRETIRE, TOPO_METH_UNRETIRE_DESC,
     82 	    TOPO_METH_UNRETIRE_VERSION, TOPO_STABILITY_INTERNAL,
     83 	    dimm_page_unretire },
     84 	{ NULL }
     85 };
     86 
     87 static ldom_hdl_t *pi_lhp = NULL;
     88 
     89 #pragma init(pi_ldom_init)
     90 static void
     91 pi_ldom_init(void)
     92 {
     93 	pi_lhp = ldom_init(NULL, NULL);
     94 }
     95 
     96 #pragma fini(pi_ldom_fini)
     97 static void
     98 pi_ldom_fini(void)
     99 {
    100 	if (pi_lhp != NULL)
    101 		ldom_fini(pi_lhp);
    102 }
    103 
    104 static int
    105 set_retnvl(topo_mod_t *mod, nvlist_t **out, const char *retname, uint32_t ret)
    106 {
    107 	nvlist_t *nvl;
    108 
    109 	topo_mod_dprintf(mod, "topo method set \"%s\" = %u\n", retname, ret);
    110 
    111 	if (topo_mod_nvalloc(mod, &nvl, NV_UNIQUE_NAME) < 0)
    112 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
    113 
    114 	if (nvlist_add_uint32(nvl, retname, ret) != 0) {
    115 		nvlist_free(nvl);
    116 		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
    117 	}
    118 
    119 	*out = nvl;
    120 	return (0);
    121 }
    122 
    123 /*
    124  * For each visited cpu node, call the callback function with its ASRU.
    125  */
    126 static int
    127 cpu_walker(topo_mod_t *mod, tnode_t *node, void *pdata)
    128 {
    129 	struct cpu_walk_data *swdp = pdata;
    130 	nvlist_t *asru;
    131 	int err, rc;
    132 
    133 	/*
    134 	 * Terminate the walk if we reach start-node's sibling
    135 	 */
    136 	if (node != swdp->parent &&
    137 	    topo_node_parent(node) == topo_node_parent(swdp->parent))
    138 		return (TOPO_WALK_TERMINATE);
    139 
    140 	if (strcmp(topo_node_name(node), CPU) != 0 &&
    141 	    strcmp(topo_node_name(node), STRAND) != 0)
    142 		return (TOPO_WALK_NEXT);
    143 
    144 	if (topo_node_asru(node, &asru, NULL, &err) != 0) {
    145 		swdp->fail++;
    146 		return (TOPO_WALK_NEXT);
    147 	}
    148 
    149 	rc = swdp->func(swdp->lhp, asru);
    150 
    151 	/*
    152 	 * The "offline" and "online" counter are only useful for the "status"
    153 	 * callback.
    154 	 */
    155 	if (rc == P_OFFLINE || rc == P_FAULTED) {
    156 		swdp->offline++;
    157 		err = 0;
    158 	} else if (rc == P_ONLINE) {
    159 		swdp->online++;
    160 		err = 0;
    161 	} else {
    162 		swdp->fail++;
    163 		err = errno;
    164 	}
    165 
    166 	/* dump out status info if debug is turned on. */
    167 	if (getenv("TOPOCHIPDBG") != NULL ||
    168 	    getenv("TOPOSUN4VPIDBG") != NULL) {
    169 		const char *op;
    170 		char *fmristr = NULL;
    171 
    172 		if (swdp->func == ldom_fmri_retire)
    173 			op = "retire";
    174 		else if (swdp->func == ldom_fmri_unretire)
    175 			op = "unretire";
    176 		else if (swdp->func == ldom_fmri_status)
    177 			op = "check status";
    178 		else
    179 			op = "unknown op";
    180 
    181 		(void) topo_mod_nvl2str(mod, asru, &fmristr);
    182 		topo_mod_dprintf(mod, "%s cpu (%s): rc = %d, err = %s\n",
    183 		    op, fmristr == NULL ? "unknown fmri" : fmristr,
    184 		    rc, strerror(err));
    185 		if (fmristr != NULL)
    186 			topo_mod_strfree(mod, fmristr);
    187 	}
    188 
    189 	nvlist_free(asru);
    190 	return (TOPO_WALK_NEXT);
    191 }
    192 
    193 static int
    194 walk_cpus(topo_mod_t *mod, struct cpu_walk_data *swdp, tnode_t *parent,
    195     int (*func)(ldom_hdl_t *, nvlist_t *))
    196 {
    197 	topo_walk_t *twp;
    198 	int err;
    199 
    200 	swdp->lhp = pi_lhp;
    201 	swdp->parent = parent;
    202 	swdp->func = func;
    203 	swdp->err = swdp->offline = swdp->online = swdp->fail = 0;
    204 
    205 	/*
    206 	 * Return failure if ldom service is not initialized.
    207 	 */
    208 	if (pi_lhp == NULL) {
    209 		swdp->fail++;
    210 		return (0);
    211 	}
    212 
    213 	twp = topo_mod_walk_init(mod, parent, cpu_walker, swdp, &err);
    214 	if (twp == NULL)
    215 		return (-1);
    216 
    217 	err = topo_walk_step(twp, TOPO_WALK_CHILD);
    218 	topo_walk_fini(twp);
    219 
    220 	if (err == TOPO_WALK_ERR || swdp->err > 0)
    221 		return (-1);
    222 
    223 	return (0);
    224 }
    225 
    226 /* ARGSUSED */
    227 int
    228 cpu_retire(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    229     nvlist_t *in, nvlist_t **out)
    230 {
    231 	struct cpu_walk_data swd;
    232 	uint32_t rc;
    233 
    234 	if (version > TOPO_METH_RETIRE_VERSION)
    235 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    236 
    237 	if (walk_cpus(mod, &swd, node, ldom_fmri_retire) == -1)
    238 		return (-1);
    239 
    240 	rc = swd.fail > 0 ? FMD_AGENT_RETIRE_FAIL : FMD_AGENT_RETIRE_DONE;
    241 
    242 	return (set_retnvl(mod, out, TOPO_METH_RETIRE_RET, rc));
    243 }
    244 
    245 /* ARGSUSED */
    246 int
    247 cpu_unretire(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    248     nvlist_t *in, nvlist_t **out)
    249 {
    250 	struct cpu_walk_data swd;
    251 	uint32_t rc;
    252 
    253 	if (version > TOPO_METH_UNRETIRE_VERSION)
    254 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    255 
    256 	if (walk_cpus(mod, &swd, node, ldom_fmri_unretire) == -1)
    257 		return (-1);
    258 
    259 	rc = swd.fail > 0 ? FMD_AGENT_RETIRE_FAIL : FMD_AGENT_RETIRE_DONE;
    260 
    261 	return (set_retnvl(mod, out, TOPO_METH_UNRETIRE_RET, rc));
    262 }
    263 
    264 /* ARGSUSED */
    265 int
    266 cpu_service_state(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    267     nvlist_t *in, nvlist_t **out)
    268 {
    269 	struct cpu_walk_data swd;
    270 	uint32_t rc;
    271 
    272 	if (version > TOPO_METH_SERVICE_STATE_VERSION)
    273 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    274 
    275 	if (walk_cpus(mod, &swd, node, ldom_fmri_status) == -1)
    276 		return (-1);
    277 
    278 	if (swd.fail > 0)
    279 		rc = FMD_SERVICE_STATE_UNKNOWN;
    280 	else if (swd.offline > 0)
    281 		rc = swd.online > 0 ? FMD_SERVICE_STATE_DEGRADED :
    282 		    FMD_SERVICE_STATE_UNUSABLE;
    283 	else
    284 		rc = FMD_SERVICE_STATE_OK;
    285 
    286 	return (set_retnvl(mod, out, TOPO_METH_SERVICE_STATE_RET, rc));
    287 }
    288 
    289 /* ARGSUSED */
    290 int
    291 cpu_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    292     nvlist_t *in, nvlist_t **out)
    293 {
    294 	struct cpu_walk_data swd;
    295 	uint32_t rc;
    296 
    297 	if (version > TOPO_METH_UNUSABLE_VERSION)
    298 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    299 
    300 	if (walk_cpus(mod, &swd, node, ldom_fmri_status) == -1)
    301 		return (-1);
    302 
    303 	rc = (swd.offline > 0 && swd.fail + swd.online == 0) ? 1 : 0;
    304 
    305 	return (set_retnvl(mod, out, TOPO_METH_UNUSABLE_RET, rc));
    306 }
    307 
    308 static nvlist_t *
    309 mem_fmri_create(topo_mod_t *mod, char *serial, char *label)
    310 {
    311 	int err;
    312 	nvlist_t *fmri;
    313 
    314 	if (topo_mod_nvalloc(mod, &fmri, NV_UNIQUE_NAME) != 0)
    315 		return (NULL);
    316 	err = nvlist_add_uint8(fmri, FM_VERSION, FM_MEM_SCHEME_VERSION);
    317 	err |= nvlist_add_string(fmri, FM_FMRI_SCHEME, FM_FMRI_SCHEME_MEM);
    318 	if (serial != NULL)
    319 		err |= nvlist_add_string_array(fmri, FM_FMRI_MEM_SERIAL_ID,
    320 		    &serial, 1);
    321 	if (label != NULL)
    322 		err |= nvlist_add_string(fmri, FM_FMRI_MEM_UNUM, label);
    323 	if (err != 0) {
    324 		nvlist_free(fmri);
    325 		(void) topo_mod_seterrno(mod, EMOD_FMRI_NVL);
    326 		return (NULL);
    327 	}
    328 
    329 	return (fmri);
    330 }
    331 
    332 /* Topo Methods */
    333 static int
    334 mem_asru_compute(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    335     nvlist_t *in, nvlist_t **out)
    336 {
    337 	nvlist_t *asru, *pargs, *args, *hcsp;
    338 	int err;
    339 	char *serial = NULL, *label = NULL;
    340 	uint64_t pa, offset;
    341 
    342 	if (version > TOPO_METH_ASRU_COMPUTE_VERSION)
    343 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    344 
    345 	if (strcmp(topo_node_name(node), DIMM) != 0)
    346 		return (topo_mod_seterrno(mod, EMOD_METHOD_INVAL));
    347 
    348 	pargs = NULL;
    349 
    350 	if (nvlist_lookup_nvlist(in, TOPO_PROP_PARGS, &pargs) == 0)
    351 		(void) nvlist_lookup_string(pargs, FM_FMRI_HC_SERIAL_ID,
    352 		    &serial);
    353 	if (serial == NULL &&
    354 	    nvlist_lookup_nvlist(in, TOPO_PROP_ARGS, &args) == 0)
    355 		(void) nvlist_lookup_string(args, FM_FMRI_HC_SERIAL_ID,
    356 		    &serial);
    357 
    358 	(void) topo_node_label(node, &label, &err);
    359 
    360 	asru = mem_fmri_create(mod, serial, label);
    361 
    362 	if (label != NULL)
    363 		topo_mod_strfree(mod, label);
    364 
    365 	if (asru == NULL)
    366 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
    367 
    368 	err = 0;
    369 
    370 	/*
    371 	 * For a memory page, 'in' includes an hc-specific member which
    372 	 * specifies physaddr and/or offset. Set them in asru as well.
    373 	 */
    374 	if (pargs && nvlist_lookup_nvlist(pargs,
    375 	    FM_FMRI_HC_SPECIFIC, &hcsp) == 0) {
    376 		if (nvlist_lookup_uint64(hcsp,
    377 		    FM_FMRI_HC_SPECIFIC_PHYSADDR, &pa) == 0)
    378 			err += nvlist_add_uint64(asru, FM_FMRI_MEM_PHYSADDR,
    379 			    pa);
    380 		if (nvlist_lookup_uint64(hcsp,
    381 		    FM_FMRI_HC_SPECIFIC_OFFSET, &offset) == 0)
    382 			err += nvlist_add_uint64(asru, FM_FMRI_MEM_OFFSET,
    383 			    offset);
    384 	}
    385 
    386 
    387 	if (err != 0 || topo_mod_nvalloc(mod, out, NV_UNIQUE_NAME) < 0) {
    388 		nvlist_free(asru);
    389 		return (topo_mod_seterrno(mod, EMOD_NOMEM));
    390 	}
    391 
    392 	err = nvlist_add_string(*out, TOPO_PROP_VAL_NAME, TOPO_PROP_ASRU);
    393 	err |= nvlist_add_uint32(*out, TOPO_PROP_VAL_TYPE, TOPO_TYPE_FMRI);
    394 	err |= nvlist_add_nvlist(*out, TOPO_PROP_VAL_VAL, asru);
    395 	nvlist_free(asru);
    396 
    397 	if (err != 0) {
    398 		nvlist_free(*out);
    399 		*out = NULL;
    400 		return (topo_mod_seterrno(mod, EMOD_NVL_INVAL));
    401 	}
    402 
    403 	return (0);
    404 }
    405 
    406 static boolean_t
    407 is_page_fmri(nvlist_t *nvl)
    408 {
    409 	nvlist_t *hcsp;
    410 	uint64_t val;
    411 
    412 	if (nvlist_lookup_nvlist(nvl, FM_FMRI_HC_SPECIFIC, &hcsp) == 0 &&
    413 	    (nvlist_lookup_uint64(hcsp, FM_FMRI_HC_SPECIFIC_OFFSET,
    414 	    &val) == 0 ||
    415 	    nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_OFFSET,
    416 	    &val) == 0 ||
    417 	    nvlist_lookup_uint64(hcsp, FM_FMRI_HC_SPECIFIC_PHYSADDR,
    418 	    &val) == 0 ||
    419 	    nvlist_lookup_uint64(hcsp, "asru-" FM_FMRI_HC_SPECIFIC_PHYSADDR,
    420 	    &val) == 0))
    421 		return (B_TRUE);
    422 
    423 	return (B_FALSE);
    424 }
    425 
    426 static int
    427 dimm_page_service_state(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    428     nvlist_t *in, nvlist_t **out)
    429 {
    430 	uint32_t rc = FMD_SERVICE_STATE_OK;
    431 	nvlist_t *asru;
    432 	int err;
    433 
    434 	if (version > TOPO_METH_SERVICE_STATE_VERSION)
    435 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    436 
    437 	if (pi_lhp != NULL && is_page_fmri(in) &&
    438 	    topo_node_asru(node, &asru, in, &err) == 0) {
    439 		err = ldom_fmri_status(pi_lhp, asru);
    440 
    441 		if (err == 0 || err == EINVAL)
    442 			rc = FMD_SERVICE_STATE_UNUSABLE;
    443 		else if (err == EAGAIN)
    444 			rc = FMD_SERVICE_STATE_ISOLATE_PENDING;
    445 		nvlist_free(asru);
    446 	}
    447 
    448 	return (set_retnvl(mod, out, TOPO_METH_SERVICE_STATE_RET, rc));
    449 }
    450 
    451 static int
    452 dimm_page_unusable(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    453     nvlist_t *in, nvlist_t **out)
    454 {
    455 	uint32_t rc = 0;
    456 	nvlist_t *asru;
    457 	int err;
    458 
    459 	if (version > TOPO_METH_UNUSABLE_VERSION)
    460 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    461 
    462 	if (pi_lhp != NULL && is_page_fmri(in) &&
    463 	    topo_node_asru(node, &asru, in, &err) == 0) {
    464 		err = ldom_fmri_status(pi_lhp, asru);
    465 
    466 		if (err == 0 || err == EINVAL)
    467 			rc = 1;
    468 		nvlist_free(asru);
    469 	}
    470 
    471 	return (set_retnvl(mod, out, TOPO_METH_UNUSABLE_RET, rc));
    472 }
    473 
    474 static int
    475 dimm_page_retire(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    476     nvlist_t *in, nvlist_t **out)
    477 {
    478 	uint32_t rc = FMD_AGENT_RETIRE_FAIL;
    479 	nvlist_t *asru;
    480 	int err;
    481 
    482 	if (version > TOPO_METH_RETIRE_VERSION)
    483 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    484 
    485 	if (pi_lhp != NULL && is_page_fmri(in) &&
    486 	    topo_node_asru(node, &asru, in, &err) == 0) {
    487 		err = ldom_fmri_retire(pi_lhp, asru);
    488 
    489 		if (err == 0 || err == EIO || err == EINVAL)
    490 			rc = FMD_AGENT_RETIRE_DONE;
    491 		else if (err == EAGAIN)
    492 			rc = FMD_AGENT_RETIRE_ASYNC;
    493 		nvlist_free(asru);
    494 	}
    495 
    496 	return (set_retnvl(mod, out, TOPO_METH_RETIRE_RET, rc));
    497 }
    498 
    499 static int
    500 dimm_page_unretire(topo_mod_t *mod, tnode_t *node, topo_version_t version,
    501     nvlist_t *in, nvlist_t **out)
    502 {
    503 	uint32_t rc = FMD_AGENT_RETIRE_FAIL;
    504 	nvlist_t *asru;
    505 	int err;
    506 
    507 	if (version > TOPO_METH_UNRETIRE_VERSION)
    508 		return (topo_mod_seterrno(mod, EMOD_VER_NEW));
    509 
    510 	if (pi_lhp != NULL && is_page_fmri(in) &&
    511 	    topo_node_asru(node, &asru, in, &err) == 0) {
    512 		err = ldom_fmri_unretire(pi_lhp, asru);
    513 
    514 		if (err == 0 || err == EIO)
    515 			rc = FMD_AGENT_RETIRE_DONE;
    516 		nvlist_free(asru);
    517 	}
    518 
    519 	return (set_retnvl(mod, out, TOPO_METH_UNRETIRE_RET, rc));
    520 }
    521