Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * sun4v CPU DR Module
     29  */
     30 
     31 #include <sys/modctl.h>
     32 #include <sys/processor.h>
     33 #include <sys/cpuvar.h>
     34 #include <sys/cpupart.h>
     35 #include <sys/sunddi.h>
     36 #include <sys/sunndi.h>
     37 #include <sys/note.h>
     38 #include <sys/sysevent/dr.h>
     39 #include <sys/hypervisor_api.h>
     40 #include <sys/mach_descrip.h>
     41 #include <sys/mdesc.h>
     42 #include <sys/ds.h>
     43 #include <sys/drctl.h>
     44 #include <sys/dr_util.h>
     45 #include <sys/dr_cpu.h>
     46 #include <sys/promif.h>
     47 #include <sys/machsystm.h>
     48 
     49 
     50 static struct modlmisc modlmisc = {
     51 	&mod_miscops,
     52 	"sun4v CPU DR"
     53 };
     54 
     55 static struct modlinkage modlinkage = {
     56 	MODREV_1,
     57 	(void *)&modlmisc,
     58 	NULL
     59 };
     60 
     61 typedef int (*fn_t)(processorid_t, int *, boolean_t);
     62 
     63 /*
     64  * Global DS Handle
     65  */
     66 static ds_svc_hdl_t ds_handle;
     67 
     68 /*
     69  * Supported DS Capability Versions
     70  */
     71 static ds_ver_t		dr_cpu_vers[] = { { 1, 1 }, { 1, 0 } };
     72 #define	DR_CPU_NVERS	(sizeof (dr_cpu_vers) / sizeof (dr_cpu_vers[0]))
     73 
     74 static ds_ver_t		version;
     75 
     76 /*
     77  * DS Capability Description
     78  */
     79 static ds_capability_t dr_cpu_cap = {
     80 	DR_CPU_DS_ID,		/* svc_id */
     81 	dr_cpu_vers,		/* vers */
     82 	DR_CPU_NVERS		/* nvers */
     83 };
     84 
     85 #define	DRCPU_VERS_EQ(_maj, _min) \
     86 	((version.major == (_maj)) && (version.minor == (_min)))
     87 
     88 #define	DRCPU_VERS_GTEQ(_maj, _min) \
     89 	((version.major > (_maj)) ||					\
     90 	((version.major == (_maj)) && (version.minor >= (_min))))
     91 
     92 /*
     93  * DS Callbacks
     94  */
     95 static void dr_cpu_reg_handler(ds_cb_arg_t, ds_ver_t *, ds_svc_hdl_t);
     96 static void dr_cpu_unreg_handler(ds_cb_arg_t arg);
     97 static void dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
     98 
     99 /*
    100  * DS Client Ops Vector
    101  */
    102 static ds_clnt_ops_t dr_cpu_ops = {
    103 	dr_cpu_reg_handler,	/* ds_reg_cb */
    104 	dr_cpu_unreg_handler,	/* ds_unreg_cb */
    105 	dr_cpu_data_handler,	/* ds_data_cb */
    106 	NULL			/* cb_arg */
    107 };
    108 
    109 /*
    110  * Operation Results
    111  *
    112  * Used internally to gather results while an operation on a
    113  * list of CPUs is in progress. In particular, it is used to
    114  * keep track of which CPUs have already failed so that they are
    115  * not processed further, and the manner in which they failed.
    116  */
    117 typedef struct {
    118 	uint32_t	cpuid;
    119 	uint32_t	result;
    120 	uint32_t	status;
    121 	char		*string;
    122 } dr_cpu_res_t;
    123 
    124 #define	DR_CPU_MAX_ERR_LEN	64	/* maximum error string length */
    125 
    126 /*
    127  * Internal Functions
    128  */
    129 static int dr_cpu_init(void);
    130 static int dr_cpu_fini(void);
    131 
    132 static int dr_cpu_list_wrk(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
    133 static int dr_cpu_list_status(dr_cpu_hdr_t *, dr_cpu_hdr_t **, int *);
    134 
    135 static int dr_cpu_unconfigure(processorid_t, int *status, boolean_t force);
    136 static int dr_cpu_configure(processorid_t, int *status, boolean_t force);
    137 static int dr_cpu_status(processorid_t, int *status);
    138 
    139 static void dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res);
    140 static void dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres);
    141 static int dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res);
    142 
    143 static dr_cpu_res_t *dr_cpu_res_array_init(dr_cpu_hdr_t *, drctl_rsrc_t *, int);
    144 static void dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres);
    145 static size_t dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res,
    146     dr_cpu_hdr_t **respp);
    147 
    148 static int dr_cpu_probe(processorid_t newcpuid);
    149 static int dr_cpu_deprobe(processorid_t cpuid);
    150 
    151 static dev_info_t *dr_cpu_find_node(processorid_t cpuid);
    152 static mde_cookie_t dr_cpu_find_node_md(processorid_t, md_t *, mde_cookie_t *);
    153 
    154 int
    155 _init(void)
    156 {
    157 	int	status;
    158 
    159 	/* check that CPU DR is enabled */
    160 	if (dr_is_disabled(DR_TYPE_CPU)) {
    161 		cmn_err(CE_CONT, "!CPU DR is disabled\n");
    162 		return (-1);
    163 	}
    164 
    165 	if ((status = dr_cpu_init()) != 0) {
    166 		cmn_err(CE_NOTE, "CPU DR initialization failed");
    167 		return (status);
    168 	}
    169 
    170 	if ((status = mod_install(&modlinkage)) != 0) {
    171 		(void) dr_cpu_fini();
    172 	}
    173 
    174 	return (status);
    175 }
    176 
    177 int
    178 _info(struct modinfo *modinfop)
    179 {
    180 	return (mod_info(&modlinkage, modinfop));
    181 }
    182 
    183 int dr_cpu_allow_unload;
    184 
    185 int
    186 _fini(void)
    187 {
    188 	int	status;
    189 
    190 	if (dr_cpu_allow_unload == 0)
    191 		return (EBUSY);
    192 
    193 	if ((status = mod_remove(&modlinkage)) == 0) {
    194 		(void) dr_cpu_fini();
    195 	}
    196 
    197 	return (status);
    198 }
    199 
    200 static int
    201 dr_cpu_init(void)
    202 {
    203 	int	rv;
    204 
    205 	if ((rv = ds_cap_init(&dr_cpu_cap, &dr_cpu_ops)) != 0) {
    206 		cmn_err(CE_NOTE, "ds_cap_init failed: %d", rv);
    207 		return (-1);
    208 	}
    209 
    210 	return (0);
    211 }
    212 
    213 static int
    214 dr_cpu_fini(void)
    215 {
    216 	int	rv;
    217 
    218 	if ((rv = ds_cap_fini(&dr_cpu_cap)) != 0) {
    219 		cmn_err(CE_NOTE, "ds_cap_fini failed: %d", rv);
    220 		return (-1);
    221 	}
    222 
    223 	return (0);
    224 }
    225 
    226 static void
    227 dr_cpu_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
    228 {
    229 	DR_DBG_CPU("reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n", arg,
    230 	    ver->major, ver->minor, hdl);
    231 
    232 	version.major = ver->major;
    233 	version.minor = ver->minor;
    234 	ds_handle = hdl;
    235 }
    236 
    237 static void
    238 dr_cpu_unreg_handler(ds_cb_arg_t arg)
    239 {
    240 	DR_DBG_CPU("unreg_handler: arg=0x%p\n", arg);
    241 
    242 	ds_handle = DS_INVALID_HDL;
    243 }
    244 
    245 static void
    246 dr_cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
    247 {
    248 	_NOTE(ARGUNUSED(arg))
    249 
    250 	dr_cpu_hdr_t	*req = buf;
    251 	dr_cpu_hdr_t	err_resp;
    252 	dr_cpu_hdr_t	*resp = &err_resp;
    253 	int		resp_len = 0;
    254 	int		rv;
    255 
    256 	/*
    257 	 * Sanity check the message
    258 	 */
    259 	if (buflen < sizeof (dr_cpu_hdr_t)) {
    260 		DR_DBG_CPU("incoming message short: expected at least %ld "
    261 		    "bytes, received %ld\n", sizeof (dr_cpu_hdr_t), buflen);
    262 		goto done;
    263 	}
    264 
    265 	if (req == NULL) {
    266 		DR_DBG_CPU("empty message: expected at least %ld bytes\n",
    267 		    sizeof (dr_cpu_hdr_t));
    268 		goto done;
    269 	}
    270 
    271 	DR_DBG_CPU("incoming request:\n");
    272 	DR_DBG_DUMP_MSG(buf, buflen);
    273 
    274 	if (req->num_records > NCPU) {
    275 		DR_DBG_CPU("CPU list too long: %d when %d is the maximum\n",
    276 		    req->num_records, NCPU);
    277 		goto done;
    278 	}
    279 
    280 	if (req->num_records == 0) {
    281 		DR_DBG_CPU("No CPU specified for operation\n");
    282 		goto done;
    283 	}
    284 
    285 	/*
    286 	 * Process the command
    287 	 */
    288 	switch (req->msg_type) {
    289 	case DR_CPU_CONFIGURE:
    290 	case DR_CPU_UNCONFIGURE:
    291 	case DR_CPU_FORCE_UNCONFIG:
    292 		if ((rv = dr_cpu_list_wrk(req, &resp, &resp_len)) != 0) {
    293 			DR_DBG_CPU("%s%s failed (%d)\n",
    294 			    (req->msg_type == DR_CPU_CONFIGURE) ?
    295 			    "CPU configure" : "CPU unconfigure",
    296 			    (req->msg_type == DR_CPU_FORCE_UNCONFIG) ?
    297 			    " (forced)" : "", rv);
    298 		}
    299 		break;
    300 
    301 	case DR_CPU_STATUS:
    302 		if ((rv = dr_cpu_list_status(req, &resp, &resp_len)) != 0)
    303 			DR_DBG_CPU("CPU status failed (%d)\n", rv);
    304 		break;
    305 
    306 	default:
    307 		cmn_err(CE_NOTE, "unsupported DR operation (%d)",
    308 		    req->msg_type);
    309 		break;
    310 	}
    311 
    312 done:
    313 	/* check if an error occurred */
    314 	if (resp == &err_resp) {
    315 		resp->req_num = (req) ? req->req_num : 0;
    316 		resp->msg_type = DR_CPU_ERROR;
    317 		resp->num_records = 0;
    318 		resp_len = sizeof (dr_cpu_hdr_t);
    319 	}
    320 
    321 	DR_DBG_CPU("outgoing response:\n");
    322 	DR_DBG_DUMP_MSG(resp, resp_len);
    323 
    324 	/* send back the response */
    325 	if (ds_cap_send(ds_handle, resp, resp_len) != 0) {
    326 		DR_DBG_CPU("ds_send failed\n");
    327 	}
    328 
    329 	/* free any allocated memory */
    330 	if (DRCPU_VERS_GTEQ(1, 1) || (resp != &err_resp)) {
    331 		DR_DBG_KMEM("%s: free addr %p size %d\n",
    332 		    __func__, (void *)resp, resp_len);
    333 		kmem_free(resp, resp_len);
    334 	}
    335 }
    336 
    337 /*
    338  * Create a response message which consists of a header followed
    339  * by the error string passed in.
    340  */
    341 static size_t
    342 dr_cpu_err_resp(dr_cpu_hdr_t *req, dr_cpu_hdr_t **respp, char *msg)
    343 {
    344 	size_t size;
    345 	dr_cpu_hdr_t *resp;
    346 
    347 	ASSERT((msg != NULL) && (strlen(msg) > 0));
    348 
    349 	size = sizeof (*req) + strlen(msg) + 1;
    350 	resp = kmem_alloc(size, KM_SLEEP);
    351 	DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    352 	    __func__, (void *)resp, size);
    353 
    354 	resp->req_num = req->req_num;
    355 	resp->msg_type = DR_CPU_ERROR;
    356 	resp->num_records = 0;
    357 
    358 	(void) strcpy((char *)(resp) + sizeof (*resp), msg);
    359 
    360 	*respp = resp;
    361 
    362 	return (size);
    363 }
    364 
    365 /*
    366  * Common routine to config or unconfig multiple cpus.  The unconfig
    367  * case checks with the OS to see if the removal of cpus will be
    368  * permitted, but can be overridden by the "force" version of the
    369  * command.  Otherwise, the logic for both cases is identical.
    370  *
    371  * Note: Do not modify result buffer or length on error.
    372  */
    373 static int
    374 dr_cpu_list_wrk(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
    375 {
    376 	int		rv;
    377 	int		idx;
    378 	int		count;
    379 	fn_t		dr_fn;
    380 	int		se_hint;
    381 	boolean_t	force = B_FALSE;
    382 	uint32_t	*req_cpus;
    383 	dr_cpu_res_t	*res;
    384 	int		drctl_cmd;
    385 	int		drctl_flags = 0;
    386 	drctl_rsrc_t	*drctl_req;
    387 	size_t		drctl_req_len;
    388 	drctl_resp_t	*drctl_resp;
    389 	drctl_rsrc_t	*drctl_rsrc;
    390 	size_t		drctl_resp_len = 0;
    391 	drctl_cookie_t	drctl_res_ck;
    392 
    393 	ASSERT((req != NULL) && (req->num_records != 0));
    394 
    395 	count = req->num_records;
    396 
    397 	/*
    398 	 * Extract all information that is specific
    399 	 * to the various types of operations.
    400 	 */
    401 	switch (req->msg_type) {
    402 	case DR_CPU_CONFIGURE:
    403 		dr_fn = dr_cpu_configure;
    404 		drctl_cmd = DRCTL_CPU_CONFIG_REQUEST;
    405 		se_hint = SE_HINT_INSERT;
    406 		break;
    407 	case DR_CPU_FORCE_UNCONFIG:
    408 		drctl_flags = DRCTL_FLAG_FORCE;
    409 		force = B_TRUE;
    410 		_NOTE(FALLTHROUGH)
    411 	case DR_CPU_UNCONFIGURE:
    412 		dr_fn = dr_cpu_unconfigure;
    413 		drctl_cmd = DRCTL_CPU_UNCONFIG_REQUEST;
    414 		se_hint = SE_HINT_REMOVE;
    415 		break;
    416 	default:
    417 		/* Programming error if we reach this. */
    418 		cmn_err(CE_NOTE,
    419 		    "%s: bad msg_type %d\n", __func__, req->msg_type);
    420 		ASSERT(0);
    421 		return (-1);
    422 	}
    423 
    424 	/* the incoming array of cpuids to operate on */
    425 	req_cpus = DR_CPU_CMD_CPUIDS(req);
    426 
    427 	/* allocate drctl request msg based on incoming resource count */
    428 	drctl_req_len = sizeof (drctl_rsrc_t) * count;
    429 	drctl_req = kmem_zalloc(drctl_req_len, KM_SLEEP);
    430 	DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    431 	    __func__, (void *)drctl_req, drctl_req_len);
    432 
    433 	/* copy the cpuids for the drctl call from the incoming request msg */
    434 	for (idx = 0; idx < count; idx++)
    435 		drctl_req[idx].res_cpu_id = req_cpus[idx];
    436 
    437 	rv = drctl_config_init(drctl_cmd, drctl_flags, drctl_req,
    438 	    count, &drctl_resp, &drctl_resp_len, &drctl_res_ck);
    439 
    440 	ASSERT((drctl_resp != NULL) && (drctl_resp_len != 0));
    441 
    442 	if (rv != 0) {
    443 		DR_DBG_CPU("%s: drctl_config_init "
    444 		    "returned: %d\n", __func__, rv);
    445 
    446 		if (DRCPU_VERS_EQ(1, 0)) {
    447 			rv = -1;
    448 		} else {
    449 			ASSERT(DRCPU_VERS_GTEQ(1, 1));
    450 			ASSERT(drctl_resp->resp_type == DRCTL_RESP_ERR);
    451 
    452 			*resp_len = dr_cpu_err_resp(req,
    453 			    resp, drctl_resp->resp_err_msg);
    454 		}
    455 
    456 		DR_DBG_KMEM("%s: free addr %p size %ld\n",
    457 		    __func__, (void *)drctl_resp, drctl_resp_len);
    458 		kmem_free(drctl_resp, drctl_resp_len);
    459 		DR_DBG_KMEM("%s: free addr %p size %ld\n",
    460 		    __func__, (void *)drctl_req, drctl_req_len);
    461 		kmem_free(drctl_req, drctl_req_len);
    462 
    463 		return (rv);
    464 	}
    465 
    466 	ASSERT(drctl_resp->resp_type == DRCTL_RESP_OK);
    467 
    468 	drctl_rsrc = drctl_resp->resp_resources;
    469 
    470 	/* create the result scratch array */
    471 	res = dr_cpu_res_array_init(req, drctl_rsrc, count);
    472 
    473 	/*
    474 	 * For unconfigure, check if there are any conditions
    475 	 * that will cause the operation to fail. These are
    476 	 * performed before the actual unconfigure attempt so
    477 	 * that a meaningful error message can be generated.
    478 	 */
    479 	if (req->msg_type != DR_CPU_CONFIGURE)
    480 		dr_cpu_check_cpus(req, res);
    481 
    482 	/* perform the specified operation on each of the CPUs */
    483 	for (idx = 0; idx < count; idx++) {
    484 		int result;
    485 		int status;
    486 
    487 		/*
    488 		 * If no action will be taken against the current
    489 		 * CPU, update the drctl resource information to
    490 		 * ensure that it gets recovered properly during
    491 		 * the drctl fini() call.
    492 		 */
    493 		if (res[idx].result != DR_CPU_RES_OK) {
    494 			drctl_req[idx].status = DRCTL_STATUS_CONFIG_FAILURE;
    495 			continue;
    496 		}
    497 
    498 		/* call the function to perform the actual operation */
    499 		result = (*dr_fn)(req_cpus[idx], &status, force);
    500 
    501 		/* save off results of the operation */
    502 		res[idx].result = result;
    503 		res[idx].status = status;
    504 
    505 		/* save result for drctl fini() reusing init() msg memory */
    506 		drctl_req[idx].status = (result != DR_CPU_RES_OK) ?
    507 		    DRCTL_STATUS_CONFIG_FAILURE : DRCTL_STATUS_CONFIG_SUCCESS;
    508 
    509 		DR_DBG_CPU("%s: cpuid %d status %d result %d off '%s'\n",
    510 		    __func__, req_cpus[idx], drctl_req[idx].status, result,
    511 		    (res[idx].string) ? res[idx].string : "");
    512 	}
    513 
    514 	if ((rv = drctl_config_fini(&drctl_res_ck, drctl_req, count)) != 0)
    515 		DR_DBG_CPU("%s: drctl_config_fini "
    516 		    "returned: %d\n", __func__, rv);
    517 
    518 	/*
    519 	 * Operation completed without any fatal errors.
    520 	 * Pack the response for transmission.
    521 	 */
    522 	*resp_len = dr_cpu_pack_response(req, res, resp);
    523 
    524 	/* notify interested parties about the operation */
    525 	dr_generate_event(DR_TYPE_CPU, se_hint);
    526 
    527 	/*
    528 	 * Deallocate any scratch memory.
    529 	 */
    530 	DR_DBG_KMEM("%s: free addr %p size %ld\n",
    531 	    __func__, (void *)drctl_resp, drctl_resp_len);
    532 	kmem_free(drctl_resp, drctl_resp_len);
    533 	DR_DBG_KMEM("%s: free addr %p size %ld\n",
    534 	    __func__, (void *)drctl_req, drctl_req_len);
    535 	kmem_free(drctl_req, drctl_req_len);
    536 
    537 	dr_cpu_res_array_fini(res, count);
    538 
    539 	return (0);
    540 }
    541 
    542 /*
    543  * Allocate and initialize a result array based on the initial
    544  * drctl operation. A valid result array is always returned.
    545  */
    546 static dr_cpu_res_t *
    547 dr_cpu_res_array_init(dr_cpu_hdr_t *req, drctl_rsrc_t *rsrc, int nrsrc)
    548 {
    549 	int		idx;
    550 	dr_cpu_res_t	*res;
    551 	char		*err_str;
    552 	size_t		err_len;
    553 
    554 	/* allocate zero filled buffer to initialize fields */
    555 	res = kmem_zalloc(nrsrc * sizeof (dr_cpu_res_t), KM_SLEEP);
    556 	DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    557 	    __func__, (void *)res, nrsrc * sizeof (dr_cpu_res_t));
    558 
    559 	/*
    560 	 * Fill in the result information for each resource.
    561 	 */
    562 	for (idx = 0; idx < nrsrc; idx++) {
    563 		res[idx].cpuid = rsrc[idx].res_cpu_id;
    564 		res[idx].result = DR_CPU_RES_OK;
    565 
    566 		if (rsrc[idx].status == DRCTL_STATUS_ALLOW)
    567 			continue;
    568 
    569 		/*
    570 		 * Update the state information for this CPU.
    571 		 */
    572 		res[idx].result = DR_CPU_RES_BLOCKED;
    573 		res[idx].status = (req->msg_type == DR_CPU_CONFIGURE) ?
    574 		    DR_CPU_STAT_UNCONFIGURED : DR_CPU_STAT_CONFIGURED;
    575 
    576 		/*
    577 		 * If an error string exists, copy it out of the
    578 		 * message buffer. This eliminates any dependency
    579 		 * on the memory allocated for the message buffer
    580 		 * itself.
    581 		 */
    582 		if (rsrc[idx].offset != NULL) {
    583 			err_str = (char *)rsrc + rsrc[idx].offset;
    584 			err_len = strlen(err_str) + 1;
    585 
    586 			res[idx].string = kmem_alloc(err_len, KM_SLEEP);
    587 			DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    588 			    __func__, (void *)(res[idx].string), err_len);
    589 			bcopy(err_str, res[idx].string, err_len);
    590 		}
    591 	}
    592 
    593 	return (res);
    594 }
    595 
    596 static void
    597 dr_cpu_res_array_fini(dr_cpu_res_t *res, int nres)
    598 {
    599 	int	idx;
    600 	size_t	str_len;
    601 
    602 	for (idx = 0; idx < nres; idx++) {
    603 		/* deallocate the error string if present */
    604 		if (res[idx].string) {
    605 			str_len = strlen(res[idx].string) + 1;
    606 			DR_DBG_KMEM("%s: free addr %p size %ld\n",
    607 			    __func__, (void *)(res[idx].string), str_len);
    608 			kmem_free(res[idx].string, str_len);
    609 		}
    610 	}
    611 
    612 	/* deallocate the result array itself */
    613 	DR_DBG_KMEM("%s: free addr %p size %ld\n",
    614 	    __func__, (void *)res, sizeof (dr_cpu_res_t) * nres);
    615 	kmem_free(res, sizeof (dr_cpu_res_t) * nres);
    616 }
    617 
    618 /*
    619  * Allocate and pack a response message for transmission based
    620  * on the specified result array. A valid response message and
    621  * valid size information is always returned.
    622  */
    623 static size_t
    624 dr_cpu_pack_response(dr_cpu_hdr_t *req, dr_cpu_res_t *res, dr_cpu_hdr_t **respp)
    625 {
    626 	int		idx;
    627 	dr_cpu_hdr_t	*resp;
    628 	dr_cpu_stat_t	*resp_stat;
    629 	size_t		resp_len;
    630 	uint32_t	curr_off;
    631 	caddr_t		curr_str;
    632 	size_t		str_len;
    633 	size_t		stat_len;
    634 	int		nstat = req->num_records;
    635 
    636 	/*
    637 	 * Calculate the size of the response message
    638 	 * and allocate an appropriately sized buffer.
    639 	 */
    640 	resp_len = 0;
    641 
    642 	/* add the header size */
    643 	resp_len += sizeof (dr_cpu_hdr_t);
    644 
    645 	/* add the stat array size */
    646 	stat_len = sizeof (dr_cpu_stat_t) * nstat;
    647 	resp_len += stat_len;
    648 
    649 	/* add the size of any error strings */
    650 	for (idx = 0; idx < nstat; idx++) {
    651 		if (res[idx].string != NULL) {
    652 			resp_len += strlen(res[idx].string) + 1;
    653 		}
    654 	}
    655 
    656 	/* allocate the message buffer */
    657 	resp = kmem_zalloc(resp_len, KM_SLEEP);
    658 	DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    659 	    __func__, (void *)resp, resp_len);
    660 
    661 	/*
    662 	 * Fill in the header information.
    663 	 */
    664 	resp->req_num = req->req_num;
    665 	resp->msg_type = DR_CPU_OK;
    666 	resp->num_records = nstat;
    667 
    668 	/*
    669 	 * Fill in the stat information.
    670 	 */
    671 	resp_stat = DR_CPU_RESP_STATS(resp);
    672 
    673 	/* string offsets start immediately after stat array */
    674 	curr_off = sizeof (dr_cpu_hdr_t) + stat_len;
    675 	curr_str = (char *)resp_stat + stat_len;
    676 
    677 	for (idx = 0; idx < nstat; idx++) {
    678 		resp_stat[idx].cpuid = res[idx].cpuid;
    679 		resp_stat[idx].result = res[idx].result;
    680 		resp_stat[idx].status = res[idx].status;
    681 
    682 		if (res[idx].string != NULL) {
    683 			/* copy over the error string */
    684 			str_len = strlen(res[idx].string) + 1;
    685 			bcopy(res[idx].string, curr_str, str_len);
    686 			resp_stat[idx].string_off = curr_off;
    687 
    688 			curr_off += str_len;
    689 			curr_str += str_len;
    690 		}
    691 	}
    692 
    693 	/* buffer should be exactly filled */
    694 	ASSERT(curr_off == resp_len);
    695 
    696 	*respp = resp;
    697 	return (resp_len);
    698 }
    699 
    700 /*
    701  * Check for conditions that will prevent a CPU from being offlined.
    702  * This provides the opportunity to generate useful information to
    703  * help diagnose the failure rather than letting the offline attempt
    704  * fail in a more generic way.
    705  */
    706 static void
    707 dr_cpu_check_cpus(dr_cpu_hdr_t *req, dr_cpu_res_t *res)
    708 {
    709 	int		idx;
    710 	cpu_t		*cp;
    711 	uint32_t	*cpuids;
    712 
    713 	ASSERT((req->msg_type == DR_CPU_UNCONFIGURE) ||
    714 	    (req->msg_type == DR_CPU_FORCE_UNCONFIG));
    715 
    716 	DR_DBG_CPU("dr_cpu_check_cpus...\n");
    717 
    718 	/* array of cpuids start just after the header */
    719 	cpuids = DR_CPU_CMD_CPUIDS(req);
    720 
    721 	mutex_enter(&cpu_lock);
    722 
    723 	/*
    724 	 * Always check processor set membership first. The
    725 	 * last CPU in a processor set will fail to offline
    726 	 * even if the operation if forced, so any failures
    727 	 * should always be reported.
    728 	 */
    729 	dr_cpu_check_psrset(cpuids, res, req->num_records);
    730 
    731 	/* process each cpu that is part of the request */
    732 	for (idx = 0; idx < req->num_records; idx++) {
    733 
    734 		/* nothing to check if the CPU has already failed */
    735 		if (res[idx].result != DR_CPU_RES_OK)
    736 			continue;
    737 
    738 		if ((cp = cpu_get(cpuids[idx])) == NULL)
    739 			continue;
    740 
    741 		/*
    742 		 * Only check if there are bound threads if the
    743 		 * operation is not a forced unconfigure. In a
    744 		 * forced request, threads are automatically
    745 		 * unbound before they are offlined.
    746 		 */
    747 		if (req->msg_type == DR_CPU_UNCONFIGURE) {
    748 			/*
    749 			 * The return value is only interesting if other
    750 			 * checks are added to this loop and a decision
    751 			 * is needed on whether to continue checking.
    752 			 */
    753 			(void) dr_cpu_check_bound_thr(cp, &res[idx]);
    754 		}
    755 	}
    756 
    757 	mutex_exit(&cpu_lock);
    758 }
    759 
    760 /*
    761  * Examine the processor set configuration for the specified
    762  * CPUs and see if the unconfigure operation would result in
    763  * trying to remove the last CPU in any processor set.
    764  */
    765 static void
    766 dr_cpu_check_psrset(uint32_t *cpuids, dr_cpu_res_t *res, int nres)
    767 {
    768 	int		cpu_idx;
    769 	int		set_idx;
    770 	cpu_t		*cp;
    771 	cpupart_t	*cpp;
    772 	char		err_str[DR_CPU_MAX_ERR_LEN];
    773 	size_t		err_len;
    774 	struct {
    775 		cpupart_t	*cpp;
    776 		int		ncpus;
    777 	} *psrset;
    778 
    779 	ASSERT(MUTEX_HELD(&cpu_lock));
    780 
    781 	/*
    782 	 * Allocate a scratch array to count the CPUs in
    783 	 * the various processor sets. A CPU always belongs
    784 	 * to exactly one processor set, so by definition,
    785 	 * the scratch array never needs to be larger than
    786 	 * the number of CPUs.
    787 	 */
    788 	psrset = kmem_zalloc(sizeof (*psrset) * nres, KM_SLEEP);
    789 	DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    790 	    __func__, (void *)psrset, sizeof (*psrset) * nres);
    791 
    792 	for (cpu_idx = 0; cpu_idx < nres; cpu_idx++) {
    793 
    794 		/* skip any CPUs that have already failed */
    795 		if (res[cpu_idx].result != DR_CPU_RES_OK)
    796 			continue;
    797 
    798 		if ((cp = cpu_get(cpuids[cpu_idx])) == NULL)
    799 			continue;
    800 
    801 		cpp = cp->cpu_part;
    802 
    803 		/* lookup the set this CPU belongs to */
    804 		for (set_idx = 0; set_idx < nres; set_idx++) {
    805 
    806 			/* matching set found */
    807 			if (cpp == psrset[set_idx].cpp)
    808 				break;
    809 
    810 			/* set not found, start a new entry */
    811 			if (psrset[set_idx].cpp == NULL) {
    812 				psrset[set_idx].cpp = cpp;
    813 				psrset[set_idx].ncpus = cpp->cp_ncpus;
    814 				break;
    815 			}
    816 		}
    817 
    818 		ASSERT(set_idx != nres);
    819 
    820 		/*
    821 		 * Remove the current CPU from the set total but only
    822 		 * generate an error for the last CPU. The correct CPU
    823 		 * will get the error because the unconfigure attempts
    824 		 * will occur in the same order in which the CPUs are
    825 		 * examined in this loop.  The cp_ncpus field of a
    826 		 * cpupart_t counts only online cpus, so it is safe
    827 		 * to remove an offline cpu without testing ncpus.
    828 		 */
    829 		if (cpu_is_offline(cp))
    830 			continue;
    831 
    832 		if (--psrset[set_idx].ncpus == 0) {
    833 			/*
    834 			 * Fill in the various pieces of information
    835 			 * to report that the operation will fail.
    836 			 */
    837 			res[cpu_idx].result = DR_CPU_RES_BLOCKED;
    838 			res[cpu_idx].status = DR_CPU_STAT_CONFIGURED;
    839 
    840 			(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN,
    841 			    "last online cpu in processor set %d", cpp->cp_id);
    842 
    843 			err_len = strlen(err_str) + 1;
    844 
    845 			res[cpu_idx].string = kmem_alloc(err_len, KM_SLEEP);
    846 			DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    847 			    __func__, (void *)(res[cpu_idx].string), err_len);
    848 			bcopy(err_str, res[cpu_idx].string, err_len);
    849 
    850 			DR_DBG_CPU("cpu %d: %s\n", cpuids[cpu_idx], err_str);
    851 		}
    852 	}
    853 
    854 	DR_DBG_KMEM("%s: free addr %p size %ld\n",
    855 	    __func__, (void *)psrset, sizeof (*psrset) * nres);
    856 	kmem_free(psrset, sizeof (*psrset) * nres);
    857 }
    858 
    859 /*
    860  * Check if any threads are bound to the specified CPU. If the
    861  * condition is true, DR_CPU_RES_BLOCKED is returned and an error
    862  * string is generated and placed in the specified result structure.
    863  * Otherwise, DR_CPU_RES_OK is returned.
    864  */
    865 static int
    866 dr_cpu_check_bound_thr(cpu_t *cp, dr_cpu_res_t *res)
    867 {
    868 	int		nbound;
    869 	proc_t		*pp;
    870 	kthread_t	*tp;
    871 	char		err_str[DR_CPU_MAX_ERR_LEN];
    872 	size_t		err_len;
    873 
    874 	/*
    875 	 * Error string allocation makes an assumption
    876 	 * that no blocking condition has been identified.
    877 	 */
    878 	ASSERT(res->result == DR_CPU_RES_OK);
    879 	ASSERT(res->string == NULL);
    880 
    881 	ASSERT(MUTEX_HELD(&cpu_lock));
    882 
    883 	mutex_enter(&pidlock);
    884 
    885 	nbound = 0;
    886 
    887 	/*
    888 	 * Walk the active processes, checking if each
    889 	 * thread belonging to the process is bound.
    890 	 */
    891 	for (pp = practive; (pp != NULL) && (nbound <= 1); pp = pp->p_next) {
    892 		mutex_enter(&pp->p_lock);
    893 
    894 		tp = pp->p_tlist;
    895 
    896 		if ((tp == NULL) || (pp->p_flag & SSYS)) {
    897 			mutex_exit(&pp->p_lock);
    898 			continue;
    899 		}
    900 
    901 		do {
    902 			if (tp->t_bind_cpu != cp->cpu_id)
    903 				continue;
    904 
    905 			/*
    906 			 * Update the running total of bound
    907 			 * threads. Continue the search until
    908 			 * it can be determined if more than
    909 			 * one thread is bound to the CPU.
    910 			 */
    911 			if (++nbound > 1)
    912 				break;
    913 
    914 		} while ((tp = tp->t_forw) != pp->p_tlist);
    915 
    916 		mutex_exit(&pp->p_lock);
    917 	}
    918 
    919 	mutex_exit(&pidlock);
    920 
    921 	if (nbound) {
    922 		/*
    923 		 * Threads are bound to the CPU. Fill in
    924 		 * various pieces of information to report
    925 		 * that the operation will fail.
    926 		 */
    927 		res->result = DR_CPU_RES_BLOCKED;
    928 		res->status = DR_CPU_STAT_CONFIGURED;
    929 
    930 		(void) snprintf(err_str, DR_CPU_MAX_ERR_LEN, "cpu has bound "
    931 		    "thread%s", (nbound > 1) ? "s" : "");
    932 
    933 		err_len = strlen(err_str) + 1;
    934 
    935 		res->string = kmem_alloc(err_len, KM_SLEEP);
    936 		DR_DBG_KMEM("%s: alloc addr %p size %ld\n",
    937 		    __func__, (void *)(res->string), err_len);
    938 		bcopy(err_str, res->string, err_len);
    939 
    940 		DR_DBG_CPU("cpu %d: %s\n", cp->cpu_id, err_str);
    941 	}
    942 
    943 	return (res->result);
    944 }
    945 
    946 /*
    947  * Do not modify result buffer or length on error.
    948  */
    949 static int
    950 dr_cpu_list_status(dr_cpu_hdr_t *req, dr_cpu_hdr_t **resp, int *resp_len)
    951 {
    952 	int		idx;
    953 	int		result;
    954 	int		status;
    955 	int		rlen;
    956 	uint32_t	*cpuids;
    957 	dr_cpu_hdr_t	*rp;
    958 	dr_cpu_stat_t	*stat;
    959 	md_t		*mdp = NULL;
    960 	int		num_nodes;
    961 	int		listsz;
    962 	mde_cookie_t	*listp = NULL;
    963 	mde_cookie_t	cpunode;
    964 	boolean_t	walk_md = B_FALSE;
    965 
    966 	/* the incoming array of cpuids to configure */
    967 	cpuids = DR_CPU_CMD_CPUIDS(req);
    968 
    969 	/* allocate a response message */
    970 	rlen = sizeof (dr_cpu_hdr_t);
    971 	rlen += req->num_records * sizeof (dr_cpu_stat_t);
    972 	rp = kmem_zalloc(rlen, KM_SLEEP);
    973 	DR_DBG_KMEM("%s: alloc addr %p size %d\n", __func__, (void *)rp, rlen);
    974 
    975 	/* fill in the known data */
    976 	rp->req_num = req->req_num;
    977 	rp->msg_type = DR_CPU_STATUS;
    978 	rp->num_records = req->num_records;
    979 
    980 	/* stat array for the response */
    981 	stat = DR_CPU_RESP_STATS(rp);
    982 
    983 	/* get the status for each of the CPUs */
    984 	for (idx = 0; idx < req->num_records; idx++) {
    985 
    986 		result = dr_cpu_status(cpuids[idx], &status);
    987 
    988 		if (result == DR_CPU_RES_FAILURE)
    989 			walk_md = B_TRUE;
    990 
    991 		/* save off results of the status */
    992 		stat[idx].cpuid = cpuids[idx];
    993 		stat[idx].result = result;
    994 		stat[idx].status = status;
    995 	}
    996 
    997 	if (walk_md == B_FALSE)
    998 		goto done;
    999 
   1000 	/*
   1001 	 * At least one of the cpus did not have a CPU
   1002 	 * structure. So, consult the MD to determine if
   1003 	 * they are present.
   1004 	 */
   1005 
   1006 	if ((mdp = md_get_handle()) == NULL) {
   1007 		DR_DBG_CPU("unable to initialize MD\n");
   1008 		goto done;
   1009 	}
   1010 
   1011 	num_nodes = md_node_count(mdp);
   1012 	ASSERT(num_nodes > 0);
   1013 
   1014 	listsz = num_nodes * sizeof (mde_cookie_t);
   1015 	listp = kmem_zalloc(listsz, KM_SLEEP);
   1016 	DR_DBG_KMEM("%s: alloc addr %p size %d\n",
   1017 	    __func__, (void *)listp, listsz);
   1018 
   1019 	for (idx = 0; idx < req->num_records; idx++) {
   1020 
   1021 		if (stat[idx].result != DR_CPU_RES_FAILURE)
   1022 			continue;
   1023 
   1024 		/* check the MD for the current cpuid */
   1025 		cpunode = dr_cpu_find_node_md(stat[idx].cpuid, mdp, listp);
   1026 
   1027 		stat[idx].result = DR_CPU_RES_OK;
   1028 
   1029 		if (cpunode == MDE_INVAL_ELEM_COOKIE) {
   1030 			stat[idx].status = DR_CPU_STAT_NOT_PRESENT;
   1031 		} else {
   1032 			stat[idx].status = DR_CPU_STAT_UNCONFIGURED;
   1033 		}
   1034 	}
   1035 
   1036 	DR_DBG_KMEM("%s: free addr %p size %d\n",
   1037 	    __func__, (void *)listp, listsz);
   1038 	kmem_free(listp, listsz);
   1039 
   1040 	(void) md_fini_handle(mdp);
   1041 
   1042 done:
   1043 	*resp = rp;
   1044 	*resp_len = rlen;
   1045 
   1046 	return (0);
   1047 }
   1048 
   1049 static int
   1050 dr_cpu_configure(processorid_t cpuid, int *status, boolean_t force)
   1051 {
   1052 	 _NOTE(ARGUNUSED(force))
   1053 	struct cpu	*cp;
   1054 	int		rv = 0;
   1055 
   1056 	DR_DBG_CPU("dr_cpu_configure...\n");
   1057 
   1058 	/*
   1059 	 * Build device tree node for the CPU
   1060 	 */
   1061 	if ((rv = dr_cpu_probe(cpuid)) != 0) {
   1062 		DR_DBG_CPU("failed to probe CPU %d (%d)\n", cpuid, rv);
   1063 		if (rv == EINVAL) {
   1064 			*status = DR_CPU_STAT_NOT_PRESENT;
   1065 			return (DR_CPU_RES_NOT_IN_MD);
   1066 		}
   1067 		*status = DR_CPU_STAT_UNCONFIGURED;
   1068 		return (DR_CPU_RES_FAILURE);
   1069 	}
   1070 
   1071 	mutex_enter(&cpu_lock);
   1072 
   1073 	/*
   1074 	 * Configure the CPU
   1075 	 */
   1076 	if ((cp = cpu_get(cpuid)) == NULL) {
   1077 
   1078 		if ((rv = cpu_configure(cpuid)) != 0) {
   1079 			DR_DBG_CPU("failed to configure CPU %d (%d)\n",
   1080 			    cpuid, rv);
   1081 			rv = DR_CPU_RES_FAILURE;
   1082 			*status = DR_CPU_STAT_UNCONFIGURED;
   1083 			goto done;
   1084 		}
   1085 
   1086 		DR_DBG_CPU("CPU %d configured\n", cpuid);
   1087 
   1088 		/* CPU struct should exist now */
   1089 		cp = cpu_get(cpuid);
   1090 	}
   1091 
   1092 	ASSERT(cp);
   1093 
   1094 	/*
   1095 	 * Power on the CPU. In sun4v, this brings the stopped
   1096 	 * CPU into the guest from the Hypervisor.
   1097 	 */
   1098 	if (cpu_is_poweredoff(cp)) {
   1099 
   1100 		if ((rv = cpu_poweron(cp)) != 0) {
   1101 			DR_DBG_CPU("failed to power on CPU %d (%d)\n",
   1102 			    cpuid, rv);
   1103 			rv = DR_CPU_RES_FAILURE;
   1104 			*status = DR_CPU_STAT_UNCONFIGURED;
   1105 			goto done;
   1106 		}
   1107 
   1108 		DR_DBG_CPU("CPU %d powered on\n", cpuid);
   1109 	}
   1110 
   1111 	/*
   1112 	 * Online the CPU
   1113 	 */
   1114 	if (cpu_is_offline(cp)) {
   1115 
   1116 		if ((rv = cpu_online(cp)) != 0) {
   1117 			DR_DBG_CPU("failed to online CPU %d (%d)\n",
   1118 			    cpuid, rv);
   1119 			rv = DR_CPU_RES_FAILURE;
   1120 			/* offline is still configured */
   1121 			*status = DR_CPU_STAT_CONFIGURED;
   1122 			goto done;
   1123 		}
   1124 
   1125 		DR_DBG_CPU("CPU %d online\n", cpuid);
   1126 	}
   1127 
   1128 	rv = DR_CPU_RES_OK;
   1129 	*status = DR_CPU_STAT_CONFIGURED;
   1130 
   1131 done:
   1132 	mutex_exit(&cpu_lock);
   1133 
   1134 	return (rv);
   1135 }
   1136 
   1137 static int
   1138 dr_cpu_unconfigure(processorid_t cpuid, int *status, boolean_t force)
   1139 {
   1140 	struct cpu	*cp;
   1141 	int		rv = 0;
   1142 	int		cpu_flags;
   1143 
   1144 	DR_DBG_CPU("dr_cpu_unconfigure%s...\n", (force) ? " (force)" : "");
   1145 
   1146 	mutex_enter(&cpu_lock);
   1147 
   1148 	cp = cpu_get(cpuid);
   1149 
   1150 	if (cp == NULL) {
   1151 		/*
   1152 		 * As OS CPU structures are already torn down proceed
   1153 		 * to deprobe device tree to make sure the device tree
   1154 		 * is up do date.
   1155 		 */
   1156 		goto deprobe;
   1157 	}
   1158 
   1159 	ASSERT(cp->cpu_id == cpuid);
   1160 
   1161 	/*
   1162 	 * Offline the CPU
   1163 	 */
   1164 	if (cpu_is_active(cp)) {
   1165 
   1166 		/* set the force flag correctly */
   1167 		cpu_flags = (force) ? CPU_FORCED : 0;
   1168 
   1169 		/*
   1170 		 * Before we take the CPU offline, we first enable interrupts.
   1171 		 * Otherwise, cpu_offline() might reject the request.  Note:
   1172 		 * if the offline subsequently fails, the target cpu will be
   1173 		 * left with interrupts enabled.  This is consistent with the
   1174 		 * behavior of psradm(1M) and p_online(2).
   1175 		 */
   1176 		cpu_intr_enable(cp);
   1177 
   1178 		if ((rv = cpu_offline(cp, cpu_flags)) != 0) {
   1179 			DR_DBG_CPU("failed to offline CPU %d (%d)\n",
   1180 			    cpuid, rv);
   1181 
   1182 			rv = DR_CPU_RES_FAILURE;
   1183 			*status = DR_CPU_STAT_CONFIGURED;
   1184 			mutex_exit(&cpu_lock);
   1185 			return (rv);
   1186 		}
   1187 
   1188 		DR_DBG_CPU("CPU %d offline\n", cpuid);
   1189 	}
   1190 
   1191 	/*
   1192 	 * Power off the CPU. In sun4v, this puts the running
   1193 	 * CPU into the stopped state in the Hypervisor.
   1194 	 */
   1195 	if (!cpu_is_poweredoff(cp)) {
   1196 
   1197 		if ((rv = cpu_poweroff(cp)) != 0) {
   1198 			DR_DBG_CPU("failed to power off CPU %d (%d)\n",
   1199 			    cpuid, rv);
   1200 			rv = DR_CPU_RES_FAILURE;
   1201 			*status = DR_CPU_STAT_CONFIGURED;
   1202 			mutex_exit(&cpu_lock);
   1203 			return (rv);
   1204 		}
   1205 
   1206 		DR_DBG_CPU("CPU %d powered off\n", cpuid);
   1207 	}
   1208 
   1209 	/*
   1210 	 * Unconfigure the CPU
   1211 	 */
   1212 	if ((rv = cpu_unconfigure(cpuid)) != 0) {
   1213 		DR_DBG_CPU("failed to unconfigure CPU %d (%d)\n", cpuid, rv);
   1214 		rv = DR_CPU_RES_FAILURE;
   1215 		*status = DR_CPU_STAT_UNCONFIGURED;
   1216 		mutex_exit(&cpu_lock);
   1217 		return (rv);
   1218 	}
   1219 
   1220 	DR_DBG_CPU("CPU %d unconfigured\n", cpuid);
   1221 
   1222 deprobe:
   1223 	mutex_exit(&cpu_lock);
   1224 	/*
   1225 	 * Tear down device tree.
   1226 	 */
   1227 	if ((rv = dr_cpu_deprobe(cpuid)) != 0) {
   1228 		DR_DBG_CPU("failed to deprobe CPU %d (%d)\n", cpuid, rv);
   1229 		rv = DR_CPU_RES_FAILURE;
   1230 		*status = DR_CPU_STAT_UNCONFIGURED;
   1231 		return (rv);
   1232 	}
   1233 
   1234 	rv = DR_CPU_RES_OK;
   1235 	*status = DR_CPU_STAT_UNCONFIGURED;
   1236 
   1237 	return (rv);
   1238 }
   1239 
   1240 /*
   1241  * Determine the state of a CPU. If the CPU structure is not present,
   1242  * it does not attempt to determine whether or not the CPU is in the
   1243  * MD. It is more efficient to do this at the higher level for all
   1244  * CPUs since it may not even be necessary to search the MD if all
   1245  * the CPUs are accounted for. Returns DR_CPU_RES_OK if the CPU
   1246  * structure is present, and DR_CPU_RES_FAILURE otherwise as a signal
   1247  * that an MD walk is necessary.
   1248  */
   1249 static int
   1250 dr_cpu_status(processorid_t cpuid, int *status)
   1251 {
   1252 	int		rv;
   1253 	struct cpu	*cp;
   1254 
   1255 	DR_DBG_CPU("dr_cpu_status...\n");
   1256 
   1257 	mutex_enter(&cpu_lock);
   1258 
   1259 	if ((cp = cpu_get(cpuid)) == NULL) {
   1260 		/* need to check if cpu is in the MD */
   1261 		rv = DR_CPU_RES_FAILURE;
   1262 		goto done;
   1263 	}
   1264 
   1265 	if (cpu_is_poweredoff(cp)) {
   1266 		/*
   1267 		 * The CPU is powered off, so it is considered
   1268 		 * unconfigured from the service entity point of
   1269 		 * view. The CPU is not available to the system
   1270 		 * and intervention by the service entity would
   1271 		 * be required to change that.
   1272 		 */
   1273 		*status = DR_CPU_STAT_UNCONFIGURED;
   1274 	} else {
   1275 		/*
   1276 		 * The CPU is powered on, so it is considered
   1277 		 * configured from the service entity point of
   1278 		 * view. It is available for use by the system
   1279 		 * and service entities are not concerned about
   1280 		 * the operational status (offline, online, etc.)
   1281 		 * of the CPU in terms of DR.
   1282 		 */
   1283 		*status = DR_CPU_STAT_CONFIGURED;
   1284 	}
   1285 
   1286 	rv = DR_CPU_RES_OK;
   1287 
   1288 done:
   1289 	mutex_exit(&cpu_lock);
   1290 
   1291 	return (rv);
   1292 }
   1293 
   1294 typedef struct {
   1295 	md_t		*mdp;
   1296 	mde_cookie_t	cpunode;
   1297 	dev_info_t	*dip;
   1298 } cb_arg_t;
   1299 
   1300 #define	STR_ARR_LEN	5
   1301 
   1302 static int
   1303 new_cpu_node(dev_info_t *new_node, void *arg, uint_t flags)
   1304 {
   1305 	_NOTE(ARGUNUSED(flags))
   1306 
   1307 	char		*compat;
   1308 	uint64_t	freq;
   1309 	uint64_t	cpuid = 0;
   1310 	int		regbuf[4];
   1311 	int		len = 0;
   1312 	cb_arg_t	*cba;
   1313 	char		*str_arr[STR_ARR_LEN];
   1314 	char		*curr;
   1315 	int		idx = 0;
   1316 
   1317 	DR_DBG_CPU("new_cpu_node...\n");
   1318 
   1319 	cba = (cb_arg_t *)arg;
   1320 
   1321 	/*
   1322 	 * Add 'name' property
   1323 	 */
   1324 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
   1325 	    "name", "cpu") != DDI_SUCCESS) {
   1326 		DR_DBG_CPU("new_cpu_node: failed to create 'name' property\n");
   1327 		return (DDI_WALK_ERROR);
   1328 	}
   1329 
   1330 	/*
   1331 	 * Add 'compatible' property
   1332 	 */
   1333 	if (md_get_prop_data(cba->mdp, cba->cpunode, "compatible",
   1334 	    (uint8_t **)(&compat), &len)) {
   1335 		DR_DBG_CPU("new_cpu_node: failed to read 'compatible' property "
   1336 		    "from MD\n");
   1337 		return (DDI_WALK_ERROR);
   1338 	}
   1339 
   1340 	DR_DBG_CPU("'compatible' len is %d\n", len);
   1341 
   1342 	/* parse the MD string array */
   1343 	curr = compat;
   1344 	while (curr < (compat + len)) {
   1345 
   1346 		DR_DBG_CPU("adding '%s' to 'compatible' property\n", curr);
   1347 
   1348 		str_arr[idx++] = curr;
   1349 		curr += strlen(curr) + 1;
   1350 
   1351 		if (idx == STR_ARR_LEN) {
   1352 			DR_DBG_CPU("exceeded str_arr len (%d)\n", STR_ARR_LEN);
   1353 			break;
   1354 		}
   1355 	}
   1356 
   1357 	if (ndi_prop_update_string_array(DDI_DEV_T_NONE, new_node,
   1358 	    "compatible", str_arr, idx) != DDI_SUCCESS) {
   1359 		DR_DBG_CPU("new_cpu_node: failed to create 'compatible' "
   1360 		    "property\n");
   1361 		return (DDI_WALK_ERROR);
   1362 	}
   1363 
   1364 	/*
   1365 	 * Add 'device_type' property
   1366 	 */
   1367 	if (ndi_prop_update_string(DDI_DEV_T_NONE, new_node,
   1368 	    "device_type", "cpu") != DDI_SUCCESS) {
   1369 		DR_DBG_CPU("new_cpu_node: failed to create 'device_type' "
   1370 		    "property\n");
   1371 		return (DDI_WALK_ERROR);
   1372 	}
   1373 
   1374 	/*
   1375 	 * Add 'clock-frequency' property
   1376 	 */
   1377 	if (md_get_prop_val(cba->mdp, cba->cpunode, "clock-frequency", &freq)) {
   1378 		DR_DBG_CPU("new_cpu_node: failed to read 'clock-frequency' "
   1379 		    "property from MD\n");
   1380 		return (DDI_WALK_ERROR);
   1381 	}
   1382 
   1383 	if (ndi_prop_update_int(DDI_DEV_T_NONE, new_node,
   1384 	    "clock-frequency", freq) != DDI_SUCCESS) {
   1385 		DR_DBG_CPU("new_cpu_node: failed to create 'clock-frequency' "
   1386 		    "property\n");
   1387 		return (DDI_WALK_ERROR);
   1388 	}
   1389 
   1390 	/*
   1391 	 * Add 'reg' (cpuid) property
   1392 	 */
   1393 	if (md_get_prop_val(cba->mdp, cba->cpunode, "id", &cpuid)) {
   1394 		DR_DBG_CPU("new_cpu_node: failed to read 'id' property "
   1395 		    "from MD\n");
   1396 		return (DDI_WALK_ERROR);
   1397 	}
   1398 
   1399 	DR_DBG_CPU("new cpuid=0x%lx\n", cpuid);
   1400 
   1401 	bzero(regbuf, 4 * sizeof (int));
   1402 	regbuf[0] = 0xc0000000 | cpuid;
   1403 
   1404 	if (ndi_prop_update_int_array(DDI_DEV_T_NONE, new_node,
   1405 	    "reg", regbuf, 4) != DDI_SUCCESS) {
   1406 		DR_DBG_CPU("new_cpu_node: failed to create 'reg' property\n");
   1407 		return (DDI_WALK_ERROR);
   1408 	}
   1409 
   1410 	cba->dip = new_node;
   1411 
   1412 	return (DDI_WALK_TERMINATE);
   1413 }
   1414 
   1415 static int
   1416 dr_cpu_probe(processorid_t cpuid)
   1417 {
   1418 	dev_info_t	*pdip;
   1419 	dev_info_t	*dip;
   1420 	devi_branch_t	br;
   1421 	md_t		*mdp = NULL;
   1422 	int		num_nodes;
   1423 	int		rv = 0;
   1424 	int		listsz;
   1425 	mde_cookie_t	*listp = NULL;
   1426 	cb_arg_t	cba;
   1427 	mde_cookie_t	cpunode;
   1428 
   1429 	if ((dip = dr_cpu_find_node(cpuid)) != NULL) {
   1430 		/* nothing to do */
   1431 		e_ddi_branch_rele(dip);
   1432 		return (0);
   1433 	}
   1434 
   1435 	if ((mdp = md_get_handle()) == NULL) {
   1436 		DR_DBG_CPU("unable to initialize machine description\n");
   1437 		return (-1);
   1438 	}
   1439 
   1440 	num_nodes = md_node_count(mdp);
   1441 	ASSERT(num_nodes > 0);
   1442 
   1443 	listsz = num_nodes * sizeof (mde_cookie_t);
   1444 	listp = kmem_zalloc(listsz, KM_SLEEP);
   1445 	DR_DBG_KMEM("%s: alloc addr %p size %d\n",
   1446 	    __func__, (void *)listp, listsz);
   1447 
   1448 	cpunode = dr_cpu_find_node_md(cpuid, mdp, listp);
   1449 
   1450 	if (cpunode == MDE_INVAL_ELEM_COOKIE) {
   1451 		rv = EINVAL;
   1452 		goto done;
   1453 	}
   1454 
   1455 	/* pass in MD cookie for CPU */
   1456 	cba.mdp = mdp;
   1457 	cba.cpunode = cpunode;
   1458 
   1459 	br.arg = (void *)&cba;
   1460 	br.type = DEVI_BRANCH_SID;
   1461 	br.create.sid_branch_create = new_cpu_node;
   1462 	br.devi_branch_callback = NULL;
   1463 	pdip = ddi_root_node();
   1464 
   1465 	if ((rv = e_ddi_branch_create(pdip, &br, NULL, 0))) {
   1466 		DR_DBG_CPU("e_ddi_branch_create failed: %d\n", rv);
   1467 		rv = -1;
   1468 		goto done;
   1469 	}
   1470 
   1471 	DR_DBG_CPU("CPU %d probed\n", cpuid);
   1472 
   1473 	rv = 0;
   1474 
   1475 done:
   1476 	if (listp) {
   1477 		DR_DBG_KMEM("%s: free addr %p size %d\n",
   1478 		    __func__, (void *)listp, listsz);
   1479 		kmem_free(listp, listsz);
   1480 	}
   1481 
   1482 	if (mdp)
   1483 		(void) md_fini_handle(mdp);
   1484 
   1485 	return (rv);
   1486 }
   1487 
   1488 static int
   1489 dr_cpu_deprobe(processorid_t cpuid)
   1490 {
   1491 	dev_info_t	*fdip = NULL;
   1492 	dev_info_t	*dip;
   1493 
   1494 	if ((dip = dr_cpu_find_node(cpuid)) == NULL) {
   1495 		DR_DBG_CPU("cpuid %d already deprobed\n", cpuid);
   1496 		return (0);
   1497 	}
   1498 
   1499 	ASSERT(e_ddi_branch_held(dip));
   1500 
   1501 	if (e_ddi_branch_destroy(dip, &fdip, 0)) {
   1502 		char *path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1503 
   1504 		DR_DBG_KMEM("%s: alloc addr %p size %d\n",
   1505 		    __func__, (void *)path, MAXPATHLEN);
   1506 		/*
   1507 		 * If non-NULL, fdip is held and must be released.
   1508 		 */
   1509 		if (fdip != NULL) {
   1510 			(void) ddi_pathname(fdip, path);
   1511 			ddi_release_devi(fdip);
   1512 		} else {
   1513 			(void) ddi_pathname(dip, path);
   1514 		}
   1515 		cmn_err(CE_NOTE, "node removal failed: %s (%p)",
   1516 		    path, (fdip) ? (void *)fdip : (void *)dip);
   1517 
   1518 		DR_DBG_KMEM("%s: free addr %p size %d\n",
   1519 		    __func__, (void *)path, MAXPATHLEN);
   1520 		kmem_free(path, MAXPATHLEN);
   1521 
   1522 		return (-1);
   1523 	}
   1524 
   1525 	DR_DBG_CPU("CPU %d deprobed\n", cpuid);
   1526 
   1527 	return (0);
   1528 }
   1529 
   1530 typedef struct {
   1531 	processorid_t	cpuid;
   1532 	dev_info_t	*dip;
   1533 } dr_search_arg_t;
   1534 
   1535 static int
   1536 dr_cpu_check_node(dev_info_t *dip, void *arg)
   1537 {
   1538 	char 		*name;
   1539 	processorid_t	cpuid;
   1540 	dr_search_arg_t	*sarg = (dr_search_arg_t *)arg;
   1541 
   1542 	if (dip == ddi_root_node()) {
   1543 		return (DDI_WALK_CONTINUE);
   1544 	}
   1545 
   1546 	name = ddi_node_name(dip);
   1547 
   1548 	if (strcmp(name, "cpu") != 0) {
   1549 		return (DDI_WALK_PRUNECHILD);
   1550 	}
   1551 
   1552 	cpuid = ddi_prop_get_int(DDI_DEV_T_ANY, dip, DDI_PROP_DONTPASS,
   1553 	    "reg", -1);
   1554 
   1555 	cpuid = PROM_CFGHDL_TO_CPUID(cpuid);
   1556 
   1557 	DR_DBG_CPU("found cpuid=0x%x, looking for 0x%x\n", cpuid, sarg->cpuid);
   1558 
   1559 	if (cpuid == sarg->cpuid) {
   1560 		DR_DBG_CPU("matching node\n");
   1561 
   1562 		/* matching node must be returned held */
   1563 		if (!e_ddi_branch_held(dip))
   1564 			e_ddi_branch_hold(dip);
   1565 
   1566 		sarg->dip = dip;
   1567 		return (DDI_WALK_TERMINATE);
   1568 	}
   1569 
   1570 	return (DDI_WALK_CONTINUE);
   1571 }
   1572 
   1573 /*
   1574  * Walk the device tree to find the dip corresponding to the cpuid
   1575  * passed in. If present, the dip is returned held. The caller must
   1576  * release the hold on the dip once it is no longer required. If no
   1577  * matching node if found, NULL is returned.
   1578  */
   1579 static dev_info_t *
   1580 dr_cpu_find_node(processorid_t cpuid)
   1581 {
   1582 	dr_search_arg_t	arg;
   1583 
   1584 	DR_DBG_CPU("dr_cpu_find_node...\n");
   1585 
   1586 	arg.cpuid = cpuid;
   1587 	arg.dip = NULL;
   1588 
   1589 	ddi_walk_devs(ddi_root_node(), dr_cpu_check_node, &arg);
   1590 
   1591 	ASSERT((arg.dip == NULL) || (e_ddi_branch_held(arg.dip)));
   1592 
   1593 	return ((arg.dip) ? arg.dip : NULL);
   1594 }
   1595 
   1596 /*
   1597  * Look up a particular cpuid in the MD. Returns the mde_cookie_t
   1598  * representing that CPU if present, and MDE_INVAL_ELEM_COOKIE
   1599  * otherwise. It is assumed the scratch array has already been
   1600  * allocated so that it can accommodate the worst case scenario,
   1601  * every node in the MD.
   1602  */
   1603 static mde_cookie_t
   1604 dr_cpu_find_node_md(processorid_t cpuid, md_t *mdp, mde_cookie_t *listp)
   1605 {
   1606 	int		idx;
   1607 	int		nnodes;
   1608 	mde_cookie_t	rootnode;
   1609 	uint64_t	cpuid_prop;
   1610 	mde_cookie_t	result = MDE_INVAL_ELEM_COOKIE;
   1611 
   1612 	rootnode = md_root_node(mdp);
   1613 	ASSERT(rootnode != MDE_INVAL_ELEM_COOKIE);
   1614 
   1615 	/*
   1616 	 * Scan the DAG for all the CPU nodes
   1617 	 */
   1618 	nnodes = md_scan_dag(mdp, rootnode, md_find_name(mdp, "cpu"),
   1619 	    md_find_name(mdp, "fwd"), listp);
   1620 
   1621 	if (nnodes < 0) {
   1622 		DR_DBG_CPU("Scan for CPUs failed\n");
   1623 		return (result);
   1624 	}
   1625 
   1626 	DR_DBG_CPU("dr_cpu_find_node_md: found %d CPUs in the MD\n", nnodes);
   1627 
   1628 	/*
   1629 	 * Find the CPU of interest
   1630 	 */
   1631 	for (idx = 0; idx < nnodes; idx++) {
   1632 
   1633 		if (md_get_prop_val(mdp, listp[idx], "id", &cpuid_prop)) {
   1634 			DR_DBG_CPU("Missing 'id' property for CPU node %d\n",
   1635 			    idx);
   1636 			break;
   1637 		}
   1638 
   1639 		if (cpuid_prop == cpuid) {
   1640 			/* found a match */
   1641 			DR_DBG_CPU("dr_cpu_find_node_md: found CPU %d "
   1642 			    "in MD\n", cpuid);
   1643 			result = listp[idx];
   1644 			break;
   1645 		}
   1646 	}
   1647 
   1648 	if (result == MDE_INVAL_ELEM_COOKIE) {
   1649 		DR_DBG_CPU("CPU %d not in MD\n", cpuid);
   1650 	}
   1651 
   1652 	return (result);
   1653 }
   1654