Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * sun4v Fault Isolation Services Module
     29  */
     30 
     31 #include <sys/modctl.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/machsystm.h>
     34 #include <sys/processor.h>
     35 #include <sys/mem.h>
     36 #include <vm/page.h>
     37 #include <sys/note.h>
     38 #include <sys/ds.h>
     39 #include <sys/fault_iso.h>
     40 
     41 /*
     42  * Debugging routines
     43  */
     44 #ifdef DEBUG
     45 uint_t fi_debug = 0x0;
     46 #define	FI_DBG	if (fi_debug) cmn_err
     47 #else /* DEBUG */
     48 #define	FI_DBG	_NOTE(CONSTCOND) if (0) cmn_err
     49 #endif /* DEBUG */
     50 
     51 /*
     52  * Domains Services interaction
     53  */
     54 static ds_svc_hdl_t	cpu_handle;
     55 static ds_svc_hdl_t	mem_handle;
     56 
     57 static ds_ver_t		fi_vers[] = { { 1, 0 } };
     58 #define	FI_NVERS	(sizeof (fi_vers) / sizeof (fi_vers[0]))
     59 
     60 static ds_capability_t cpu_cap = {
     61 	"fma-cpu-service",	/* svc_id */
     62 	fi_vers,		/* vers */
     63 	FI_NVERS		/* nvers */
     64 };
     65 
     66 static ds_capability_t mem_cap = {
     67 	"fma-mem-service",	/* svc_id */
     68 	fi_vers,		/* vers */
     69 	FI_NVERS		/* nvers */
     70 };
     71 
     72 static void fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl);
     73 static void fi_unreg_handler(ds_cb_arg_t arg);
     74 
     75 static void cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
     76 static void mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen);
     77 
     78 static ds_clnt_ops_t cpu_ops = {
     79 	fi_reg_handler,		/* ds_reg_cb */
     80 	fi_unreg_handler,	/* ds_unreg_cb */
     81 	cpu_data_handler,	/* ds_data_cb */
     82 	&cpu_handle		/* cb_arg */
     83 };
     84 
     85 static ds_clnt_ops_t mem_ops = {
     86 	fi_reg_handler,		/* ds_reg_cb */
     87 	fi_unreg_handler,	/* ds_unreg_cb */
     88 	mem_data_handler,	/* ds_data_cb */
     89 	&mem_handle		/* cb_arg */
     90 };
     91 
     92 static int fi_init(void);
     93 static void fi_fini(void);
     94 
     95 static struct modlmisc modlmisc = {
     96 	&mod_miscops,
     97 	"sun4v Fault Isolation Services"
     98 };
     99 
    100 static struct modlinkage modlinkage = {
    101 	MODREV_1,
    102 	(void *)&modlmisc,
    103 	NULL
    104 };
    105 
    106 int
    107 _init(void)
    108 {
    109 	int	rv;
    110 
    111 	if ((rv = fi_init()) != 0)
    112 		return (rv);
    113 
    114 	if ((rv = mod_install(&modlinkage)) != 0)
    115 		fi_fini();
    116 
    117 	return (rv);
    118 }
    119 
    120 int
    121 _info(struct modinfo *modinfop)
    122 {
    123 	return (mod_info(&modlinkage, modinfop));
    124 }
    125 
    126 int fi_allow_unload;
    127 
    128 int
    129 _fini(void)
    130 {
    131 	int	status;
    132 
    133 	if (fi_allow_unload == 0)
    134 		return (EBUSY);
    135 
    136 	if ((status = mod_remove(&modlinkage)) == 0)
    137 		fi_fini();
    138 
    139 	return (status);
    140 }
    141 
    142 static int
    143 fi_init(void)
    144 {
    145 	int	rv;
    146 
    147 	/* register CPU service with domain services framework */
    148 	rv = ds_cap_init(&cpu_cap, &cpu_ops);
    149 	if (rv != 0) {
    150 		FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
    151 		return (rv);
    152 	}
    153 
    154 	/* register MEM servicewith domain services framework */
    155 	rv = ds_cap_init(&mem_cap, &mem_ops);
    156 	if (rv != 0) {
    157 		FI_DBG(CE_CONT, "ds_cap_init failed: %d", rv);
    158 		(void) ds_cap_fini(&cpu_cap);
    159 		return (rv);
    160 	}
    161 
    162 	return (rv);
    163 }
    164 
    165 static void
    166 fi_fini(void)
    167 {
    168 	/*
    169 	 * Stop incoming requests from Zeus
    170 	 */
    171 	(void) ds_cap_fini(&cpu_cap);
    172 	(void) ds_cap_fini(&mem_cap);
    173 }
    174 
    175 static void
    176 cpu_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
    177 {
    178 	_NOTE(ARGUNUSED(arg))
    179 
    180 	fma_cpu_service_req_t	*msg = buf;
    181 	fma_cpu_resp_t		resp_msg;
    182 	int			rv = 0;
    183 	int			cpu_status;
    184 	int			resp_back = 0;
    185 
    186 	/*
    187 	 * If the buffer is the wrong size for CPU calls or is NULL then
    188 	 * do not return any message. The call from the ldom mgr. will time out
    189 	 * and the response will be NULL.
    190 	 */
    191 	if (msg == NULL || buflen != sizeof (fma_cpu_service_req_t)) {
    192 		return;
    193 	}
    194 
    195 	FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, cpu_id = %d\n",
    196 	    msg->req_num, msg->msg_type, msg->cpu_id);
    197 
    198 	resp_msg.req_num = msg->req_num;
    199 
    200 	switch (msg->msg_type) {
    201 	case FMA_CPU_REQ_STATUS:
    202 		rv = p_online_internal(msg->cpu_id, P_STATUS,
    203 		    &cpu_status);
    204 		if (rv == EINVAL) {
    205 			FI_DBG(CE_CONT, "Failed p_online call failed."
    206 			    "Invalid CPU\n");
    207 			resp_msg.result = FMA_CPU_RESP_FAILURE;
    208 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
    209 			resp_back = 1;
    210 		}
    211 		break;
    212 	case FMA_CPU_REQ_OFFLINE:
    213 		rv = p_online_internal(msg->cpu_id, P_FAULTED,
    214 		    &cpu_status);
    215 		if (rv == EINVAL) {
    216 			FI_DBG(CE_CONT, "Failed p_online call failed."
    217 			    "Invalid CPU\n");
    218 			resp_msg.result = FMA_CPU_RESP_FAILURE;
    219 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
    220 			resp_back = 1;
    221 		} else if (rv == EBUSY) {
    222 			FI_DBG(CE_CONT, "Failed p_online call failed."
    223 			    "Tried to offline while busy\n");
    224 			resp_msg.result = FMA_CPU_RESP_FAILURE;
    225 			resp_msg.status = FMA_CPU_STAT_ONLINE;
    226 			resp_back = 1;
    227 		}
    228 		break;
    229 	case FMA_CPU_REQ_ONLINE:
    230 		rv = p_online_internal(msg->cpu_id, P_ONLINE,
    231 		    &cpu_status);
    232 		if (rv == EINVAL) {
    233 			FI_DBG(CE_CONT, "Failed p_online call failed."
    234 			    "Invalid CPU\n");
    235 			resp_msg.result = FMA_CPU_RESP_FAILURE;
    236 			resp_msg.status = FMA_CPU_STAT_ILLEGAL;
    237 			resp_back = 1;
    238 		} else if (rv == ENOTSUP) {
    239 			FI_DBG(CE_CONT, "Failed p_online call failed."
    240 			    "Online not supported for single CPU\n");
    241 			resp_msg.result = FMA_CPU_RESP_FAILURE;
    242 			resp_msg.status = FMA_CPU_STAT_OFFLINE;
    243 			resp_back = 1;
    244 		}
    245 		break;
    246 	default:
    247 		/*
    248 		 * If the msg_type was of unknown type simply return and
    249 		 * have the ldom mgr. time out with a NULL response.
    250 		 */
    251 		return;
    252 	}
    253 
    254 	if (rv != 0) {
    255 		if (resp_back) {
    256 			if ((rv = ds_cap_send(cpu_handle, &resp_msg,
    257 			    sizeof (resp_msg))) != 0) {
    258 				FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n",
    259 				    rv);
    260 			}
    261 			return;
    262 		}
    263 		ASSERT((rv == EINVAL) || ((rv == EBUSY) &&
    264 		    (msg->msg_type == FMA_CPU_REQ_OFFLINE)) ||
    265 		    ((rv == ENOTSUP) && (msg->msg_type == FMA_CPU_REQ_ONLINE)));
    266 
    267 		cmn_err(CE_WARN, "p_online_internal error not handled "
    268 		    "rv = %d\n", rv);
    269 	}
    270 
    271 	resp_msg.req_num = msg->req_num;
    272 	resp_msg.result = FMA_CPU_RESP_OK;
    273 
    274 	switch (cpu_status) {
    275 	case P_OFFLINE:
    276 	case P_FAULTED:
    277 	case P_POWEROFF:
    278 	case P_SPARE:
    279 		resp_msg.status = FMA_CPU_STAT_OFFLINE;
    280 		break;
    281 	case P_ONLINE:
    282 	case P_NOINTR:
    283 		resp_msg.status = FMA_CPU_STAT_ONLINE;
    284 		break;
    285 	default:
    286 		resp_msg.status = FMA_CPU_STAT_ILLEGAL;
    287 	}
    288 
    289 	if ((rv = ds_cap_send(cpu_handle, &resp_msg,
    290 	    sizeof (resp_msg))) != 0) {
    291 		FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
    292 	}
    293 }
    294 
    295 static void
    296 mem_data_handler(ds_cb_arg_t arg, void *buf, size_t buflen)
    297 {
    298 	_NOTE(ARGUNUSED(arg))
    299 
    300 	fma_mem_service_req_t	*msg = buf;
    301 	fma_mem_resp_t		resp_msg;
    302 	int			rv = 0;
    303 
    304 	/*
    305 	 * If the buffer is the wrong size for Mem calls or is NULL then
    306 	 * do not return any message. The call from the ldom mgr. will time out
    307 	 * and the response will be NULL.
    308 	 */
    309 	if (msg == NULL || buflen != sizeof (fma_mem_service_req_t)) {
    310 		return;
    311 	}
    312 
    313 	FI_DBG(CE_CONT, "req_num = %ld, msg_type = %d, memory addr = 0x%lx"
    314 	"memory length = 0x%lx\n", msg->req_num, msg->msg_type,
    315 	    msg->real_addr, msg->length);
    316 
    317 	resp_msg.req_num = msg->req_num;
    318 	resp_msg.res_addr = msg->real_addr;
    319 	resp_msg.res_length = msg->length;
    320 
    321 	/*
    322 	 * Information about return values for page calls can be referenced
    323 	 * in usr/src/uts/common/vm/page_retire.c
    324 	 */
    325 	switch (msg->msg_type) {
    326 	case FMA_MEM_REQ_STATUS:
    327 		rv = page_retire_check(msg->real_addr, NULL);
    328 		switch (rv) {
    329 		/* Page is retired */
    330 		case 0:
    331 			resp_msg.result = FMA_MEM_RESP_OK;
    332 			resp_msg.status = FMA_MEM_STAT_RETIRED;
    333 			break;
    334 		/* Page is pending. Send back failure and not retired */
    335 		case EAGAIN:
    336 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    337 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
    338 			break;
    339 		/* Page is not retired. */
    340 		case EIO:
    341 			resp_msg.result = FMA_MEM_RESP_OK;
    342 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
    343 			break;
    344 		/* PA is not valid */
    345 		case EINVAL:
    346 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    347 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
    348 			break;
    349 		default:
    350 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
    351 			    (rv ==  EINVAL));
    352 			cmn_err(CE_WARN, "fault_iso: return value from "
    353 			    "page_retire_check invalid: %d\n", rv);
    354 		}
    355 		break;
    356 	case FMA_MEM_REQ_RETIRE:
    357 		rv = page_retire(msg->real_addr, PR_FMA);
    358 		switch (rv) {
    359 		/* Page retired successfully */
    360 		case 0:
    361 			resp_msg.result = FMA_MEM_RESP_OK;
    362 			resp_msg.status = FMA_MEM_STAT_RETIRED;
    363 			break;
    364 		/* Tried to retire and now Pending retirement */
    365 		case EAGAIN:
    366 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    367 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
    368 			break;
    369 		/* Did not try to retire. Page already retired */
    370 		case EIO:
    371 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    372 			resp_msg.status = FMA_MEM_STAT_RETIRED;
    373 			break;
    374 		/* PA is not valid */
    375 		case EINVAL:
    376 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    377 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
    378 			break;
    379 		default:
    380 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
    381 			    (rv ==  EINVAL));
    382 			cmn_err(CE_WARN, "fault_iso: return value from "
    383 			    "page_retire invalid: %d\n", rv);
    384 		}
    385 		break;
    386 	case FMA_MEM_REQ_RESURRECT:
    387 		rv = page_unretire(msg->real_addr);
    388 		switch (rv) {
    389 		/* Page succesfullly unretired */
    390 		case 0:
    391 			resp_msg.result = FMA_MEM_RESP_OK;
    392 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
    393 			break;
    394 		/* Page could not be locked. Still retired */
    395 		case EAGAIN:
    396 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    397 			resp_msg.status = FMA_MEM_STAT_RETIRED;
    398 			break;
    399 		/* Page was not retired already */
    400 		case EIO:
    401 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    402 			resp_msg.status = FMA_MEM_STAT_NOTRETIRED;
    403 			break;
    404 		/* PA is not valid */
    405 		case EINVAL:
    406 			resp_msg.result = FMA_MEM_RESP_FAILURE;
    407 			resp_msg.status = FMA_MEM_STAT_ILLEGAL;
    408 			break;
    409 		default:
    410 			ASSERT((rv == 0) || (rv == EAGAIN) || (rv == EIO) ||
    411 			    (rv ==  EINVAL));
    412 			cmn_err(CE_WARN, "fault_iso: return value from "
    413 			    "page_unretire invalid: %d\n", rv);
    414 		}
    415 		break;
    416 	default:
    417 		/*
    418 		 * If the msg_type was of unknown type simply return and
    419 		 * have the ldom mgr. time out with a NULL response.
    420 		 */
    421 		return;
    422 	}
    423 
    424 	if ((rv = ds_cap_send(mem_handle, &resp_msg, sizeof (resp_msg))) != 0) {
    425 		FI_DBG(CE_CONT, "ds_cap_send failed (%d)\n", rv);
    426 	}
    427 }
    428 
    429 static void
    430 fi_reg_handler(ds_cb_arg_t arg, ds_ver_t *ver, ds_svc_hdl_t hdl)
    431 {
    432 	FI_DBG(CE_CONT, "fi_reg_handler: arg=0x%p, ver=%d.%d, hdl=0x%lx\n",
    433 	    arg, ver->major, ver->minor, hdl);
    434 
    435 	if ((ds_svc_hdl_t *)arg == &cpu_handle)
    436 		cpu_handle = hdl;
    437 	if ((ds_svc_hdl_t *)arg == &mem_handle)
    438 		mem_handle = hdl;
    439 }
    440 
    441 static void
    442 fi_unreg_handler(ds_cb_arg_t arg)
    443 {
    444 	FI_DBG(CE_CONT, "fi_unreg_handler: arg=0x%p\n", arg);
    445 
    446 	if ((ds_svc_hdl_t *)arg == &cpu_handle)
    447 		cpu_handle = DS_INVALID_HDL;
    448 	if ((ds_svc_hdl_t *)arg == &mem_handle)
    449 		mem_handle = DS_INVALID_HDL;
    450 }
    451