1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/mutex.h> 29 #include <sys/debug.h> 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/kmem.h> 33 #include <sys/thread.h> 34 #include <sys/id_space.h> 35 #include <sys/avl.h> 36 #include <sys/list.h> 37 #include <sys/sysmacros.h> 38 #include <sys/proc.h> 39 #include <sys/contract.h> 40 #include <sys/contract_impl.h> 41 #include <sys/contract/device.h> 42 #include <sys/contract/device_impl.h> 43 #include <sys/cmn_err.h> 44 #include <sys/nvpair.h> 45 #include <sys/policy.h> 46 #include <sys/ddi_impldefs.h> 47 #include <sys/ddi_implfuncs.h> 48 #include <sys/systm.h> 49 #include <sys/stat.h> 50 #include <sys/sunddi.h> 51 #include <sys/esunddi.h> 52 #include <sys/ddi.h> 53 #include <sys/fs/dv_node.h> 54 #include <sys/sunndi.h> 55 #undef ct_lock /* needed because clnt.h defines ct_lock as a macro */ 56 57 /* 58 * Device Contracts 59 * ----------------- 60 * This file contains the core code for the device contracts framework. 61 * A device contract is an agreement or a contract between a process and 62 * the kernel regarding the state of the device. A device contract may be 63 * created when a relationship is formed between a device and a process 64 * i.e. at open(2) time, or it may be created at some point after the device 65 * has been opened. A device contract once formed may be broken by either party. 66 * A device contract can be broken by the process by an explicit abandon of the 67 * contract or by an implicit abandon when the process exits. A device contract 68 * can be broken by the kernel either asynchronously (without negotiation) or 69 * synchronously (with negotiation). Exactly which happens depends on the device 70 * state transition. The following state diagram shows the transitions between 71 * device states. Only device state transitions currently supported by device 72 * contracts is shown. 73 * 74 * <-- A --> 75 * /-----------------> DEGRADED 76 * | | 77 * | | 78 * | | S 79 * | | | 80 * | | v 81 * v S --> v 82 * ONLINE ------------> OFFLINE 83 * 84 * 85 * In the figure above, the arrows indicate the direction of transition. The 86 * letter S refers to transitions which are inherently synchronous i.e. 87 * require negotiation and the letter A indicates transitions which are 88 * asynchronous i.e. are done without contract negotiations. A good example 89 * of a synchronous transition is the ONLINE -> OFFLINE transition. This 90 * transition cannot happen as long as there are consumers which have the 91 * device open. Thus some form of negotiation needs to happen between the 92 * consumers and the kernel to ensure that consumers either close devices 93 * or disallow the move to OFFLINE. Certain other transitions such as 94 * ONLINE --> DEGRADED for example, are inherently asynchronous i.e. 95 * non-negotiable. A device that suffers a fault that degrades its 96 * capabilities will become degraded irrespective of what consumers it has, 97 * so a negotiation in this case is pointless. 98 * 99 * The following device states are currently defined for device contracts: 100 * 101 * CT_DEV_EV_ONLINE 102 * The device is online and functioning normally 103 * CT_DEV_EV_DEGRADED 104 * The device is online but is functioning in a degraded capacity 105 * CT_DEV_EV_OFFLINE 106 * The device is offline and is no longer configured 107 * 108 * A typical consumer of device contracts starts out with a contract 109 * template and adds terms to that template. These include the 110 * "acceptable set" (A-set) term, which is a bitset of device states which 111 * are guaranteed by the contract. If the device moves out of a state in 112 * the A-set, the contract is broken. The breaking of the contract can 113 * be asynchronous in which case a critical contract event is sent to the 114 * contract holder but no negotiations take place. If the breaking of the 115 * contract is synchronous, negotations are opened between the affected 116 * consumer and the kernel. The kernel does this by sending a critical 117 * event to the consumer with the CTE_NEG flag set indicating that this 118 * is a negotiation event. The consumer can accept this change by sending 119 * a ACK message to the kernel. Alternatively, if it has the necessary 120 * privileges, it can send a NACK message to the kernel which will block 121 * the device state change. To NACK a negotiable event, a process must 122 * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 123 * 124 * Other terms include the "minor path" term, specified explicitly if the 125 * contract is not being created at open(2) time or specified implicitly 126 * if the contract is being created at open time via an activated template. 127 * 128 * A contract event is sent on any state change to which the contract 129 * owner has subscribed via the informative or critical event sets. Only 130 * critical events are guaranteed to be delivered. Since all device state 131 * changes are controlled by the kernel and cannot be arbitrarily generated 132 * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not 133 * need to be asserted in a process's effective set to designate an event as 134 * critical. To ensure privacy, a process must either have the same effective 135 * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege 136 * asserted in its effective set in order to observe device contract events 137 * off the device contract type specific endpoint. 138 * 139 * Yet another term available with device contracts is the "non-negotiable" 140 * term. This term is used to pre-specify a NACK to any contract negotiation. 141 * This term is ignored for asynchronous state changes. For example, a 142 * provcess may have the A-set {ONLINE|DEGRADED} and make the contract 143 * non-negotiable. In this case, the device contract framework assumes a 144 * NACK for any transition to OFFLINE and blocks the offline. If the A-set 145 * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE 146 * are NACKed but transitions to DEGRADE succeed. 147 * 148 * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract) 149 * happens just before the I/O framework attempts to offline a device 150 * (i.e. detach a device and set the offline flag so that it cannot be 151 * reattached). A device contract holder is expected to either NACK the offline 152 * (if privileged) or release the device and allow the offline to proceed. 153 * 154 * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract) 155 * is generated just before the I/O framework transitions the device state 156 * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology). 157 * 158 * The contract holder is expected to ACK or NACK a negotiation event 159 * within a certain period of time. If the ACK/NACK is not received 160 * within the timeout period, the device contract framework will behave 161 * as if the contract does not exist and will proceed with the event. 162 * 163 * Unlike a process contract a device contract does not need to exist 164 * once it is abandoned, since it does not define a fault boundary. It 165 * merely represents an agreement between a process and the kernel 166 * regarding the state of the device. Once the process has abandoned 167 * the contract (either implicitly via a process exit or explicitly) 168 * the kernel has no reason to retain the contract. As a result 169 * device contracts are neither inheritable nor need to exist in an 170 * orphan state. 171 * 172 * A device unlike a process may exist in multiple contracts and has 173 * a "life" outside a device contract. A device unlike a process 174 * may exist without an associated contract. Unlike a process contract 175 * a device contract may be formed after a binding relationship is 176 * formed between a process and a device. 177 * 178 * IMPLEMENTATION NOTES 179 * ==================== 180 * DATA STRUCTURES 181 * ---------------- 182 * The heart of the device contracts implementation is the device contract 183 * private cont_device_t (or ctd for short) data structure. It encapsulates 184 * the generic contract_t data structure and has a number of private 185 * fields. 186 * These include: 187 * cond_minor: The minor device that is the subject of the contract 188 * cond_aset: The bitset of states which are guaranteed by the 189 * contract 190 * cond_noneg: If set, indicates that the result of negotiation has 191 * been predefined to be a NACK 192 * In addition, there are other device identifiers such the devinfo node, 193 * dev_t and spec_type of the minor node. There are also a few fields that 194 * are used during negotiation to maintain state. See 195 * uts/common/sys/contract/device_impl.h 196 * for details. 197 * The ctd structure represents the device private part of a contract of 198 * type "device" 199 * 200 * Another data structure used by device contracts is ctmpl_device. It is 201 * the device contracts private part of the contract template structure. It 202 * encapsulates the generic template structure "ct_template_t" and includes 203 * the following device contract specific fields 204 * ctd_aset: The bitset of states that should be guaranteed by a 205 * contract 206 * ctd_noneg: If set, indicates that contract should NACK a 207 * negotiation 208 * ctd_minor: The devfs_path (without the /devices prefix) of the 209 * minor node that is the subject of the contract. 210 * 211 * ALGORITHMS 212 * --------- 213 * There are three sets of routines in this file 214 * Template related routines 215 * ------------------------- 216 * These routines provide support for template related operations initated 217 * via the generic template operations. These include routines that dup 218 * a template, free it, and set various terms in the template 219 * (such as the minor node path, the acceptable state set (or A-set) 220 * and the non-negotiable term) as well as a routine to query the 221 * device specific portion of the template for the abovementioned terms. 222 * There is also a routine to create (ctmpl_device_create) that is used to 223 * create a contract from a template. This routine calls (after initial 224 * setup) the common function used to create a device contract 225 * (contract_device_create). 226 * 227 * core device contract implementation 228 * ---------------------------------- 229 * These routines support the generic contract framework to provide 230 * functionality that allows contracts to be created, managed and 231 * destroyed. The contract_device_create() routine is a routine used 232 * to create a contract from a template (either via an explicit create 233 * operation on a template or implicitly via an open with an 234 * activated template.). The contract_device_free() routine assists 235 * in freeing the device contract specific parts. There are routines 236 * used to abandon (contract_device_abandon) a device contract as well 237 * as a routine to destroy (which despite its name does not destroy, 238 * it only moves a contract to a dead state) a contract. 239 * There is also a routine to return status information about a 240 * contract - the level of detail depends on what is requested by the 241 * user. A value of CTD_FIXED only returns fixed length fields such 242 * as the A-set, state of device and value of the "noneg" term. If 243 * CTD_ALL is specified, the minor node path is returned as well. 244 * 245 * In addition there are interfaces (contract_device_ack/nack) which 246 * are used to support negotiation between userland processes and 247 * device contracts. These interfaces record the acknowledgement 248 * or lack thereof for negotiation events and help determine if the 249 * negotiated event should occur. 250 * 251 * "backend routines" 252 * ----------------- 253 * The backend routines form the interface between the I/O framework 254 * and the device contract subsystem. These routines, allow the I/O 255 * framework to call into the device contract subsystem to notify it of 256 * impending changes to a device state as well as to inform of the 257 * final disposition of such attempted state changes. Routines in this 258 * class include contract_device_offline() that indicates an attempt to 259 * offline a device, contract_device_degrade() that indicates that 260 * a device is moving to the degraded state and contract_device_negend() 261 * that is used by the I/O framework to inform the contracts subsystem of 262 * the final disposition of an attempted operation. 263 * 264 * SUMMARY 265 * ------- 266 * A contract starts its life as a template. A process allocates a device 267 * contract template and sets various terms: 268 * The A-set 269 * The device minor node 270 * Critical and informative events 271 * The noneg i.e. no negotition term 272 * Setting of these terms in the template is done via the 273 * ctmpl_device_set() entry point in this file. A process can query a 274 * template to determine the terms already set in the template - this is 275 * facilitated by the ctmpl_device_get() routine. 276 * 277 * Once all the appropriate terms are set, the contract is instantiated via 278 * one of two methods 279 * - via an explicit create operation - this is facilitated by the 280 * ctmpl_device_create() entry point 281 * - synchronously with the open(2) system call - this is achieved via the 282 * contract_device_open() routine. 283 * The core work for both these above functions is done by 284 * contract_device_create() 285 * 286 * A contract once created can be queried for its status. Support for 287 * status info is provided by both the common contracts framework and by 288 * the "device" contract type. If the level of detail requested is 289 * CTD_COMMON, only the common contract framework data is used. Higher 290 * levels of detail result in calls to contract_device_status() to supply 291 * device contract type specific status information. 292 * 293 * A contract once created may be abandoned either explicitly or implictly. 294 * In either case, the contract_device_abandon() function is invoked. This 295 * function merely calls contract_destroy() which moves the contract to 296 * the DEAD state. The device contract portion of destroy processing is 297 * provided by contract_device_destroy() which merely disassociates the 298 * contract from its device devinfo node. A contract in the DEAD state is 299 * not freed. It hanbgs around until all references to the contract are 300 * gone. When that happens, the contract is finally deallocated. The 301 * device contract specific portion of the free is done by 302 * contract_device_free() which finally frees the device contract specific 303 * data structure (cont_device_t). 304 * 305 * When a device undergoes a state change, the I/O framework calls the 306 * corresponding device contract entry point. For example, when a device 307 * is about to go OFFLINE, the routine contract_device_offline() is 308 * invoked. Similarly if a device moves to DEGRADED state, the routine 309 * contract_device_degrade() function is called. These functions call the 310 * core routine contract_device_publish(). This function determines via 311 * the function is_sync_neg() whether an event is a synchronous (i.e. 312 * negotiable) event or not. In the former case contract_device_publish() 313 * publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs 314 * and/or NACKs from contract holders. In the latter case, it simply 315 * publishes the event and does not wait. In the negotiation case, ACKs or 316 * NACKs from userland consumers results in contract_device_ack_nack() 317 * being called where the result of the negotiation is recorded in the 318 * contract data structure. Once all outstanding contract owners have 319 * responded, the device contract code in wait_for_acks() determines the 320 * final result of the negotiation. A single NACK overrides all other ACKs 321 * If there is no NACK, then a single ACK will result in an overall ACK 322 * result. If there are no ACKs or NACKs, then the result CT_NONE is 323 * returned back to the I/O framework. Once the event is permitted or 324 * blocked, the I/O framework proceeds or aborts the state change. The 325 * I/O framework then calls contract_device_negend() with a result code 326 * indicating final disposition of the event. This call releases the 327 * barrier and other state associated with the previous negotiation, 328 * which permits the next event (if any) to come into the device contract 329 * framework. 330 * 331 * Finally, a device that has outstanding contracts may be removed from 332 * the system which results in its devinfo node being freed. The devinfo 333 * free routine in the I/O framework, calls into the device contract 334 * function - contract_device_remove_dip(). This routine, disassociates 335 * the dip from all contracts associated with the contract being freed, 336 * allowing the devinfo node to be freed. 337 * 338 * LOCKING 339 * --------- 340 * There are four sets of data that need to be protected by locks 341 * 342 * i) device contract specific portion of the contract template - This data 343 * is protected by the template lock ctmpl_lock. 344 * 345 * ii) device contract specific portion of the contract - This data is 346 * protected by the contract lock ct_lock 347 * 348 * iii) The linked list of contracts hanging off a devinfo node - This 349 * list is protected by the per-devinfo node lock devi_ct_lock 350 * 351 * iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv 352 * and devi_ct_count that controls state changes to a dip 353 * 354 * The template lock is independent in that none of the other locks in this 355 * file may be taken while holding the template lock (and vice versa). 356 * 357 * The remaining three locks have the following lock order 358 * 359 * devi_ct_lock -> ct_count barrier -> ct_lock 360 * 361 */ 362 363 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, 364 int spec_type, proc_t *owner, int *errorp); 365 366 /* barrier routines */ 367 static void ct_barrier_acquire(dev_info_t *dip); 368 static void ct_barrier_release(dev_info_t *dip); 369 static int ct_barrier_held(dev_info_t *dip); 370 static int ct_barrier_empty(dev_info_t *dip); 371 static void ct_barrier_wait_for_release(dev_info_t *dip); 372 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs); 373 static void ct_barrier_decr(dev_info_t *dip); 374 static void ct_barrier_incr(dev_info_t *dip); 375 376 ct_type_t *device_type; 377 378 /* 379 * Macro predicates for determining when events should be sent and how. 380 */ 381 #define EVSENDP(ctd, flag) \ 382 ((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag) 383 384 #define EVINFOP(ctd, flag) \ 385 ((ctd->cond_contract.ct_ev_crit & flag) == 0) 386 387 /* 388 * State transition table showing which transitions are synchronous and which 389 * are not. 390 */ 391 struct ct_dev_negtable { 392 uint_t st_old; 393 uint_t st_new; 394 uint_t st_neg; 395 } ct_dev_negtable[] = { 396 {CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE, 1}, 397 {CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED, 0}, 398 {CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE, 0}, 399 {CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE, 1}, 400 {0} 401 }; 402 403 /* 404 * Device contract template implementation 405 */ 406 407 /* 408 * ctmpl_device_dup 409 * 410 * The device contract template dup entry point. 411 * This simply copies all the fields (generic as well as device contract 412 * specific) fields of the original. 413 */ 414 static struct ct_template * 415 ctmpl_device_dup(struct ct_template *template) 416 { 417 ctmpl_device_t *new; 418 ctmpl_device_t *old = template->ctmpl_data; 419 char *buf; 420 char *minor; 421 422 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 423 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 424 425 /* 426 * copy generic fields. 427 * ctmpl_copy returns with old template lock held 428 */ 429 ctmpl_copy(&new->ctd_ctmpl, template); 430 431 new->ctd_ctmpl.ctmpl_data = new; 432 new->ctd_aset = old->ctd_aset; 433 new->ctd_minor = NULL; 434 new->ctd_noneg = old->ctd_noneg; 435 436 if (old->ctd_minor) { 437 ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN); 438 bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1); 439 } else { 440 kmem_free(buf, MAXPATHLEN); 441 buf = NULL; 442 } 443 444 mutex_exit(&template->ctmpl_lock); 445 if (buf) { 446 minor = i_ddi_strdup(buf, KM_SLEEP); 447 kmem_free(buf, MAXPATHLEN); 448 buf = NULL; 449 } else { 450 minor = NULL; 451 } 452 mutex_enter(&template->ctmpl_lock); 453 454 if (minor) { 455 new->ctd_minor = minor; 456 } 457 458 ASSERT(buf == NULL); 459 return (&new->ctd_ctmpl); 460 } 461 462 /* 463 * ctmpl_device_free 464 * 465 * The device contract template free entry point. Just 466 * frees the template. 467 */ 468 static void 469 ctmpl_device_free(struct ct_template *template) 470 { 471 ctmpl_device_t *dtmpl = template->ctmpl_data; 472 473 if (dtmpl->ctd_minor) 474 kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1); 475 476 kmem_free(dtmpl, sizeof (ctmpl_device_t)); 477 } 478 479 /* 480 * SAFE_EV is the set of events which a non-privileged process is 481 * allowed to make critical. An unprivileged device contract owner has 482 * no control over when a device changes state, so all device events 483 * can be in the critical set. 484 * 485 * EXCESS tells us if "value", a critical event set, requires 486 * additional privilege. For device contracts EXCESS currently 487 * evaluates to 0. 488 */ 489 #define SAFE_EV (CT_DEV_ALLEVENT) 490 #define EXCESS(value) ((value) & ~SAFE_EV) 491 492 493 /* 494 * ctmpl_device_set 495 * 496 * The device contract template set entry point. Sets various terms in the 497 * template. The non-negotiable term can only be set if the process has 498 * the {PRIV_SYS_DEVICES} privilege asserted in its effective set. 499 */ 500 static int 501 ctmpl_device_set(struct ct_template *tmpl, ct_param_t *param, const cred_t *cr) 502 { 503 ctmpl_device_t *dtmpl = tmpl->ctmpl_data; 504 int error; 505 dev_info_t *dip; 506 int spec_type; 507 uint64_t param_value; 508 char *str_value; 509 510 ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock)); 511 512 if (param->ctpm_id == CTDP_MINOR) { 513 str_value = (char *)param->ctpm_value; 514 str_value[param->ctpm_size - 1] = '\0'; 515 } else { 516 if (param->ctpm_size < sizeof (uint64_t)) 517 return (EINVAL); 518 param_value = *(uint64_t *)param->ctpm_value; 519 } 520 521 switch (param->ctpm_id) { 522 case CTDP_ACCEPT: 523 if (param_value & ~CT_DEV_ALLEVENT) 524 return (EINVAL); 525 if (param_value == 0) 526 return (EINVAL); 527 if (param_value == CT_DEV_ALLEVENT) 528 return (EINVAL); 529 530 dtmpl->ctd_aset = param_value; 531 break; 532 case CTDP_NONEG: 533 if (param_value != CTDP_NONEG_SET && 534 param_value != CTDP_NONEG_CLEAR) 535 return (EINVAL); 536 537 /* 538 * only privileged processes can designate a contract 539 * non-negotiatble. 540 */ 541 if (param_value == CTDP_NONEG_SET && 542 (error = secpolicy_sys_devices(cr)) != 0) { 543 return (error); 544 } 545 546 dtmpl->ctd_noneg = param_value; 547 break; 548 549 case CTDP_MINOR: 550 if (*str_value != '/' || 551 strncmp(str_value, "/devices/", 552 strlen("/devices/")) == 0 || 553 strstr(str_value, "../devices/") != NULL || 554 strchr(str_value, ':') == NULL) { 555 return (EINVAL); 556 } 557 558 spec_type = 0; 559 dip = NULL; 560 if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) { 561 return (ERANGE); 562 } 563 ddi_release_devi(dip); 564 565 if (spec_type != S_IFCHR && spec_type != S_IFBLK) { 566 return (EINVAL); 567 } 568 569 if (dtmpl->ctd_minor != NULL) { 570 kmem_free(dtmpl->ctd_minor, 571 strlen(dtmpl->ctd_minor) + 1); 572 } 573 dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP); 574 break; 575 case CTP_EV_CRITICAL: 576 /* 577 * Currently for device contracts, any event 578 * may be added to the critical set. We retain the 579 * following code however for future enhancements. 580 */ 581 if (EXCESS(param_value) && 582 (error = secpolicy_contract_event(cr)) != 0) 583 return (error); 584 tmpl->ctmpl_ev_crit = param_value; 585 break; 586 default: 587 return (EINVAL); 588 } 589 590 return (0); 591 } 592 593 /* 594 * ctmpl_device_get 595 * 596 * The device contract template get entry point. Simply fetches and 597 * returns the value of the requested term. 598 */ 599 static int 600 ctmpl_device_get(struct ct_template *template, ct_param_t *param) 601 { 602 ctmpl_device_t *dtmpl = template->ctmpl_data; 603 uint64_t *param_value = param->ctpm_value; 604 605 ASSERT(MUTEX_HELD(&template->ctmpl_lock)); 606 607 if (param->ctpm_id == CTDP_ACCEPT || 608 param->ctpm_id == CTDP_NONEG) { 609 if (param->ctpm_size < sizeof (uint64_t)) 610 return (EINVAL); 611 param->ctpm_size = sizeof (uint64_t); 612 } 613 614 switch (param->ctpm_id) { 615 case CTDP_ACCEPT: 616 *param_value = dtmpl->ctd_aset; 617 break; 618 case CTDP_NONEG: 619 *param_value = dtmpl->ctd_noneg; 620 break; 621 case CTDP_MINOR: 622 if (dtmpl->ctd_minor) { 623 param->ctpm_size = strlcpy((char *)param->ctpm_value, 624 dtmpl->ctd_minor, param->ctpm_size); 625 param->ctpm_size++; 626 } else { 627 return (ENOENT); 628 } 629 break; 630 default: 631 return (EINVAL); 632 } 633 634 return (0); 635 } 636 637 /* 638 * Device contract type specific portion of creating a contract using 639 * a specified template 640 */ 641 /*ARGSUSED*/ 642 int 643 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp) 644 { 645 ctmpl_device_t *dtmpl; 646 char *buf; 647 dev_t dev; 648 int spec_type; 649 int error; 650 cont_device_t *ctd; 651 652 if (ctidp == NULL) 653 return (EINVAL); 654 655 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 656 657 dtmpl = template->ctmpl_data; 658 659 mutex_enter(&template->ctmpl_lock); 660 if (dtmpl->ctd_minor == NULL) { 661 /* incomplete template */ 662 mutex_exit(&template->ctmpl_lock); 663 kmem_free(buf, MAXPATHLEN); 664 return (EINVAL); 665 } else { 666 ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN); 667 bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1); 668 } 669 mutex_exit(&template->ctmpl_lock); 670 671 spec_type = 0; 672 dev = NODEV; 673 if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 || 674 dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE || 675 (spec_type != S_IFCHR && spec_type != S_IFBLK)) { 676 CT_DEBUG((CE_WARN, 677 "tmpl_create: failed to find device: %s", buf)); 678 kmem_free(buf, MAXPATHLEN); 679 return (ERANGE); 680 } 681 kmem_free(buf, MAXPATHLEN); 682 683 ctd = contract_device_create(template->ctmpl_data, 684 dev, spec_type, curproc, &error); 685 686 if (ctd == NULL) { 687 CT_DEBUG((CE_WARN, "Failed to create device contract for " 688 "process (%d) with device (devt = %lu, spec_type = %s)", 689 curproc->p_pid, dev, 690 spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK")); 691 return (error); 692 } 693 694 mutex_enter(&ctd->cond_contract.ct_lock); 695 *ctidp = ctd->cond_contract.ct_id; 696 mutex_exit(&ctd->cond_contract.ct_lock); 697 698 return (0); 699 } 700 701 /* 702 * Device contract specific template entry points 703 */ 704 static ctmplops_t ctmpl_device_ops = { 705 ctmpl_device_dup, /* ctop_dup */ 706 ctmpl_device_free, /* ctop_free */ 707 ctmpl_device_set, /* ctop_set */ 708 ctmpl_device_get, /* ctop_get */ 709 ctmpl_device_create, /* ctop_create */ 710 CT_DEV_ALLEVENT /* all device events bitmask */ 711 }; 712 713 714 /* 715 * Device contract implementation 716 */ 717 718 /* 719 * contract_device_default 720 * 721 * The device contract default template entry point. Creates a 722 * device contract template with a default A-set and no "noneg" , 723 * with informative degrade events and critical offline events. 724 * There is no default minor path. 725 */ 726 static ct_template_t * 727 contract_device_default(void) 728 { 729 ctmpl_device_t *new; 730 731 new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP); 732 ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new); 733 734 new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED; 735 new->ctd_noneg = 0; 736 new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED; 737 new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE; 738 739 return (&new->ctd_ctmpl); 740 } 741 742 /* 743 * contract_device_free 744 * 745 * Destroys the device contract specific portion of a contract and 746 * frees the contract. 747 */ 748 static void 749 contract_device_free(contract_t *ct) 750 { 751 cont_device_t *ctd = ct->ct_data; 752 753 ASSERT(ctd->cond_minor); 754 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 755 kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1); 756 757 ASSERT(ctd->cond_devt != DDI_DEV_T_ANY && 758 ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV); 759 760 ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR); 761 762 ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT)); 763 ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1); 764 765 ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT)); 766 ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK))); 767 768 ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0)); 769 ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0)); 770 771 ASSERT(!list_link_active(&ctd->cond_next)); 772 773 kmem_free(ctd, sizeof (cont_device_t)); 774 } 775 776 /* 777 * contract_device_abandon 778 * 779 * The device contract abandon entry point. 780 */ 781 static void 782 contract_device_abandon(contract_t *ct) 783 { 784 ASSERT(MUTEX_HELD(&ct->ct_lock)); 785 786 /* 787 * device contracts cannot be inherited or orphaned. 788 * Move the contract to the DEAD_STATE. It will be freed 789 * once all references to it are gone. 790 */ 791 contract_destroy(ct); 792 } 793 794 /* 795 * contract_device_destroy 796 * 797 * The device contract destroy entry point. 798 * Called from contract_destroy() to do any type specific destroy. Note 799 * that destroy is a misnomer - this does not free the contract, it only 800 * moves it to the dead state. A contract is actually freed via 801 * contract_rele() -> contract_dtor(), contop_free() 802 */ 803 static void 804 contract_device_destroy(contract_t *ct) 805 { 806 cont_device_t *ctd = ct->ct_data; 807 dev_info_t *dip = ctd->cond_dip; 808 809 ASSERT(MUTEX_HELD(&ct->ct_lock)); 810 811 if (dip == NULL) { 812 /* 813 * The dip has been removed, this is a dangling contract 814 * Check that dip linkages are NULL 815 */ 816 ASSERT(!list_link_active(&ctd->cond_next)); 817 CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no " 818 "devinfo node. contract ctid : %d", ct->ct_id)); 819 return; 820 } 821 822 /* 823 * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock 824 */ 825 mutex_exit(&ct->ct_lock); 826 827 /* 828 * Waiting for the barrier to be released is strictly speaking not 829 * necessary. But it simplifies the implementation of 830 * contract_device_publish() by establishing the invariant that 831 * device contracts cannot go away during negotiation. 832 */ 833 mutex_enter(&(DEVI(dip)->devi_ct_lock)); 834 ct_barrier_wait_for_release(dip); 835 mutex_enter(&ct->ct_lock); 836 837 list_remove(&(DEVI(dip)->devi_ct), ctd); 838 ctd->cond_dip = NULL; /* no longer linked to dip */ 839 contract_rele(ct); /* remove hold for dip linkage */ 840 841 mutex_exit(&ct->ct_lock); 842 mutex_exit(&(DEVI(dip)->devi_ct_lock)); 843 mutex_enter(&ct->ct_lock); 844 } 845 846 /* 847 * contract_device_status 848 * 849 * The device contract status entry point. Called when level of "detail" 850 * is either CTD_FIXED or CTD_ALL 851 * 852 */ 853 static void 854 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl, 855 void *status, model_t model) 856 { 857 cont_device_t *ctd = ct->ct_data; 858 859 ASSERT(detail == CTD_FIXED || detail == CTD_ALL); 860 861 mutex_enter(&ct->ct_lock); 862 contract_status_common(ct, zone, status, model); 863 864 /* 865 * There's no need to hold the contract lock while accessing static 866 * data like aset or noneg. But since we need the lock to access other 867 * data like state, we hold it anyway. 868 */ 869 VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0); 870 VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0); 871 VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0); 872 873 if (detail == CTD_FIXED) { 874 mutex_exit(&ct->ct_lock); 875 return; 876 } 877 878 ASSERT(ctd->cond_minor); 879 VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0); 880 881 mutex_exit(&ct->ct_lock); 882 } 883 884 /* 885 * Converts a result integer into the corresponding string. Used for printing 886 * messages 887 */ 888 static char * 889 result_str(uint_t result) 890 { 891 switch (result) { 892 case CT_ACK: 893 return ("CT_ACK"); 894 case CT_NACK: 895 return ("CT_NACK"); 896 case CT_NONE: 897 return ("CT_NONE"); 898 default: 899 return ("UNKNOWN"); 900 } 901 } 902 903 /* 904 * Converts a device state integer constant into the corresponding string. 905 * Used to print messages. 906 */ 907 static char * 908 state_str(uint_t state) 909 { 910 switch (state) { 911 case CT_DEV_EV_ONLINE: 912 return ("ONLINE"); 913 case CT_DEV_EV_DEGRADED: 914 return ("DEGRADED"); 915 case CT_DEV_EV_OFFLINE: 916 return ("OFFLINE"); 917 default: 918 return ("UNKNOWN"); 919 } 920 } 921 922 /* 923 * Routine that determines if a particular CT_DEV_EV_? event corresponds to a 924 * synchronous state change or not. 925 */ 926 static int 927 is_sync_neg(uint_t old, uint_t new) 928 { 929 int i; 930 931 ASSERT(old & CT_DEV_ALLEVENT); 932 ASSERT(new & CT_DEV_ALLEVENT); 933 934 if (old == new) { 935 CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s", 936 state_str(new))); 937 return (-2); 938 } 939 940 for (i = 0; ct_dev_negtable[i].st_new != 0; i++) { 941 if (old == ct_dev_negtable[i].st_old && 942 new == ct_dev_negtable[i].st_new) { 943 return (ct_dev_negtable[i].st_neg); 944 } 945 } 946 947 CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: " 948 "old = %s -> new = %s", state_str(old), state_str(new))); 949 950 return (-1); 951 } 952 953 /* 954 * Used to cleanup cached dv_nodes so that when a device is released by 955 * a contract holder, its devinfo node can be successfully detached. 956 */ 957 static int 958 contract_device_dvclean(dev_info_t *dip) 959 { 960 char *devnm; 961 dev_info_t *pdip; 962 int error; 963 964 ASSERT(dip); 965 966 /* pdip can be NULL if we have contracts against the root dip */ 967 pdip = ddi_get_parent(dip); 968 969 if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) { 970 char *path; 971 972 path = kmem_alloc(MAXPATHLEN, KM_SLEEP); 973 (void) ddi_pathname(dip, path); 974 CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, " 975 "device=%s", path)); 976 kmem_free(path, MAXPATHLEN); 977 return (EDEADLOCK); 978 } 979 980 if (pdip) { 981 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP); 982 (void) ddi_deviname(dip, devnm); 983 error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE); 984 kmem_free(devnm, MAXNAMELEN + 1); 985 } else { 986 error = devfs_clean(dip, NULL, DV_CLEAN_FORCE); 987 } 988 989 return (error); 990 } 991 992 /* 993 * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland. 994 * Results in the ACK or NACK being recorded on the dip for one particular 995 * contract. The device contracts framework evaluates the ACK/NACKs for all 996 * contracts against a device to determine if a particular device state change 997 * should be allowed. 998 */ 999 static int 1000 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid, 1001 uint_t cmd) 1002 { 1003 cont_device_t *ctd = ct->ct_data; 1004 dev_info_t *dip; 1005 ctid_t ctid; 1006 int error; 1007 1008 ctid = ct->ct_id; 1009 1010 CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid)); 1011 1012 mutex_enter(&ct->ct_lock); 1013 CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid)); 1014 1015 dip = ctd->cond_dip; 1016 1017 ASSERT(ctd->cond_minor); 1018 ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN); 1019 1020 /* 1021 * Negotiation only if new state is not in A-set 1022 */ 1023 ASSERT(!(ctd->cond_aset & evtype)); 1024 1025 /* 1026 * Negotiation only if transition is synchronous 1027 */ 1028 ASSERT(is_sync_neg(ctd->cond_state, evtype)); 1029 1030 /* 1031 * We shouldn't be negotiating if the "noneg" flag is set 1032 */ 1033 ASSERT(!ctd->cond_noneg); 1034 1035 if (dip) 1036 ndi_hold_devi(dip); 1037 1038 mutex_exit(&ct->ct_lock); 1039 1040 /* 1041 * dv_clean only if !NACK and offline state change 1042 */ 1043 if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) { 1044 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid)); 1045 error = contract_device_dvclean(dip); 1046 if (error != 0) { 1047 CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d", 1048 ctid)); 1049 ddi_release_devi(dip); 1050 } 1051 } 1052 1053 mutex_enter(&ct->ct_lock); 1054 1055 if (dip) 1056 ddi_release_devi(dip); 1057 1058 if (dip == NULL) { 1059 if (ctd->cond_currev_id != evid) { 1060 CT_DEBUG((CE_WARN, "%sACK for non-current event " 1061 "(type=%s, id=%llu) on removed device", 1062 cmd == CT_NACK ? "N" : "", 1063 state_str(evtype), (unsigned long long)evid)); 1064 CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d", 1065 ctid)); 1066 } else { 1067 ASSERT(ctd->cond_currev_type == evtype); 1068 CT_DEBUG((CE_WARN, "contract_ack: no such device: " 1069 "ctid: %d", ctid)); 1070 } 1071 error = (ct->ct_state == CTS_DEAD) ? ESRCH : 1072 ((cmd == CT_NACK) ? ETIMEDOUT : 0); 1073 mutex_exit(&ct->ct_lock); 1074 return (error); 1075 } 1076 1077 /* 1078 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock 1079 */ 1080 mutex_exit(&ct->ct_lock); 1081 1082 mutex_enter(&DEVI(dip)->devi_ct_lock); 1083 mutex_enter(&ct->ct_lock); 1084 if (ctd->cond_currev_id != evid) { 1085 char *buf; 1086 mutex_exit(&ct->ct_lock); 1087 mutex_exit(&DEVI(dip)->devi_ct_lock); 1088 ndi_hold_devi(dip); 1089 buf = kmem_alloc(MAXPATHLEN, KM_SLEEP); 1090 (void) ddi_pathname(dip, buf); 1091 ddi_release_devi(dip); 1092 CT_DEBUG((CE_WARN, "%sACK for non-current event" 1093 "(type=%s, id=%llu) on device %s", 1094 cmd == CT_NACK ? "N" : "", 1095 state_str(evtype), (unsigned long long)evid, buf)); 1096 kmem_free(buf, MAXPATHLEN); 1097 CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d", 1098 cmd == CT_NACK ? ETIMEDOUT : 0, ctid)); 1099 return (cmd == CT_ACK ? 0 : ETIMEDOUT); 1100 } 1101 1102 ASSERT(ctd->cond_currev_type == evtype); 1103 ASSERT(cmd == CT_ACK || cmd == CT_NACK); 1104 1105 CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d", 1106 cmd == CT_NACK ? "N" : "", ctid)); 1107 1108 ctd->cond_currev_ack = cmd; 1109 mutex_exit(&ct->ct_lock); 1110 1111 ct_barrier_decr(dip); 1112 mutex_exit(&DEVI(dip)->devi_ct_lock); 1113 1114 CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid)); 1115 1116 return (0); 1117 } 1118 1119 /* 1120 * Invoked when a userland contract holder approves (i.e. ACKs) a state change 1121 */ 1122 static int 1123 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid) 1124 { 1125 return (contract_device_ack_nack(ct, evtype, evid, CT_ACK)); 1126 } 1127 1128 /* 1129 * Invoked when a userland contract holder blocks (i.e. NACKs) a state change 1130 */ 1131 static int 1132 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid) 1133 { 1134 return (contract_device_ack_nack(ct, evtype, evid, CT_NACK)); 1135 } 1136 1137 /* 1138 * Creates a new contract synchronously with the breaking of an existing 1139 * contract. Currently not supported. 1140 */ 1141 /*ARGSUSED*/ 1142 static int 1143 contract_device_newct(contract_t *ct) 1144 { 1145 return (ENOTSUP); 1146 } 1147 1148 /* 1149 * Core device contract implementation entry points 1150 */ 1151 static contops_t contract_device_ops = { 1152 contract_device_free, /* contop_free */ 1153 contract_device_abandon, /* contop_abandon */ 1154 contract_device_destroy, /* contop_destroy */ 1155 contract_device_status, /* contop_status */ 1156 contract_device_ack, /* contop_ack */ 1157 contract_device_nack, /* contop_nack */ 1158 contract_qack_notsup, /* contop_qack */ 1159 contract_device_newct /* contop_newct */ 1160 }; 1161 1162 /* 1163 * contract_device_init 1164 * 1165 * Initializes the device contract type. 1166 */ 1167 void 1168 contract_device_init(void) 1169 { 1170 device_type = contract_type_init(CTT_DEVICE, "device", 1171 &contract_device_ops,