Home | History | Annotate | Download | only in contract
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/mutex.h>
     27 #include <sys/debug.h>
     28 #include <sys/types.h>
     29 #include <sys/param.h>
     30 #include <sys/kmem.h>
     31 #include <sys/thread.h>
     32 #include <sys/id_space.h>
     33 #include <sys/avl.h>
     34 #include <sys/list.h>
     35 #include <sys/sysmacros.h>
     36 #include <sys/proc.h>
     37 #include <sys/contract.h>
     38 #include <sys/contract_impl.h>
     39 #include <sys/contract/device.h>
     40 #include <sys/contract/device_impl.h>
     41 #include <sys/cmn_err.h>
     42 #include <sys/nvpair.h>
     43 #include <sys/policy.h>
     44 #include <sys/ddi_impldefs.h>
     45 #include <sys/ddi_implfuncs.h>
     46 #include <sys/systm.h>
     47 #include <sys/stat.h>
     48 #include <sys/sunddi.h>
     49 #include <sys/esunddi.h>
     50 #include <sys/ddi.h>
     51 #include <sys/fs/dv_node.h>
     52 #include <sys/sunndi.h>
     53 #undef ct_lock	/* needed because clnt.h defines ct_lock as a macro */
     54 
     55 /*
     56  * Device Contracts
     57  * -----------------
     58  * This file contains the core code for the device contracts framework.
     59  * A device contract is an agreement or a contract between a process and
     60  * the kernel regarding the state of the device. A device contract may be
     61  * created when a relationship is formed between a device and a process
     62  * i.e. at open(2) time, or it may be created at some point after the device
     63  * has been opened. A device contract once formed may be broken by either party.
     64  * A device contract can be broken by the process by an explicit abandon of the
     65  * contract or by an implicit abandon when the process exits. A device contract
     66  * can be broken by the kernel either asynchronously (without negotiation) or
     67  * synchronously (with negotiation). Exactly which happens depends on the device
     68  * state transition. The following state diagram shows the transitions between
     69  * device states. Only device state transitions currently supported by device
     70  * contracts is shown.
     71  *
     72  *                              <-- A -->
     73  *                       /-----------------> DEGRADED
     74  *                       |                      |
     75  *                       |                      |
     76  *                       |                      | S
     77  *                       |                      | |
     78  *                       |                      | v
     79  *                       v       S -->          v
     80  *                      ONLINE ------------> OFFLINE
     81  *
     82  *
     83  * In the figure above, the arrows indicate the direction of transition. The
     84  * letter S refers to transitions which are inherently synchronous i.e.
     85  * require negotiation and the letter A indicates transitions which are
     86  * asynchronous i.e. are done without contract negotiations. A good example
     87  * of a synchronous transition is the ONLINE -> OFFLINE transition. This
     88  * transition cannot happen as long as there are consumers which have the
     89  * device open. Thus some form of negotiation needs to happen between the
     90  * consumers and the kernel to ensure that consumers either close devices
     91  * or disallow the move to OFFLINE. Certain other transitions such as
     92  * ONLINE --> DEGRADED for example, are inherently asynchronous i.e.
     93  * non-negotiable. A device that suffers a fault that degrades its
     94  * capabilities will become degraded irrespective of what consumers it has,
     95  * so a negotiation in this case is pointless.
     96  *
     97  * The following device states are currently defined for device contracts:
     98  *
     99  *      CT_DEV_EV_ONLINE
    100  *              The device is online and functioning normally
    101  *      CT_DEV_EV_DEGRADED
    102  *              The device is online but is functioning in a degraded capacity
    103  *      CT_DEV_EV_OFFLINE
    104  *              The device is offline and is no longer configured
    105  *
    106  * A typical consumer of device contracts starts out with a contract
    107  * template and adds terms to that template. These include the
    108  * "acceptable set" (A-set) term, which is a bitset of device states which
    109  * are guaranteed by the contract. If the device moves out of a state in
    110  * the A-set, the contract is broken. The breaking of the contract can
    111  * be asynchronous in which case a critical contract event is sent to the
    112  * contract holder but no negotiations take place. If the breaking of the
    113  * contract is synchronous, negotations are opened between the affected
    114  * consumer and the kernel. The kernel does this by sending a critical
    115  * event to the consumer with the CTE_NEG flag set indicating that this
    116  * is a negotiation event. The consumer can accept this change by sending
    117  * a ACK message to the kernel. Alternatively, if it has the necessary
    118  * privileges, it can send a NACK message to the kernel which will block
    119  * the device state change. To NACK a negotiable event, a process must
    120  * have the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
    121  *
    122  * Other terms include the "minor path" term, specified explicitly if the
    123  * contract is not being created at open(2) time or specified implicitly
    124  * if the contract is being created at open time via an activated template.
    125  *
    126  * A contract event is sent on any state change to which the contract
    127  * owner has subscribed via the informative or critical event sets. Only
    128  * critical events are guaranteed to be delivered. Since all device state
    129  * changes are controlled by the kernel and cannot be arbitrarily generated
    130  * by a non-privileged user, the {PRIV_CONTRACT_EVENT} privilege does not
    131  * need to be asserted in a process's effective set to designate an event as
    132  * critical. To ensure privacy, a process must either have the same effective
    133  * userid as the contract holder or have the {PRIV_CONTRACT_OBSERVER} privilege
    134  * asserted in its effective set in order to observe device contract events
    135  * off the device contract type specific endpoint.
    136  *
    137  * Yet another term available with device contracts is the "non-negotiable"
    138  * term. This term is used to pre-specify a NACK to any contract negotiation.
    139  * This term is ignored for asynchronous state changes. For example, a
    140  * provcess may have the A-set {ONLINE|DEGRADED} and make the contract
    141  * non-negotiable. In this case, the device contract framework assumes a
    142  * NACK for any transition to OFFLINE and blocks the offline. If the A-set
    143  * is {ONLINE} and the non-negotiable term is set, transitions to OFFLINE
    144  * are NACKed but transitions to DEGRADE succeed.
    145  *
    146  * The OFFLINE negotiation (if OFFLINE state is not in the A-set for a contract)
    147  * happens just before the I/O framework attempts to offline a device
    148  * (i.e. detach a device and set the offline flag so that it cannot be
    149  * reattached). A device contract holder is expected to either NACK the offline
    150  * (if privileged) or release the device and allow the offline to proceed.
    151  *
    152  * The DEGRADE contract event (if DEGRADE is not in the A-set for a contract)
    153  * is generated just before the I/O framework transitions the device state
    154  * to "degraded" (i.e. DEVI_DEVICE_DEGRADED in I/O framework terminology).
    155  *
    156  * The contract holder is expected to ACK or NACK a negotiation event
    157  * within a certain period of time. If the ACK/NACK is not received
    158  * within the timeout period, the device contract framework will behave
    159  * as if the contract does not exist and will proceed with the event.
    160  *
    161  * Unlike a process contract a device contract does not need to exist
    162  * once it is abandoned, since it does not define a fault boundary. It
    163  * merely represents an agreement between a process and the kernel
    164  * regarding the state of the device. Once the process has abandoned
    165  * the contract (either implicitly via a process exit or explicitly)
    166  * the kernel has no reason to retain the contract. As a result
    167  * device contracts are neither inheritable nor need to exist in an
    168  * orphan state.
    169  *
    170  * A device unlike a process may exist in multiple contracts and has
    171  * a "life" outside a device contract. A device unlike a process
    172  * may exist without an associated contract. Unlike a process contract
    173  * a device contract may be formed after a binding relationship is
    174  * formed between a process and a device.
    175  *
    176  *	IMPLEMENTATION NOTES
    177  *	====================
    178  * DATA STRUCTURES
    179  * ----------------
    180  * 	The heart of the device contracts implementation is the device contract
    181  * 	private cont_device_t (or ctd for short) data structure. It encapsulates
    182  * 	the generic contract_t data structure and has a number of private
    183  *	fields.
    184  * 	These include:
    185  *		cond_minor: The minor device that is the subject of the contract
    186  *		cond_aset:  The bitset of states which are guaranteed by the
    187  *			   contract
    188  *		cond_noneg: If set, indicates that the result of negotiation has
    189  *			    been predefined to be a NACK
    190  * 	In addition, there are other device identifiers such the devinfo node,
    191  * 	dev_t and spec_type of the minor node. There are also a few fields that
    192  * 	are used during negotiation to maintain state. See
    193  *		uts/common/sys/contract/device_impl.h
    194  * 	for details.
    195  * 	The ctd structure represents the device private part of a contract of
    196  * 	type "device"
    197  *
    198  * 	Another data structure used by device contracts is ctmpl_device. It is
    199  * 	the device contracts private part of the contract template structure. It
    200  *	encapsulates the generic template structure "ct_template_t" and includes
    201  *	the following device contract specific fields
    202  *		ctd_aset:   The bitset of states that should be guaranteed by a
    203  *			    contract
    204  *		ctd_noneg:  If set, indicates that contract should NACK a
    205  *			    negotiation
    206  *		ctd_minor:  The devfs_path (without the /devices prefix) of the
    207  *			    minor node that is the subject of the contract.
    208  *
    209  * ALGORITHMS
    210  * ---------
    211  * There are three sets of routines in this file
    212  * 	Template related routines
    213  * 	-------------------------
    214  *	These routines provide support for template related operations initated
    215  *	via the generic template operations. These include routines that dup
    216  *	a template, free it, and set various terms in the template
    217  *	(such as the minor node path, the acceptable state set (or A-set)
    218  *	and the non-negotiable term) as well as a routine to query the
    219  *	device specific portion of the template for the abovementioned terms.
    220  *	There is also a routine to create (ctmpl_device_create) that is used to
    221  *	create a contract from a template. This routine calls (after initial
    222  *	setup) the common function used to create a device contract
    223  *	(contract_device_create).
    224  *
    225  *	core device contract implementation
    226  *	----------------------------------
    227  *	These routines support the generic contract framework to provide
    228  *	functionality that allows contracts to be created, managed and
    229  *	destroyed. The contract_device_create() routine is a routine used
    230  *	to create a contract from a template (either via an explicit create
    231  *	operation on a template or implicitly via an open with an
    232  *	activated template.). The contract_device_free() routine assists
    233  *	in freeing the device contract specific parts. There are routines
    234  *	used to abandon (contract_device_abandon) a device contract as well
    235  *	as a routine to destroy (which despite its name does not destroy,
    236  *	it only moves a contract to a dead state) a contract.
    237  *	There is also a routine to return status information about a
    238  *	contract - the level of detail depends on what is requested by the
    239  *	user. A value of CTD_FIXED only returns fixed length fields such
    240  *	as the A-set, state of device and value of the "noneg" term. If
    241  *	CTD_ALL is specified, the minor node path is returned as well.
    242  *
    243  *	In addition there are interfaces (contract_device_ack/nack) which
    244  *	are used to support negotiation between userland processes and
    245  *	device contracts. These interfaces record the acknowledgement
    246  *	or lack thereof for negotiation events and help determine if the
    247  *	negotiated event should occur.
    248  *
    249  *	"backend routines"
    250  *	-----------------
    251  *	The backend routines form the interface between the I/O framework
    252  *	and the device contract subsystem. These routines, allow the I/O
    253  *	framework to call into the device contract subsystem to notify it of
    254  *	impending changes to a device state as well as to inform of the
    255  *	final disposition of such attempted state changes. Routines in this
    256  *	class include contract_device_offline() that indicates an attempt to
    257  *	offline a device, contract_device_degrade() that indicates that
    258  *	a device is moving to the degraded state and contract_device_negend()
    259  *	that is used by the I/O framework to inform the contracts subsystem of
    260  *	the final disposition of an attempted operation.
    261  *
    262  *	SUMMARY
    263  *	-------
    264  *      A contract starts its life as a template. A process allocates a device
    265  *	contract template and sets various terms:
    266  *		The A-set
    267  *		The device minor node
    268  *		Critical and informative events
    269  *		The noneg i.e. no negotition term
    270  *	Setting of these terms in the template is done via the
    271  *	ctmpl_device_set() entry point in this file. A process can query a
    272  *	template to determine the terms already set in the template - this is
    273  *	facilitated by the ctmpl_device_get() routine.
    274  *
    275  *	Once all the appropriate terms are set, the contract is instantiated via
    276  *	one of two methods
    277  *	- via an explicit create operation - this is facilitated by the
    278  *	  ctmpl_device_create() entry point
    279  *	- synchronously with the open(2) system call - this is achieved via the
    280  *	  contract_device_open() routine.
    281  *	The core work for both these above functions is done by
    282  *	contract_device_create()
    283  *
    284  *	A contract once created can be queried for its status. Support for
    285  *	status info is provided by both the common contracts framework and by
    286  *	the "device" contract type. If the level of detail requested is
    287  *	CTD_COMMON, only the common contract framework data is used. Higher
    288  *	levels of detail result in calls to contract_device_status() to supply
    289  *	device contract type specific status information.
    290  *
    291  *	A contract once created may be abandoned either explicitly or implictly.
    292  *	In either case, the contract_device_abandon() function is invoked. This
    293  * 	function merely calls contract_destroy() which moves the contract to
    294  *	the DEAD state. The device contract portion of destroy processing is
    295  *	provided by contract_device_destroy() which merely disassociates the
    296  *	contract from its device devinfo node. A contract in the DEAD state is
    297  *	not freed. It hanbgs around until all references to the contract are
    298  *	gone. When that happens, the contract is finally deallocated. The
    299  *	device contract specific portion of the free is done by
    300  *	contract_device_free() which finally frees the device contract specific
    301  *	data structure (cont_device_t).
    302  *
    303  *	When a device undergoes a state change, the I/O framework calls the
    304  *	corresponding device contract entry point. For example, when a device
    305  *	is about to go OFFLINE, the routine contract_device_offline() is
    306  *	invoked. Similarly if a device moves to DEGRADED state, the routine
    307  *	contract_device_degrade() function is called. These functions call the
    308  *	core routine contract_device_publish(). This function determines via
    309  *	the function is_sync_neg() whether an event is a synchronous (i.e.
    310  *	negotiable) event or not. In the former case contract_device_publish()
    311  *	publishes a CTE_NEG event and then waits in wait_for_acks() for ACKs
    312  *	and/or NACKs from contract holders. In the latter case, it simply
    313  *	publishes the event and does not wait. In the negotiation case, ACKs or
    314  *	NACKs from userland consumers results in contract_device_ack_nack()
    315  *	being called where the result of the negotiation is recorded in the
    316  *	contract data structure. Once all outstanding contract owners have
    317  *	responded, the device contract code in wait_for_acks() determines the
    318  *	final result of the negotiation. A single NACK overrides all other ACKs
    319  *	If there is no NACK, then a single ACK will result in an overall ACK
    320  *	result. If there are no ACKs or NACKs, then the result CT_NONE is
    321  *	returned back to the I/O framework. Once the event is permitted or
    322  *	blocked, the I/O framework proceeds or aborts the state change. The
    323  *	I/O framework then calls contract_device_negend() with a result code
    324  *	indicating final disposition of the event. This call releases the
    325  *	barrier and other state associated with the previous negotiation,
    326  *	which permits the next event (if any) to come into the device contract
    327  *	framework.
    328  *
    329  *	Finally, a device that has outstanding contracts may be removed from
    330  *	the system which results in its devinfo node being freed. The devinfo
    331  *	free routine in the I/O framework, calls into the device contract
    332  *	function - contract_device_remove_dip(). This routine, disassociates
    333  *	the dip from all contracts associated with the contract being freed,
    334  *	allowing the devinfo node to be freed.
    335  *
    336  * LOCKING
    337  * ---------
    338  * 	There are four sets of data that need to be protected by locks
    339  *
    340  *	i) device contract specific portion of the contract template - This data
    341  *	is protected by the template lock ctmpl_lock.
    342  *
    343  *	ii) device contract specific portion of the contract - This data is
    344  *	protected by the contract lock ct_lock
    345  *
    346  *	iii) The linked list of contracts hanging off a devinfo node - This
    347  *	list is protected by the per-devinfo node lock devi_ct_lock
    348  *
    349  *	iv) Finally there is a barrier, controlled by devi_ct_lock, devi_ct_cv
    350  *	and devi_ct_count that controls state changes to a dip
    351  *
    352  *	The template lock is independent in that none of the other locks in this
    353  *	file may be taken while holding the template lock (and vice versa).
    354  *
    355  *	The remaining three locks have the following lock order
    356  *
    357  *	devi_ct_lock  -> ct_count barrier ->  ct_lock
    358  *
    359  */
    360 
    361 static cont_device_t *contract_device_create(ctmpl_device_t *dtmpl, dev_t dev,
    362     int spec_type, proc_t *owner, int *errorp);
    363 
    364 /* barrier routines */
    365 static void ct_barrier_acquire(dev_info_t *dip);
    366 static void ct_barrier_release(dev_info_t *dip);
    367 static int ct_barrier_held(dev_info_t *dip);
    368 static int ct_barrier_empty(dev_info_t *dip);
    369 static void ct_barrier_wait_for_release(dev_info_t *dip);
    370 static int ct_barrier_wait_for_empty(dev_info_t *dip, int secs);
    371 static void ct_barrier_decr(dev_info_t *dip);
    372 static void ct_barrier_incr(dev_info_t *dip);
    373 
    374 ct_type_t *device_type;
    375 
    376 /*
    377  * Macro predicates for determining when events should be sent and how.
    378  */
    379 #define	EVSENDP(ctd, flag) \
    380 	((ctd->cond_contract.ct_ev_info | ctd->cond_contract.ct_ev_crit) & flag)
    381 
    382 #define	EVINFOP(ctd, flag) \
    383 	((ctd->cond_contract.ct_ev_crit & flag) == 0)
    384 
    385 /*
    386  * State transition table showing which transitions are synchronous and which
    387  * are not.
    388  */
    389 struct ct_dev_negtable {
    390 	uint_t	st_old;
    391 	uint_t	st_new;
    392 	uint_t	st_neg;
    393 } ct_dev_negtable[] = {
    394 	{CT_DEV_EV_ONLINE, CT_DEV_EV_OFFLINE,	1},
    395 	{CT_DEV_EV_ONLINE, CT_DEV_EV_DEGRADED,	0},
    396 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_ONLINE,	0},
    397 	{CT_DEV_EV_DEGRADED, CT_DEV_EV_OFFLINE,	1},
    398 	{0}
    399 };
    400 
    401 /*
    402  * Device contract template implementation
    403  */
    404 
    405 /*
    406  * ctmpl_device_dup
    407  *
    408  * The device contract template dup entry point.
    409  * This simply copies all the fields (generic as well as device contract
    410  * specific) fields of the original.
    411  */
    412 static struct ct_template *
    413 ctmpl_device_dup(struct ct_template *template)
    414 {
    415 	ctmpl_device_t *new;
    416 	ctmpl_device_t *old = template->ctmpl_data;
    417 	char *buf;
    418 	char *minor;
    419 
    420 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
    421 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    422 
    423 	/*
    424 	 * copy generic fields.
    425 	 * ctmpl_copy returns with old template lock held
    426 	 */
    427 	ctmpl_copy(&new->ctd_ctmpl, template);
    428 
    429 	new->ctd_ctmpl.ctmpl_data = new;
    430 	new->ctd_aset = old->ctd_aset;
    431 	new->ctd_minor = NULL;
    432 	new->ctd_noneg = old->ctd_noneg;
    433 
    434 	if (old->ctd_minor) {
    435 		ASSERT(strlen(old->ctd_minor) + 1 <= MAXPATHLEN);
    436 		bcopy(old->ctd_minor, buf, strlen(old->ctd_minor) + 1);
    437 	} else {
    438 		kmem_free(buf, MAXPATHLEN);
    439 		buf = NULL;
    440 	}
    441 
    442 	mutex_exit(&template->ctmpl_lock);
    443 	if (buf) {
    444 		minor = i_ddi_strdup(buf, KM_SLEEP);
    445 		kmem_free(buf, MAXPATHLEN);
    446 		buf = NULL;
    447 	} else {
    448 		minor = NULL;
    449 	}
    450 	mutex_enter(&template->ctmpl_lock);
    451 
    452 	if (minor) {
    453 		new->ctd_minor = minor;
    454 	}
    455 
    456 	ASSERT(buf == NULL);
    457 	return (&new->ctd_ctmpl);
    458 }
    459 
    460 /*
    461  * ctmpl_device_free
    462  *
    463  * The device contract template free entry point.  Just
    464  * frees the template.
    465  */
    466 static void
    467 ctmpl_device_free(struct ct_template *template)
    468 {
    469 	ctmpl_device_t *dtmpl = template->ctmpl_data;
    470 
    471 	if (dtmpl->ctd_minor)
    472 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
    473 
    474 	kmem_free(dtmpl, sizeof (ctmpl_device_t));
    475 }
    476 
    477 /*
    478  * SAFE_EV is the set of events which a non-privileged process is
    479  * allowed to make critical. An unprivileged device contract owner has
    480  * no control over when a device changes state, so all device events
    481  * can be in the critical set.
    482  *
    483  * EXCESS tells us if "value", a critical event set, requires
    484  * additional privilege. For device contracts EXCESS currently
    485  * evaluates to 0.
    486  */
    487 #define	SAFE_EV		(CT_DEV_ALLEVENT)
    488 #define	EXCESS(value)	((value) & ~SAFE_EV)
    489 
    490 
    491 /*
    492  * ctmpl_device_set
    493  *
    494  * The device contract template set entry point. Sets various terms in the
    495  * template. The non-negotiable  term can only be set if the process has
    496  * the {PRIV_SYS_DEVICES} privilege asserted in its effective set.
    497  */
    498 static int
    499 ctmpl_device_set(struct ct_template *tmpl, ct_kparam_t *kparam,
    500     const cred_t *cr)
    501 {
    502 	ctmpl_device_t *dtmpl = tmpl->ctmpl_data;
    503 	ct_param_t *param = &kparam->param;
    504 	int error;
    505 	dev_info_t *dip;
    506 	int spec_type;
    507 	uint64_t param_value;
    508 	char *str_value;
    509 
    510 	ASSERT(MUTEX_HELD(&tmpl->ctmpl_lock));
    511 
    512 	if (param->ctpm_id == CTDP_MINOR) {
    513 		str_value = (char *)kparam->ctpm_kbuf;
    514 		str_value[param->ctpm_size - 1] = '\0';
    515 	} else {
    516 		if (param->ctpm_size < sizeof (uint64_t))
    517 			return (EINVAL);
    518 		param_value = *(uint64_t *)kparam->ctpm_kbuf;
    519 	}
    520 
    521 	switch (param->ctpm_id) {
    522 	case CTDP_ACCEPT:
    523 		if (param_value & ~CT_DEV_ALLEVENT)
    524 			return (EINVAL);
    525 		if (param_value == 0)
    526 			return (EINVAL);
    527 		if (param_value == CT_DEV_ALLEVENT)
    528 			return (EINVAL);
    529 
    530 		dtmpl->ctd_aset = param_value;
    531 		break;
    532 	case CTDP_NONEG:
    533 		if (param_value != CTDP_NONEG_SET &&
    534 		    param_value != CTDP_NONEG_CLEAR)
    535 			return (EINVAL);
    536 
    537 		/*
    538 		 * only privileged processes can designate a contract
    539 		 * non-negotiatble.
    540 		 */
    541 		if (param_value == CTDP_NONEG_SET &&
    542 		    (error = secpolicy_sys_devices(cr)) != 0) {
    543 			return (error);
    544 		}
    545 
    546 		dtmpl->ctd_noneg = param_value;
    547 		break;
    548 
    549 	case CTDP_MINOR:
    550 		if (*str_value != '/' ||
    551 		    strncmp(str_value, "/devices/",
    552 		    strlen("/devices/")) == 0 ||
    553 		    strstr(str_value, "../devices/") != NULL ||
    554 		    strchr(str_value, ':') == NULL) {
    555 			return (EINVAL);
    556 		}
    557 
    558 		spec_type = 0;
    559 		dip = NULL;
    560 		if (resolve_pathname(str_value, &dip, NULL, &spec_type) != 0) {
    561 			return (ERANGE);
    562 		}
    563 		ddi_release_devi(dip);
    564 
    565 		if (spec_type != S_IFCHR && spec_type != S_IFBLK) {
    566 			return (EINVAL);
    567 		}
    568 
    569 		if (dtmpl->ctd_minor != NULL) {
    570 			kmem_free(dtmpl->ctd_minor,
    571 			    strlen(dtmpl->ctd_minor) + 1);
    572 		}
    573 		dtmpl->ctd_minor = i_ddi_strdup(str_value, KM_SLEEP);
    574 		break;
    575 	case CTP_EV_CRITICAL:
    576 		/*
    577 		 * Currently for device contracts, any event
    578 		 * may be added to the critical set. We retain the
    579 		 * following code however for future enhancements.
    580 		 */
    581 		if (EXCESS(param_value) &&
    582 		    (error = secpolicy_contract_event(cr)) != 0)
    583 			return (error);
    584 		tmpl->ctmpl_ev_crit = param_value;
    585 		break;
    586 	default:
    587 		return (EINVAL);
    588 	}
    589 
    590 	return (0);
    591 }
    592 
    593 /*
    594  * ctmpl_device_get
    595  *
    596  * The device contract template get entry point.  Simply fetches and
    597  * returns the value of the requested term.
    598  */
    599 static int
    600 ctmpl_device_get(struct ct_template *template, ct_kparam_t *kparam)
    601 {
    602 	ctmpl_device_t *dtmpl = template->ctmpl_data;
    603 	ct_param_t *param = &kparam->param;
    604 	uint64_t *param_value = kparam->ctpm_kbuf;
    605 
    606 	ASSERT(MUTEX_HELD(&template->ctmpl_lock));
    607 
    608 	if (param->ctpm_id == CTDP_ACCEPT ||
    609 	    param->ctpm_id == CTDP_NONEG) {
    610 		if (param->ctpm_size < sizeof (uint64_t))
    611 			return (EINVAL);
    612 		kparam->ret_size = sizeof (uint64_t);
    613 	}
    614 
    615 	switch (param->ctpm_id) {
    616 	case CTDP_ACCEPT:
    617 		*param_value = dtmpl->ctd_aset;
    618 		break;
    619 	case CTDP_NONEG:
    620 		*param_value = dtmpl->ctd_noneg;
    621 		break;
    622 	case CTDP_MINOR:
    623 		if (dtmpl->ctd_minor) {
    624 			kparam->ret_size = strlcpy((char *)kparam->ctpm_kbuf,
    625 			    dtmpl->ctd_minor, param->ctpm_size);
    626 			kparam->ret_size++;
    627 		} else {
    628 			return (ENOENT);
    629 		}
    630 		break;
    631 	default:
    632 		return (EINVAL);
    633 	}
    634 
    635 	return (0);
    636 }
    637 
    638 /*
    639  * Device contract type specific portion of creating a contract using
    640  * a specified template
    641  */
    642 /*ARGSUSED*/
    643 int
    644 ctmpl_device_create(ct_template_t *template, ctid_t *ctidp)
    645 {
    646 	ctmpl_device_t *dtmpl;
    647 	char *buf;
    648 	dev_t dev;
    649 	int spec_type;
    650 	int error;
    651 	cont_device_t *ctd;
    652 
    653 	if (ctidp == NULL)
    654 		return (EINVAL);
    655 
    656 	buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    657 
    658 	dtmpl = template->ctmpl_data;
    659 
    660 	mutex_enter(&template->ctmpl_lock);
    661 	if (dtmpl->ctd_minor == NULL) {
    662 		/* incomplete template */
    663 		mutex_exit(&template->ctmpl_lock);
    664 		kmem_free(buf, MAXPATHLEN);
    665 		return (EINVAL);
    666 	} else {
    667 		ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
    668 		bcopy(dtmpl->ctd_minor, buf, strlen(dtmpl->ctd_minor) + 1);
    669 	}
    670 	mutex_exit(&template->ctmpl_lock);
    671 
    672 	spec_type = 0;
    673 	dev = NODEV;
    674 	if (resolve_pathname(buf, NULL, &dev, &spec_type) != 0 ||
    675 	    dev == NODEV || dev == DDI_DEV_T_ANY || dev == DDI_DEV_T_NONE ||
    676 	    (spec_type != S_IFCHR && spec_type != S_IFBLK)) {
    677 		CT_DEBUG((CE_WARN,
    678 		    "tmpl_create: failed to find device: %s", buf));
    679 		kmem_free(buf, MAXPATHLEN);
    680 		return (ERANGE);
    681 	}
    682 	kmem_free(buf, MAXPATHLEN);
    683 
    684 	ctd = contract_device_create(template->ctmpl_data,
    685 	    dev, spec_type, curproc, &error);
    686 
    687 	if (ctd == NULL) {
    688 		CT_DEBUG((CE_WARN, "Failed to create device contract for "
    689 		    "process (%d) with device (devt = %lu, spec_type = %s)",
    690 		    curproc->p_pid, dev,
    691 		    spec_type == S_IFCHR ? "S_IFCHR" : "S_IFBLK"));
    692 		return (error);
    693 	}
    694 
    695 	mutex_enter(&ctd->cond_contract.ct_lock);
    696 	*ctidp = ctd->cond_contract.ct_id;
    697 	mutex_exit(&ctd->cond_contract.ct_lock);
    698 
    699 	return (0);
    700 }
    701 
    702 /*
    703  * Device contract specific template entry points
    704  */
    705 static ctmplops_t ctmpl_device_ops = {
    706 	ctmpl_device_dup,		/* ctop_dup */
    707 	ctmpl_device_free,		/* ctop_free */
    708 	ctmpl_device_set,		/* ctop_set */
    709 	ctmpl_device_get,		/* ctop_get */
    710 	ctmpl_device_create,		/* ctop_create */
    711 	CT_DEV_ALLEVENT			/* all device events bitmask */
    712 };
    713 
    714 
    715 /*
    716  * Device contract implementation
    717  */
    718 
    719 /*
    720  * contract_device_default
    721  *
    722  * The device contract default template entry point.  Creates a
    723  * device contract template with a default A-set and no "noneg" ,
    724  * with informative degrade events and critical offline events.
    725  * There is no default minor path.
    726  */
    727 static ct_template_t *
    728 contract_device_default(void)
    729 {
    730 	ctmpl_device_t *new;
    731 
    732 	new = kmem_zalloc(sizeof (ctmpl_device_t), KM_SLEEP);
    733 	ctmpl_init(&new->ctd_ctmpl, &ctmpl_device_ops, device_type, new);
    734 
    735 	new->ctd_aset = CT_DEV_EV_ONLINE | CT_DEV_EV_DEGRADED;
    736 	new->ctd_noneg = 0;
    737 	new->ctd_ctmpl.ctmpl_ev_info = CT_DEV_EV_DEGRADED;
    738 	new->ctd_ctmpl.ctmpl_ev_crit = CT_DEV_EV_OFFLINE;
    739 
    740 	return (&new->ctd_ctmpl);
    741 }
    742 
    743 /*
    744  * contract_device_free
    745  *
    746  * Destroys the device contract specific portion of a contract and
    747  * frees the contract.
    748  */
    749 static void
    750 contract_device_free(contract_t *ct)
    751 {
    752 	cont_device_t *ctd = ct->ct_data;
    753 
    754 	ASSERT(ctd->cond_minor);
    755 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
    756 	kmem_free(ctd->cond_minor, strlen(ctd->cond_minor) + 1);
    757 
    758 	ASSERT(ctd->cond_devt != DDI_DEV_T_ANY &&
    759 	    ctd->cond_devt != DDI_DEV_T_NONE && ctd->cond_devt != NODEV);
    760 
    761 	ASSERT(ctd->cond_spec == S_IFBLK || ctd->cond_spec == S_IFCHR);
    762 
    763 	ASSERT(!(ctd->cond_aset & ~CT_DEV_ALLEVENT));
    764 	ASSERT(ctd->cond_noneg == 0 || ctd->cond_noneg == 1);
    765 
    766 	ASSERT(!(ctd->cond_currev_type & ~CT_DEV_ALLEVENT));
    767 	ASSERT(!(ctd->cond_currev_ack & ~(CT_ACK | CT_NACK)));
    768 
    769 	ASSERT((ctd->cond_currev_id > 0) ^ (ctd->cond_currev_type == 0));
    770 	ASSERT((ctd->cond_currev_id > 0) || (ctd->cond_currev_ack == 0));
    771 
    772 	ASSERT(!list_link_active(&ctd->cond_next));
    773 
    774 	kmem_free(ctd, sizeof (cont_device_t));
    775 }
    776 
    777 /*
    778  * contract_device_abandon
    779  *
    780  * The device contract abandon entry point.
    781  */
    782 static void
    783 contract_device_abandon(contract_t *ct)
    784 {
    785 	ASSERT(MUTEX_HELD(&ct->ct_lock));
    786 
    787 	/*
    788 	 * device contracts cannot be inherited or orphaned.
    789 	 * Move the contract to the DEAD_STATE. It will be freed
    790 	 * once all references to it are gone.
    791 	 */
    792 	contract_destroy(ct);
    793 }
    794 
    795 /*
    796  * contract_device_destroy
    797  *
    798  * The device contract destroy entry point.
    799  * Called from contract_destroy() to do any type specific destroy. Note
    800  * that destroy is a misnomer - this does not free the contract, it only
    801  * moves it to the dead state. A contract is actually freed via
    802  * 	contract_rele() -> contract_dtor(), contop_free()
    803  */
    804 static void
    805 contract_device_destroy(contract_t *ct)
    806 {
    807 	cont_device_t	*ctd = ct->ct_data;
    808 	dev_info_t	*dip = ctd->cond_dip;
    809 
    810 	ASSERT(MUTEX_HELD(&ct->ct_lock));
    811 
    812 	if (dip == NULL) {
    813 		/*
    814 		 * The dip has been removed, this is a dangling contract
    815 		 * Check that dip linkages are NULL
    816 		 */
    817 		ASSERT(!list_link_active(&ctd->cond_next));
    818 		CT_DEBUG((CE_NOTE, "contract_device_destroy: contract has no "
    819 		    "devinfo node. contract ctid : %d", ct->ct_id));
    820 		return;
    821 	}
    822 
    823 	/*
    824 	 * Need to have lock order: devi_ct_lock -> ct_count barrier -> ct_lock
    825 	 */
    826 	mutex_exit(&ct->ct_lock);
    827 
    828 	/*
    829 	 * Waiting for the barrier to be released is strictly speaking not
    830 	 * necessary. But it simplifies the implementation of
    831 	 * contract_device_publish() by establishing the invariant that
    832 	 * device contracts cannot go away during negotiation.
    833 	 */
    834 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
    835 	ct_barrier_wait_for_release(dip);
    836 	mutex_enter(&ct->ct_lock);
    837 
    838 	list_remove(&(DEVI(dip)->devi_ct), ctd);
    839 	ctd->cond_dip = NULL; /* no longer linked to dip */
    840 	contract_rele(ct);	/* remove hold for dip linkage */
    841 
    842 	mutex_exit(&ct->ct_lock);
    843 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
    844 	mutex_enter(&ct->ct_lock);
    845 }
    846 
    847 /*
    848  * contract_device_status
    849  *
    850  * The device contract status entry point. Called when level of "detail"
    851  * is either CTD_FIXED or CTD_ALL
    852  *
    853  */
    854 static void
    855 contract_device_status(contract_t *ct, zone_t *zone, int detail, nvlist_t *nvl,
    856     void *status, model_t model)
    857 {
    858 	cont_device_t *ctd = ct->ct_data;
    859 
    860 	ASSERT(detail == CTD_FIXED || detail == CTD_ALL);
    861 
    862 	mutex_enter(&ct->ct_lock);
    863 	contract_status_common(ct, zone, status, model);
    864 
    865 	/*
    866 	 * There's no need to hold the contract lock while accessing static
    867 	 * data like aset or noneg. But since we need the lock to access other
    868 	 * data like state, we hold it anyway.
    869 	 */
    870 	VERIFY(nvlist_add_uint32(nvl, CTDS_STATE, ctd->cond_state) == 0);
    871 	VERIFY(nvlist_add_uint32(nvl, CTDS_ASET, ctd->cond_aset) == 0);
    872 	VERIFY(nvlist_add_uint32(nvl, CTDS_NONEG, ctd->cond_noneg) == 0);
    873 
    874 	if (detail == CTD_FIXED) {
    875 		mutex_exit(&ct->ct_lock);
    876 		return;
    877 	}
    878 
    879 	ASSERT(ctd->cond_minor);
    880 	VERIFY(nvlist_add_string(nvl, CTDS_MINOR, ctd->cond_minor) == 0);
    881 
    882 	mutex_exit(&ct->ct_lock);
    883 }
    884 
    885 /*
    886  * Converts a result integer into the corresponding string. Used for printing
    887  * messages
    888  */
    889 static char *
    890 result_str(uint_t result)
    891 {
    892 	switch (result) {
    893 	case CT_ACK:
    894 		return ("CT_ACK");
    895 	case CT_NACK:
    896 		return ("CT_NACK");
    897 	case CT_NONE:
    898 		return ("CT_NONE");
    899 	default:
    900 		return ("UNKNOWN");
    901 	}
    902 }
    903 
    904 /*
    905  * Converts a device state integer constant into the corresponding string.
    906  * Used to print messages.
    907  */
    908 static char *
    909 state_str(uint_t state)
    910 {
    911 	switch (state) {
    912 	case CT_DEV_EV_ONLINE:
    913 		return ("ONLINE");
    914 	case CT_DEV_EV_DEGRADED:
    915 		return ("DEGRADED");
    916 	case CT_DEV_EV_OFFLINE:
    917 		return ("OFFLINE");
    918 	default:
    919 		return ("UNKNOWN");
    920 	}
    921 }
    922 
    923 /*
    924  * Routine that determines if a particular CT_DEV_EV_? event corresponds to a
    925  * synchronous state change or not.
    926  */
    927 static int
    928 is_sync_neg(uint_t old, uint_t new)
    929 {
    930 	int	i;
    931 
    932 	ASSERT(old & CT_DEV_ALLEVENT);
    933 	ASSERT(new & CT_DEV_ALLEVENT);
    934 
    935 	if (old == new) {
    936 		CT_DEBUG((CE_WARN, "is_sync_neg: transition to same state: %s",
    937 		    state_str(new)));
    938 		return (-2);
    939 	}
    940 
    941 	for (i = 0; ct_dev_negtable[i].st_new != 0; i++) {
    942 		if (old == ct_dev_negtable[i].st_old &&
    943 		    new == ct_dev_negtable[i].st_new) {
    944 			return (ct_dev_negtable[i].st_neg);
    945 		}
    946 	}
    947 
    948 	CT_DEBUG((CE_WARN, "is_sync_neg: Unsupported state transition: "
    949 	    "old = %s -> new = %s", state_str(old), state_str(new)));
    950 
    951 	return (-1);
    952 }
    953 
    954 /*
    955  * Used to cleanup cached dv_nodes so that when a device is released by
    956  * a contract holder, its devinfo node can be successfully detached.
    957  */
    958 static int
    959 contract_device_dvclean(dev_info_t *dip)
    960 {
    961 	char		*devnm;
    962 	dev_info_t	*pdip;
    963 	int		error;
    964 
    965 	ASSERT(dip);
    966 
    967 	/* pdip can be NULL if we have contracts against the root dip */
    968 	pdip = ddi_get_parent(dip);
    969 
    970 	if (pdip && DEVI_BUSY_OWNED(pdip) || !pdip && DEVI_BUSY_OWNED(dip)) {
    971 		char		*path;
    972 
    973 		path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    974 		(void) ddi_pathname(dip, path);
    975 		CT_DEBUG((CE_WARN, "ct_dv_clean: Parent node is busy owned, "
    976 		    "device=%s", path));
    977 		kmem_free(path, MAXPATHLEN);
    978 		return (EDEADLOCK);
    979 	}
    980 
    981 	if (pdip) {
    982 		devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
    983 		(void) ddi_deviname(dip, devnm);
    984 		error = devfs_clean(pdip, devnm + 1, DV_CLEAN_FORCE);
    985 		kmem_free(devnm, MAXNAMELEN + 1);
    986 	} else {
    987 		error = devfs_clean(dip, NULL, DV_CLEAN_FORCE);
    988 	}
    989 
    990 	return (error);
    991 }
    992 
    993 /*
    994  * Endpoint of a ct_ctl_ack() or ct_ctl_nack() call from userland.
    995  * Results in the ACK or NACK being recorded on the dip for one particular
    996  * contract. The device contracts framework evaluates the ACK/NACKs for all
    997  * contracts against a device to determine if a particular device state change
    998  * should be allowed.
    999  */
   1000 static int
   1001 contract_device_ack_nack(contract_t *ct, uint_t evtype, uint64_t evid,
   1002     uint_t cmd)
   1003 {
   1004 	cont_device_t *ctd = ct->ct_data;
   1005 	dev_info_t *dip;
   1006 	ctid_t	ctid;
   1007 	int error;
   1008 
   1009 	ctid = ct->ct_id;
   1010 
   1011 	CT_DEBUG((CE_NOTE, "ack_nack: entered: ctid %d", ctid));
   1012 
   1013 	mutex_enter(&ct->ct_lock);
   1014 	CT_DEBUG((CE_NOTE, "ack_nack: contract lock acquired: %d", ctid));
   1015 
   1016 	dip = ctd->cond_dip;
   1017 
   1018 	ASSERT(ctd->cond_minor);
   1019 	ASSERT(strlen(ctd->cond_minor) < MAXPATHLEN);
   1020 
   1021 	/*
   1022 	 * Negotiation only if new state is not in A-set
   1023 	 */
   1024 	ASSERT(!(ctd->cond_aset & evtype));
   1025 
   1026 	/*
   1027 	 * Negotiation only if transition is synchronous
   1028 	 */
   1029 	ASSERT(is_sync_neg(ctd->cond_state, evtype));
   1030 
   1031 	/*
   1032 	 * We shouldn't be negotiating if the "noneg" flag is set
   1033 	 */
   1034 	ASSERT(!ctd->cond_noneg);
   1035 
   1036 	if (dip)
   1037 		ndi_hold_devi(dip);
   1038 
   1039 	mutex_exit(&ct->ct_lock);
   1040 
   1041 	/*
   1042 	 * dv_clean only if !NACK and offline state change
   1043 	 */
   1044 	if (cmd != CT_NACK && evtype == CT_DEV_EV_OFFLINE && dip) {
   1045 		CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: %d", ctid));
   1046 		error = contract_device_dvclean(dip);
   1047 		if (error != 0) {
   1048 			CT_DEBUG((CE_NOTE, "ack_nack: dv_clean: failed: %d",
   1049 			    ctid));
   1050 			ddi_release_devi(dip);
   1051 		}
   1052 	}
   1053 
   1054 	mutex_enter(&ct->ct_lock);
   1055 
   1056 	if (dip)
   1057 		ddi_release_devi(dip);
   1058 
   1059 	if (dip == NULL) {
   1060 		if (ctd->cond_currev_id != evid) {
   1061 			CT_DEBUG((CE_WARN, "%sACK for non-current event "
   1062 			    "(type=%s, id=%llu) on removed device",
   1063 			    cmd == CT_NACK ? "N" : "",
   1064 			    state_str(evtype), (unsigned long long)evid));
   1065 			CT_DEBUG((CE_NOTE, "ack_nack: error: ESRCH, ctid: %d",
   1066 			    ctid));
   1067 		} else {
   1068 			ASSERT(ctd->cond_currev_type == evtype);
   1069 			CT_DEBUG((CE_WARN, "contract_ack: no such device: "
   1070 			    "ctid: %d", ctid));
   1071 		}
   1072 		error = (ct->ct_state == CTS_DEAD) ? ESRCH :
   1073 		    ((cmd == CT_NACK) ? ETIMEDOUT : 0);
   1074 		mutex_exit(&ct->ct_lock);
   1075 		return (error);
   1076 	}
   1077 
   1078 	/*
   1079 	 * Must follow lock order: devi_ct_lock -> ct_count barrier - >ct_lock
   1080 	 */
   1081 	mutex_exit(&ct->ct_lock);
   1082 
   1083 	mutex_enter(&DEVI(dip)->devi_ct_lock);
   1084 	mutex_enter(&ct->ct_lock);
   1085 	if (ctd->cond_currev_id != evid) {
   1086 		char *buf;
   1087 		mutex_exit(&ct->ct_lock);
   1088 		mutex_exit(&DEVI(dip)->devi_ct_lock);
   1089 		ndi_hold_devi(dip);
   1090 		buf = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1091 		(void) ddi_pathname(dip, buf);
   1092 		ddi_release_devi(dip);
   1093 		CT_DEBUG((CE_WARN, "%sACK for non-current event"
   1094 		    "(type=%s, id=%llu) on device %s",
   1095 		    cmd == CT_NACK ? "N" : "",
   1096 		    state_str(evtype), (unsigned long long)evid, buf));
   1097 		kmem_free(buf, MAXPATHLEN);
   1098 		CT_DEBUG((CE_NOTE, "ack_nack: error: %d, ctid: %d",
   1099 		    cmd == CT_NACK ? ETIMEDOUT : 0, ctid));
   1100 		return (cmd == CT_ACK ? 0 : ETIMEDOUT);
   1101 	}
   1102 
   1103 	ASSERT(ctd->cond_currev_type == evtype);
   1104 	ASSERT(cmd == CT_ACK || cmd == CT_NACK);
   1105 
   1106 	CT_DEBUG((CE_NOTE, "ack_nack: setting %sACK for ctid: %d",
   1107 	    cmd == CT_NACK ? "N" : "", ctid));
   1108 
   1109 	ctd->cond_currev_ack = cmd;
   1110 	mutex_exit(&ct->ct_lock);
   1111 
   1112 	ct_barrier_decr(dip);
   1113 	mutex_exit(&DEVI(dip)->devi_ct_lock);
   1114 
   1115 	CT_DEBUG((CE_NOTE, "ack_nack: normal exit: ctid: %d", ctid));
   1116 
   1117 	return (0);
   1118 }
   1119 
   1120 /*
   1121  * Invoked when a userland contract holder approves (i.e. ACKs) a state change
   1122  */
   1123 static int
   1124 contract_device_ack(contract_t *ct, uint_t evtype, uint64_t evid)
   1125 {
   1126 	return (contract_device_ack_nack(ct, evtype, evid, CT_ACK));
   1127 }
   1128 
   1129 /*
   1130  * Invoked when a userland contract holder blocks (i.e. NACKs) a state change
   1131  */
   1132 static int
   1133 contract_device_nack(contract_t *ct, uint_t evtype, uint64_t evid)
   1134 {
   1135 	return (contract_device_ack_nack(ct, evtype, evid, CT_NACK));
   1136 }
   1137 
   1138 /*
   1139  * Creates a new contract synchronously with the breaking of an existing
   1140  * contract. Currently not supported.
   1141  */
   1142 /*ARGSUSED*/
   1143 static int
   1144 contract_device_newct(contract_t *ct)
   1145 {
   1146 	return (ENOTSUP);
   1147 }
   1148 
   1149 /*
   1150  * Core device contract implementation entry points
   1151  */
   1152 static contops_t contract_device_ops = {
   1153 	contract_device_free,		/* contop_free */
   1154 	contract_device_abandon,	/* contop_abandon */
   1155 	contract_device_destroy,	/* contop_destroy */
   1156 	contract_device_status,		/* contop_status */
   1157 	contract_device_ack,		/* contop_ack */
   1158 	contract_device_nack,		/* contop_nack */
   1159 	contract_qack_notsup,		/* contop_qack */
   1160 	contract_device_newct		/* contop_newct */
   1161 };
   1162 
   1163 /*
   1164  * contract_device_init
   1165  *
   1166  * Initializes the device contract type.
   1167  */
   1168 void
   1169 contract_device_init(void)
   1170 {
   1171 	device_type = contract_type_init(CTT_DEVICE, "device",
   1172 	    &contract_device_ops, contract_device_default);
   1173 }
   1174 
   1175 /*
   1176  * contract_device_create
   1177  *
   1178  * create a device contract given template "tmpl" and the "owner" process.
   1179  * May fail and return NULL if project.max-contracts would have been exceeded.
   1180  *
   1181  * Common device contract creation routine called for both open-time and
   1182  * non-open time device contract creation
   1183  */
   1184 static cont_device_t *
   1185 contract_device_create(ctmpl_device_t *dtmpl, dev_t dev, int spec_type,
   1186     proc_t *owner, int *errorp)
   1187 {
   1188 	cont_device_t *ctd;
   1189 	char *minor;
   1190 	char *path;
   1191 	dev_info_t *dip;
   1192 
   1193 	ASSERT(dtmpl != NULL);
   1194 	ASSERT(dev != NODEV && dev != DDI_DEV_T_ANY && dev != DDI_DEV_T_NONE);
   1195 	ASSERT(spec_type == S_IFCHR || spec_type == S_IFBLK);
   1196 	ASSERT(errorp);
   1197 
   1198 	*errorp = 0;
   1199 
   1200 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1201 
   1202 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
   1203 	ASSERT(strlen(dtmpl->ctd_minor) < MAXPATHLEN);
   1204 	bcopy(dtmpl->ctd_minor, path, strlen(dtmpl->ctd_minor) + 1);
   1205 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
   1206 
   1207 	dip = e_ddi_hold_devi_by_path(path, 0);
   1208 	if (dip == NULL) {
   1209 		cmn_err(CE_WARN, "contract_create: Cannot find devinfo node "
   1210 		    "for device path (%s)", path);
   1211 		kmem_free(path, MAXPATHLEN);
   1212 		*errorp = ERANGE;
   1213 		return (NULL);
   1214 	}
   1215 
   1216 	/*
   1217 	 * Lock out any parallel contract negotiations
   1218 	 */
   1219 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
   1220 	ct_barrier_acquire(dip);
   1221 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
   1222 
   1223 	minor = i_ddi_strdup(path, KM_SLEEP);
   1224 	kmem_free(path, MAXPATHLEN);
   1225 
   1226 	(void) contract_type_pbundle(device_type, owner);
   1227 
   1228 	ctd = kmem_zalloc(sizeof (cont_device_t), KM_SLEEP);
   1229 
   1230 	/*
   1231 	 * Only we hold a refernce to this contract. Safe to access
   1232 	 * the fields without a ct_lock
   1233 	 */
   1234 	ctd->cond_minor = minor;
   1235 	/*
   1236 	 * It is safe to set the dip pointer in the contract
   1237 	 * as the contract will always be destroyed before the dip
   1238 	 * is released
   1239 	 */
   1240 	ctd->cond_dip = dip;
   1241 	ctd->cond_devt = dev;
   1242 	ctd->cond_spec = spec_type;
   1243 
   1244 	/*
   1245 	 * Since we are able to lookup the device, it is either
   1246 	 * online or degraded
   1247 	 */
   1248 	ctd->cond_state = DEVI_IS_DEVICE_DEGRADED(dip) ?
   1249 	    CT_DEV_EV_DEGRADED : CT_DEV_EV_ONLINE;
   1250 
   1251 	mutex_enter(&dtmpl->ctd_ctmpl.ctmpl_lock);
   1252 	ctd->cond_aset = dtmpl->ctd_aset;
   1253 	ctd->cond_noneg = dtmpl->ctd_noneg;
   1254 
   1255 	/*
   1256 	 * contract_ctor() initailizes the common portion of a contract
   1257 	 * contract_dtor() destroys the common portion of a contract
   1258 	 */
   1259 	if (contract_ctor(&ctd->cond_contract, device_type, &dtmpl->ctd_ctmpl,
   1260 	    ctd, 0, owner, B_TRUE)) {
   1261 		mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
   1262 		/*
   1263 		 * contract_device_free() destroys the type specific
   1264 		 * portion of a contract and frees the contract.
   1265 		 * The "minor" path and "cred" is a part of the type specific
   1266 		 * portion of the contract and will be freed by
   1267 		 * contract_device_free()
   1268 		 */
   1269 		contract_device_free(&ctd->cond_contract);
   1270 
   1271 		/* release barrier */
   1272 		mutex_enter(&(DEVI(dip)->devi_ct_lock));
   1273 		ct_barrier_release(dip);
   1274 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
   1275 
   1276 		ddi_release_devi(dip);
   1277 		*errorp = EAGAIN;
   1278 		return (NULL);
   1279 	}
   1280 	mutex_exit(&dtmpl->ctd_ctmpl.ctmpl_lock);
   1281 
   1282 	mutex_enter(&ctd->cond_contract.ct_lock);
   1283 	ctd->cond_contract.ct_ntime.ctm_total = CT_DEV_ACKTIME;
   1284 	ctd->cond_contract.ct_qtime.ctm_total = CT_DEV_ACKTIME;
   1285 	ctd->cond_contract.ct_ntime.ctm_start = -1;
   1286 	ctd->cond_contract.ct_qtime.ctm_start = -1;
   1287 	mutex_exit(&ctd->cond_contract.ct_lock);
   1288 
   1289 	/*
   1290 	 * Insert device contract into list hanging off the dip
   1291 	 * Bump up the ref-count on the contract to reflect this
   1292 	 */
   1293 	contract_hold(&ctd->cond_contract);
   1294 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
   1295 	list_insert_tail(&(DEVI(dip)->devi_ct), ctd);
   1296 
   1297 	/* release barrier */
   1298 	ct_barrier_release(dip);
   1299 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
   1300 
   1301 	ddi_release_devi(dip);
   1302 
   1303 	return (ctd);
   1304 }
   1305 
   1306 /*
   1307  * Called when a device is successfully opened to create an open-time contract
   1308  * i.e. synchronously with a device open.
   1309  */
   1310 int
   1311 contract_device_open(dev_t dev, int spec_type, contract_t **ctpp)
   1312 {
   1313 	ctmpl_device_t *dtmpl;
   1314 	ct_template_t  *tmpl;
   1315 	cont_device_t *ctd;
   1316 	char *path;
   1317 	klwp_t *lwp;
   1318 	int error;
   1319 
   1320 	if (ctpp)
   1321 		*ctpp = NULL;
   1322 
   1323 	/*
   1324 	 * Check if we are in user-context i.e. if we have an lwp
   1325 	 */
   1326 	lwp = ttolwp(curthread);
   1327 	if (lwp == NULL) {
   1328 		CT_DEBUG((CE_NOTE, "contract_open: Not user-context"));
   1329 		return (0);
   1330 	}
   1331 
   1332 	tmpl = ctmpl_dup(lwp->lwp_ct_active[device_type->ct_type_index]);
   1333 	if (tmpl == NULL) {
   1334 		return (0);
   1335 	}
   1336 	dtmpl = tmpl->ctmpl_data;
   1337 
   1338 	/*
   1339 	 * If the user set a minor path in the template before an open,
   1340 	 * ignore it. We use the minor path of the actual minor opened.
   1341 	 */
   1342 	mutex_enter(&tmpl->ctmpl_lock);
   1343 	if (dtmpl->ctd_minor != NULL) {
   1344 		CT_DEBUG((CE_NOTE, "contract_device_open(): Process %d: "
   1345 		    "ignoring device minor path in active template: %s",
   1346 		    curproc->p_pid, dtmpl->ctd_minor));
   1347 		/*
   1348 		 * This is a copy of the actual activated template.
   1349 		 * Safe to make changes such as freeing the minor
   1350 		 * path in the template.
   1351 		 */
   1352 		kmem_free(dtmpl->ctd_minor, strlen(dtmpl->ctd_minor) + 1);
   1353 		dtmpl->ctd_minor = NULL;
   1354 	}
   1355 	mutex_exit(&tmpl->ctmpl_lock);
   1356 
   1357 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1358 
   1359 	if (ddi_dev_pathname(dev, spec_type, path) != DDI_SUCCESS) {
   1360 		CT_DEBUG((CE_NOTE, "contract_device_open(): Failed to derive "
   1361 		    "minor path from dev_t,spec {%lu, %d} for process (%d)",
   1362 		    dev, spec_type, curproc->p_pid));
   1363 		ctmpl_free(tmpl);
   1364 		kmem_free(path, MAXPATHLEN);
   1365 		return (1);
   1366 	}
   1367 
   1368 	mutex_enter(&tmpl->ctmpl_lock);
   1369 	ASSERT(dtmpl->ctd_minor == NULL);
   1370 	dtmpl->ctd_minor = path;
   1371 	mutex_exit(&tmpl->ctmpl_lock);
   1372 
   1373 	ctd = contract_device_create(dtmpl, dev, spec_type, curproc, &error);
   1374 
   1375 	mutex_enter(&tmpl->ctmpl_lock);
   1376 	ASSERT(dtmpl->ctd_minor);
   1377 	dtmpl->ctd_minor = NULL;
   1378 	mutex_exit(&tmpl->ctmpl_lock);
   1379 	ctmpl_free(tmpl);
   1380 	kmem_free(path, MAXPATHLEN);
   1381 
   1382 	if (ctd == NULL) {
   1383 		cmn_err(CE_NOTE, "contract_device_open(): Failed to "
   1384 		    "create device contract for process (%d) holding "
   1385 		    "device (devt = %lu, spec_type = %d)",
   1386 		    curproc->p_pid, dev, spec_type);
   1387 		return (1);
   1388 	}
   1389 
   1390 	if (ctpp) {
   1391 		mutex_enter(&ctd->cond_contract.ct_lock);
   1392 		*ctpp = &ctd->cond_contract;
   1393 		mutex_exit(&ctd->cond_contract.ct_lock);
   1394 	}
   1395 	return (0);
   1396 }
   1397 
   1398 /*
   1399  * Called during contract negotiation by the device contract framework to wait
   1400  * for ACKs or NACKs from contract holders. If all responses are not received
   1401  * before a specified timeout, this routine times out.
   1402  */
   1403 static uint_t
   1404 wait_for_acks(dev_info_t *dip, dev_t dev, int spec_type, uint_t evtype)
   1405 {
   1406 	cont_device_t *ctd;
   1407 	int timed_out = 0;
   1408 	int result = CT_NONE;
   1409 	int ack;
   1410 	char *f = "wait_for_acks";
   1411 
   1412 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   1413 	ASSERT(dip);
   1414 	ASSERT(evtype & CT_DEV_ALLEVENT);
   1415 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
   1416 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
   1417 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
   1418 
   1419 	CT_DEBUG((CE_NOTE, "%s: entered: dip: %p", f, (void *)dip));
   1420 
   1421 	if (ct_barrier_wait_for_empty(dip, CT_DEV_ACKTIME) == -1) {
   1422 		/*
   1423 		 * some contract owner(s) didn't respond in time
   1424 		 */
   1425 		CT_DEBUG((CE_NOTE, "%s: timed out: %p", f, (void *)dip));
   1426 		timed_out = 1;
   1427 	}
   1428 
   1429 	ack = 0;
   1430 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
   1431 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
   1432 
   1433 		mutex_enter(&ctd->cond_contract.ct_lock);
   1434 
   1435 		ASSERT(ctd->cond_dip == dip);
   1436 
   1437 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
   1438 			mutex_exit(&ctd->cond_contract.ct_lock);
   1439 			continue;
   1440 		}
   1441 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
   1442 			mutex_exit(&ctd->cond_contract.ct_lock);
   1443 			continue;
   1444 		}
   1445 
   1446 		/* skip if non-negotiable contract */
   1447 		if (ctd->cond_noneg) {
   1448 			mutex_exit(&ctd->cond_contract.ct_lock);
   1449 			continue;
   1450 		}
   1451 
   1452 		ASSERT(ctd->cond_currev_type == evtype);
   1453 		if (ctd->cond_currev_ack == CT_NACK) {
   1454 			CT_DEBUG((CE_NOTE, "%s: found a NACK,result = NACK: %p",
   1455 			    f, (void *)dip));
   1456 			mutex_exit(&ctd->cond_contract.ct_lock);
   1457 			return (CT_NACK);
   1458 		} else if (ctd->cond_currev_ack == CT_ACK) {
   1459 			ack = 1;
   1460 			CT_DEBUG((CE_NOTE, "%s: found a ACK: %p",
   1461 			    f, (void *)dip));
   1462 		}
   1463 		mutex_exit(&ctd->cond_contract.ct_lock);
   1464 	}
   1465 
   1466 	if (ack) {
   1467 		result = CT_ACK;
   1468 		CT_DEBUG((CE_NOTE, "%s: result = ACK, dip=%p", f, (void *)dip));
   1469 	} else if (timed_out) {
   1470 		result = CT_NONE;
   1471 		CT_DEBUG((CE_NOTE, "%s: result = NONE (timed-out), dip=%p",
   1472 		    f, (void *)dip));
   1473 	} else {
   1474 		CT_DEBUG((CE_NOTE, "%s: result = NONE, dip=%p",
   1475 		    f, (void *)dip));
   1476 	}
   1477 
   1478 
   1479 	return (result);
   1480 }
   1481 
   1482 /*
   1483  * Determines the current state of a device (i.e a devinfo node
   1484  */
   1485 static int
   1486 get_state(dev_info_t *dip)
   1487 {
   1488 	if (DEVI_IS_DEVICE_OFFLINE(dip) || DEVI_IS_DEVICE_DOWN(dip))
   1489 		return (CT_DEV_EV_OFFLINE);
   1490 	else if (DEVI_IS_DEVICE_DEGRADED(dip))
   1491 		return (CT_DEV_EV_DEGRADED);
   1492 	else
   1493 		return (CT_DEV_EV_ONLINE);
   1494 }
   1495 
   1496 /*
   1497  * Sets the current state of a device in a device contract
   1498  */
   1499 static void
   1500 set_cond_state(dev_info_t *dip)
   1501 {
   1502 	uint_t state = get_state(dip);
   1503 	cont_device_t *ctd;
   1504 
   1505 	/* verify that barrier is held */
   1506 	ASSERT(ct_barrier_held(dip));
   1507 
   1508 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
   1509 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
   1510 		mutex_enter(&ctd->cond_contract.ct_lock);
   1511 		ASSERT(ctd->cond_dip == dip);
   1512 		ctd->cond_state = state;
   1513 		mutex_exit(&ctd->cond_contract.ct_lock);
   1514 	}
   1515 }
   1516 
   1517 /*
   1518  * Core routine called by event-specific routines when an event occurs.
   1519  * Determines if an event should be be published, and if it is to be
   1520  * published, whether a negotiation should take place. Also implements
   1521  * NEGEND events which publish the final disposition of an event after
   1522  * negotiations are complete.
   1523  *
   1524  * When an event occurs on a minor node, this routine walks the list of
   1525  * contracts hanging off a devinfo node and for each contract on the affected
   1526  * dip, evaluates the following cases
   1527  *
   1528  *	a. an event that is synchronous, breaks the contract and NONEG not set
   1529  *		- bumps up the outstanding negotiation counts on the dip
   1530  *		- marks the dip as undergoing negotiation (devi_ct_neg)
   1531  *		- event of type CTE_NEG is published
   1532  *	b. an event that is synchronous, breaks the contract and NONEG is set
   1533  *		- sets the final result to CT_NACK, event is blocked
   1534  *		- does not publish an event
   1535  *	c. event is asynchronous and breaks the contract
   1536  *		- publishes a critical event irrespect of whether the NONEG
   1537  *		  flag is set, since the contract will be broken and contract
   1538  *		  owner needs to be informed.
   1539  *	d. No contract breakage but the owner has subscribed to the event
   1540  *		- publishes the event irrespective of the NONEG event as the
   1541  *		  owner has explicitly subscribed to the event.
   1542  *	e. NEGEND event
   1543  *		- publishes a critical event. Should only be doing this if
   1544  *		  if NONEG is not set.
   1545  *	f. all other events
   1546  *		- Since a contract is not broken and this event has not been
   1547  *		  subscribed to, this event does not need to be published for
   1548  *		  for this contract.
   1549  *
   1550  *	Once an event is published, what happens next depends on the type of
   1551  *	event:
   1552  *
   1553  *	a. NEGEND event
   1554  *		- cleanup all state associated with the preceding negotiation
   1555  *		  and return CT_ACK to the caller of contract_device_publish()
   1556  *	b. NACKed event
   1557  *		- One or more contracts had the NONEG term, so the event was
   1558  *		  blocked. Return CT_NACK to the caller.
   1559  *	c. Negotiated event
   1560  *		- Call wait_for_acks() to wait for responses from contract
   1561  *		holders. The end result is either CT_ACK (event is permitted),
   1562  *		CT_NACK (event is blocked) or CT_NONE (no contract owner)
   1563  *		responded. This result is returned back to the caller.
   1564  *	d. All other events
   1565  *		- If the event was asynchronous (i.e. not negotiated) or
   1566  *		a contract was not broken return CT_ACK to the caller.
   1567  */
   1568 static uint_t
   1569 contract_device_publish(dev_info_t *dip, dev_t dev, int spec_type,
   1570     uint_t evtype, nvlist_t *tnvl)
   1571 {
   1572 	cont_device_t *ctd;
   1573 	uint_t result = CT_NONE;
   1574 	uint64_t evid = 0;
   1575 	uint64_t nevid = 0;
   1576 	char *path = NULL;
   1577 	int negend;
   1578 	int match;
   1579 	int sync = 0;
   1580 	contract_t *ct;
   1581 	ct_kevent_t *event;
   1582 	nvlist_t *nvl;
   1583 	int broken = 0;
   1584 
   1585 	ASSERT(dip);
   1586 	ASSERT(dev != NODEV && dev != DDI_DEV_T_NONE);
   1587 	ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
   1588 	    (spec_type == S_IFBLK || spec_type == S_IFCHR));
   1589 	ASSERT(evtype == 0 || (evtype & CT_DEV_ALLEVENT));
   1590 
   1591 	/* Is this a synchronous state change ? */
   1592 	if (evtype != CT_EV_NEGEND) {
   1593 		sync = is_sync_neg(get_state(dip), evtype);
   1594 		/* NOP if unsupported transition */
   1595 		if (sync == -2 || sync == -1) {
   1596 			DEVI(dip)->devi_flags |= DEVI_CT_NOP;
   1597 			result = (sync == -2) ? CT_ACK : CT_NONE;
   1598 			goto out;
   1599 		}
   1600 		CT_DEBUG((CE_NOTE, "publish: is%s sync state change",
   1601 		    sync ? "" : " not"));
   1602 	} else if (DEVI(dip)->devi_flags & DEVI_CT_NOP) {
   1603 		DEVI(dip)->devi_flags &= ~DEVI_CT_NOP;
   1604 		result = CT_ACK;
   1605 		goto out;
   1606 	}
   1607 
   1608 	path = kmem_alloc(MAXPATHLEN, KM_SLEEP);
   1609 	(void) ddi_pathname(dip, path);
   1610 
   1611 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
   1612 
   1613 	/*
   1614 	 * Negotiation end - set the state of the device in the contract
   1615 	 */
   1616 	if (evtype == CT_EV_NEGEND) {
   1617 		CT_DEBUG((CE_NOTE, "publish: negend: setting cond state"));
   1618 		set_cond_state(dip);
   1619 	}
   1620 
   1621 	/*
   1622 	 * If this device didn't go through negotiation, don't publish
   1623 	 * a NEGEND event - simply release the barrier to allow other
   1624 	 * device events in.
   1625 	 */
   1626 	negend = 0;
   1627 	if (evtype == CT_EV_NEGEND && !DEVI(dip)->devi_ct_neg) {
   1628 		CT_DEBUG((CE_NOTE, "publish: no negend reqd. release barrier"));
   1629 		ct_barrier_release(dip);
   1630 		mutex_exit(&(DEVI(dip)->devi_ct_lock));
   1631 		result = CT_ACK;
   1632 		goto out;
   1633 	} else if (evtype == CT_EV_NEGEND) {
   1634 		/*
   1635 		 * There are negotiated contract breakages that
   1636 		 * need a NEGEND event
   1637 		 */
   1638 		ASSERT(ct_barrier_held(dip));
   1639 		negend = 1;
   1640 		CT_DEBUG((CE_NOTE, "publish: setting negend flag"));
   1641 	} else {
   1642 		/*
   1643 		 * This is a new event, not a NEGEND event. Wait for previous
   1644 		 * contract events to complete.
   1645 		 */
   1646 		ct_barrier_acquire(dip);
   1647 	}
   1648 
   1649 
   1650 	match = 0;
   1651 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL;
   1652 	    ctd = list_next(&(DEVI(dip)->devi_ct), ctd)) {
   1653 
   1654 		ctid_t ctid;
   1655 		size_t len = strlen(path);
   1656 
   1657 		mutex_enter(&ctd->cond_contract.ct_lock);
   1658 
   1659 		ASSERT(ctd->cond_dip == dip);
   1660 		ASSERT(ctd->cond_minor);
   1661 		ASSERT(strncmp(ctd->cond_minor, path, len) == 0 &&
   1662 		    ctd->cond_minor[len] == ':');
   1663 
   1664 		if (dev != DDI_DEV_T_ANY && dev != ctd->cond_devt) {
   1665 			mutex_exit(&ctd->cond_contract.ct_lock);
   1666 			continue;
   1667 		}
   1668 		if (dev != DDI_DEV_T_ANY && spec_type != ctd->cond_spec) {
   1669 			mutex_exit(&ctd->cond_contract.ct_lock);
   1670 			continue;
   1671 		}
   1672 
   1673 		/* We have a matching contract */
   1674 		match = 1;
   1675 		ctid = ctd->cond_contract.ct_id;
   1676 		CT_DEBUG((CE_NOTE, "publish: found matching contract: %d",
   1677 		    ctid));
   1678 
   1679 		/*
   1680 		 * There are 4 possible cases
   1681 		 * 1. A contract is broken (dev not in acceptable state) and
   1682 		 *    the state change is synchronous - start negotiation
   1683 		 *    by sending a CTE_NEG critical event.
   1684 		 * 2. A contract is broken and the state change is
   1685 		 *    asynchronous - just send a critical event and
   1686 		 *    break the contract.
   1687 		 * 3. Contract is not broken, but consumer has subscribed
   1688 		 *    to the event as a critical or informative event
   1689 		 *    - just send the appropriate event
   1690 		 * 4. contract waiting for negend event - just send the critical
   1691 		 *    NEGEND event.
   1692 		 */
   1693 		broken = 0;
   1694 		if (!negend && !(evtype & ctd->cond_aset)) {
   1695 			broken = 1;
   1696 			CT_DEBUG((CE_NOTE, "publish: Contract broken: %d",
   1697 			    ctid));
   1698 		}
   1699 
   1700 		/*
   1701 		 * Don't send event if
   1702 		 *	- contract is not broken AND
   1703 		 *	- contract holder has not subscribed to this event AND
   1704 		 *	- contract not waiting for a NEGEND event
   1705 		 */
   1706 		if (!broken && !EVSENDP(ctd, evtype) &&
   1707 		    !ctd->cond_neg) {
   1708 			CT_DEBUG((CE_NOTE, "contract_device_publish(): "
   1709 			    "contract (%d): no publish reqd: event %d",
   1710 			    ctd->cond_contract.ct_id, evtype));
   1711 			mutex_exit(&ctd->cond_contract.ct_lock);
   1712 			continue;
   1713 		}
   1714 
   1715 		/*
   1716 		 * Note: need to kmem_zalloc() the event so mutexes are
   1717 		 * initialized automatically
   1718 		 */
   1719 		ct = &ctd->cond_contract;
   1720 		event = kmem_zalloc(sizeof (ct_kevent_t), KM_SLEEP);
   1721 		event->cte_type = evtype;
   1722 
   1723 		if (broken && sync) {
   1724 			CT_DEBUG((CE_NOTE, "publish: broken + sync: "
   1725 			    "ctid: %d", ctid));
   1726 			ASSERT(!negend);
   1727 			ASSERT(ctd->cond_currev_id == 0);
   1728 			ASSERT(ctd->cond_currev_type == 0);
   1729 			ASSERT(ctd->cond_currev_ack == 0);
   1730 			ASSERT(ctd->cond_neg == 0);
   1731 			if (ctd->cond_noneg) {
   1732 				/* Nothing to publish. Event has been blocked */
   1733 				CT_DEBUG((CE_NOTE, "publish: sync and noneg:"
   1734 				    "not publishing blocked ev: ctid: %d",
   1735 				    ctid));
   1736 				result = CT_NACK;
   1737 				kmem_free(event, sizeof (ct_kevent_t));
   1738 				mutex_exit(&ctd->cond_contract.ct_lock);
   1739 				continue;
   1740 			}
   1741 			event->cte_flags = CTE_NEG; /* critical neg. event */
   1742 			ctd->cond_currev_type = event->cte_type;
   1743 			ct_barrier_incr(dip);
   1744 			DEVI(dip)->devi_ct_neg = 1; /* waiting for negend */
   1745 			ctd->cond_neg = 1;
   1746 		} else if (broken && !sync) {
   1747 			CT_DEBUG((CE_NOTE, "publish: broken + async: ctid: %d",
   1748 			    ctid));
   1749 			ASSERT(!negend);
   1750 			ASSERT(ctd->cond_currev_id == 0);
   1751 			ASSERT(ctd->cond_currev_type == 0);
   1752 			ASSERT(ctd->cond_currev_ack == 0);
   1753 			ASSERT(ctd->cond_neg == 0);
   1754 			event->cte_flags = 0; /* critical event */
   1755 		} else if (EVSENDP(ctd, event->cte_type)) {
   1756 			CT_DEBUG((CE_NOTE, "publish: event suscrib: ctid: %d",
   1757 			    ctid));
   1758 			ASSERT(!negend);
   1759 			ASSERT(ctd->cond_currev_id == 0);
   1760 			ASSERT(ctd->cond_currev_type == 0);
   1761 			ASSERT(ctd->cond_currev_ack == 0);
   1762 			ASSERT(ctd->cond_neg == 0);
   1763 			event->cte_flags = EVINFOP(ctd, event->cte_type) ?
   1764 			    CTE_INFO : 0;
   1765 		} else if (ctd->cond_neg) {
   1766 			CT_DEBUG((CE_NOTE, "publish: NEGEND: ctid: %d", ctid));
   1767 			ASSERT(negend);
   1768 			ASSERT(ctd->cond_noneg == 0);
   1769 			nevid = ctd->cond_contract.ct_nevent ?
   1770 			    ctd->cond_contract.ct_nevent->cte_id : 0;
   1771 			ASSERT(ctd->cond_currev_id == nevid);
   1772 			event->cte_flags = 0;	/* NEGEND is always critical */
   1773 			ctd->cond_currev_id = 0;
   1774 			ctd->cond_currev_type = 0;
   1775 			ctd->cond_currev_ack = 0;
   1776 			ctd->cond_neg = 0;
   1777 		} else {
   1778 			CT_DEBUG((CE_NOTE, "publish: not publishing event for "
   1779 			    "ctid: %d, evtype: %d",
   1780 			    ctd->cond_contract.ct_id, event->cte_type));
   1781 			ASSERT(!negend);
   1782 			ASSERT(ctd->cond_currev_id == 0);
   1783 			ASSERT(ctd->cond_currev_type == 0);
   1784 			ASSERT(ctd->cond_currev_ack == 0);
   1785 			ASSERT(ctd->cond_neg == 0);
   1786 			kmem_free(event, sizeof (ct_kevent_t));
   1787 			mutex_exit(&ctd->cond_contract.ct_lock);
   1788 			continue;
   1789 		}
   1790 
   1791 		nvl = NULL;
   1792 		if (tnvl) {
   1793 			VERIFY(nvlist_dup(tnvl, &nvl, 0) == 0);
   1794 			if (negend) {
   1795 				int32_t newct = 0;
   1796 				ASSERT(ctd->cond_noneg == 0);
   1797 				VERIFY(nvlist_add_uint64(nvl, CTS_NEVID, nevid)
   1798 				    == 0);
   1799 				VERIFY(nvlist_lookup_int32(nvl, CTS_NEWCT,
   1800 				    &newct) == 0);
   1801 				VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
   1802 				    newct == 1 ? 0 :
   1803 				    ctd->cond_contract.ct_id) == 0);
   1804 				CT_DEBUG((CE_NOTE, "publish: negend: ctid: %d "
   1805 				    "CTS_NEVID: %llu, CTS_NEWCT: %s",
   1806 				    ctid, (unsigned long long)nevid,
   1807 				    newct ? "success" : "failure"));
   1808 
   1809 			}
   1810 		}
   1811 
   1812 		if (ctd->cond_neg) {
   1813 			ASSERT(ctd->cond_contract.ct_ntime.ctm_start == -1);
   1814 			ASSERT(ctd->cond_contract.ct_qtime.ctm_start == -1);
   1815 			ctd->cond_contract.ct_ntime.ctm_start = ddi_get_lbolt();
   1816 			ctd->cond_contract.ct_qtime.ctm_start =
   1817 			    ctd->cond_contract.ct_ntime.ctm_start;
   1818 		}
   1819 
   1820 		/*
   1821 		 * by holding the dip's devi_ct_lock we ensure that
   1822 		 * all ACK/NACKs are held up until we have finished
   1823 		 * publishing to all contracts.
   1824 		 */
   1825 		mutex_exit(&ctd->cond_contract.ct_lock);
   1826 		evid = cte_publish_all(ct, event, nvl, NULL);
   1827 		mutex_enter(&ctd->cond_contract.ct_lock);
   1828 
   1829 		if (ctd->cond_neg) {
   1830 			ASSERT(!negend);
   1831 			ASSERT(broken);
   1832 			ASSERT(sync);
   1833 			ASSERT(!ctd->cond_noneg);
   1834 			CT_DEBUG((CE_NOTE, "publish: sync break, setting evid"
   1835 			    ": %d", ctid));
   1836 			ctd->cond_currev_id = evid;
   1837 		} else if (negend) {
   1838 			ctd->cond_contract.ct_ntime.ctm_start = -1;
   1839 			ctd->cond_contract.ct_qtime.ctm_start = -1;
   1840 		}
   1841 		mutex_exit(&ctd->cond_contract.ct_lock);
   1842 	}
   1843 
   1844 	/*
   1845 	 * If "negend" set counter back to initial state (-1) so that
   1846 	 * other events can be published. Also clear the negotiation flag
   1847 	 * on dip.
   1848 	 *
   1849 	 * 0 .. n are used for counting.
   1850 	 * -1 indicates counter is available for use.
   1851 	 */
   1852 	if (negend) {
   1853 		/*
   1854 		 * devi_ct_count not necessarily 0. We may have
   1855 		 * timed out in which case, count will be non-zero.
   1856 		 */
   1857 		ct_barrier_release(dip);
   1858 		DEVI(dip)->devi_ct_neg = 0;
   1859 		CT_DEBUG((CE_NOTE, "publish: negend: reset dip state: dip=%p",
   1860 		    (void *)dip));
   1861 	} else if (DEVI(dip)->devi_ct_neg) {
   1862 		ASSERT(match);
   1863 		ASSERT(!ct_barrier_empty(dip));
   1864 		CT_DEBUG((CE_NOTE, "publish: sync count=%d, dip=%p",
   1865 		    DEVI(dip)->devi_ct_count, (void *)dip));
   1866 	} else {
   1867 		/*
   1868 		 * for non-negotiated events or subscribed events or no
   1869 		 * matching contracts
   1870 		 */
   1871 		ASSERT(ct_barrier_empty(dip));
   1872 		ASSERT(DEVI(dip)->devi_ct_neg == 0);
   1873 		CT_DEBUG((CE_NOTE, "publish: async/non-nego/subscrib/no-match: "
   1874 		    "dip=%p", (void *)dip));
   1875 
   1876 		/*
   1877 		 * only this function when called from contract_device_negend()
   1878 		 * can reset the counter to READY state i.e. -1. This function
   1879 		 * is so called for every event whether a NEGEND event is needed
   1880 		 * or not, but the negend event is only published if the event
   1881 		 * whose end they signal is a negotiated event for the contract.
   1882 		 */
   1883 	}
   1884 
   1885 	if (!match) {
   1886 		/* No matching contracts */
   1887 		CT_DEBUG((CE_NOTE, "publish: No matching contract"));
   1888 		result = CT_NONE;
   1889 	} else if (result == CT_NACK) {
   1890 		/* a non-negotiable contract exists and this is a neg. event */
   1891 		CT_DEBUG((CE_NOTE, "publish: found 1 or more NONEG contract"));
   1892 		(void) wait_for_acks(dip, dev, spec_type, evtype);
   1893 	} else if (DEVI(dip)->devi_ct_neg) {
   1894 		/* one or more contracts going through negotations  */
   1895 		CT_DEBUG((CE_NOTE, "publish: sync contract: waiting"));
   1896 		result = wait_for_acks(dip, dev, spec_type, evtype);
   1897 	} else {
   1898 		/* no negotiated contracts or no broken contracts or NEGEND */
   1899 		CT_DEBUG((CE_NOTE, "publish: async/no-break/negend"));
   1900 		result = CT_ACK;
   1901 	}
   1902 
   1903 	/*
   1904 	 * Release the lock only now so that the only point where we
   1905 	 * drop the lock is in wait_for_acks(). This is so that we don't
   1906 	 * miss cv_signal/cv_broadcast from contract holders
   1907 	 */
   1908 	CT_DEBUG((CE_NOTE, "publish: dropping devi_ct_lock"));
   1909 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
   1910 
   1911 out:
   1912 	if (tnvl)
   1913 		nvlist_free(tnvl);
   1914 	if (path)
   1915 		kmem_free(path, MAXPATHLEN);
   1916 
   1917 
   1918 	CT_DEBUG((CE_NOTE, "publish: result = %s", result_str(result)));
   1919 	return (result);
   1920 }
   1921 
   1922 
   1923 /*
   1924  * contract_device_offline
   1925  *
   1926  * Event publishing routine called by I/O framework when a device is offlined.
   1927  */
   1928 ct_ack_t
   1929 contract_device_offline(dev_info_t *dip, dev_t dev, int spec_type)
   1930 {
   1931 	nvlist_t *nvl;
   1932 	uint_t result;
   1933 	uint_t evtype;
   1934 
   1935 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
   1936 
   1937 	evtype = CT_DEV_EV_OFFLINE;
   1938 	result = contract_device_publish(dip, dev, spec_type, evtype, nvl);
   1939 
   1940 	/*
   1941 	 * If a contract offline is NACKED, the framework expects us to call
   1942 	 * NEGEND ourselves, since we know the final result
   1943 	 */
   1944 	if (result == CT_NACK) {
   1945 		contract_device_negend(dip, dev, spec_type, CT_EV_FAILURE);
   1946 	}
   1947 
   1948 	return (result);
   1949 }
   1950 
   1951 /*
   1952  * contract_device_degrade
   1953  *
   1954  * Event publishing routine called by I/O framework when a device
   1955  * moves to degrade state.
   1956  */
   1957 /*ARGSUSED*/
   1958 void
   1959 contract_device_degrade(dev_info_t *dip, dev_t dev, int spec_type)
   1960 {
   1961 	nvlist_t *nvl;
   1962 	uint_t evtype;
   1963 
   1964 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
   1965 
   1966 	evtype = CT_DEV_EV_DEGRADED;
   1967 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
   1968 }
   1969 
   1970 /*
   1971  * contract_device_undegrade
   1972  *
   1973  * Event publishing routine called by I/O framework when a device
   1974  * moves from degraded state to online state.
   1975  */
   1976 /*ARGSUSED*/
   1977 void
   1978 contract_device_undegrade(dev_info_t *dip, dev_t dev, int spec_type)
   1979 {
   1980 	nvlist_t *nvl;
   1981 	uint_t evtype;
   1982 
   1983 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
   1984 
   1985 	evtype = CT_DEV_EV_ONLINE;
   1986 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
   1987 }
   1988 
   1989 /*
   1990  * For all contracts which have undergone a negotiation (because the device
   1991  * moved out of the acceptable state for that contract and the state
   1992  * change is synchronous i.e. requires negotiation) this routine publishes
   1993  * a CT_EV_NEGEND event with the final disposition of the event.
   1994  *
   1995  * This event is always a critical event.
   1996  */
   1997 void
   1998 contract_device_negend(dev_info_t *dip, dev_t dev, int spec_type, int result)
   1999 {
   2000 	nvlist_t *nvl;
   2001 	uint_t evtype;
   2002 
   2003 	ASSERT(result == CT_EV_SUCCESS || result == CT_EV_FAILURE);
   2004 
   2005 	CT_DEBUG((CE_NOTE, "contract_device_negend(): entered: result: %d, "
   2006 	    "dip: %p", result, (void *)dip));
   2007 
   2008 	VERIFY(nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP) == 0);
   2009 	VERIFY(nvlist_add_int32(nvl, CTS_NEWCT,
   2010 	    result == CT_EV_SUCCESS ? 1 : 0) == 0);
   2011 
   2012 	evtype = CT_EV_NEGEND;
   2013 	(void) contract_device_publish(dip, dev, spec_type, evtype, nvl);
   2014 
   2015 	CT_DEBUG((CE_NOTE, "contract_device_negend(): exit dip: %p",
   2016 	    (void *)dip));
   2017 }
   2018 
   2019 /*
   2020  * Wrapper routine called by other subsystems (such as LDI) to start
   2021  * negotiations when a synchronous device state change occurs.
   2022  * Returns CT_ACK or CT_NACK.
   2023  */
   2024 ct_ack_t
   2025 contract_device_negotiate(dev_info_t *dip, dev_t dev, int spec_type,
   2026     uint_t evtype)
   2027 {
   2028 	int	result;
   2029 
   2030 	ASSERT(dip);
   2031 	ASSERT(dev != NODEV);
   2032 	ASSERT(dev != DDI_DEV_T_ANY);
   2033 	ASSERT(dev != DDI_DEV_T_NONE);
   2034 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
   2035 
   2036 	switch (evtype) {
   2037 	case CT_DEV_EV_OFFLINE:
   2038 		result = contract_device_offline(dip, dev, spec_type);
   2039 		break;
   2040 	default:
   2041 		cmn_err(CE_PANIC, "contract_device_negotiate(): Negotiation "
   2042 		    "not supported: event (%d) for dev_t (%lu) and spec (%d), "
   2043 		    "dip (%p)", evtype, dev, spec_type, (void *)dip);
   2044 		result = CT_NACK;
   2045 		break;
   2046 	}
   2047 
   2048 	return (result);
   2049 }
   2050 
   2051 /*
   2052  * A wrapper routine called by other subsystems (such as the LDI) to
   2053  * finalize event processing for a state change event. For synchronous
   2054  * state changes, this publishes NEGEND events. For asynchronous i.e.
   2055  * non-negotiable events this publishes the event.
   2056  */
   2057 void
   2058 contract_device_finalize(dev_info_t *dip, dev_t dev, int spec_type,
   2059     uint_t evtype, int ct_result)
   2060 {
   2061 	ASSERT(dip);
   2062 	ASSERT(dev != NODEV);
   2063 	ASSERT(dev != DDI_DEV_T_ANY);
   2064 	ASSERT(dev != DDI_DEV_T_NONE);
   2065 	ASSERT(spec_type == S_IFBLK || spec_type == S_IFCHR);
   2066 
   2067 	switch (evtype) {
   2068 	case CT_DEV_EV_OFFLINE:
   2069 		contract_device_negend(dip, dev, spec_type, ct_result);
   2070 		break;
   2071 	case CT_DEV_EV_DEGRADED:
   2072 		contract_device_degrade(dip, dev, spec_type);
   2073 		contract_device_negend(dip, dev, spec_type, ct_result);
   2074 		break;
   2075 	case CT_DEV_EV_ONLINE:
   2076 		contract_device_undegrade(dip, dev, spec_type);
   2077 		contract_device_negend(dip, dev, spec_type, ct_result);
   2078 		break;
   2079 	default:
   2080 		cmn_err(CE_PANIC, "contract_device_finalize(): Unsupported "
   2081 		    "event (%d) for dev_t (%lu) and spec (%d), dip (%p)",
   2082 		    evtype, dev, spec_type, (void *)dip);
   2083 		break;
   2084 	}
   2085 }
   2086 
   2087 /*
   2088  * Called by I/O framework when a devinfo node is freed to remove the
   2089  * association between a devinfo node and its contracts.
   2090  */
   2091 void
   2092 contract_device_remove_dip(dev_info_t *dip)
   2093 {
   2094 	cont_device_t *ctd;
   2095 	cont_device_t *next;
   2096 	contract_t *ct;
   2097 
   2098 	mutex_enter(&(DEVI(dip)->devi_ct_lock));
   2099 	ct_barrier_wait_for_release(dip);
   2100 
   2101 	for (ctd = list_head(&(DEVI(dip)->devi_ct)); ctd != NULL; ctd = next) {
   2102 		next = list_next(&(DEVI(dip)->devi_ct), ctd);
   2103 		list_remove(&(DEVI(dip)->devi_ct), ctd);
   2104 		ct = &ctd->cond_contract;
   2105 		/*
   2106 		 * Unlink the dip associated with this contract
   2107 		 */
   2108 		mutex_enter(&ct->ct_lock);
   2109 		ASSERT(ctd->cond_dip == dip);
   2110 		ctd->cond_dip = NULL; /* no longer linked to dip */
   2111 		contract_rele(ct);	/* remove hold for dip linkage */
   2112 		CT_DEBUG((CE_NOTE, "ct: remove_dip: removed dip from contract: "
   2113 		    "ctid: %d", ct->ct_id));
   2114 		mutex_exit(&ct->ct_lock);
   2115 	}
   2116 	ASSERT(list_is_empty(&(DEVI(dip)->devi_ct)));
   2117 	mutex_exit(&(DEVI(dip)->devi_ct_lock));
   2118 }
   2119 
   2120 /*
   2121  * Barrier related routines
   2122  */
   2123 static void
   2124 ct_barrier_acquire(dev_info_t *dip)
   2125 {
   2126 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2127 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: waiting for barrier"));
   2128 	while (DEVI(dip)->devi_ct_count != -1)
   2129 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
   2130 	DEVI(dip)->devi_ct_count = 0;
   2131 	CT_DEBUG((CE_NOTE, "ct_barrier_acquire: thread owns barrier"));
   2132 }
   2133 
   2134 static void
   2135 ct_barrier_release(dev_info_t *dip)
   2136 {
   2137 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2138 	ASSERT(DEVI(dip)->devi_ct_count != -1);
   2139 	DEVI(dip)->devi_ct_count = -1;
   2140 	cv_broadcast(&(DEVI(dip)->devi_ct_cv));
   2141 	CT_DEBUG((CE_NOTE, "ct_barrier_release: Released barrier"));
   2142 }
   2143 
   2144 static int
   2145 ct_barrier_held(dev_info_t *dip)
   2146 {
   2147 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2148 	return (DEVI(dip)->devi_ct_count != -1);
   2149 }
   2150 
   2151 static int
   2152 ct_barrier_empty(dev_info_t *dip)
   2153 {
   2154 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2155 	ASSERT(DEVI(dip)->devi_ct_count != -1);
   2156 	return (DEVI(dip)->devi_ct_count == 0);
   2157 }
   2158 
   2159 static void
   2160 ct_barrier_wait_for_release(dev_info_t *dip)
   2161 {
   2162 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2163 	while (DEVI(dip)->devi_ct_count != -1)
   2164 		cv_wait(&(DEVI(dip)->devi_ct_cv), &(DEVI(dip)->devi_ct_lock));
   2165 }
   2166 
   2167 static void
   2168 ct_barrier_decr(dev_info_t *dip)
   2169 {
   2170 	CT_DEBUG((CE_NOTE, "barrier_decr:  ct_count before decr: %d",
   2171 	    DEVI(dip)->devi_ct_count));
   2172 
   2173 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2174 	ASSERT(DEVI(dip)->devi_ct_count > 0);
   2175 
   2176 	DEVI(dip)->devi_ct_count--;
   2177 	if (DEVI(dip)->devi_ct_count == 0) {
   2178 		cv_broadcast(&DEVI(dip)->devi_ct_cv);
   2179 		CT_DEBUG((CE_NOTE, "barrier_decr: cv_broadcast"));
   2180 	}
   2181 }
   2182 
   2183 static void
   2184 ct_barrier_incr(dev_info_t *dip)
   2185 {
   2186 	ASSERT(ct_barrier_held(dip));
   2187 	DEVI(dip)->devi_ct_count++;
   2188 }
   2189 
   2190 static int
   2191 ct_barrier_wait_for_empty(dev_info_t *dip, int secs)
   2192 {
   2193 	clock_t abstime;
   2194 
   2195 	ASSERT(MUTEX_HELD(&(DEVI(dip)->devi_ct_lock)));
   2196 
   2197 	abstime = ddi_get_lbolt() + drv_usectohz(secs*1000000);
   2198 	while (DEVI(dip)->devi_ct_count) {
   2199 		if (cv_timedwait(&(DEVI(dip)->devi_ct_cv),
   2200 		    &(DEVI(dip)->devi_ct_lock), abstime) == -1) {
   2201 			return (-1);
   2202 		}
   2203 	}
   2204 	return (0);
   2205 }
   2206