Home | History | Annotate | Download | only in dev
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * negative cache handling for the /dev fs
     28  */
     29 
     30 #include <sys/types.h>
     31 #include <sys/param.h>
     32 #include <sys/t_lock.h>
     33 #include <sys/systm.h>
     34 #include <sys/sysmacros.h>
     35 #include <sys/user.h>
     36 #include <sys/time.h>
     37 #include <sys/vfs.h>
     38 #include <sys/vnode.h>
     39 #include <sys/file.h>
     40 #include <sys/fcntl.h>
     41 #include <sys/flock.h>
     42 #include <sys/kmem.h>
     43 #include <sys/uio.h>
     44 #include <sys/errno.h>
     45 #include <sys/stat.h>
     46 #include <sys/cred.h>
     47 #include <sys/cmn_err.h>
     48 #include <sys/debug.h>
     49 #include <sys/mode.h>
     50 #include <sys/policy.h>
     51 #include <fs/fs_subr.h>
     52 #include <sys/mount.h>
     53 #include <sys/fs/snode.h>
     54 #include <sys/fs/dv_node.h>
     55 #include <sys/fs/sdev_impl.h>
     56 #include <sys/sunndi.h>
     57 #include <sys/sunmdi.h>
     58 #include <sys/ddi.h>
     59 #include <sys/modctl.h>
     60 #include <sys/devcache.h>
     61 
     62 
     63 /*
     64  * ncache is a negative cache of failed lookups.  An entry
     65  * is added after an attempt to configure a device by that
     66  * name failed.  An accumulation of these entries over time
     67  * gives us a set of device name for which implicit reconfiguration
     68  * does not need to be attempted.  If a name is created matching
     69  * an entry in ncache, that entry is removed, with the
     70  * persistent store updated.
     71  *
     72  * Implicit reconfig is initiated for any name during lookup that
     73  * can't be resolved from the backing store and that isn't
     74  * present in the negative cache.  This functionality is
     75  * enabled during system startup once communication with devfsadm
     76  * can be achieved.  Since readdir is more general, implicit
     77  * reconfig initiated by reading a directory isn't enabled until
     78  * the system is more fully booted, at the time of the multi-user
     79  * milestone, corresponding to init state 2.
     80  *
     81  * A maximum is imposed on the number of entries in the cache
     82  * to limit some script going wild and as a defense against attack.
     83  * The default limit is 64 and can be adjusted via sdev_nc_max_entries.
     84  *
     85  * Each entry also has a expiration count.  When looked up a name in
     86  * the cache is set to the default.  Subsequent boots will decrement
     87  * the count if a name isn't referenced.  This permits a once-only
     88  * entry to eventually be removed over time.
     89  *
     90  * sdev_reconfig_delay implements a "debounce" of the timing beyond
     91  * system available indication, providing what the filesystem considers
     92  * to be the system-is-fully-booted state.  This is provided to adjust
     93  * the timing if some application startup is performing a readdir
     94  * in /dev that initiates a troublesome implicit reconfig on every boot.
     95  *
     96  * sdev_nc_disable_reset can be used to disable clearing the negative cache
     97  * on reconfig boot.  The default is to clear the cache on reconfig boot.
     98  * sdev_nc_disable can be used to disable the negative cache itself.
     99  *
    100  * sdev_reconfig_disable can be used to disable implicit reconfig.
    101  * The default is that implicit reconfig is enabled.
    102  */
    103 
    104 /* tunables and defaults */
    105 #define	SDEV_NC_EXPIRECNT	4
    106 #define	SDEV_NC_MAX_ENTRIES	64
    107 #define	SEV_RECONFIG_DELAY	6	/* seconds */
    108 
    109 /* tunables */
    110 int	sdev_nc_expirecnt = SDEV_NC_EXPIRECNT;
    111 int	sdev_nc_max_entries = SDEV_NC_MAX_ENTRIES;
    112 int	sdev_reconfig_delay = SEV_RECONFIG_DELAY;
    113 int	sdev_reconfig_verbose = 0;
    114 int	sdev_reconfig_disable = 0;
    115 int	sdev_nc_disable = 0;
    116 int	sdev_nc_disable_reset = 0;
    117 int	sdev_nc_verbose = 0;
    118 int	sdev_cache_read_disable = 0;
    119 int	sdev_cache_write_disable = 0;
    120 
    121 /* globals */
    122 int	sdev_boot_state = SDEV_BOOT_STATE_INITIAL;
    123 int	sdev_reconfig_boot = 0;
    124 sdev_nc_list_t *sdev_ncache;
    125 static nvf_handle_t sdevfd_handle;
    126 
    127 /* static prototypes */
    128 static void sdev_ncache_write_complete(nvf_handle_t);
    129 static void sdev_ncache_write(void);
    130 static void sdev_ncache_process_store(void);
    131 static sdev_nc_list_t *sdev_nc_newlist(void);
    132 static void sdev_nc_free_unlinked_node(sdev_nc_node_t *);
    133 static sdev_nc_node_t *sdev_nc_findpath(sdev_nc_list_t *, char *);
    134 static void sdev_nc_insertnode(sdev_nc_list_t *, sdev_nc_node_t *);
    135 static void sdev_nc_free_bootonly(void);
    136 static int sdev_ncache_unpack_nvlist(nvf_handle_t, nvlist_t *, char *);
    137 static int sdev_ncache_pack_list(nvf_handle_t, nvlist_t **);
    138 static void sdev_ncache_list_free(nvf_handle_t);
    139 static void sdev_nvp_free(nvp_devname_t *);
    140 
    141 /*
    142  * Registration for /etc/devices/devname_cache
    143  */
    144 static nvf_ops_t sdev_cache_ops = {
    145 	"/etc/devices/devname_cache",		/* path to cache */
    146 	sdev_ncache_unpack_nvlist,		/* read: unpack nvlist */
    147 	sdev_ncache_pack_list,			/* write: pack list */
    148 	sdev_ncache_list_free,			/* free data list */
    149 	sdev_ncache_write_complete		/* write complete callback */
    150 };
    151 
    152 /*
    153  * called once at filesystem initialization
    154  */
    155 void
    156 sdev_ncache_init(void)
    157 {
    158 	sdev_ncache = sdev_nc_newlist();
    159 }
    160 
    161 /*
    162  * called at mount of the global instance
    163  * currently the global instance is never unmounted
    164  */
    165 void
    166 sdev_ncache_setup(void)
    167 {
    168 	sdevfd_handle = nvf_register_file(&sdev_cache_ops);
    169 	ASSERT(sdevfd_handle);
    170 
    171 	list_create(nvf_list(sdevfd_handle), sizeof (nvp_devname_t),
    172 	    offsetof(nvp_devname_t, nvp_link));
    173 
    174 	rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
    175 	if (!sdev_cache_read_disable) {
    176 		(void) nvf_read_file(sdevfd_handle);
    177 	}
    178 	sdev_ncache_process_store();
    179 	rw_exit(nvf_lock(sdevfd_handle));
    180 
    181 	sdev_devstate_change();
    182 }
    183 
    184 static void
    185 sdev_nvp_free(nvp_devname_t *dp)
    186 {
    187 	int	i;
    188 	char	**p;
    189 
    190 	if (dp->nvp_npaths > 0) {
    191 		p = dp->nvp_paths;
    192 		for (i = 0; i < dp->nvp_npaths; i++, p++) {
    193 			kmem_free(*p, strlen(*p)+1);
    194 		}
    195 		kmem_free(dp->nvp_paths,
    196 		    dp->nvp_npaths * sizeof (char *));
    197 		kmem_free(dp->nvp_expirecnts,
    198 		    dp->nvp_npaths * sizeof (int));
    199 	}
    200 
    201 	kmem_free(dp, sizeof (nvp_devname_t));
    202 }
    203 
    204 static void
    205 sdev_ncache_list_free(nvf_handle_t fd)
    206 {
    207 	list_t		*listp;
    208 	nvp_devname_t	*dp;
    209 
    210 	ASSERT(fd == sdevfd_handle);
    211 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
    212 
    213 	listp = nvf_list(fd);
    214 	if ((dp = list_head(listp)) != NULL) {
    215 		list_remove(listp, dp);
    216 		sdev_nvp_free(dp);
    217 	}
    218 }
    219 
    220 /*
    221  * Unpack a device path/nvlist pair to internal data list format.
    222  * Used to decode the nvlist format into the internal representation
    223  * when reading /etc/devices/devname_cache.
    224  * Note that the expiration counts are optional, for compatibility
    225  * with earlier instances of the cache.  If not present, the
    226  * expire counts are initialized to defaults.
    227  */
    228 static int
    229 sdev_ncache_unpack_nvlist(nvf_handle_t fd, nvlist_t *nvl, char *name)
    230 {
    231 	nvp_devname_t *np;
    232 	char	**strs;
    233 	int	*cnts;
    234 	uint_t	nstrs, ncnts;
    235 	int	rval, i;
    236 
    237 	ASSERT(fd == sdevfd_handle);
    238 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
    239 
    240 	/* name of the sublist must match what we created */
    241 	if (strcmp(name, DP_DEVNAME_ID) != 0) {
    242 		return (-1);
    243 	}
    244 
    245 	np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
    246 
    247 	rval = nvlist_lookup_string_array(nvl,
    248 	    DP_DEVNAME_NCACHE_ID, &strs, &nstrs);
    249 	if (rval) {
    250 		kmem_free(np, sizeof (nvp_devname_t));
    251 		return (-1);
    252 	}
    253 
    254 	np->nvp_npaths = nstrs;
    255 	np->nvp_paths = kmem_zalloc(nstrs * sizeof (char *), KM_SLEEP);
    256 	for (i = 0; i < nstrs; i++) {
    257 		np->nvp_paths[i] = i_ddi_strdup(strs[i], KM_SLEEP);
    258 	}
    259 	np->nvp_expirecnts = kmem_zalloc(nstrs * sizeof (int), KM_SLEEP);
    260 	for (i = 0; i < nstrs; i++) {
    261 		np->nvp_expirecnts[i] = sdev_nc_expirecnt;
    262 	}
    263 
    264 	rval = nvlist_lookup_int32_array(nvl,
    265 	    DP_DEVNAME_NC_EXPIRECNT_ID, &cnts, &ncnts);
    266 	if (rval == 0) {
    267 		ASSERT(ncnts == nstrs);
    268 		ncnts = min(ncnts, nstrs);
    269 		for (i = 0; i < nstrs; i++) {
    270 			np->nvp_expirecnts[i] = cnts[i];
    271 		}
    272 	}
    273 
    274 	list_insert_tail(nvf_list(sdevfd_handle), np);
    275 
    276 	return (0);
    277 }
    278 
    279 /*
    280  * Pack internal format cache data to a single nvlist.
    281  * Used when writing the nvlist file.
    282  * Note this is called indirectly by the nvpflush daemon.
    283  */
    284 static int
    285 sdev_ncache_pack_list(nvf_handle_t fd, nvlist_t **ret_nvl)
    286 {
    287 	nvlist_t	*nvl, *sub_nvl;
    288 	nvp_devname_t	*np;
    289 	int		rval;
    290 	list_t		*listp;
    291 
    292 	ASSERT(fd == sdevfd_handle);
    293 	ASSERT(RW_WRITE_HELD(nvf_lock(fd)));
    294 
    295 	rval = nvlist_alloc(&nvl, NV_UNIQUE_NAME, KM_SLEEP);
    296 	if (rval != 0) {
    297 		nvf_error("%s: nvlist alloc error %d\n",
    298 		    nvf_cache_name(fd), rval);
    299 		return (DDI_FAILURE);
    300 	}
    301 
    302 	listp = nvf_list(sdevfd_handle);
    303 	if ((np = list_head(listp)) != NULL) {
    304 		ASSERT(list_next(listp, np) == NULL);
    305 
    306 		rval = nvlist_alloc(&sub_nvl, NV_UNIQUE_NAME, KM_SLEEP);
    307 		if (rval != 0) {
    308 			nvf_error("%s: nvlist alloc error %d\n",
    309 			    nvf_cache_name(fd), rval);
    310 			sub_nvl = NULL;
    311 			goto err;
    312 		}
    313 
    314 		rval = nvlist_add_string_array(sub_nvl,
    315 		    DP_DEVNAME_NCACHE_ID, np->nvp_paths, np->nvp_npaths);
    316 		if (rval != 0) {
    317 			nvf_error("%s: nvlist add error %d (sdev)\n",
    318 			    nvf_cache_name(fd), rval);
    319 			goto err;
    320 		}
    321 
    322 		rval = nvlist_add_int32_array(sub_nvl,
    323 		    DP_DEVNAME_NC_EXPIRECNT_ID,
    324 		    np->nvp_expirecnts, np->nvp_npaths);
    325 		if (rval != 0) {
    326 			nvf_error("%s: nvlist add error %d (sdev)\n",
    327 			    nvf_cache_name(fd), rval);
    328 			goto err;
    329 		}
    330 
    331 		rval = nvlist_add_nvlist(nvl, DP_DEVNAME_ID, sub_nvl);
    332 		if (rval != 0) {
    333 			nvf_error("%s: nvlist add error %d (sublist)\n",
    334 			    nvf_cache_name(fd), rval);
    335 			goto err;
    336 		}
    337 		nvlist_free(sub_nvl);
    338 	}
    339 
    340 	*ret_nvl = nvl;
    341 	return (DDI_SUCCESS);
    342 
    343 err:
    344 	if (sub_nvl)
    345 		nvlist_free(sub_nvl);
    346 	nvlist_free(nvl);
    347 	*ret_nvl = NULL;
    348 	return (DDI_FAILURE);
    349 }
    350 
    351 /*
    352  * Run through the data read from the backing cache store
    353  * to establish the initial state of the neg. cache.
    354  */
    355 static void
    356 sdev_ncache_process_store(void)
    357 {
    358 	sdev_nc_list_t	*ncl = sdev_ncache;
    359 	nvp_devname_t	*np;
    360 	sdev_nc_node_t	*lp;
    361 	char		*path;
    362 	int		i, n;
    363 	list_t		*listp;
    364 
    365 	if (sdev_nc_disable)
    366 		return;
    367 
    368 	ASSERT(RW_WRITE_HELD(nvf_lock(sdevfd_handle)));
    369 
    370 	listp = nvf_list(sdevfd_handle);
    371 	for (np = list_head(listp); np; np = list_next(listp, np)) {
    372 		for (i = 0; i < np->nvp_npaths; i++) {
    373 			sdcmn_err5(("    %s %d\n",
    374 			    np->nvp_paths[i], np->nvp_expirecnts[i]));
    375 			if (ncl->ncl_nentries < sdev_nc_max_entries) {
    376 				path = np->nvp_paths[i];
    377 				n = strlen(path) + 1;
    378 				lp = kmem_alloc(sizeof (sdev_nc_node_t),
    379 				    KM_SLEEP);
    380 				lp->ncn_name = kmem_alloc(n, KM_SLEEP);
    381 				bcopy(path, lp->ncn_name, n);
    382 				lp->ncn_flags = NCN_SRC_STORE;
    383 				lp->ncn_expirecnt = np->nvp_expirecnts[i];
    384 				sdev_nc_insertnode(ncl, lp);
    385 			} else if (sdev_nc_verbose) {
    386 				cmn_err(CE_CONT,
    387 				    "?%s: truncating from ncache (max %d)\n",
    388 				    np->nvp_paths[i], sdev_nc_max_entries);
    389 			}
    390 		}
    391 	}
    392 }
    393 
    394 /*
    395  * called by nvpflush daemon to inform us that an update of
    396  * the cache file has been completed.
    397  */
    398 static void
    399 sdev_ncache_write_complete(nvf_handle_t fd)
    400 {
    401 	sdev_nc_list_t	*ncl = sdev_ncache;
    402 
    403 	ASSERT(fd == sdevfd_handle);
    404 
    405 	mutex_enter(&ncl->ncl_mutex);
    406 
    407 	ASSERT(ncl->ncl_flags & NCL_LIST_WRITING);
    408 
    409 	if (ncl->ncl_flags & NCL_LIST_DIRTY) {
    410 		sdcmn_err5(("ncache write complete but dirty again\n"));
    411 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
    412 		mutex_exit(&ncl->ncl_mutex);
    413 		sdev_ncache_write();
    414 	} else {
    415 		sdcmn_err5(("ncache write complete\n"));
    416 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
    417 		mutex_exit(&ncl->ncl_mutex);
    418 		rw_enter(nvf_lock(fd), RW_WRITER);
    419 		sdev_ncache_list_free(fd);
    420 		rw_exit(nvf_lock(fd));
    421 	}
    422 }
    423 
    424 /*
    425  * Prepare to perform an update of the neg. cache backing store.
    426  */
    427 static void
    428 sdev_ncache_write(void)
    429 {
    430 	sdev_nc_list_t	*ncl = sdev_ncache;
    431 	nvp_devname_t	*np;
    432 	sdev_nc_node_t	*lp;
    433 	int		n, i;
    434 
    435 	if (sdev_cache_write_disable) {
    436 		mutex_enter(&ncl->ncl_mutex);
    437 		ncl->ncl_flags &= ~NCL_LIST_WRITING;
    438 		mutex_exit(&ncl->ncl_mutex);
    439 		return;
    440 	}
    441 
    442 	/* proper lock ordering here is essential */
    443 	rw_enter(nvf_lock(sdevfd_handle), RW_WRITER);
    444 	sdev_ncache_list_free(sdevfd_handle);
    445 
    446 	rw_enter(&ncl->ncl_lock, RW_READER);
    447 	n = ncl->ncl_nentries;
    448 	ASSERT(n <= sdev_nc_max_entries);
    449 
    450 	np = kmem_zalloc(sizeof (nvp_devname_t), KM_SLEEP);
    451 	np->nvp_npaths = n;
    452 	np->nvp_paths = kmem_zalloc(n * sizeof (char *), KM_SLEEP);
    453 	np->nvp_expirecnts = kmem_zalloc(n * sizeof (int), KM_SLEEP);
    454 
    455 	i = 0;
    456 	for (lp = list_head(&ncl->ncl_list); lp;
    457 	    lp = list_next(&ncl->ncl_list, lp)) {
    458 		np->nvp_paths[i] = i_ddi_strdup(lp->ncn_name, KM_SLEEP);
    459 		np->nvp_expirecnts[i] = lp->ncn_expirecnt;
    460 		sdcmn_err5(("    %s %d\n",
    461 		    np->nvp_paths[i], np->nvp_expirecnts[i]));
    462 		i++;
    463 	}
    464 
    465 	rw_exit(&ncl->ncl_lock);
    466 
    467 	nvf_mark_dirty(sdevfd_handle);
    468 	list_insert_tail(nvf_list(sdevfd_handle), np);
    469 	rw_exit(nvf_lock(sdevfd_handle));
    470 
    471 	nvf_wake_daemon();
    472 }
    473 
    474 static void
    475 sdev_nc_flush_updates(void)
    476 {
    477 	sdev_nc_list_t *ncl = sdev_ncache;
    478 
    479 	if (sdev_nc_disable || sdev_cache_write_disable)
    480 		return;
    481 
    482 	mutex_enter(&ncl->ncl_mutex);
    483 	if (((ncl->ncl_flags &
    484 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE | NCL_LIST_WRITING)) ==
    485 	    (NCL_LIST_DIRTY | NCL_LIST_WENABLE))) {
    486 		ncl->ncl_flags &= ~NCL_LIST_DIRTY;
    487 		ncl->ncl_flags |= NCL_LIST_WRITING;
    488 		mutex_exit(&ncl->ncl_mutex);
    489 		sdev_ncache_write();
    490 	} else {
    491 		mutex_exit(&ncl->ncl_mutex);
    492 	}
    493 }
    494 
    495 static void
    496 sdev_nc_flush_boot_update(void)
    497 {
    498 	sdev_nc_list_t *ncl = sdev_ncache;
    499 
    500 	if (sdev_nc_disable || sdev_cache_write_disable ||
    501 	    (sdev_boot_state == SDEV_BOOT_STATE_INITIAL)) {
    502 		return;
    503 	}
    504 	mutex_enter(&ncl->ncl_mutex);
    505 	if (ncl->ncl_flags & NCL_LIST_WENABLE) {
    506 		mutex_exit(&ncl->ncl_mutex);
    507 		sdev_nc_flush_updates();
    508 	} else {
    509 		mutex_exit(&ncl->ncl_mutex);
    510 	}
    511 
    512 }
    513 
    514 static void
    515 sdev_state_boot_complete()
    516 {
    517 	sdev_nc_list_t	*ncl = sdev_ncache;
    518 	sdev_nc_node_t	*lp, *next;
    519 
    520 	/*
    521 	 * Once boot is complete, decrement the expire count of each entry
    522 	 * in the cache not touched by a reference.  Remove any that
    523 	 * goes to zero.  This effectively removes random entries over
    524 	 * time.
    525 	 */
    526 	rw_enter(&ncl->ncl_lock, RW_WRITER);
    527 	mutex_enter(&ncl->ncl_mutex);
    528 
    529 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
    530 		next = list_next(&ncl->ncl_list, lp);
    531 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0) {
    532 			if (lp->ncn_flags & NCN_ACTIVE) {
    533 				if (lp->ncn_expirecnt != sdev_nc_expirecnt) {
    534 					lp->ncn_expirecnt = sdev_nc_expirecnt;
    535 					ncl->ncl_flags |= NCL_LIST_DIRTY;
    536 				}
    537 			} else {
    538 				if (--lp->ncn_expirecnt == 0) {
    539 					list_remove(&ncl->ncl_list, lp);
    540 					sdev_nc_free_unlinked_node(lp);
    541 					ncl->ncl_nentries--;
    542 				}
    543 				ncl->ncl_flags |= NCL_LIST_DIRTY;
    544 			}
    545 		}
    546 	}
    547 
    548 	mutex_exit(&ncl->ncl_mutex);
    549 	rw_exit(&ncl->ncl_lock);
    550 
    551 	sdev_nc_flush_boot_update();
    552 	sdev_boot_state = SDEV_BOOT_STATE_COMPLETE;
    553 }
    554 
    555 /*
    556  * Upon transition to the login state on a reconfigure boot,
    557  * a debounce timer is set up so that we cache all the nonsense
    558  * lookups we're hit with by the windowing system startup.
    559  */
    560 
    561 /*ARGSUSED*/
    562 static void
    563 sdev_state_timeout(void *arg)
    564 {
    565 	sdev_state_boot_complete();
    566 }
    567 
    568 static void
    569 sdev_state_sysavail()
    570 {
    571 	sdev_nc_list_t *ncl = sdev_ncache;
    572 	clock_t	nticks;
    573 	int nsecs;
    574 
    575 	mutex_enter(&ncl->ncl_mutex);
    576 	ncl->ncl_flags |= NCL_LIST_WENABLE;
    577 	mutex_exit(&ncl->ncl_mutex);
    578 
    579 	nsecs = sdev_reconfig_delay;
    580 	if (nsecs == 0) {
    581 		sdev_state_boot_complete();
    582 	} else {
    583 		nticks = drv_usectohz(1000000 * nsecs);
    584 		sdcmn_err5(("timeout initiated %ld\n", nticks));
    585 		(void) timeout(sdev_state_timeout, NULL, nticks);
    586 		sdev_nc_flush_boot_update();
    587 	}
    588 }
    589 
    590 /*
    591  * Called to inform the filesystem of progress during boot,
    592  * either a notice of reconfiguration boot or an indication of
    593  * system boot complete.  At system boot complete, set up a
    594  * timer at the expiration of which no further failed lookups
    595  * will be added to the negative cache.
    596  *
    597  * The dev filesystem infers from reconfig boot that implicit
    598  * reconfig need not be invoked at all as all available devices
    599  * will have already been named.
    600  *
    601  * The dev filesystem infers from "system available" that devfsadmd
    602  * can now be run and hence implicit reconfiguration may be initiated.
    603  * During early stages of system startup, implicit reconfig is
    604  * not done to avoid impacting boot performance.
    605  */
    606 void
    607 sdev_devstate_change(void)
    608 {
    609 	int new_state;
    610 
    611 	/*
    612 	 * Track system state and manage interesting transitions
    613 	 */
    614 	new_state = SDEV_BOOT_STATE_INITIAL;
    615 	if (i_ddi_reconfig())
    616 		new_state = SDEV_BOOT_STATE_RECONFIG;
    617 	if (i_ddi_sysavail())
    618 		new_state = SDEV_BOOT_STATE_SYSAVAIL;
    619 
    620 	if (sdev_boot_state < new_state) {
    621 		switch (new_state) {
    622 		case SDEV_BOOT_STATE_RECONFIG:
    623 			sdcmn_err5(("state change: reconfigure boot\n"));
    624 			sdev_boot_state = new_state;
    625 			sdev_reconfig_boot = 1;
    626 			if (!sdev_nc_disable_reset)
    627 				sdev_nc_free_bootonly();
    628 			break;
    629 		case SDEV_BOOT_STATE_SYSAVAIL:
    630 			sdcmn_err5(("system available\n"));
    631 			sdev_boot_state = new_state;
    632 			sdev_state_sysavail();
    633 			break;
    634 		}
    635 	}
    636 }
    637 
    638 /*
    639  * Lookup: filter out entries in the negative cache
    640  * Return 1 if the lookup should not cause a reconfig.
    641  */
    642 int
    643 sdev_lookup_filter(sdev_node_t *dv, char *nm)
    644 {
    645 	int n;
    646 	sdev_nc_list_t *ncl = sdev_ncache;
    647 	sdev_nc_node_t *lp;
    648 	char *path;
    649 	int rval = 0;
    650 	int changed = 0;
    651 
    652 	ASSERT(i_ddi_io_initialized());
    653 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
    654 
    655 	if (sdev_nc_disable)
    656 		return (0);
    657 
    658 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
    659 	path = kmem_alloc(n, KM_SLEEP);
    660 	(void) sprintf(path, "%s/%s", dv->sdev_path, nm);
    661 
    662 	rw_enter(&ncl->ncl_lock, RW_READER);
    663 	if ((lp = sdev_nc_findpath(ncl, path)) != NULL) {
    664 		sdcmn_err5(("%s/%s: lookup by %s cached, no reconfig\n",
    665 		    dv->sdev_name, nm, curproc->p_user.u_comm));
    666 		if (sdev_nc_verbose) {
    667 			cmn_err(CE_CONT,
    668 			    "?%s/%s: lookup by %s cached, no reconfig\n",
    669 			    dv->sdev_name, nm, curproc->p_user.u_comm);
    670 		}
    671 		mutex_enter(&ncl->ncl_mutex);
    672 		lp->ncn_flags |= NCN_ACTIVE;
    673 		if (sdev_nc_expirecnt > 0 && lp->ncn_expirecnt > 0 &&
    674 		    lp->ncn_expirecnt < sdev_nc_expirecnt) {
    675 			lp->ncn_expirecnt = sdev_nc_expirecnt;
    676 			ncl->ncl_flags |= NCL_LIST_DIRTY;
    677 			changed = 1;
    678 		}
    679 		mutex_exit(&ncl->ncl_mutex);
    680 		rval = 1;
    681 	}
    682 	rw_exit(&ncl->ncl_lock);
    683 	kmem_free(path, n);
    684 	if (changed)
    685 		sdev_nc_flush_boot_update();
    686 	return (rval);
    687 }
    688 
    689 void
    690 sdev_lookup_failed(sdev_node_t *dv, char *nm, int failed_flags)
    691 {
    692 	if (sdev_nc_disable)
    693 		return;
    694 
    695 	/*
    696 	 * If we're still in the initial boot stage, always update
    697 	 * the cache - we may not have received notice of the
    698 	 * reconfig boot state yet.  On a reconfigure boot, entries
    699 	 * from the backing store are not re-persisted on update,
    700 	 * but new entries are marked as needing an update.
    701 	 * Never cache dynamic or non-global nodes.
    702 	 */
    703 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
    704 	    !SDEV_IS_NO_NCACHE(dv) &&
    705 	    ((failed_flags & SLF_NO_NCACHE) == 0) &&
    706 	    ((sdev_reconfig_boot &&
    707 	    (sdev_boot_state != SDEV_BOOT_STATE_COMPLETE)) ||
    708 	    (!sdev_reconfig_boot && ((failed_flags & SLF_REBUILT))))) {
    709 			sdev_nc_addname(sdev_ncache,
    710 			    dv, nm, NCN_SRC_CURRENT|NCN_ACTIVE);
    711 	}
    712 }
    713 
    714 static sdev_nc_list_t *
    715 sdev_nc_newlist(void)
    716 {
    717 	sdev_nc_list_t	*ncl;
    718 
    719 	ncl = kmem_zalloc(sizeof (sdev_nc_list_t), KM_SLEEP);
    720 
    721 	rw_init(&ncl->ncl_lock, NULL, RW_DEFAULT, NULL);
    722 	mutex_init(&ncl->ncl_mutex, NULL, MUTEX_DEFAULT, NULL);
    723 	list_create(&ncl->ncl_list, sizeof (sdev_nc_node_t),
    724 	    offsetof(sdev_nc_node_t, ncn_link));
    725 
    726 	return (ncl);
    727 }
    728 
    729 static void
    730 sdev_nc_free_unlinked_node(sdev_nc_node_t *lp)
    731 {
    732 	kmem_free(lp->ncn_name, strlen(lp->ncn_name) + 1);
    733 	kmem_free(lp, sizeof (sdev_nc_node_t));
    734 }
    735 
    736 static sdev_nc_node_t *
    737 sdev_nc_findpath(sdev_nc_list_t *ncl, char *path)
    738 {
    739 	sdev_nc_node_t *lp;
    740 
    741 	ASSERT(RW_LOCK_HELD(&ncl->ncl_lock));
    742 
    743 	for (lp = list_head(&ncl->ncl_list); lp;
    744 	    lp = list_next(&ncl->ncl_list, lp)) {
    745 		if (strcmp(path, lp->ncn_name) == 0)
    746 			return (lp);
    747 	}
    748 
    749 	return (NULL);
    750 }
    751 
    752 static void
    753 sdev_nc_insertnode(sdev_nc_list_t *ncl, sdev_nc_node_t *new)
    754 {
    755 	sdev_nc_node_t *lp;
    756 
    757 	rw_enter(&ncl->ncl_lock, RW_WRITER);
    758 
    759 	lp = sdev_nc_findpath(ncl, new->ncn_name);
    760 	if (lp == NULL) {
    761 		if (ncl->ncl_nentries == sdev_nc_max_entries) {
    762 			sdcmn_err5((
    763 			    "%s by %s: not adding to ncache (max %d)\n",
    764 			    new->ncn_name, curproc->p_user.u_comm,
    765 			    ncl->ncl_nentries));
    766 			if (sdev_nc_verbose) {
    767 				cmn_err(CE_CONT, "?%s by %s: "
    768 				    "not adding to ncache (max %d)\n",
    769 				    new->ncn_name, curproc->p_user.u_comm,
    770 				    ncl->ncl_nentries);
    771 			}
    772 			rw_exit(&ncl->ncl_lock);
    773 			sdev_nc_free_unlinked_node(new);
    774 		} else {
    775 
    776 			list_insert_tail(&ncl->ncl_list, new);
    777 			ncl->ncl_nentries++;
    778 
    779 			/* don't mark list dirty for nodes from store */
    780 			mutex_enter(&ncl->ncl_mutex);
    781 			if ((new->ncn_flags & NCN_SRC_STORE) == 0) {
    782 				sdcmn_err5(("%s by %s: add to ncache\n",
    783 				    new->ncn_name, curproc->p_user.u_comm));
    784 				if (sdev_nc_verbose) {
    785 					cmn_err(CE_CONT,
    786 					    "?%s by %s: add to ncache\n",
    787 					    new->ncn_name,
    788 					    curproc->p_user.u_comm);
    789 				}
    790 				ncl->ncl_flags |= NCL_LIST_DIRTY;
    791 			}
    792 			mutex_exit(&ncl->ncl_mutex);
    793 			rw_exit(&ncl->ncl_lock);
    794 			lp = new;
    795 			sdev_nc_flush_boot_update();
    796 		}
    797 	} else {
    798 		mutex_enter(&ncl->ncl_mutex);
    799 		lp->ncn_flags |= new->ncn_flags;
    800 		mutex_exit(&ncl->ncl_mutex);
    801 		rw_exit(&ncl->ncl_lock);
    802 		sdev_nc_free_unlinked_node(new);
    803 	}
    804 }
    805 
    806 void
    807 sdev_nc_addname(sdev_nc_list_t *ncl, sdev_node_t *dv, char *nm, int flags)
    808 {
    809 	int n;
    810 	sdev_nc_node_t *lp;
    811 
    812 	ASSERT(SDEVTOV(dv)->v_type == VDIR);
    813 
    814 	lp = kmem_zalloc(sizeof (sdev_nc_node_t), KM_SLEEP);
    815 
    816 	n = strlen(dv->sdev_path) + strlen(nm) + 2;
    817 	lp->ncn_name = kmem_alloc(n, KM_SLEEP);
    818 	(void) sprintf(lp->ncn_name, "%s/%s",
    819 	    dv->sdev_path, nm);
    820 	lp->ncn_flags = flags;
    821 	lp->ncn_expirecnt = sdev_nc_expirecnt;
    822 	sdev_nc_insertnode(ncl, lp);
    823 }
    824 
    825 void
    826 sdev_nc_node_exists(sdev_node_t *dv)
    827 {
    828 	/* dynamic and non-global nodes are never cached */
    829 	if (SDEV_IS_GLOBAL(dv) && !SDEV_IS_DYNAMIC(dv) &&
    830 	    !SDEV_IS_NO_NCACHE(dv)) {
    831 		sdev_nc_path_exists(sdev_ncache, dv->sdev_path);
    832 	}
    833 }
    834 
    835 void
    836 sdev_nc_path_exists(sdev_nc_list_t *ncl, char *path)
    837 {
    838 	sdev_nc_node_t *lp;
    839 
    840 	if (sdev_nc_disable)
    841 		return;
    842 
    843 	rw_enter(&ncl->ncl_lock, RW_READER);
    844 	if ((lp = sdev_nc_findpath(ncl, path)) == NULL) {
    845 		rw_exit(&ncl->ncl_lock);
    846 		return;
    847 	}
    848 	if (rw_tryupgrade(&ncl->ncl_lock) == 0) {
    849 		rw_exit(&ncl->ncl_lock);
    850 		rw_enter(&ncl->ncl_lock, RW_WRITER);
    851 		lp = sdev_nc_findpath(ncl, path);
    852 	}
    853 	if (lp) {
    854 		list_remove(&ncl->ncl_list, lp);
    855 		ncl->ncl_nentries--;
    856 		mutex_enter(&ncl->ncl_mutex);
    857 		ncl->ncl_flags |= NCL_LIST_DIRTY;
    858 		if (ncl->ncl_flags & NCL_LIST_WENABLE) {
    859 			mutex_exit(&ncl->ncl_mutex);
    860 			rw_exit(&ncl->ncl_lock);
    861 			sdev_nc_flush_updates();
    862 		} else {
    863 			mutex_exit(&ncl->ncl_mutex);
    864 			rw_exit(&ncl->ncl_lock);
    865 		}
    866 		sdev_nc_free_unlinked_node(lp);
    867 		sdcmn_err5(("%s by %s: removed from ncache\n",
    868 		    path, curproc->p_user.u_comm));
    869 		if (sdev_nc_verbose) {
    870 			cmn_err(CE_CONT, "?%s by %s: removed from ncache\n",
    871 			    path, curproc->p_user.u_comm);
    872 		}
    873 	} else
    874 		rw_exit(&ncl->ncl_lock);
    875 }
    876 
    877 static void
    878 sdev_nc_free_bootonly(void)
    879 {
    880 	sdev_nc_list_t	*ncl = sdev_ncache;
    881 	sdev_nc_node_t *lp;
    882 	sdev_nc_node_t *next;
    883 
    884 	ASSERT(sdev_reconfig_boot);
    885 
    886 	rw_enter(&ncl->ncl_lock, RW_WRITER);
    887 
    888 	for (lp = list_head(&ncl->ncl_list); lp; lp = next) {
    889 		next = list_next(&ncl->ncl_list, lp);
    890 		if ((lp->ncn_flags & NCN_SRC_CURRENT) == 0) {
    891 			sdcmn_err5(("freeing %s\n", lp->ncn_name));
    892 			mutex_enter(&ncl->ncl_mutex);
    893 			ncl->ncl_flags |= NCL_LIST_DIRTY;
    894 			mutex_exit(&ncl->ncl_mutex);
    895 			list_remove(&ncl->ncl_list, lp);
    896 			sdev_nc_free_unlinked_node(lp);
    897 			ncl->ncl_nentries--;
    898 		}
    899 	}
    900 
    901 	rw_exit(&ncl->ncl_lock);
    902 }
    903