Home | History | Annotate | Download | only in ufs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/param.h>
     28 #include <sys/systm.h>
     29 #include <sys/errno.h>
     30 #include <sys/mode.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/varargs.h>
     34 #include <sys/time.h>
     35 #include <sys/buf.h>
     36 #include <sys/kmem.h>
     37 #include <sys/t_lock.h>
     38 #include <sys/poll.h>
     39 #include <sys/debug.h>
     40 #include <sys/cred.h>
     41 #include <sys/lockfs.h>
     42 #include <sys/fs/ufs_fs.h>
     43 #include <sys/fs/ufs_inode.h>
     44 #include <sys/fs/ufs_panic.h>
     45 #include <sys/fs/ufs_lockfs.h>
     46 #include <sys/fs/ufs_trans.h>
     47 #include <sys/fs/ufs_mount.h>
     48 #include <sys/fs/ufs_prot.h>
     49 #include <sys/fs/ufs_bio.h>
     50 #include <sys/pathname.h>
     51 #include <sys/utsname.h>
     52 #include <sys/conf.h>
     53 
     54 /* handy */
     55 #define	abs(x)		((x) < 0? -(x): (x))
     56 
     57 #if defined(DEBUG)
     58 
     59 #define	DBGLVL_NONE	0x00000000
     60 #define	DBGLVL_MAJOR	0x00000100
     61 #define	DBGLVL_MINOR	0x00000200
     62 #define	DBGLVL_MINUTE	0x00000400
     63 #define	DBGLVL_TRIVIA	0x00000800
     64 #define	DBGLVL_HIDEOUS	0x00001000
     65 
     66 #define	DBGFLG_NONE		0x00000000
     67 #define	DBGFLG_NOPANIC		0x00000001
     68 #define	DBGFLG_LVLONLY		0x00000002
     69 #define	DBGFLG_FIXWOULDPANIC	0x00000004
     70 
     71 #define	DBGFLG_FLAGMASK		0x0000000F
     72 #define	DBGFLG_LEVELMASK	~DBGFLG_FLAGMASK
     73 
     74 #define	DEBUG_FLAGS	(ufs_fix_failure_dbg & DBGFLG_FLAGMASK)
     75 #define	DEBUG_LEVEL	(ufs_fix_failure_dbg & DBGFLG_LEVELMASK)
     76 
     77 unsigned int ufs_fix_failure_dbg =	DBGLVL_NONE | DBGFLG_NONE;
     78 
     79 #define	DCALL(dbg_level, call)						\
     80 	{								\
     81 		if (DEBUG_LEVEL != DBGLVL_NONE) {			\
     82 			if (DEBUG_FLAGS & DBGFLG_LVLONLY) {		\
     83 				if (DEBUG_LEVEL & dbg_level) {		\
     84 					call;				\
     85 				}					\
     86 			} else {					\
     87 				if (dbg_level <= DEBUG_LEVEL) {		\
     88 					call;				\
     89 				}					\
     90 			}						\
     91 		}							\
     92 	}
     93 
     94 #define	DPRINTF(dbg_level, msg)		DCALL(dbg_level, printf msg)
     95 
     96 #define	MAJOR(msg)			DPRINTF(DBGLVL_MAJOR, msg)
     97 #define	MINOR(msg)			DPRINTF(DBGLVL_MINOR, msg)
     98 #define	MINUTE(msg)			DPRINTF(DBGLVL_MINUTE, msg)
     99 #define	TRIVIA(msg)			DPRINTF(DBGLVL_TRIVIA, msg)
    100 #define	HIDEOUS(msg)			DPRINTF(DBGLVL_HIDEOUS, msg)
    101 
    102 #else	/* !DEBUG */
    103 
    104 #define	DCALL(ignored_dbg_level, ignored_routine)
    105 #define	MAJOR(ignored)
    106 #define	MINOR(ignored)
    107 #define	MINUTE(ignored)
    108 #define	TRIVIA(ignored)
    109 #define	HIDEOUS(ignored)
    110 
    111 #endif /* DEBUG */
    112 
    113 #define	NULLSTR(str)	(!(str) || *(str) == '\0'? "<null>" : (str))
    114 #define	NULSTRING	""
    115 
    116 /* somewhat arbitrary limits, in seconds */
    117 /* all probably ought to be different, but these are convenient for debugging */
    118 const time_t	UF_TOO_LONG		= 128;	/* max. wait for fsck start */
    119 
    120 /* all of these are in units of seconds used for retry period while ... */
    121 const time_t	UF_FIXSTART_PERIOD	= 16;	/* awaiting fsck start */
    122 const time_t	UF_FIXPOLL_PERIOD	= 256;	/* awaiting fsck finish */
    123 const time_t	UF_SHORT_ERROR_PERIOD	= 4;	/* after (lockfs) error */
    124 const time_t	UF_LONG_ERROR_PERIOD	= 512;	/* after (lockfs) error */
    125 
    126 #define	NO_ERROR		0
    127 #define	LOCKFS_OLOCK		LOCKFS_MAXLOCK+1
    128 
    129 const ulong_t	GB			= 1024 * 1024 * 1024;
    130 const ulong_t	SecondsPerGig		= 1024;	/* ~17 minutes (overestimate) */
    131 
    132 /*
    133  * per filesystem flags
    134  */
    135 const int	UFSFX_PANIC		= (UFSMNT_ONERROR_PANIC >> 4);
    136 const int	UFSFX_LCKONLY		= (UFSMNT_ONERROR_LOCK >> 4);
    137 const int	UFSFX_LCKUMOUNT		= (UFSMNT_ONERROR_UMOUNT >> 4);
    138 const int	UFSFX_DEFAULT		= (UFSMNT_ONERROR_DEFAULT >> 4);
    139 const int	UFSFX_REPAIR_START	= 0x10000000;
    140 
    141 /* return protocols */
    142 
    143 typedef enum triage_return_code {
    144 	TRIAGE_DEAD = -1,
    145 	TRIAGE_NO_SPIRIT,
    146 	TRIAGE_ATTEND_TO
    147 } triage_t;
    148 
    149 typedef enum statefunc_return_code {
    150 	SFRC_SUCCESS = 1,
    151 	SFRC_FAIL = 0
    152 } sfrc_t;
    153 
    154 /* external references */
    155 /* in ufs_thread.c */
    156 extern int	ufs_thread_run(struct ufs_q *, callb_cpr_t *cprinfop);
    157 extern int	ufs_checkaccton(vnode_t *);		/* in ufs_lockfs.c */
    158 extern int	ufs_checkswapon(vnode_t *);		/* in ufs_lockfs.c */
    159 
    160 extern struct pollhead		ufs_pollhd;		/* in ufs_vnops.c */
    161 
    162 /* globals */
    163 struct	ufs_q	 ufs_fix;
    164 
    165 /*
    166  * patchable constants:
    167  *   These are set in ufsfx_init() [called at modload]
    168  */
    169 struct ufs_failure_tunable {
    170 	long	 uft_too_long;		/* limit repair startup time */
    171 	long	 uft_fixstart_period;	/* pre-repair start period */
    172 	long	 uft_fixpoll_period;	/* post-fsck start period */
    173 	long	 uft_short_err_period;	/* post-error short period */
    174 	long	 uft_long_err_period;	/* post-error long period */
    175 } ufsfx_tune;
    176 
    177 /* internal statistics of events */
    178 struct uf_statistics {
    179 	ulong_t		ufst_lock_violations;
    180 	ulong_t		ufst_current_races;
    181 	ulong_t		ufst_unmount_failures;
    182 	ulong_t		ufst_num_fixed;
    183 	ulong_t		ufst_num_failed;
    184 	ulong_t		ufst_cpu_waste;
    185 	time_t		ufst_last_start_tm;
    186 	kmutex_t	ufst_mutex;
    187 } uf_stats;
    188 
    189 typedef enum state_action {
    190 	UFA_ERROR = -1,		/* internal error */
    191 	UFA_FOUND,		/* found uf in state */
    192 	UFA_SET			/* change uf to state */
    193 } ufsa_t;
    194 
    195 /* state definition */
    196 typedef struct uf_state_desc {
    197 	int	  ud_v;					/* value */
    198 	char	 *ud_name;				/* name */
    199 	sfrc_t	(*ud_sfp)(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    200 							/* per-state actions */
    201 	ufs_failure_states_t	  ud_prev;		/* valid prev. states */
    202 
    203 	struct uf_state_desc_attr {
    204 		unsigned	terminal:1;	/* no action req. if found */
    205 		unsigned	at_fail:1;	/* state set by thread */
    206 						/* encountering the error */
    207 		unsigned	unused;
    208 	} ud_attr;
    209 } ufsd_t;
    210 
    211 /*
    212  * forward references
    213  */
    214 
    215 /* thread to watch for failures */
    216 static void	ufsfx_thread_fix_failures(void *);
    217 static int 	ufsfx_do_failure_q(void);
    218 static void	ufsfx_kill_fix_failure_thread(void *);
    219 
    220 /* routines called when failure occurs */
    221 static int		 ufs_fault_v(vnode_t *, char *, va_list)
    222 	__KVPRINTFLIKE(2);
    223 static ufs_failure_t	*init_failure(vnode_t *, char *, va_list)
    224 	__KVPRINTFLIKE(2);
    225 static void		 queue_failure(ufs_failure_t *);
    226 /*PRINTFLIKE2*/
    227 static void		 real_panic(ufs_failure_t *, const char *, ...)
    228 	__KPRINTFLIKE(2);
    229 static void		 real_panic_v(ufs_failure_t *, const char *, va_list)
    230 	__KVPRINTFLIKE(2);
    231 static triage_t		 triage(vnode_t *);
    232 
    233 /* routines called when failure record is acted upon */
    234 static sfrc_t	set_state(ufs_failure_t *, ufs_failure_states_t);
    235 static int	state_trans_valid(ufs_failure_states_t, ufs_failure_states_t);
    236 static int	terminal_state(ufs_failure_states_t);
    237 
    238 /* routines called when states entered/found */
    239 static sfrc_t	sf_minimum(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    240 static sfrc_t	sf_undef(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    241 static sfrc_t	sf_init(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    242 static sfrc_t	sf_queue(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    243 static sfrc_t	sf_found_queue(ufs_failure_t *);
    244 static sfrc_t	sf_nonterm_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    245 static sfrc_t	sf_term_cmn(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    246 static sfrc_t	sf_panic(ufs_failure_t *, ufsa_t, ufs_failure_states_t);
    247 static sfrc_t	sf_set_trylck(ufs_failure_t *);
    248 static sfrc_t	sf_set_locked(ufs_failure_t *);
    249 static sfrc_t	sf_found_trylck(ufs_failure_t *);
    250 static sfrc_t	sf_found_lock_fix_cmn(ufs_failure_t *, ufs_failure_states_t);
    251 static sfrc_t	sf_found_umount(ufs_failure_t *);
    252 
    253 /* support routines, called by sf_nonterm_cmn and sf_term_cmn */
    254 static time_t 	trylock_time_exceeded(ufs_failure_t *);
    255 static void 	pester_msg(ufs_failure_t *, int);
    256 static int 	get_lockfs_status(ufs_failure_t *, struct lockfs *);
    257 static void 	alloc_lockfs_comment(ufs_failure_t *, struct lockfs *);
    258 static int 	set_lockfs(ufs_failure_t *, struct lockfs *);
    259 static int 	lockfs_failure(ufs_failure_t *);
    260 static int 	lockfs_success(ufs_failure_t *);
    261 static int	fsck_active(ufs_failure_t *);
    262 
    263 /* low-level support routines */
    264 static ufsd_t	*get_state_desc(ufs_failure_states_t);
    265 static char	*fs_name(ufs_failure_t *);
    266 
    267 #if defined(DEBUG)
    268 static char	*state_name(ufs_failure_states_t);
    269 static char	*lock_name(struct lockfs *);
    270 static char	*err_name(int);
    271 static char	*act_name(ufsa_t);
    272 static void	 dump_uf_list(char *msg);
    273 static void	 dump_uf(ufs_failure_t *, int i);
    274 #endif /* DEBUG */
    275 /*
    276  *
    277  * State Transitions:
    278  *
    279  * normally:
    280  * if flagged to be locked but not unmounted:	(UFSMNT_ONERROR_LOCK)
    281  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> FIXING -> FIXED
    282  *
    283  * The only difference between these two is that the fsck must be started
    284  * manually.
    285  *
    286  * if flagged to be unmounted:			(UFSMNT_ONERROR_UMOUNT)
    287  *	UNDEF -> INIT -> QUEUE -> TRYLCK -> LOCKED -> UMOUNT -> NOTFIX
    288  *
    289  * if flagged to panic:				(UFSMNT_ONERROR_PANIC)
    290  *	UNDEF -> INIT -> PANIC
    291  *
    292  * if a secondary panic on a file system which has an active failure
    293  * record:
    294  *	UNDEF -> INIT -> QUEUE -> REPLICA
    295  *
    296  * UNDEF, INIT, QUEUE all are set in the context of the failing thread.
    297  * All other states (except possibly PANIC) are set in by the monitor
    298  * (lock) thread.
    299  *
    300  */
    301 
    302 ufsd_t	state_desc[] =
    303 {
    304 	{ UF_ILLEGAL,	"in an unknown state",	sf_minimum,	UF_ILLEGAL,
    305 								{ 0, 1, 0 } },
    306 	{ UF_UNDEF,	"undefined",		sf_undef,	UF_UNDEF,
    307 								{ 0, 1, 0 } },
    308 	{ UF_INIT,	"being initialized",	sf_init,	UF_UNDEF,
    309 								{ 0, 1, 0 } },
    310 	{ UF_QUEUE,	"queued",		sf_queue,	UF_INIT,
    311 								{ 0, 1, 0 } },
    312 	{ UF_TRYLCK,	"trying to be locked",	sf_nonterm_cmn,
    313 						UF_QUEUE,	{ 0, 0, 0 } },
    314 	{ UF_LOCKED,	"locked",		sf_nonterm_cmn,
    315 					UF_TRYLCK | UF_FIXING,	{ 0, 0, 0 } },
    316 	{ UF_UMOUNT,	"being unmounted",	sf_nonterm_cmn,
    317 
    318 #if defined(DEBUG)
    319 					UF_PANIC |
    320 #endif /* DEBUG */
    321 					UF_TRYLCK | UF_LOCKED, 	{ 0, 0, 0 } },
    322 	{ UF_FIXING,	"being fixed",		sf_nonterm_cmn,
    323 						UF_LOCKED,	{ 0, 0, 0 } },
    324 	{ UF_FIXED,	"fixed",		sf_term_cmn,
    325 						UF_FIXING,	{ 1, 0, 0 } },
    326 	{ UF_NOTFIX,	"not fixed",		sf_term_cmn,
    327 
    328 #if defined(DEBUG)
    329 							UF_PANIC |
    330 #endif /* DEBUG */
    331 
    332 	    UF_QUEUE | UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING,
    333 								{ 1, 0, 0 } },
    334 	{ UF_REPLICA,	"a replica",		sf_term_cmn,
    335 						UF_QUEUE,	{ 1, 0, 0 } },
    336 	{ UF_PANIC,	"panicking",		sf_panic,
    337 		/* XXX make this narrower */	UF_ALLSTATES,	{ 0, 0, 0 } },
    338 	{ UF_UNDEF,	NULL,			((sfrc_t (*)()) NULL),
    339 						UF_UNDEF, 	{ 0, 0, 0 } }
    340 };
    341 
    342 /* unified collection */
    343 struct ufsfx_info {
    344 	struct uf_statistics		*ufi_statp;
    345 	struct ufs_failure_tunable	*ufi_tunep;
    346 	ufsd_t				*ufi_statetab;
    347 } uffsinfo;
    348 
    349 #if defined(DEBUG)
    350 struct action_description {
    351 	ufsa_t	 ad_v;
    352 	char	*ad_name;
    353 };
    354 
    355 #define	EUNK		(-1)
    356 
    357 struct error_description {
    358 	int	 ed_errno;
    359 	char	*ed_name;
    360 } err_desc[] =
    361 {
    362 	{ EUNK,		"<unexpected errno?>"	},
    363 	{ EINVAL,	"EINVAL"		},
    364 	{ EACCES,	"EACCES"		},
    365 	{ EPERM,	"EPERM"			},
    366 	{ EIO,		"EIO"			},
    367 	{ EDEADLK,	"EDEADLK"		},
    368 	{ EBUSY,	"EBUSY"			},
    369 	{ EAGAIN,	"EAGAIN"		},
    370 	{ ERESTART,	"ERESTART"		},
    371 	{ ETIMEDOUT,	"ETIMEDOUT"		},
    372 	{ NO_ERROR,	"Ok"			},
    373 	{ EUNK,		NULL 			}
    374 };
    375 
    376 struct action_description act_desc[] =
    377 {
    378 	{ UFA_ERROR,	"<unexpected action?>"	},
    379 	{ UFA_FOUND,	"\"found\""	},
    380 	{ UFA_SET,	"\"set\""	},
    381 	{ UFA_ERROR,	NULL			},
    382 };
    383 
    384 #define	LOCKFS_BADLOCK	(-1)
    385 
    386 struct lock_description {
    387 	int	 ld_type;
    388 	char	*ld_name;
    389 } lock_desc[] =
    390 {
    391 	{ LOCKFS_BADLOCK,	"<unexpected lock?>"	},
    392 	{ LOCKFS_ULOCK,		"Unlock"		},
    393 	{ LOCKFS_ELOCK,		"Error Lock"		},
    394 	{ LOCKFS_HLOCK,		"Hard Lock"		},
    395 	{ LOCKFS_OLOCK,		"Old Lock"		},
    396 	{ LOCKFS_BADLOCK,	NULL			}
    397 };
    398 
    399 #endif /* DEBUG */
    400 
    401 /*
    402  * ufs_fault, ufs_fault_v
    403  *
    404  *  called instead of cmn_err(CE_PANIC, ...) by ufs routines
    405  *  when a failure is detected to put the file system into an
    406  *  error state (if possible) or to devolve to a panic otherwise
    407  *
    408  * vnode is some vnode in this file system, used to find the way
    409  * to ufsvfs, vfsp etc.  Since a panic can be called from many
    410  * levels, the vnode is the most convenient hook to pass through.
    411  *
    412  */
    413 
    414 /*PRINTFLIKE2*/
    415 int
    416 ufs_fault(vnode_t *vp, char *fmt, ...)
    417 {
    418 	va_list	adx;
    419 	int	error;
    420 
    421 	MINOR(("[ufs_fault"));
    422 
    423 	va_start(adx, fmt);
    424 	error = ufs_fault_v(vp, fmt, adx);
    425 	va_end(adx);
    426 
    427 	MINOR((": %s (%d)]\n", err_name(error), error));
    428 	return (error);
    429 }
    430 
    431 const char *nullfmt = "<null format?>";
    432 
    433 static int
    434 ufs_fault_v(vnode_t *vp, char *fmt, va_list adx)
    435 {
    436 	ufs_failure_t		*new = NULL;
    437 	ufsvfs_t		*ufsvfsp;
    438 	triage_t		 fix;
    439 	int			 err = ERESTART;
    440 	int			need_vfslock;
    441 
    442 	MINOR(("[ufs_fault_v"));
    443 
    444 	if (fmt == NULL)
    445 		fmt = (char *)nullfmt;
    446 
    447 	fix = triage(vp);
    448 
    449 	if (vp) {
    450 		ufsvfsp = (struct ufsvfs *)vp->v_vfsp->vfs_data;
    451 
    452 		/*
    453 		 * Something bad has happened. That is why we are here.
    454 		 *
    455 		 * In order for the bad thing to be recorded in the superblock
    456 		 * we need to write to the superblock directly.
    457 		 * In the case that logging is enabled the logging code
    458 		 * would normally intercept our write as a delta to the log,
    459 		 * thus we mark the filesystem FSBAD in any case.
    460 		 */
    461 		need_vfslock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
    462 
    463 		if (need_vfslock) {
    464 			mutex_enter(&ufsvfsp->vfs_lock);
    465 		}
    466 
    467 		ufsvfsp->vfs_fs->fs_clean = FSBAD;
    468 		ASSERT(SEMA_HELD(&ufsvfsp->vfs_bufp->b_sem));
    469 		ufsvfsp->vfs_bufp->b_flags &=
    470 		    ~(B_ASYNC | B_READ | B_DONE | B_ERROR | B_DELWRI);
    471 
    472 		(void) bdev_strategy(ufsvfsp->vfs_bufp);
    473 		(void) biowait(ufsvfsp->vfs_bufp);
    474 
    475 		if (need_vfslock) {
    476 			mutex_exit(&ufsvfsp->vfs_lock);
    477 		}
    478 	}
    479 
    480 	switch (fix) {
    481 
    482 	default:
    483 	case TRIAGE_DEAD:
    484 	case TRIAGE_NO_SPIRIT:
    485 
    486 		real_panic_v(new, fmt, adx);
    487 		/* LINTED: warning: logical expression always true: op "||" */
    488 		ASSERT(DEBUG);
    489 		err = EAGAIN;
    490 
    491 #if defined(DEBUG)
    492 		if (!(DEBUG_FLAGS & DBGFLG_FIXWOULDPANIC)) {
    493 			break;
    494 		}
    495 		/* FALLTHROUGH */
    496 
    497 #else
    498 		break;
    499 
    500 #endif /* DEBUG */
    501 
    502 	case TRIAGE_ATTEND_TO:
    503 
    504 		/* q thread not running yet? */
    505 		if (mutex_tryenter(&ufs_fix.uq_mutex)) {
    506 			if (!ufs_fix.uq_threadp) {
    507 				mutex_exit(&ufs_fix.uq_mutex);
    508 				ufs_thread_start(&ufs_fix,
    509 				    ufsfx_thread_fix_failures, NULL);
    510 				ufs_fix.uq_threadp->t_flag |= T_DONTBLOCK;
    511 				mutex_enter(&ufs_fix.uq_mutex);
    512 			} else {
    513 				/*
    514 				 * We got the lock but we are not the current
    515 				 * threadp so we have to release the lock.
    516 				 */
    517 				mutex_exit(&ufs_fix.uq_mutex);
    518 			}
    519 		} else {
    520 			MINOR((": fix failure thread already running "));
    521 			/*
    522 			 * No need to log another failure as one is already
    523 			 * being logged.
    524 			 */
    525 			break;
    526 		}
    527 
    528 		if (ufs_fix.uq_threadp && ufs_fix.uq_threadp == curthread) {
    529 			mutex_exit(&ufs_fix.uq_mutex);
    530 			cmn_err(CE_WARN, "ufs_fault_v: recursive ufs_fault");
    531 		} else {
    532 			/*
    533 			 * Must check if we actually still own the lock and
    534 			 * if so then release the lock and move on with life.
    535 			 */
    536 			if (mutex_owner(&ufs_fix.uq_mutex) == curthread)
    537 				mutex_exit(&ufs_fix.uq_mutex);
    538 		}
    539 
    540 		new = init_failure(vp, fmt, adx);
    541 		if (new != NULL) {
    542 			queue_failure(new);
    543 			break;
    544 		}
    545 		real_panic_v(new, fmt, adx);
    546 		break;
    547 
    548 	}
    549 	MINOR(("] "));
    550 	return (err);
    551 }
    552 
    553 /*
    554  * triage()
    555  *
    556  *  Attempt to fix iff:
    557  *    - the system is not already panicking
    558  *    - this file system isn't explicitly marked not to be fixed
    559  *    - we can connect to the user-level daemon
    560  * These conditions are detectable later, but if we can determine
    561  * them in the failing threads context the core dump may be more
    562  * useful.
    563  *
    564  */
    565 
    566 static triage_t
    567 triage(vnode_t *vp)
    568 {
    569 	struct inode	 *ip;
    570 	int		  need_unlock_vfs;
    571 	int		  fs_flags;
    572 
    573 	MINUTE(("[triage"));
    574 
    575 	if (panicstr) {
    576 		MINUTE((
    577 		": already panicking: \"%s\" => TRIAGE_DEAD]\n", panicstr));
    578 		return (TRIAGE_DEAD);
    579 	}
    580 
    581 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs) {
    582 		MINUTE((
    583 	": vp, ip or ufsvfs is NULL; can't determine fs => TRIAGE_DEAD]\n"));
    584 		return (TRIAGE_DEAD);
    585 	}
    586 
    587 	/* use tryenter and continue no matter what since we're panicky */
    588 	need_unlock_vfs = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
    589 	if (need_unlock_vfs)
    590 		need_unlock_vfs = mutex_tryenter(&ip->i_ufsvfs->vfs_lock);
    591 
    592 	fs_flags = ip->i_ufsvfs->vfs_fsfx.fx_flags;
    593 	if (need_unlock_vfs)
    594 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
    595 
    596 	if (fs_flags & UFSFX_PANIC) {
    597 		MINUTE((
    598 		": filesystem marked \"panic\" => TRIAGE_NO_SPIRIT]\n"));
    599 		return (TRIAGE_NO_SPIRIT);
    600 	}
    601 
    602 	if (ufs_checkaccton(vp) != 0) {
    603 		MINUTE((
    604 		": filesystem would deadlock (accounting) => TRIAGE_DEAD]\n"));
    605 		return (TRIAGE_DEAD);
    606 	}
    607 
    608 	if (ufs_checkswapon(vp) != 0) {
    609 		MINUTE((
    610 		": filesystem would deadlock (swapping) => TRIAGE_DEAD]\n"));
    611 		return (TRIAGE_DEAD);
    612 	}
    613 
    614 	MINUTE((": return TRIAGE_ATTEND_TO] "));
    615 	return (TRIAGE_ATTEND_TO);
    616 }
    617 
    618 /*
    619  * init failure
    620  *
    621  * This routine allocates a failure struct and initializes
    622  * it's member elements.
    623  * Space is allocated for copies of dynamic identifying fs structures
    624  * passed in.  Without a much more segmented kernel architecture
    625  * this is as protected as we can make it (for now.)
    626  */
    627 static ufs_failure_t *
    628 init_failure(vnode_t *vp, char *fmt, va_list adx)
    629 {
    630 	ufs_failure_t	*new;
    631 	struct inode	*ip;
    632 	int		 initialization_worked = 0;
    633 	int		 need_vfs_unlock;
    634 
    635 	MINOR(("[init_failure"));
    636 
    637 	new = kmem_zalloc(sizeof (ufs_failure_t), KM_NOSLEEP);
    638 	if (!new) {
    639 		MINOR((": kmem_zalloc failed]\n"));
    640 		return (NULL);
    641 	}
    642 
    643 	/*
    644 	 * enough information to make a fix attempt possible?
    645 	 */
    646 	if (!vp || !(ip = VTOI(vp)) || !ip->i_ufsvfs || !vp->v_vfsp ||
    647 	    !ip->i_ufsvfs->vfs_bufp || !ITOF(ip) || !fmt)
    648 		goto errout;
    649 
    650 	if (vp->v_type != VREG && vp->v_type != VDIR &&
    651 	    vp->v_type != VBLK && vp->v_type != VCHR &&
    652 	    vp->v_type != VLNK && vp->v_type != VFIFO &&
    653 	    vp->v_type != VSOCK)
    654 		goto errout;
    655 
    656 	if (ip->i_ufsvfs->vfs_root->v_type != VREG &&
    657 	    ip->i_ufsvfs->vfs_root->v_type != VDIR &&
    658 	    ip->i_ufsvfs->vfs_root->v_type != VBLK &&
    659 	    ip->i_ufsvfs->vfs_root->v_type != VCHR &&
    660 	    ip->i_ufsvfs->vfs_root->v_type != VLNK &&
    661 	    ip->i_ufsvfs->vfs_root->v_type != VFIFO &&
    662 	    ip->i_ufsvfs->vfs_root->v_type != VSOCK)
    663 		goto errout;
    664 
    665 	if ((ITOF(ip)->fs_magic != FS_MAGIC) &&
    666 	    (ITOF(ip)->fs_magic != MTB_UFS_MAGIC))
    667 		goto errout;
    668 
    669 	/* intialize values */
    670 
    671 	(void) vsnprintf(new->uf_panic_str, LOCKFS_MAXCOMMENTLEN - 1, fmt, adx);
    672 
    673 	new->uf_ufsvfsp = ip->i_ufsvfs;
    674 	new->uf_vfsp    = ip->i_vfs;
    675 
    676 	mutex_init(&new->uf_mutex, NULL, MUTEX_DEFAULT, NULL);
    677 	need_vfs_unlock = !MUTEX_HELD(&ip->i_ufsvfs->vfs_lock);
    678 
    679 	if (need_vfs_unlock) {
    680 		if (!mutex_tryenter(&ip->i_ufsvfs->vfs_lock)) {
    681 			/*
    682 			 * not much alternative here, but we're panicking
    683 			 * already, it couldn't be worse - so just
    684 			 * proceed optimistically and take note.
    685 			 */
    686 			mutex_enter(&uf_stats.ufst_mutex);
    687 			uf_stats.ufst_lock_violations++;
    688 			mutex_exit(&uf_stats.ufst_mutex);
    689 			MINOR((": couldn't get vfs lock"))
    690 			need_vfs_unlock = 0;
    691 		}
    692 	}
    693 
    694 	if (mutex_tryenter(&new->uf_mutex)) {
    695 		initialization_worked = set_state(new, UF_INIT);
    696 		mutex_exit(&new->uf_mutex);
    697 	}
    698 
    699 	if (need_vfs_unlock)
    700 		mutex_exit(&ip->i_ufsvfs->vfs_lock);
    701 
    702 	if (initialization_worked) {
    703 		MINOR(("] "));
    704 		return (new);
    705 	}
    706 	/* FALLTHROUGH */
    707 
    708 errout:
    709 	if (new)
    710 		kmem_free(new, sizeof (ufs_failure_t));
    711 	MINOR((": failed]\n"));
    712 	return (NULL);
    713 }
    714 
    715 static void
    716 queue_failure(ufs_failure_t *new)
    717 {
    718 	MINOR(("[queue_failure"));
    719 
    720 	mutex_enter(&ufs_fix.uq_mutex);
    721 
    722 	if (ufs_fix.uq_ufhead)
    723 		insque(new, &ufs_fix.uq_ufhead);
    724 	else
    725 		ufs_fix.uq_ufhead = new;
    726 
    727 	if (mutex_tryenter(&new->uf_mutex)) {
    728 		(void) set_state(new, UF_QUEUE);
    729 		mutex_exit(&new->uf_mutex);
    730 	}
    731 
    732 	mutex_enter(&uf_stats.ufst_mutex);		/* force wakeup */
    733 	ufs_fix.uq_ne = ufs_fix.uq_lowat = uf_stats.ufst_num_failed;
    734 	mutex_exit(&uf_stats.ufst_mutex);
    735 
    736 	cv_broadcast(&ufs_fix.uq_cv);
    737 
    738 	DCALL(DBGLVL_MAJOR, cmn_err(CE_WARN, new->uf_panic_str ?
    739 	    new->uf_panic_str : "queue_failure: NULL panic str?"));
    740 	mutex_exit(&ufs_fix.uq_mutex);
    741 
    742 	MINOR(("] "));
    743 }
    744 
    745 /*PRINTFLIKE2*/
    746 static void
    747 real_panic(ufs_failure_t *f, const char *fmt, ...)
    748 {
    749 	va_list	adx;
    750 
    751 	MINUTE(("[real_panic "));
    752 
    753 	va_start(adx, fmt);
    754 	real_panic_v(f, fmt, adx);
    755 	va_end(adx);
    756 
    757 	MINUTE((": return?!]\n"));
    758 }
    759 
    760 static void
    761 real_panic_v(ufs_failure_t *f, const char *fmt, va_list adx)
    762 {
    763 	int seriousness = CE_PANIC;
    764 	int need_unlock;
    765 
    766 	MINUTE(("[real_panic_v "));
    767 
    768 	if (f && f->uf_ufsvfsp)
    769 		TRANS_SETERROR(f->uf_ufsvfsp);
    770 
    771 #if defined(DEBUG)
    772 	if (DEBUG_FLAGS & DBGFLG_NOPANIC) {
    773 		seriousness = CE_WARN;
    774 		cmn_err(CE_WARN, "real_panic: EWOULDPANIC\n");
    775 	}
    776 #endif /* DEBUG */
    777 
    778 	delay(hz >> 1);			/* allow previous warnings to get out */
    779 
    780 	if (!f && fmt)
    781 		vcmn_err(seriousness, fmt, adx);
    782 	else
    783 		cmn_err(seriousness, f && f->uf_panic_str? f->uf_panic_str:
    784 		    "real_panic: <unknown panic?>");
    785 
    786 	if (f) {
    787 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
    788 		if (need_unlock) {
    789 			mutex_enter(&f->uf_mutex);
    790 		}
    791 
    792 		f->uf_retry = -1;
    793 		(void) set_state(f, UF_PANIC);
    794 
    795 		if (need_unlock) {
    796 			mutex_exit(&f->uf_mutex);
    797 		}
    798 	}
    799 	MINUTE((": return?!]\n"));
    800 }
    801 
    802 /*
    803  * initializes ufs panic structs, locks, etc
    804  */
    805 void
    806 ufsfx_init(void)
    807 {
    808 
    809 	MINUTE(("[ufsfx_init"));
    810 
    811 	/* patchable; unchanged while running, so no lock is needed */
    812 	ufsfx_tune.uft_too_long		= UF_TOO_LONG;
    813 	ufsfx_tune.uft_fixstart_period	= UF_FIXSTART_PERIOD;
    814 	ufsfx_tune.uft_fixpoll_period	= UF_FIXPOLL_PERIOD;
    815 	ufsfx_tune.uft_short_err_period	= UF_SHORT_ERROR_PERIOD;
    816 	ufsfx_tune.uft_long_err_period	= UF_LONG_ERROR_PERIOD;
    817 
    818 	uffsinfo.ufi_statp	= &uf_stats;
    819 	uffsinfo.ufi_tunep	= &ufsfx_tune;
    820 	uffsinfo.ufi_statetab	= &state_desc[0];
    821 
    822 	mutex_init(&uf_stats.ufst_mutex, NULL, MUTEX_DEFAULT, NULL);
    823 	ufs_thread_init(&ufs_fix, /* maxne */ 1);
    824 
    825 	MINUTE(("] "));
    826 }
    827 
    828 /*
    829  * initializes per-ufs values
    830  * returns 0 (ok) or errno
    831  */
    832 int
    833 ufsfx_mount(struct ufsvfs *ufsvfsp, int flags)
    834 {
    835 	MINUTE(("[ufsfx_mount (%d)", flags));
    836 	/* don't check/need vfs_lock because it's still being initialized */
    837 
    838 	ufsvfsp->vfs_fsfx.fx_flags = (flags & UFSMNT_ONERROR_FLGMASK) >> 4;
    839 
    840 	MINUTE((": %s: fx_flags:%ld,",
    841 	    ufsvfsp->vfs_fs->fs_fsmnt, ufsvfsp->vfs_fsfx.fx_flags));
    842 	/*
    843 	 *	onerror={panic ^ lock only ^ unmount}
    844 	 */
    845 
    846 	if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_PANIC) {
    847 		MINUTE((" PANIC"));
    848 
    849 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKONLY) {
    850 		MINUTE((" LCKONLY"));
    851 
    852 	} else if (ufsvfsp->vfs_fsfx.fx_flags & UFSFX_LCKUMOUNT) {
    853 		MINUTE((" LCKUMOUNT"));
    854 
    855 	} else {
    856 		ufsvfsp->vfs_fsfx.fx_flags = UFSFX_DEFAULT;
    857 		ASSERT(ufsvfsp->vfs_fsfx.fx_flags &
    858 		    (UFSMNT_ONERROR_FLGMASK >> 4));
    859 		MINUTE((" DEFAULT"));
    860 	}
    861 
    862 	pollwakeup(&ufs_pollhd, POLLPRI);
    863 	MINUTE(("]\n"));
    864 	return (0);
    865 }
    866 
    867 /*
    868  * ufsfx_unmount
    869  *
    870  * called during unmount
    871  */
    872 void
    873 ufsfx_unmount(struct ufsvfs *ufsvfsp)
    874 {
    875 	ufs_failure_t	*f;
    876 	int		 must_unlock_list;
    877 
    878 	MINUTE(("[ufsfx_unmount"));
    879 
    880 	if (!ufsvfsp) {
    881 		MINUTE((": no ufsvfsp]"));
    882 		return;
    883 	}
    884 
    885 	if ((must_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex)) != 0)
    886 		mutex_enter(&ufs_fix.uq_mutex);
    887 
    888 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
    889 		int must_unlock_failure;
    890 
    891 		must_unlock_failure = !MUTEX_HELD(&f->uf_mutex);
    892 		if (must_unlock_failure) {
    893 			mutex_enter(&f->uf_mutex);
    894 		}
    895 
    896 		if (f->uf_ufsvfsp == ufsvfsp) {
    897 
    898 			/*
    899 			 * if we owned the failure record lock, then this
    900 			 * is probably a fix failure-triggered unmount, so
    901 			 * the warning is not appropriate or needed
    902 			 */
    903 
    904 			/* XXX if rebooting don't print this? */
    905 			if (!terminal_state(f->uf_s) && must_unlock_failure) {
    906 				cmn_err(CE_WARN,
    907 				    "Unmounting %s while error-locked",
    908 				    fs_name(f));
    909 			}
    910 
    911 			f->uf_ufsvfsp		= NULL;
    912 			f->uf_vfs_ufsfxp	= NULL;
    913 			f->uf_vfs_lockp		= NULL;
    914 			f->uf_bp		= NULL;
    915 			f->uf_vfsp		= NULL;
    916 			f->uf_retry		= -1;
    917 		}
    918 
    919 		if (must_unlock_failure)
    920 			mutex_exit(&f->uf_mutex);
    921 	}
    922 	if (must_unlock_list)
    923 		mutex_exit(&ufs_fix.uq_mutex);
    924 
    925 	pollwakeup(&ufs_pollhd, POLLPRI | POLLHUP);
    926 	MINUTE(("] "));
    927 }
    928 
    929 /*
    930  * ufsfx_(un)lockfs
    931  *
    932  * provides hook from lockfs code so we can recognize unlock/relock
    933  *  This is called after it is certain that the (un)lock will succeed.
    934  */
    935 void
    936 ufsfx_unlockfs(struct ufsvfs *ufsvfsp)
    937 {
    938 	ufs_failure_t	*f;
    939 	int		 need_unlock;
    940 	int		 need_unlock_list;
    941 	int		 informed = 0;
    942 
    943 	MINUTE(("[ufsfx_unlockfs"));
    944 
    945 	if (!ufsvfsp)
    946 		return;
    947 
    948 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
    949 
    950 	if (need_unlock_list)
    951 		mutex_enter(&ufs_fix.uq_mutex);
    952 
    953 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
    954 
    955 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
    956 		if (need_unlock)
    957 			mutex_enter(&f->uf_mutex);
    958 
    959 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s)) {
    960 			if (!(f->uf_s & UF_FIXING)) {
    961 				/*
    962 				 * This might happen if we don't notice that
    963 				 * the fs gets marked FSFIX before it is
    964 				 * marked FSCLEAN, as might occur if the
    965 				 * the superblock was hammered directly.
    966 				 */
    967 				if (!informed) {
    968 					informed = 1;
    969 					cmn_err(CE_NOTE,
    970 					    "Unlock of %s succeeded before "
    971 					    "fs_clean marked FSFIX?",
    972 					    fs_name(f));
    973 				}
    974 
    975 				/*
    976 				 * pass through fixing state so
    977 				 * transition protocol is satisfied
    978 				 */
    979 				if (!set_state(f, UF_FIXING)) {
    980 					MINUTE((": failed] "));
    981 				}
    982 			}
    983 
    984 			if (!set_state(f, UF_FIXED)) {
    985 				/* it's already fixed, so don't panic now */
    986 				MINUTE((": failed] "));
    987 			}
    988 		}
    989 
    990 		if (need_unlock)
    991 			mutex_exit(&f->uf_mutex);
    992 	}
    993 	if (need_unlock_list)
    994 		mutex_exit(&ufs_fix.uq_mutex);
    995 	MINUTE(("] "));
    996 }
    997 
    998 void
    999 ufsfx_lockfs(struct ufsvfs *ufsvfsp)
   1000 {
   1001 	ufs_failure_t	*f;
   1002 	int		 need_unlock;
   1003 	int		 need_unlock_list;
   1004 
   1005 	MINUTE(("[ufsfx_lockfs"));
   1006 
   1007 	if (!ufsvfsp)
   1008 		return;
   1009 
   1010 	need_unlock_list = !MUTEX_HELD(&ufs_fix.uq_mutex);
   1011 
   1012 	if (need_unlock_list)
   1013 		mutex_enter(&ufs_fix.uq_mutex);
   1014 
   1015 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
   1016 
   1017 		need_unlock = !MUTEX_HELD(&f->uf_mutex);
   1018 		if (need_unlock)
   1019 			mutex_enter(&f->uf_mutex);
   1020 
   1021 		if (f->uf_ufsvfsp == ufsvfsp && !terminal_state(f->uf_s) &&
   1022 		    f->uf_s != UF_PANIC) {
   1023 			switch (f->uf_s) {
   1024 
   1025 			default:
   1026 				cmn_err(CE_WARN,
   1027 				    "fs %s not in state "
   1028 				    "UF_TRYLCK, UF_LOCKED or UF_FIXING",
   1029 				    fs_name(f));
   1030 				break;
   1031 
   1032 			case UF_TRYLCK:
   1033 				if (!set_state(f, UF_LOCKED)) {
   1034 					MINUTE((": failed] "));
   1035 				}
   1036 				break;
   1037 
   1038 			case UF_LOCKED:
   1039 				if (!set_state(f, UF_FIXING)) {
   1040 					MINUTE((": failed] "));
   1041 				}
   1042 				break;
   1043 
   1044 			case UF_FIXING:
   1045 				break;
   1046 
   1047 			}
   1048 		}
   1049 
   1050 		if (need_unlock)
   1051 			mutex_exit(&f->uf_mutex);
   1052 	}
   1053 	if (need_unlock_list)
   1054 		mutex_exit(&ufs_fix.uq_mutex);
   1055 
   1056 	MINUTE(("] "));
   1057 }
   1058 
   1059 /*
   1060  * error lock, trigger fsck and unlock those fs with failures
   1061  * blatantly copied from the hlock routine, although this routine
   1062  * triggers differently in order to use uq_ne as meaningful data.
   1063  */
   1064 /* ARGSUSED */
   1065 void
   1066 ufsfx_thread_fix_failures(void *ignored)
   1067 {
   1068 	int		retry;
   1069 	callb_cpr_t	cprinfo;
   1070 
   1071 	CALLB_CPR_INIT(&cprinfo, &ufs_fix.uq_mutex, callb_generic_cpr,
   1072 	    "ufsfixfail");
   1073 
   1074 	MINUTE(("[ufsfx_thread_fix_failures] "));
   1075 
   1076 	for (;;) {
   1077 		/* sleep until there is work to do */
   1078 
   1079 		mutex_enter(&ufs_fix.uq_mutex);
   1080 		(void) ufs_thread_run(&ufs_fix, &cprinfo);
   1081 		ufs_fix.uq_ne = 0;
   1082 		mutex_exit(&ufs_fix.uq_mutex);
   1083 
   1084 		/* process failures on our q */
   1085 		do {
   1086 			retry = ufsfx_do_failure_q();
   1087 			if (retry) {
   1088 				mutex_enter(&ufs_fix.uq_mutex);
   1089 				CALLB_CPR_SAFE_BEGIN(&cprinfo);
   1090 				(void) cv_reltimedwait(&ufs_fix.uq_cv,
   1091 				    &ufs_fix.uq_mutex, (hz * retry),
   1092 				    TR_CLOCK_TICK);
   1093 				CALLB_CPR_SAFE_END(&cprinfo,
   1094 				    &ufs_fix.uq_mutex);
   1095 				mutex_exit(&ufs_fix.uq_mutex);
   1096 			}
   1097 		} while (retry);
   1098 	}
   1099 	/* NOTREACHED */
   1100 }
   1101 
   1102 
   1103 /*
   1104  * watch for fix-on-panic work
   1105  *
   1106  * returns # of seconds to sleep before trying again
   1107  * and zero if no retry is needed
   1108  */
   1109 
   1110 int
   1111 ufsfx_do_failure_q(void)
   1112 {
   1113 	ufs_failure_t	*f;
   1114 	long		 retry = 1;
   1115 	ufsd_t		*s;
   1116 
   1117 	MAJOR(("[ufsfx_do_failure_q"));
   1118 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
   1119 
   1120 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
   1121 		return (retry);
   1122 
   1123 	retry = 0;
   1124 rescan_q:
   1125 
   1126 	/*
   1127 	 * walk down failure list
   1128 	 *  depending on state of each failure, do whatever
   1129 	 *  is appropriate to move it to the next state
   1130 	 *  taking note of whether retry gets set
   1131 	 *
   1132 	 * retry protocol:
   1133 	 * wakeup in shortest required time for any failure
   1134 	 *   retry == 0; nothing more to do (terminal state)
   1135 	 *   retry < 0; reprocess queue immediately, retry will
   1136 	 *		be abs(retry) for the next cycle
   1137 	 *   retry > 0; schedule wakeup for retry seconds
   1138 	 */
   1139 
   1140 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
   1141 
   1142 		if (!mutex_tryenter(&f->uf_mutex)) {
   1143 			retry = 1;
   1144 			continue;
   1145 		}
   1146 		s = get_state_desc(f->uf_s);
   1147 
   1148 		MINOR((": found%s: %s, \"%s: %s\"\n",
   1149 		    s->ud_attr.terminal ? " old" : "",
   1150 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
   1151 
   1152 		if (s->ud_attr.terminal) {
   1153 			mutex_exit(&f->uf_mutex);
   1154 			continue;
   1155 		}
   1156 
   1157 		if (s->ud_sfp)
   1158 			(*s->ud_sfp)(f, UFA_FOUND, f->uf_s);
   1159 
   1160 		ASSERT(terminal_state(f->uf_s) || f->uf_retry != 0);
   1161 
   1162 		if (f->uf_retry != 0) {
   1163 			if (retry > f->uf_retry || retry == 0)
   1164 				retry = f->uf_retry;
   1165 			if (f->uf_retry < 0)
   1166 				f->uf_retry = abs(f->uf_retry);
   1167 		}
   1168 		mutex_exit(&f->uf_mutex);
   1169 	}
   1170 
   1171 
   1172 	if (retry < 0) {
   1173 		retry = abs(retry);
   1174 		goto rescan_q;
   1175 	}
   1176 
   1177 	mutex_exit(&ufs_fix.uq_mutex);
   1178 
   1179 	DCALL(DBGLVL_HIDEOUS, dump_uf_list(NULL));
   1180 	MAJOR((": retry=%ld, good night]\n\n", retry));
   1181 
   1182 	return (retry);
   1183 }
   1184 
   1185 static void
   1186 pester_msg(ufs_failure_t *f, int seriousness)
   1187 {
   1188 	MINUTE(("[pester_msg"));
   1189 	ASSERT(f->uf_s & (UF_LOCKED | UF_FIXING));
   1190 
   1191 	/*
   1192 	 * XXX if seems too long for this fs, poke administrator
   1193 	 * XXX to run fsck manually (and change retry time?)
   1194 	 */
   1195 	cmn_err(seriousness, "Waiting for repair of %s to %s",
   1196 	    fs_name(f), f->uf_s & UF_LOCKED ? "start" : "finish");
   1197 	MINUTE(("]"));
   1198 }
   1199 
   1200 static time_t
   1201 trylock_time_exceeded(ufs_failure_t *f)
   1202 {
   1203 	time_t		toolong;
   1204 	extern time_t	time;
   1205 
   1206 	MINUTE(("[trylock_time_exceeded"));
   1207 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   1208 
   1209 	toolong = (time_t)ufsfx_tune.uft_too_long + f->uf_entered_tm;
   1210 	if (time > toolong)
   1211 		cmn_err(CE_WARN, "error-lock timeout exceeded: %s", fs_name(f));
   1212 
   1213 	MINUTE(("] "));
   1214 	return (time <= toolong? 0: time - toolong);
   1215 }
   1216 
   1217 static int
   1218 get_lockfs_status(ufs_failure_t *f, struct lockfs *lfp)
   1219 {
   1220 	MINUTE(("[get_lockfs_status"));
   1221 
   1222 	if (!f->uf_ufsvfsp) {
   1223 		MINUTE((": ufsvfsp is NULL]\n"));
   1224 		return (0);
   1225 	}
   1226 
   1227 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   1228 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
   1229 	ASSERT(!vfs_lock_held(f->uf_vfsp));
   1230 	ASSERT(f->uf_ufsvfsp->vfs_root != NULL);
   1231 
   1232 	f->uf_lf_err = ufs_fiolfss(f->uf_ufsvfsp->vfs_root, lfp);
   1233 
   1234 	if (f->uf_lf_err) {
   1235 		f->uf_retry = ufsfx_tune.uft_short_err_period;
   1236 	}
   1237 
   1238 	MINUTE(("] "));
   1239 	return (1);
   1240 }
   1241 
   1242 static sfrc_t
   1243 set_state(ufs_failure_t *f, ufs_failure_states_t new_state)
   1244 {
   1245 	ufsd_t		*s;
   1246 	sfrc_t		 sfrc = SFRC_FAIL;
   1247 	int		 need_unlock;
   1248 	extern time_t	 time;
   1249 
   1250 	HIDEOUS(("[set_state: new state:%s", state_name(new_state)));
   1251 	ASSERT(f);
   1252 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   1253 
   1254 	/*
   1255 	 * if someone else is panicking, just let panic sync proceed
   1256 	 */
   1257 	if (panicstr) {
   1258 		(void) set_state(f, UF_NOTFIX);
   1259 		HIDEOUS((": state reset: not fixed] "));
   1260 		return (sfrc);
   1261 	}
   1262 
   1263 	/*
   1264 	 * bad state transition, an internal error
   1265 	 */
   1266 	if (!state_trans_valid(f->uf_s, new_state)) {
   1267 		/* recursion */
   1268 		if (!(f->uf_s & UF_PANIC) && !(new_state & UF_PANIC))
   1269 			(void) set_state(f, UF_PANIC);
   1270 		MINOR((": state reset: transition failure (\"%s\"->\"%s\")] ",
   1271 		    state_name(f->uf_s), state_name(new_state)));
   1272 		return (sfrc);
   1273 	}
   1274 
   1275 	s = get_state_desc(new_state);
   1276 
   1277 	need_unlock = !MUTEX_HELD(&ufs_fix.uq_mutex);
   1278 	if (need_unlock)
   1279 		mutex_enter(&ufs_fix.uq_mutex);
   1280 
   1281 	if (s->ud_attr.at_fail && ufs_fix.uq_threadp &&
   1282 	    curthread == ufs_fix.uq_threadp) {
   1283 		cmn_err(CE_WARN, "set_state: probable recursive panic of %s",
   1284 		    fs_name(f));
   1285 	}
   1286 	if (need_unlock)
   1287 		mutex_exit(&ufs_fix.uq_mutex);
   1288 
   1289 	/* NULL state functions always succeed */
   1290 	sfrc = !s->ud_sfp? SFRC_SUCCESS: (*s->ud_sfp)(f, UFA_SET, new_state);
   1291 
   1292 	if (sfrc == SFRC_SUCCESS && f->uf_s != new_state) {
   1293 		f->uf_s = new_state;
   1294 		f->uf_entered_tm = time;
   1295 		f->uf_counter = 0;
   1296 	}
   1297 
   1298 	HIDEOUS(("]\n"));
   1299 	return (sfrc);
   1300 }
   1301 
   1302 static ufsd_t *
   1303 get_state_desc(ufs_failure_states_t state)
   1304 {
   1305 	ufsd_t *s;
   1306 
   1307 	HIDEOUS(("[get_state_desc"));
   1308 
   1309 	for (s = &state_desc[1]; s->ud_name != NULL; s++) {
   1310 		if (s->ud_v == state) {
   1311 			HIDEOUS(("] "));
   1312 			return (s);
   1313 		}
   1314 	}
   1315 
   1316 	HIDEOUS(("] "));
   1317 	return (&state_desc[0]);	/* default */
   1318 }
   1319 
   1320 static sfrc_t
   1321 sf_undef(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
   1322 {
   1323 	sfrc_t rc;
   1324 
   1325 	TRIVIA(("[sf_undef, action is %s, state is %s\n",
   1326 	    act_name(a), state_name(s)));
   1327 	ASSERT(s == UF_UNDEF);
   1328 
   1329 	/* shouldn't find null failure records or ever set one */
   1330 	rc = set_state(f, UF_NOTFIX);
   1331 
   1332 	TRIVIA(("] "));
   1333 	return (rc);
   1334 }
   1335 
   1336 
   1337 static sfrc_t
   1338 sf_init(
   1339 	ufs_failure_t	*f,
   1340 	ufsa_t	 a,
   1341 	ufs_failure_states_t	 s)
   1342 {
   1343 	sfrc_t		rc = SFRC_FAIL;
   1344 	extern time_t	time;
   1345 
   1346 	TRIVIA(("[sf_init, action is %s", act_name(a)));
   1347 	ASSERT(s & UF_INIT);
   1348 
   1349 	switch (a) {
   1350 	case UFA_SET:
   1351 		f->uf_begin_tm = time;
   1352 		f->uf_retry = 1;
   1353 		if (!f->uf_ufsvfsp) {
   1354 			(void) set_state(f, UF_PANIC);
   1355 			TRIVIA((": NULL ufsvfsp]\n"));
   1356 			return (rc);
   1357 		}
   1358 		/*
   1359 		 * because we can call panic from many different levels,
   1360 		 * we can't be sure that we've got the vfs_lock at this
   1361 		 * point.  However, there's not much alternative and if
   1362 		 * we don't (have the lock) the worst case is we'll just
   1363 		 * panic again
   1364 		 */
   1365 		f->uf_vfs_lockp		= &f->uf_ufsvfsp->vfs_lock;
   1366 		f->uf_vfs_ufsfxp	= &f->uf_ufsvfsp->vfs_fsfx;
   1367 
   1368 		if (!f->uf_ufsvfsp->vfs_bufp) {
   1369 			(void) set_state(f, UF_PANIC);
   1370 			TRIVIA((": NULL vfs_bufp]\n"));
   1371 			return (rc);
   1372 		}
   1373 		f->uf_bp = f->uf_ufsvfsp->vfs_bufp;
   1374 
   1375 		if (!f->uf_ufsvfsp->vfs_bufp->b_un.b_fs) {
   1376 			(void) set_state(f, UF_PANIC);
   1377 			TRIVIA((": NULL vfs_fs]\n"));
   1378 			return (rc);
   1379 		}
   1380 
   1381 		/* vfs_fs = vfs_bufp->b_un.b_fs */
   1382 		bcopy(f->uf_ufsvfsp->vfs_fs->fs_fsmnt, f->uf_fsname, MAXMNTLEN);
   1383 
   1384 		f->uf_lf.lf_lock  = LOCKFS_ELOCK;	/* primer */
   1385 
   1386 		if (!f->uf_vfsp || f->uf_vfsp->vfs_dev == NODEV) {
   1387 			(void) set_state(f, UF_PANIC);
   1388 			TRIVIA((": NULL vfsp or vfs_dev == NODEV"));
   1389 			return (rc);
   1390 		}
   1391 		f->uf_dev = f->uf_vfsp->vfs_dev;
   1392 
   1393 		rc = SFRC_SUCCESS;
   1394 		break;
   1395 
   1396 	case UFA_FOUND:
   1397 	default:
   1398 		/* failures marked init shouldn't even be on the queue yet */
   1399 		rc = set_state(f, UF_QUEUE);
   1400 		TRIVIA((": found failure with state init]\n"));
   1401 	}
   1402 
   1403 	TRIVIA(("] "));
   1404 	return (rc);
   1405 }
   1406 
   1407 static sfrc_t
   1408 sf_queue(
   1409 	ufs_failure_t	*f,
   1410 	ufsa_t	 a,
   1411 	ufs_failure_states_t	 s)
   1412 {
   1413 	sfrc_t		rc = SFRC_FAIL;
   1414 
   1415 	TRIVIA(("[sf_queue, action is %s", act_name(a)));
   1416 	ASSERT(s & UF_QUEUE);
   1417 
   1418 	if (!f->uf_ufsvfsp) {
   1419 		TRIVIA((": NULL ufsvfsp]\n"));
   1420 		return (rc);
   1421 	}
   1422 
   1423 	switch (a) {
   1424 	case UFA_FOUND:
   1425 		rc = sf_found_queue(f);
   1426 		break;
   1427 
   1428 	case UFA_SET:
   1429 
   1430 		ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
   1431 
   1432 		mutex_enter(&uf_stats.ufst_mutex);
   1433 		uf_stats.ufst_num_failed++;
   1434 		mutex_exit(&uf_stats.ufst_mutex);
   1435 
   1436 		/*
   1437 		 * if can't get the vfs lock, just wait until
   1438 		 * UF_TRYLCK to set fx_current
   1439 		 */
   1440 		if (mutex_tryenter(f->uf_vfs_lockp)) {
   1441 			f->uf_vfs_ufsfxp->fx_current = f;
   1442 			mutex_exit(f->uf_vfs_lockp);
   1443 		} else {
   1444 			mutex_enter(&uf_stats.ufst_mutex);
   1445 			uf_stats.ufst_current_races++;
   1446 			mutex_exit(&uf_stats.ufst_mutex);
   1447 		}
   1448 
   1449 		f->uf_retry = 1;
   1450 		rc = SFRC_SUCCESS;
   1451 		TRIVIA(("] "));
   1452 		break;
   1453 
   1454 	default:
   1455 		(void) set_state(f, UF_PANIC);
   1456 		TRIVIA((": failed] "));
   1457 	}
   1458 
   1459 	return (rc);
   1460 }
   1461 
   1462 static sfrc_t
   1463 sf_found_queue(ufs_failure_t *f)
   1464 {
   1465 	int		replica;
   1466 	sfrc_t		rc = SFRC_FAIL;
   1467 
   1468 	TRIVIA(("[sf_found_queue"));
   1469 
   1470 	/*
   1471 	 * don't need to check for null ufsvfsp because
   1472 	 * unmount must own list's ufs_fix.uq_mutex
   1473 	 * to mark it null and we own that lock since
   1474 	 * we got here.
   1475 	 */
   1476 
   1477 	ASSERT(MUTEX_HELD(&ufs_fix.uq_mutex));
   1478 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
   1479 
   1480 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
   1481 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
   1482 		f->uf_retry = 1;
   1483 		return (rc);
   1484 	}
   1485 
   1486 	replica = f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current != NULL &&
   1487 	    f->uf_vfs_ufsfxp->fx_current != f &&
   1488 	    !terminal_state(f->uf_vfs_ufsfxp->fx_current->uf_s);
   1489 
   1490 	/*
   1491 	 * copy general flags to this ufs_failure so we don't
   1492 	 * need to refer back to the ufsvfs, or, more importantly,
   1493 	 * don't need to keep acquiring (trying to acquire) vfs_lockp
   1494 	 *
   1495 	 * The most restrictive option wins:
   1496 	 *  panic > errlock only > errlock+unmount > repair
   1497 	 * XXX panic > elock > elock > elock+umount
   1498 	 */
   1499 	if (f->uf_vfs_ufsfxp->fx_flags & UFSFX_PANIC) {
   1500 		if (!set_state(f, UF_PANIC)) {
   1501 			TRIVIA((": marked panic but was queued?"));
   1502 			real_panic(f, " ");
   1503 			/*NOTREACHED*/
   1504 		}
   1505 		mutex_exit(f->uf_vfs_lockp);
   1506 		return (rc);
   1507 	}
   1508 	f->uf_flags = f->uf_vfs_ufsfxp->fx_flags;
   1509 
   1510 	if (replica) {
   1511 		if (!set_state(f, UF_REPLICA)) {
   1512 			f->uf_retry = 1;
   1513 			TRIVIA((": set to replica failed] "));
   1514 		} else {
   1515 			TRIVIA(("] "));
   1516 		}
   1517 		mutex_exit(f->uf_vfs_lockp);
   1518 		return (rc);
   1519 	}
   1520 	mutex_exit(f->uf_vfs_lockp);
   1521 
   1522 	if (!set_state(f, UF_TRYLCK)) {
   1523 		TRIVIA((": failed] "));
   1524 	} else {
   1525 		rc = SFRC_SUCCESS;
   1526 	}
   1527 	return (rc);
   1528 }
   1529 
   1530 static sfrc_t
   1531 sf_nonterm_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
   1532 {
   1533 	sfrc_t	rc = SFRC_FAIL;
   1534 
   1535 	TRIVIA(("[sf_nonterm_cmn, action: %s, %s", act_name(a), state_name(s)));
   1536 	ASSERT(s & (UF_TRYLCK | UF_LOCKED | UF_UMOUNT | UF_FIXING));
   1537 	ASSERT(!terminal_state(s));
   1538 
   1539 	if (!f->uf_ufsvfsp && !(f->uf_s & UF_UMOUNT)) {
   1540 		TRIVIA((": NULL ufsvfsp (state != UMOUNT)]\n"));
   1541 		(void) set_state(f, UF_NOTFIX);
   1542 		return (rc);
   1543 	}
   1544 
   1545 	switch (a) {
   1546 	case UFA_SET:
   1547 		switch (s) {
   1548 		case UF_TRYLCK:
   1549 			ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
   1550 			rc = sf_set_trylck(f);
   1551 			break;
   1552 
   1553 		case UF_LOCKED:
   1554 			rc = sf_set_locked(f);
   1555 			break;
   1556 
   1557 		case UF_FIXING:
   1558 			f->uf_flags |= UFSFX_REPAIR_START;
   1559 			f->uf_retry  = ufsfx_tune.uft_fixpoll_period;
   1560 			rc = SFRC_SUCCESS;
   1561 			break;
   1562 
   1563 		case UF_UMOUNT:
   1564 			f->uf_retry = -ufsfx_tune.uft_short_err_period;
   1565 			rc = SFRC_SUCCESS;
   1566 			break;
   1567 
   1568 		default:
   1569 			(void) set_state(f, UF_PANIC);
   1570 			TRIVIA((": failed] "));
   1571 		}
   1572 		break;
   1573 
   1574 	case UFA_FOUND:
   1575 
   1576 		switch (s) {
   1577 		case UF_TRYLCK:
   1578 			rc = sf_found_trylck(f);
   1579 			break;
   1580 
   1581 		case UF_LOCKED:
   1582 		case UF_FIXING:
   1583 			rc = sf_found_lock_fix_cmn(f, s);
   1584 			break;
   1585 
   1586 		case UF_UMOUNT:
   1587 			rc = sf_found_umount(f);
   1588 			break;
   1589 
   1590 		default:
   1591 			(void) set_state(f, UF_PANIC);
   1592 			TRIVIA((": failed] "));
   1593 			break;
   1594 		}
   1595 		break;
   1596 	default:
   1597 		(void) set_state(f, UF_PANIC);
   1598 		TRIVIA((": failed] "));
   1599 		break;
   1600 	}
   1601 
   1602 	TRIVIA(("] "));
   1603 	return (rc);
   1604 }
   1605 
   1606 static sfrc_t
   1607 sf_set_trylck(ufs_failure_t *f)
   1608 {
   1609 	TRIVIA(("[sf_set_trylck"));
   1610 
   1611 	if (!mutex_tryenter(f->uf_vfs_lockp)) {
   1612 		TRIVIA((": tryenter(vfslockp) failed; retry]\n"));
   1613 		f->uf_retry = 1;
   1614 		return (SFRC_FAIL);
   1615 	}
   1616 
   1617 	if (!f->uf_vfs_ufsfxp->fx_current)
   1618 		f->uf_vfs_ufsfxp->fx_current = f;
   1619 
   1620 	mutex_exit(f->uf_vfs_lockp);
   1621 
   1622 	f->uf_lf.lf_flags = 0;
   1623 	f->uf_lf.lf_lock  = LOCKFS_ELOCK;
   1624 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
   1625 	TRIVIA(("] "));
   1626 	return (SFRC_SUCCESS);
   1627 }
   1628 
   1629 static sfrc_t
   1630 sf_found_trylck(ufs_failure_t *f)
   1631 {
   1632 	struct lockfs lockfs_status;
   1633 
   1634 	TRIVIA(("[sf_found_trylck"));
   1635 
   1636 	if (trylock_time_exceeded(f) > 0) {
   1637 		(void) set_state(f, UF_PANIC);
   1638 		TRIVIA((": failed] "));
   1639 		return (SFRC_FAIL);
   1640 	}
   1641 
   1642 	if (!get_lockfs_status(f, &lockfs_status)) {
   1643 		(void) set_state(f, UF_PANIC);
   1644 		TRIVIA((": failed] "));
   1645 		return (SFRC_FAIL);
   1646 	}
   1647 
   1648 	if (f->uf_lf_err == NO_ERROR)
   1649 		f->uf_lf.lf_key = lockfs_status.lf_key;
   1650 
   1651 	if (!set_lockfs(f, &lockfs_status)) {
   1652 		(void) set_state(f, UF_PANIC);
   1653 		TRIVIA((": failed] "));
   1654 		return (SFRC_FAIL);
   1655 	}
   1656 	TRIVIA(("] "));
   1657 	return (SFRC_SUCCESS);
   1658 }
   1659 
   1660 static sfrc_t
   1661 sf_set_locked(ufs_failure_t *f)
   1662 {
   1663 	TRIVIA(("[sf_set_locked"));
   1664 
   1665 	f->uf_retry = -ufsfx_tune.uft_fixstart_period;
   1666 
   1667 #if defined(DEBUG)
   1668 	if (f->uf_flags & UFSFX_REPAIR_START)
   1669 		TRIVIA(("clearing UFSFX_REPAIR_START "));
   1670 #endif /* DEBUG */
   1671 
   1672 	f->uf_flags &= ~UFSFX_REPAIR_START;
   1673 
   1674 	if (f->uf_s & UF_TRYLCK) {
   1675 		cmn_err(CE_WARN, "Error-locked %s: \"%s\"",
   1676 		    fs_name(f), f->uf_panic_str);
   1677 
   1678 		if (f->uf_flags & UFSFX_LCKONLY)
   1679 			cmn_err(CE_WARN, "Manual repair of %s required",
   1680 			    fs_name(f));
   1681 	}
   1682 
   1683 	/*
   1684 	 * just reset to current state
   1685 	 */
   1686 #if defined(DEBUG)
   1687 	TRIVIA(("locked->locked "));
   1688 #endif /* DEBUG */
   1689 
   1690 	TRIVIA(("] "));
   1691 	return (SFRC_SUCCESS);
   1692 }
   1693 
   1694 static sfrc_t
   1695 sf_found_lock_fix_cmn(ufs_failure_t *f, ufs_failure_states_t s)
   1696 {
   1697 	time_t		toolong;
   1698 	extern time_t	time;
   1699 	struct buf	*bp			= NULL;
   1700 	struct fs	*dfs;
   1701 	time_t		 concerned, anxious;
   1702 	sfrc_t		 rc			= SFRC_FAIL;
   1703 	ulong_t		 gb_size;
   1704 
   1705 	TRIVIA(("[sf_found_lock_fix_cmn (\"%s\")", state_name(s)));
   1706 
   1707 	if (s & UF_LOCKED) {
   1708 		ASSERT(MUTEX_HELD(&f->uf_mutex));
   1709 
   1710 		toolong =
   1711 		    time > (ufsfx_tune.uft_too_long + f->uf_entered_tm);
   1712 		TRIVIA(("%stoolong", !toolong? "not": ""));
   1713 		HIDEOUS((": time:%ld, too long:%ld, entered_tm:%ld ",
   1714 		    time, ufsfx_tune.uft_too_long, f->uf_entered_tm));
   1715 
   1716 		if (f->uf_flags & UFSFX_LCKUMOUNT) {
   1717 			if (set_state(f, UF_UMOUNT)) {
   1718 				TRIVIA(("] "));
   1719 				rc = SFRC_SUCCESS;
   1720 			} else {
   1721 				TRIVIA((": failed] "));
   1722 				f->uf_retry = 1;
   1723 			}
   1724 			return (rc);
   1725 		}
   1726 		if (!toolong) {
   1727 			rc = SFRC_SUCCESS;
   1728 		} else {
   1729 			if (!(f->uf_flags & UFSFX_REPAIR_START)) {
   1730 				cmn_err(CE_WARN, "%s repair of %s not started.",
   1731 				    (f->uf_flags & UFSFX_LCKONLY) ?
   1732 				    "Manual" : "Automatic", fs_name(f));
   1733 
   1734 				f->uf_retry = ufsfx_tune.uft_long_err_period;
   1735 			} else {
   1736 				f->uf_retry = ufsfx_tune.uft_long_err_period;
   1737 				cmn_err(CE_WARN, "Repair of %s is not timely; "
   1738 				    "operator attention is required.",
   1739 				    fs_name(f));
   1740 			}
   1741 			TRIVIA(("] "));
   1742 			return (rc);
   1743 		}
   1744 	}
   1745 
   1746 #if defined(DEBUG)
   1747 	else {
   1748 		ASSERT(s & UF_FIXING);
   1749 	}
   1750 #endif /* DEBUG */
   1751 
   1752 	/*
   1753 	 * get on disk superblock; force it to really
   1754 	 * come from the disk
   1755 	 */
   1756 	(void) bfinval(f->uf_dev, 0);
   1757 	bp = UFS_BREAD(f->uf_ufsvfsp, f->uf_dev, SBLOCK, SBSIZE);
   1758 	if (bp) {
   1759 		bp->b_flags |= (B_STALE | B_AGE);
   1760 		dfs = bp->b_un.b_fs;
   1761 	}
   1762 
   1763 	if (!bp || (bp->b_flags & B_ERROR) || ((dfs->fs_magic != FS_MAGIC) &&
   1764 	    (dfs->fs_magic != MTB_UFS_MAGIC))) {
   1765 		TRIVIA((": UFS_BREAD(SBLOCK) failed]\n"));
   1766 		f->uf_retry = 1;
   1767 		goto out;
   1768 	}
   1769 
   1770 	/* fsck started but we haven't noticed yet? */
   1771 	if (!(s & UF_FIXING) && dfs->fs_clean == FSFIX) {
   1772 		if (!set_state(f, UF_FIXING)) {
   1773 			TRIVIA((": failed]\n"));
   1774 			f->uf_retry = 1;
   1775 			goto out;
   1776 		}
   1777 	}
   1778 
   1779 	/* fsck started but didn't succeed? */
   1780 	if ((s & UF_FIXING) && ((dfs->fs_clean == FSBAD) || !fsck_active(f))) {
   1781 		TRIVIA((": fs_clean: %d", (int)dfs->fs_clean));
   1782 		(void) set_state(f, UF_LOCKED);
   1783 		cmn_err(CE_WARN, "%s: Manual repair is necessary.", fs_name(f));
   1784 		f->uf_retry = ufsfx_tune.uft_long_err_period;
   1785 		goto out;
   1786 	}
   1787 
   1788 	gb_size = (dfs->fs_size * dfs->fs_bshift) / GB;
   1789 	toolong = (time_t)((gb_size == 0? 1: gb_size) * SecondsPerGig);
   1790 
   1791 	/* fsck started but doesn't seem to be proceeding? */
   1792 	if ((s & UF_FIXING) && dfs->fs_clean == FSFIX) {
   1793 		if (time > f->uf_entered_tm + toolong) {
   1794 
   1795 			cmn_err(CE_WARN,
   1796 			    "Repair completion timeout exceeded on %s; "
   1797 			    "manual fsck may be required", fs_name(f));
   1798 			f->uf_retry = ufsfx_tune.uft_long_err_period;
   1799 		}
   1800 	}
   1801 
   1802 	concerned = f->uf_entered_tm + (toolong / 3);
   1803 	anxious = f->uf_entered_tm + ((2 * toolong) / 3);
   1804 
   1805 	if (time > concerned)
   1806 		pester_msg(f, time > anxious? CE_WARN: CE_NOTE);
   1807 
   1808 	TRIVIA(("] "));
   1809 
   1810 out:
   1811 	if (bp)
   1812 		brelse(bp);
   1813 
   1814 	return (rc);
   1815 }
   1816 
   1817 static sfrc_t
   1818 sf_found_umount(ufs_failure_t *f)
   1819 {
   1820 	extern time_t	 time;
   1821 	sfrc_t		 rc			= SFRC_FAIL;
   1822 	struct vfs	*vfsp			= f->uf_vfsp;
   1823 	struct ufsvfs	*ufsvfsp		= f->uf_ufsvfsp;
   1824 	int		 toolong		= 0;
   1825 	int		 err			= 0;
   1826 
   1827 	TRIVIA(("[sf_found_umount"));
   1828 
   1829 	toolong = time > ufsfx_tune.uft_too_long + f->uf_entered_tm;
   1830 	if (toolong) {
   1831 		TRIVIA((": unmount time limit exceeded] "));
   1832 		goto out;
   1833 	}
   1834 
   1835 	if (!vfsp || !ufsvfsp) {	/* trivial case */
   1836 		TRIVIA((": NULL vfsp and/or ufsvfsp, already unmounted?] "));
   1837 		goto out;
   1838 	}
   1839 
   1840 	if (!ULOCKFS_IS_ELOCK(&ufsvfsp->vfs_ulockfs)) {
   1841 		TRIVIA((": !not error locked?"));
   1842 		err = EINVAL;
   1843 		goto out;
   1844 	}
   1845 
   1846 	/* The vn_vfsunlock will be done in dounmount() [.../common/fs/vfs.c] */
   1847 	if (vn_vfswlock(vfsp->vfs_vnodecovered)) {
   1848 		TRIVIA((": couldn't lock coveredvp"));
   1849 		err = EBUSY;
   1850 		goto out;
   1851 	}
   1852 
   1853 	if ((err = dounmount(vfsp, 0, kcred)) != 0) {
   1854 
   1855 		/* take note, but not many alternatives here */
   1856 		mutex_enter(&uf_stats.ufst_mutex);
   1857 		uf_stats.ufst_unmount_failures++;
   1858 		mutex_exit(&uf_stats.ufst_mutex);
   1859 
   1860 		TRIVIA((": unmount failed] "));
   1861 	} else {
   1862 		cmn_err(CE_NOTE, "unmounted error-locked %s", fs_name(f));
   1863 	}
   1864 
   1865 out:
   1866 	if (toolong || (err != EBUSY && err != EAGAIN))
   1867 		rc = set_state(f, UF_NOTFIX);
   1868 
   1869 	TRIVIA(("] "));
   1870 	return (rc);
   1871 }
   1872 
   1873 static sfrc_t
   1874 sf_term_cmn(ufs_failure_t *f, ufsa_t a, ufs_failure_states_t s)
   1875 {
   1876 	extern time_t	time;
   1877 	sfrc_t		rc = SFRC_FAIL;
   1878 
   1879 	TRIVIA(("[sf_term_cmn, action is %s, state is %s",
   1880 	    act_name(a), state_name(s)));
   1881 	ASSERT(s & (UF_FIXED | UF_NOTFIX | UF_REPLICA));
   1882 	ASSERT(terminal_state(s));
   1883 
   1884 	if (!f->uf_ufsvfsp && !(f->uf_s & (UF_UMOUNT | UF_NOTFIX))) {
   1885 		TRIVIA((": NULL ufsvfsp (state != UMOUNT | NOTFIX)]\n"));
   1886 		return (rc);
   1887 	}
   1888 
   1889 	switch (a) {
   1890 	case UFA_SET:
   1891 		switch (s) {
   1892 		case UF_NOTFIX:
   1893 		case UF_FIXED:
   1894 		{
   1895 			int need_lock_vfs;
   1896 
   1897 			if (f->uf_ufsvfsp && f->uf_vfs_lockp)
   1898 				need_lock_vfs = !MUTEX_HELD(f->uf_vfs_lockp);
   1899 			else
   1900 				need_lock_vfs = 0;
   1901 
   1902 			if (need_lock_vfs && !mutex_tryenter(f->uf_vfs_lockp)) {
   1903 				TRIVIA((": tryenter(vfslockp) fail; retry]\n"));
   1904 				f->uf_retry = 1;
   1905 				break;
   1906 			}
   1907 
   1908 			f->uf_end_tm = time;
   1909 			f->uf_lf.lf_lock = LOCKFS_OLOCK;
   1910 			f->uf_retry = 0;
   1911 
   1912 			if (f->uf_vfs_ufsfxp)
   1913 				f->uf_vfs_ufsfxp->fx_current = NULL;
   1914 
   1915 			if (need_lock_vfs)
   1916 				mutex_exit(f->uf_vfs_lockp);
   1917 
   1918 			cmn_err(CE_NOTE, (s & UF_NOTFIX)? "Could not fix %s":
   1919 			    "%s is now accessible", fs_name(f));
   1920 
   1921 			if (s & UF_FIXED) {
   1922 				mutex_enter(&uf_stats.ufst_mutex);
   1923 				uf_stats.ufst_num_fixed++;
   1924 				mutex_exit(&uf_stats.ufst_mutex);
   1925 			}
   1926 			(void) timeout(ufsfx_kill_fix_failure_thread,
   1927 			    (void *)(ufsfx_tune.uft_short_err_period * hz),
   1928 			    ufsfx_tune.uft_short_err_period * hz);
   1929 			rc = SFRC_SUCCESS;
   1930 			break;
   1931 		}
   1932 		case UF_REPLICA:
   1933 
   1934 			ASSERT(MUTEX_HELD(f->uf_vfs_lockp));
   1935 
   1936 			/* not actually a replica? */
   1937 			if (f->uf_vfs_ufsfxp && f->uf_vfs_ufsfxp->fx_current &&
   1938 			    f->uf_vfs_ufsfxp->fx_current != f &&
   1939 			    !terminal_state(
   1940 			    f->uf_vfs_ufsfxp->fx_current->uf_s)) {
   1941 
   1942 				f->uf_orig = f->uf_vfs_ufsfxp->fx_current;
   1943 				f->uf_retry = 0;
   1944 				rc = SFRC_SUCCESS;
   1945 			} else {
   1946 				TRIVIA((": NULL fx_current]\n"));
   1947 				f->uf_retry = 1;
   1948 			}
   1949 
   1950 			break;
   1951 
   1952 		default:
   1953 			rc = set_state(f, UF_PANIC);
   1954 			TRIVIA((": failed] "));
   1955 			break;
   1956 		}
   1957 		break;
   1958 
   1959 	case UFA_FOUND:
   1960 		/*
   1961 		 * XXX de-allocate these after some period?
   1962 		 * XXX or move to an historical list?
   1963 		 * XXX or have an ioctl which reaps them?
   1964 		 */
   1965 		/*
   1966 		 * For now, since we don't expect lots of failures
   1967 		 * to occur (to the point of memory shortages),
   1968 		 * just punt
   1969 		 */
   1970 
   1971 		/* be sure we're not wasting cpu on old failures */
   1972 		if (f->uf_retry != 0) {
   1973 			mutex_enter(&uf_stats.ufst_mutex);
   1974 			uf_stats.ufst_cpu_waste++;
   1975 			mutex_exit(&uf_stats.ufst_mutex);
   1976 			f->uf_retry = 0;
   1977 		}
   1978 		rc = SFRC_SUCCESS;
   1979 		break;
   1980 
   1981 	default:
   1982 		(void) set_state(f, UF_PANIC);
   1983 		TRIVIA((": failed] "));
   1984 		break;
   1985 	}
   1986 
   1987 	TRIVIA(("] "));
   1988 	return (rc);
   1989 }
   1990 
   1991 static sfrc_t
   1992 sf_panic(
   1993 	ufs_failure_t	*f,
   1994 	ufsa_t	 a,
   1995 	ufs_failure_states_t	 s)
   1996 {
   1997 	sfrc_t	rc = SFRC_FAIL;
   1998 
   1999 	TRIVIA(("[sf_panic, action is %s, prev. state is %s",
   2000 	    act_name(a), state_name(f->uf_s)));
   2001 	ASSERT(s & UF_PANIC);
   2002 
   2003 	switch (a) {
   2004 	case UFA_SET:
   2005 		f->uf_retry = -ufsfx_tune.uft_short_err_period;
   2006 		rc = SFRC_SUCCESS;
   2007 		break;
   2008 
   2009 	case UFA_FOUND:
   2010 	default:
   2011 		real_panic(f, " ");
   2012 
   2013 		/* LINTED: warning: logical expression always true: op "||" */
   2014 		ASSERT(DEBUG);
   2015 
   2016 		(void) set_state(f, UF_UMOUNT);	/* XXX UF_NOTFIX? */
   2017 
   2018 		break;
   2019 	}
   2020 
   2021 	TRIVIA(("] "));
   2022 	return (rc);
   2023 }
   2024 
   2025 /*
   2026  * minimum state function
   2027  */
   2028 static sfrc_t
   2029 sf_minimum(
   2030 	ufs_failure_t	*f,
   2031 	ufsa_t	 a, /* LINTED argument unused in function: ignored */
   2032 	ufs_failure_states_t	 ignored)
   2033 {
   2034 	sfrc_t rc = SFRC_FAIL;
   2035 
   2036 	TRIVIA(("[sf_minimum, action is %s", act_name(a)));
   2037 
   2038 	switch (a) {
   2039 	case UFA_SET:
   2040 		f->uf_retry = 0;
   2041 		/* FALLTHROUGH */
   2042 
   2043 	case UFA_FOUND:
   2044 		rc = SFRC_SUCCESS;
   2045 		break;
   2046 
   2047 	default:
   2048 		(void) set_state(f, UF_PANIC);
   2049 		TRIVIA((": failed] "));
   2050 		break;
   2051 	}
   2052 
   2053 	TRIVIA(("] "));
   2054 	return (rc);
   2055 }
   2056 
   2057 static int
   2058 state_trans_valid(ufs_failure_states_t from, ufs_failure_states_t to)
   2059 {
   2060 	ufsd_t	*s;
   2061 	int	 valid;
   2062 
   2063 	HIDEOUS(("[state_trans_valid"));
   2064 
   2065 	if (from & to)
   2066 		return (1);
   2067 
   2068 	s = get_state_desc(to);
   2069 
   2070 	/*
   2071 	 * extra test is necessary since we want UF_UNDEF = 0,
   2072 	 * (to detect freshly allocated memory)
   2073 	 * but can't check for that value with a bit test
   2074 	 */
   2075 	valid = (to & UF_INIT)? from == s->ud_prev: from & s->ud_prev;
   2076 
   2077 	HIDEOUS((": %svalid] ", valid? "": "in"));
   2078 	return (valid);
   2079 }
   2080 
   2081 static int
   2082 terminal_state(ufs_failure_states_t state)
   2083 {
   2084 	ufsd_t	*s;
   2085 
   2086 	HIDEOUS(("[terminal_state"));
   2087 
   2088 	s = get_state_desc(state);
   2089 
   2090 	HIDEOUS((": %sterminal] ", s->ud_attr.terminal? "": "not "));
   2091 	return ((int)s->ud_attr.terminal);
   2092 }
   2093 
   2094 static void
   2095 alloc_lockfs_comment(ufs_failure_t *f, struct lockfs *lfp)
   2096 {
   2097 	MINUTE(("[alloc_lockfs_comment"));
   2098 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2099 
   2100 	/*
   2101 	 * ufs_fiolfs expects a kmem_alloc'ed comment;
   2102 	 * it frees the comment if the lock fails
   2103 	 * or else when the lock is unlocked.
   2104 	 */
   2105 
   2106 	f->uf_lf.lf_comment = kmem_zalloc(LOCKFS_MAXCOMMENTLEN, KM_NOSLEEP);
   2107 	if (f->uf_lf.lf_comment) {
   2108 		char	*from;
   2109 		size_t	 len;
   2110 
   2111 		/*
   2112 		 * use panic string if there's no previous comment
   2113 		 * or if we're setting the error lock
   2114 		 */
   2115 		if ((LOCKFS_IS_ELOCK(&f->uf_lf) || !lfp->lf_comment ||
   2116 		    lfp->lf_comlen <= 0)) {
   2117 			from = f->uf_panic_str;
   2118 			len = LOCKFS_MAXCOMMENTLEN;
   2119 		} else {
   2120 			from = lfp->lf_comment;
   2121 			len = lfp->lf_comlen;
   2122 		}
   2123 
   2124 		bcopy(from, f->uf_lf.lf_comment, len);
   2125 		f->uf_lf.lf_comlen = len;
   2126 
   2127 	} else {
   2128 		f->uf_lf.lf_comlen = 0;
   2129 	}
   2130 	MINUTE(("] "));
   2131 }
   2132 
   2133 static int
   2134 set_lockfs(ufs_failure_t *f, struct lockfs *lfp)
   2135 {
   2136 	int	(*handle_lockfs_rc)(ufs_failure_t *);
   2137 	int	  rc;
   2138 
   2139 	MINUTE(("[set_lockfs"));
   2140 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2141 	ASSERT(!vfs_lock_held(f->uf_vfsp));
   2142 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
   2143 
   2144 	if (!f->uf_ufsvfsp) {
   2145 		MINUTE((": ufsvfsp is NULL]\n"));
   2146 		return (0);
   2147 	}
   2148 
   2149 	ASSERT(MUTEX_NOT_HELD(&f->uf_ufsvfsp->vfs_ulockfs.ul_lock));
   2150 
   2151 	if (!f->uf_ufsvfsp->vfs_root) {
   2152 		MINUTE((": vfs_root is NULL]\n"));
   2153 		return (0);
   2154 	}
   2155 
   2156 	alloc_lockfs_comment(f, lfp);
   2157 	f->uf_lf_err = 0;
   2158 
   2159 	if (!LOCKFS_IS_ELOCK(lfp)) {
   2160 		lfp->lf_lock = f->uf_lf.lf_lock = LOCKFS_ELOCK;
   2161 		VN_HOLD(f->uf_ufsvfsp->vfs_root);
   2162 		f->uf_lf_err =
   2163 		    ufs__fiolfs(f->uf_ufsvfsp->vfs_root,
   2164 		    &f->uf_lf, /* from_user */ 0, /* from_log */ 0);
   2165 		VN_RELE(f->uf_ufsvfsp->vfs_root);
   2166 	}
   2167 
   2168 	handle_lockfs_rc = f->uf_lf_err != 0? lockfs_failure: lockfs_success;
   2169 	rc = handle_lockfs_rc(f);
   2170 
   2171 	MINUTE(("] "));
   2172 	return (rc);
   2173 }
   2174 
   2175 static int
   2176 lockfs_failure(ufs_failure_t *f)
   2177 {
   2178 	int	error;
   2179 	ufs_failure_states_t	s;
   2180 
   2181 	TRIVIA(("[lockfs_failure"));
   2182 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2183 
   2184 	if (!f->uf_ufsvfsp) {
   2185 		TRIVIA((": ufsvfsp is NULL]\n"));
   2186 		return (0);
   2187 	}
   2188 
   2189 	error = f->uf_lf_err;
   2190 	switch (error) {
   2191 			/* non-transient errors: */
   2192 	case EACCES:	/* disk/in-core metadata reconciliation failed  */
   2193 	case EPERM:	/* inode reconciliation failed; incore inode changed? */
   2194 	case EIO:	/* device is hard-locked or not responding */
   2195 	case EROFS:	/* device is write-locked */
   2196 	case EDEADLK:	/* can't lockfs; deadlock would result; */
   2197 			/* Swapping or saving accounting records */
   2198 			/* onto this fs can cause this errno. */
   2199 
   2200 		MINOR(("ufs_fiolfs(\"%s\") of %s failed: %s (%d)",
   2201 		    fs_name(f), lock_name(&f->uf_lf),
   2202 		    err_name(error), error));
   2203 
   2204 		/*
   2205 		 * if can't get lock, then fallback to panic, unless
   2206 		 * unless unmount was requested (although unmount will
   2207 		 * probably fail if the lock failed, so we'll panic
   2208 		 * anyway
   2209 		 */
   2210 
   2211 		s = ((f->uf_flags & UFSFX_LCKUMOUNT) && error != EDEADLK) ?
   2212 		    UF_UMOUNT: UF_PANIC;
   2213 
   2214 		if (!set_state(f, s)) {
   2215 			real_panic(f, " ");
   2216 			/*NOTREACHED*/
   2217 			break;
   2218 		}
   2219 		break;
   2220 
   2221 
   2222 	case EBUSY:
   2223 	case EAGAIN:
   2224 
   2225 		f->uf_retry = ufsfx_tune.uft_short_err_period;
   2226 		if (curthread->t_flag & T_DONTPEND) {
   2227 			curthread->t_flag &= ~T_DONTPEND;
   2228 
   2229 		} else if (!(f->uf_s & (UF_LOCKED | UF_FIXING))) {
   2230 			ufs_failure_states_t state;
   2231 			/*
   2232 			 * if we didn't know that the fix had started,
   2233 			 * take note
   2234 			 */
   2235 			state = error == EBUSY? UF_LOCKED: UF_FIXING;
   2236 			if (!set_state(f, state)) {
   2237 				TRIVIA((": failed] "));
   2238 				return (0);
   2239 			}
   2240 		}
   2241 		break;
   2242 
   2243 	default:	/* some other non-fatal error */
   2244 		MINOR(("lockfs(\"%s\") of %s returned %s (%d)",
   2245 		    lock_name(&f->uf_lf), fs_name(f),
   2246 		    err_name(f->uf_lf_err), f->uf_lf_err));
   2247 
   2248 		f->uf_retry = ufsfx_tune.uft_short_err_period;
   2249 		break;
   2250 
   2251 	case EINVAL:	/* unmounted? */
   2252 		(void) set_state(f, UF_NOTFIX);
   2253 		break;
   2254 	}
   2255 	TRIVIA(("] "));
   2256 	return (1);
   2257 }
   2258 
   2259 static int
   2260 lockfs_success(ufs_failure_t *f)
   2261 {
   2262 	TRIVIA(("[lockfs_success"));
   2263 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2264 
   2265 	if (!f->uf_ufsvfsp) {
   2266 		TRIVIA((": ufsvfsp is NULL]\n"));
   2267 		return (0);
   2268 	}
   2269 
   2270 	switch (f->uf_lf.lf_lock) {
   2271 	case LOCKFS_ELOCK:	/* error lock worked */
   2272 
   2273 		if (!set_state(f, UF_LOCKED)) {
   2274 			TRIVIA((": failed] "));
   2275 			return (0);
   2276 		}
   2277 		break;
   2278 
   2279 	case LOCKFS_ULOCK: 			/* unlock worked */
   2280 		/*
   2281 		 * how'd we get here?
   2282 		 * This should be done from fsck's unlock,
   2283 		 * not from this thread's context.
   2284 		 */
   2285 		cmn_err(CE_WARN, "Unlocked error-lock of %s", fs_name(f));
   2286 		ufsfx_unlockfs(f->uf_ufsvfsp);
   2287 		break;
   2288 
   2289 	default:
   2290 		if (!set_state(f, UF_NOTFIX)) {
   2291 			TRIVIA((": failed] "));
   2292 			return (0);
   2293 		}
   2294 		break;
   2295 	}
   2296 	TRIVIA(("] "));
   2297 	return (1);
   2298 }
   2299 
   2300 /*
   2301  * when fsck is running it puts its pid into the lockfs
   2302  * comment structure, prefaced by PIDSTR
   2303  */
   2304 const char *PIDSTR = "[pid:";
   2305 static int
   2306 fsck_active(ufs_failure_t *f)
   2307 {
   2308 	char		*cp;
   2309 	int		 i, found, errlocked;
   2310 	size_t		 comlen;
   2311 	const int	 PIDSTRLEN = (int)strlen(PIDSTR);
   2312 	struct ulockfs	*ulp = &f->uf_ufsvfsp->vfs_ulockfs;
   2313 
   2314 	TRIVIA(("[fsck_active"));
   2315 
   2316 	ASSERT(f);
   2317 	ASSERT(f->uf_s & UF_FIXING);
   2318 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2319 	ASSERT(f->uf_ufsvfsp);
   2320 	ASSERT(MUTEX_NOT_HELD(f->uf_vfs_lockp));
   2321 	ASSERT(MUTEX_NOT_HELD(&ulp->ul_lock));
   2322 
   2323 	mutex_enter(&ulp->ul_lock);
   2324 	cp = ulp->ul_lockfs.lf_comment;
   2325 	comlen = ulp->ul_lockfs.lf_comlen;
   2326 	errlocked = (int)ULOCKFS_IS_ELOCK(ulp);
   2327 	mutex_exit(&ulp->ul_lock);
   2328 
   2329 	if (!cp || comlen == 0) {
   2330 		TRIVIA((": null comment or comlen <= 0, found:0]"));
   2331 		return (0);
   2332 	}
   2333 
   2334 	for (found = i = 0; !found && i < (comlen - PIDSTRLEN); i++, cp++)
   2335 		found = strncmp(cp, PIDSTR, PIDSTRLEN) == 0;
   2336 
   2337 	TRIVIA(("found:%d, is_elock:%d]", found, errlocked));
   2338 	return (errlocked & found);
   2339 }
   2340 
   2341 static const char unknown_fs[]		= "<unknown fs>";
   2342 static const char null_failure[] = "<NULL ufs failure record; unknown fs>";
   2343 static const char mutated_vfs_bufp[]	= "<mutated vfs_bufp, unknown fs>";
   2344 static const char mutated_vfs_fs[]	= "<mutated vfs_fs, unknown fs>";
   2345 
   2346 static char *
   2347 fs_name(ufs_failure_t *f)
   2348 {
   2349 	HIDEOUS(("[fs_name"));
   2350 	ASSERT(MUTEX_HELD(&f->uf_mutex));
   2351 
   2352 	if (!f) {
   2353 		HIDEOUS((": failure ptr is NULL]\n"));
   2354 		return ((char *)null_failure);
   2355 	}
   2356 
   2357 	if (f->uf_fsname[0] != '\0') {
   2358 		HIDEOUS((": return (uf_fsname)]\n"));
   2359 		return (f->uf_fsname);
   2360 	}
   2361 
   2362 	if (MUTEX_HELD(f->uf_vfs_lockp)) {
   2363 		if (f->uf_bp != f->uf_ufsvfsp->vfs_bufp) {
   2364 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
   2365 			    (void *)f->uf_bp, (void *)f->uf_ufsvfsp->vfs_bufp));
   2366 			return ((char *)mutated_vfs_bufp);
   2367 		}
   2368 		if (f->uf_fs != f->uf_ufsvfsp->vfs_fs) {
   2369 			HIDEOUS((": vfs_bufp mutated from 0x%p to 0x%p\n",
   2370 			    (void *)f->uf_fs, (void *)f->uf_ufsvfsp->vfs_fs));
   2371 			return ((char *)mutated_vfs_fs);
   2372 		}
   2373 		if (f->uf_ufsvfsp && f->uf_bp && f->uf_fs &&
   2374 		    *f->uf_fs->fs_fsmnt != '\0') {
   2375 			HIDEOUS((": return (fs_fsmnt)]\n"));
   2376 			return (f->uf_fs->fs_fsmnt);
   2377 		}
   2378 	}
   2379 
   2380 	HIDEOUS((": unknown file system]\n"));
   2381 	return ((char *)unknown_fs);
   2382 }
   2383 
   2384 #if defined(DEBUG)
   2385 static char *
   2386 lock_name(struct lockfs *lfp)
   2387 {
   2388 	struct lock_description	*l;
   2389 	char			*lname;
   2390 
   2391 	HIDEOUS(("[lock_name"));
   2392 
   2393 	lname = lock_desc[0].ld_name;
   2394 	for (l = &lock_desc[1]; l->ld_name != NULL; l++) {
   2395 		if (lfp && lfp->lf_lock == l->ld_type) {
   2396 			lname = l->ld_name;
   2397 			break;
   2398 		}
   2399 	}
   2400 	HIDEOUS(("]"));
   2401 	return (lname);
   2402 }
   2403 
   2404 static char *
   2405 state_name(ufs_failure_states_t state)
   2406 {
   2407 	ufsd_t	*s;
   2408 
   2409 	HIDEOUS(("[state_name"));
   2410 
   2411 	s = get_state_desc(state);
   2412 
   2413 	HIDEOUS(("]"));
   2414 	return (s->ud_name);
   2415 }
   2416 
   2417 static char *
   2418 err_name(int error)
   2419 {
   2420 	struct error_description *e;
   2421 
   2422 	HIDEOUS(("[err_name"));
   2423 
   2424 	for (e = &err_desc[1]; e->ed_name != NULL; e++) {
   2425 		if (error == e->ed_errno) {
   2426 			HIDEOUS(("]"));
   2427 			return (e->ed_name);
   2428 		}
   2429 	}
   2430 	HIDEOUS(("]"));
   2431 	return (err_desc[0].ed_name);
   2432 }
   2433 
   2434 static char *
   2435 act_name(ufsa_t action)
   2436 {
   2437 	struct action_description *a;
   2438 
   2439 	HIDEOUS(("[act_name"));
   2440 
   2441 	for (a = &act_desc[1]; a->ad_name != NULL; a++) {
   2442 		if (action == a->ad_v) {
   2443 			HIDEOUS(("]"));
   2444 			return (a->ad_name);
   2445 		}
   2446 	}
   2447 	HIDEOUS(("]"));
   2448 	return (act_desc[0].ad_name);
   2449 }
   2450 
   2451 /*
   2452  * dump failure list
   2453  */
   2454 static void
   2455 dump_uf_list(char *msg)
   2456 {
   2457 	ufs_failure_t	*f;
   2458 	int		 i;
   2459 	int		 list_was_locked = MUTEX_HELD(&ufs_fix.uq_mutex);
   2460 
   2461 	if (!list_was_locked && !mutex_tryenter(&ufs_fix.uq_mutex)) {
   2462 		printf("dump_uf_list: couldn't get list lock\n");
   2463 		return;
   2464 	}
   2465 
   2466 	if (msg) {
   2467 		printf("\n%s", msg);
   2468 	}
   2469 	printf("\ndump_uf_list:\n\tuq_lowat: %d, uq_ne: %d\n",
   2470 	    ufs_fix.uq_lowat, ufs_fix.uq_ne);
   2471 
   2472 	mutex_enter(&uf_stats.ufst_mutex);
   2473 	printf("\tuf_stats.current_races: %ld\n", uf_stats.ufst_current_races);
   2474 	printf("\tuf_stats.num_failed: %ld\n", uf_stats.ufst_num_failed);
   2475 	printf("\tuf_stats.num_fixed: %ld\n", uf_stats.ufst_num_fixed);
   2476 	printf("\tuf_stats.cpu_waste: %ld\n", uf_stats.ufst_cpu_waste);
   2477 	printf("\tuf_stats.lock_violations: %ld, unmount_failures: %ld\n",
   2478 	    uf_stats.ufst_lock_violations, uf_stats.ufst_unmount_failures);
   2479 	mutex_exit(&uf_stats.ufst_mutex);
   2480 
   2481 	for (f = ufs_fix.uq_ufhead, i = 1; f; f = f->uf_next, i++) {
   2482 
   2483 		if (!mutex_tryenter(&f->uf_mutex)) {
   2484 			printf("%d.\t\"skipped - try enter failed\"\n", i);
   2485 			continue;
   2486 		}
   2487 
   2488 		dump_uf(f, i);
   2489 
   2490 		mutex_exit(&f->uf_mutex);
   2491 	}
   2492 
   2493 	printf("\n");
   2494 
   2495 	if (!list_was_locked)
   2496 		mutex_exit(&ufs_fix.uq_mutex);
   2497 }
   2498 
   2499 static void
   2500 dump_uf(ufs_failure_t *f, int i)
   2501 {
   2502 	if (!f) {
   2503 		printf("dump_uf: NULL failure record\n");
   2504 		return;
   2505 	}
   2506 
   2507 	printf("%d.\t\"%s\" is %s.\n",
   2508 	    i, fs_name(f), state_name(f->uf_s));
   2509 	printf("\t\"%s\"\tAddr: 0x%p\n", f->uf_panic_str, (void *)f);
   2510 	printf("\tNext: 0x%p\t\tPrev: 0x%p\n",
   2511 	    (void *)f->uf_next, (void *)f->uf_prev);
   2512 
   2513 	if (f->uf_orig)
   2514 		printf("\tOriginal failure: 0x%p \"%s\"\n",
   2515 		    (void *)f->uf_orig, f->uf_orig->uf_panic_str);
   2516 
   2517 	printf("\tUfsvfs: 0x%p\t\tVfs_lockp: 0x%p\n",
   2518 	    (void *)f->uf_ufsvfsp, (void *)f->uf_vfs_lockp);
   2519 	printf("\tVfs_fsfxp: 0x%p\n", (void *)f->uf_vfs_ufsfxp);
   2520 	printf("\tVfs_bufp: 0x%p", (void *)f->uf_bp);
   2521 
   2522 	if (f->uf_bp)
   2523 		printf("\t\tVfs_fs: 0x%p\n", (void *)f->uf_fs);
   2524 	else
   2525 		printf("\n");
   2526 
   2527 	printf("\tBegin: 0x%lx\tEntered: 0x%lx\tEnd: 0x%lx\n",
   2528 	    f->uf_begin_tm, f->uf_entered_tm, f->uf_end_tm);
   2529 
   2530 	printf("\tFlags: (%d) %s%s%s%s", f->uf_flags,
   2531 	    f->uf_flags & UFSFX_LCKONLY?	 "\"lock only\" "	: "",
   2532 	    f->uf_flags & UFSFX_LCKUMOUNT?	 "\"lock+unmount\" "	: "",
   2533 	    f->uf_flags & UFSFX_REPAIR_START? "\"started repair\" "	: "",
   2534 	    f->uf_flags == 0?                "<none>"               : "");
   2535 
   2536 	printf("\tRetry: %ld seconds\n", f->uf_retry);
   2537 
   2538 	printf("\tLockfs:\ttype: %s\terror: %s (%d)\n",
   2539 	    lock_name(&f->uf_lf), err_name(f->uf_lf_err), f->uf_lf_err);
   2540 
   2541 }
   2542 #endif /* DEBUG */
   2543 
   2544 /*
   2545  * returns # of ufs_failures in a non-terminal state on queue
   2546  * used to coordinate with hlock thread (see ufs_thread.c)
   2547  * and to determine when the error lock thread may exit
   2548  */
   2549 
   2550 int
   2551 ufsfx_get_failure_qlen(void)
   2552 {
   2553 	ufs_failure_t	*f;
   2554 	ufsd_t		*s;
   2555 	int		 qlen = 0;
   2556 
   2557 	MINUTE(("[ufsfx_get_failure_qlen"));
   2558 
   2559 	if (!mutex_tryenter(&ufs_fix.uq_mutex))
   2560 		return (-1);
   2561 
   2562 	/*
   2563 	 * walk down failure list
   2564 	 */
   2565 
   2566 	for (f = ufs_fix.uq_ufhead; f; f = f->uf_next) {
   2567 
   2568 		if (!mutex_tryenter(&f->uf_mutex))
   2569 			continue;
   2570 
   2571 		s = get_state_desc(f->uf_s);
   2572 
   2573 		if (s->ud_attr.terminal) {
   2574 			mutex_exit(&f->uf_mutex);
   2575 			continue;
   2576 		}
   2577 
   2578 		MINUTE((": found: %s, \"%s: %s\"\n",
   2579 		    fs_name(f), state_name(f->uf_s), f->uf_panic_str));
   2580 
   2581 		qlen++;
   2582 		mutex_exit(&f->uf_mutex);
   2583 	}
   2584 
   2585 	mutex_exit(&ufs_fix.uq_mutex);
   2586 
   2587 	MINUTE((": qlen=%d]\n", qlen));
   2588 
   2589 	return (qlen);
   2590 }
   2591 
   2592 /*
   2593  * timeout routine
   2594  *  called to shutdown fix failure thread and server daemon
   2595  */
   2596 static void
   2597 ufsfx_kill_fix_failure_thread(void *arg)
   2598 {
   2599 	clock_t odelta = (clock_t)arg;
   2600 	int	qlen;
   2601 
   2602 	MAJOR(("[ufsfx_kill_fix_failure_thread"));
   2603 
   2604 	qlen = ufsfx_get_failure_qlen();
   2605 
   2606 	if (qlen < 0) {
   2607 		clock_t delta;
   2608 
   2609 		delta = odelta << 1;
   2610 		if (delta <= 0)
   2611 			delta = INT_MAX;
   2612 
   2613 		(void) timeout(ufsfx_kill_fix_failure_thread,
   2614 		    (void *)delta, delta);
   2615 		MAJOR((": rescheduled"));
   2616 
   2617 	} else if (qlen == 0) {
   2618 		ufs_thread_exit(&ufs_fix);
   2619 		MAJOR((": killed"));
   2620 	}
   2621 	/*
   2622 	 * else
   2623 	 *  let timeout expire
   2624 	 */
   2625 	MAJOR(("]\n"));
   2626 }
   2627