1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 1705 jwahlig * Common Development and Distribution License (the "License"). 6 1705 jwahlig * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* 22 9858 Pavel * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 0 stevel * Use is subject to license terms. 24 0 stevel */ 25 0 stevel 26 0 stevel /* 27 0 stevel * NFS Version 4 state recovery code. 28 0 stevel */ 29 0 stevel 30 0 stevel #include <nfs/nfs4_clnt.h> 31 0 stevel #include <nfs/nfs4.h> 32 0 stevel #include <nfs/rnode4.h> 33 0 stevel #include <sys/cmn_err.h> 34 0 stevel #include <sys/cred.h> 35 0 stevel #include <sys/systm.h> 36 0 stevel #include <sys/flock.h> 37 0 stevel #include <sys/dnlc.h> 38 0 stevel #include <sys/ddi.h> 39 0 stevel #include <sys/disp.h> 40 0 stevel #include <sys/list.h> 41 0 stevel #include <sys/sdt.h> 42 0 stevel 43 0 stevel extern r4hashq_t *rtable4; 44 0 stevel 45 0 stevel /* 46 0 stevel * Information that describes what needs to be done for recovery. It is 47 0 stevel * passed to a client recovery thread as well as passed to various recovery 48 0 stevel * routines. rc_mi, rc_vp1, and rc_vp2 refer to the filesystem and 49 0 stevel * vnode(s) affected by recovery. rc_vp1 and rc_vp2 are references (use 50 0 stevel * VN_HOLD) or NULL. rc_lost_rqst contains information about the lost 51 0 stevel * lock or open/close request, and it holds reference counts for the 52 0 stevel * various objects (vnode, etc.). The recovery thread also uses flags set 53 0 stevel * in the mntinfo4_t or vnode_t to tell it what to do. rc_error is used 54 0 stevel * to save the error that originally triggered the recovery event -- will 55 0 stevel * later be used to set mi_error if recovery doesn't work. rc_bseqid_rqst 56 0 stevel * contains information about the request that got NFS4ERR_BAD_SEQID, and 57 0 stevel * it holds reference count for the various objects (vnode, open owner, 58 0 stevel * open stream, lock owner). 59 0 stevel */ 60 0 stevel 61 0 stevel typedef struct { 62 0 stevel mntinfo4_t *rc_mi; 63 0 stevel vnode_t *rc_vp1; 64 0 stevel vnode_t *rc_vp2; 65 0 stevel nfs4_recov_t rc_action; 66 0 stevel stateid4 rc_stateid; 67 0 stevel bool_t rc_srv_reboot; /* server has rebooted */ 68 0 stevel nfs4_lost_rqst_t *rc_lost_rqst; 69 0 stevel nfs4_error_t rc_orig_errors; /* original errors causing recovery */ 70 0 stevel int rc_error; 71 0 stevel nfs4_bseqid_entry_t *rc_bseqid_rqst; 72 0 stevel } recov_info_t; 73 0 stevel 74 0 stevel /* 75 0 stevel * How long to wait before trying again if there is an error doing 76 0 stevel * recovery, in seconds. 77 0 stevel */ 78 0 stevel 79 0 stevel static int recov_err_delay = 1; 80 0 stevel 81 0 stevel /* 82 0 stevel * How long to wait when processing NFS4ERR_GRACE or NFS4ERR_DELAY 83 0 stevel * errors. Expressed in seconds. Default is defined as 84 0 stevel * NFS4ERR_DELAY_TIME and this variable is initialized in nfs4_subr_init() 85 0 stevel */ 86 0 stevel time_t nfs4err_delay_time = 0; 87 0 stevel 88 0 stevel /* 89 0 stevel * Tuneable to limit how many time "exempt" ops go OTW 90 0 stevel * after a recovery error. Exempt op hints are OH_CLOSE, 91 0 stevel * OH_LOCKU, OH_DELEGRETURN. These previously always went 92 0 stevel * OTW even after rnode was "dead" due to recovery errors. 93 0 stevel * 94 0 stevel * The tuneable below limits the number of times a start_fop 95 0 stevel * invocation will retry the exempt hints. After the limit 96 0 stevel * is reached, nfs4_start_fop will return an error just like 97 0 stevel * it would for non-exempt op hints. 98 0 stevel */ 99 0 stevel int nfs4_max_recov_error_retry = 3; 100 0 stevel 101 0 stevel /* 102 0 stevel * Number of seconds the recovery thread should pause before retry when the 103 0 stevel * filesystem has been forcibly unmounted. 104 0 stevel */ 105 0 stevel 106 0 stevel int nfs4_unmount_delay = 1; 107 0 stevel 108 0 stevel #ifdef DEBUG 109 0 stevel 110 0 stevel /* 111 0 stevel * How long to wait (in seconds) between recovery operations on a given 112 0 stevel * file. Normally zero, but could be set longer for testing purposes. 113 0 stevel */ 114 0 stevel static int nfs4_recovdelay = 0; 115 0 stevel 116 0 stevel /* 117 0 stevel * Switch that controls whether to go into the debugger when recovery 118 0 stevel * fails. 119 0 stevel */ 120 0 stevel static int nfs4_fail_recov_stop = 0; 121 0 stevel 122 0 stevel /* 123 0 stevel * Tuneables to debug client namespace interaction with server 124 0 stevel * mount points: 125 0 stevel * 126 0 stevel * nfs4_srvmnt_fail_cnt: 127 0 stevel * number of times EACCES returned because client 128 0 stevel * attempted to cross server mountpoint 129 0 stevel * 130 0 stevel * nfs4_srvmnt_debug: 131 0 stevel * trigger console printf whenever client attempts 132 0 stevel * to cross server mountpoint 133 0 stevel */ 134 0 stevel int nfs4_srvmnt_fail_cnt = 0; 135 0 stevel int nfs4_srvmnt_debug = 0; 136 0 stevel #endif 137 0 stevel 138 0 stevel /* forward references, in alphabetic order */ 139 0 stevel static void close_after_open_resend(vnode_t *, cred_t *, uint32_t, 140 0 stevel nfs4_error_t *); 141 0 stevel static void errs_to_action(recov_info_t *, 142 0 stevel nfs4_server_t *, mntinfo4_t *, stateid4 *, nfs4_lost_rqst_t *, int, 143 0 stevel nfs_opnum4, nfs4_bseqid_entry_t *); 144 0 stevel static void flush_reinstate(nfs4_lost_rqst_t *); 145 0 stevel static void free_milist(mntinfo4_t **, int); 146 0 stevel static mntinfo4_t **make_milist(nfs4_server_t *, int *); 147 0 stevel static int nfs4_check_recov_err(vnode_t *, nfs4_op_hint_t, 148 0 stevel nfs4_recov_state_t *, int, char *); 149 0 stevel static char *nfs4_getsrvnames(mntinfo4_t *, size_t *); 150 0 stevel static void nfs4_recov_fh_fail(vnode_t *, int, nfsstat4); 151 0 stevel static void nfs4_recov_thread(recov_info_t *); 152 0 stevel static void nfs4_remove_lost_rqsts(mntinfo4_t *, nfs4_server_t *); 153 0 stevel static void nfs4_resend_lost_rqsts(recov_info_t *, nfs4_server_t *); 154 0 stevel static cred_t *pid_to_cr(pid_t); 155 0 stevel static void reclaim_one_lock(vnode_t *, flock64_t *, nfs4_error_t *, int *); 156 0 stevel static void recov_bad_seqid(recov_info_t *); 157 0 stevel static void recov_badstate(recov_info_t *, vnode_t *, nfsstat4); 158 0 stevel static void recov_clientid(recov_info_t *, nfs4_server_t *); 159 0 stevel static void recov_done(mntinfo4_t *, recov_info_t *); 160 0 stevel static void recov_filehandle(nfs4_recov_t, mntinfo4_t *, vnode_t *); 161 0 stevel static void recov_newserver(recov_info_t *, nfs4_server_t **, bool_t *); 162 0 stevel static void recov_openfiles(recov_info_t *, nfs4_server_t *); 163 0 stevel static void recov_stale(mntinfo4_t *, vnode_t *); 164 0 stevel static void nfs4_free_lost_rqst(nfs4_lost_rqst_t *, nfs4_server_t *); 165 0 stevel static void recov_throttle(recov_info_t *, vnode_t *); 166 0 stevel static void relock_skip_pid(locklist_t *, pid_t); 167 0 stevel static void resend_lock(nfs4_lost_rqst_t *, nfs4_error_t *); 168 0 stevel static void resend_one_op(nfs4_lost_rqst_t *, nfs4_error_t *, mntinfo4_t *, 169 0 stevel nfs4_server_t *); 170 0 stevel static void save_bseqid_rqst(nfs4_bseqid_entry_t *, recov_info_t *); 171 0 stevel static void start_recovery(recov_info_t *, mntinfo4_t *, vnode_t *, vnode_t *, 172 0 stevel nfs4_server_t *); 173 0 stevel static void start_recovery_action(nfs4_recov_t, bool_t, mntinfo4_t *, vnode_t *, 174 0 stevel vnode_t *); 175 0 stevel static int wait_for_recovery(mntinfo4_t *, nfs4_op_hint_t); 176 0 stevel 177 0 stevel /* 178 0 stevel * Return non-zero if the given errno, status, and rpc status codes 179 0 stevel * in the nfs4_error_t indicate that client recovery is needed. 180 0 stevel * "stateful" indicates whether the call that got the error establishes or 181 0 stevel * removes state on the server (open, close, lock, unlock, delegreturn). 182 0 stevel */ 183 0 stevel 184 0 stevel int 185 0 stevel nfs4_needs_recovery(nfs4_error_t *ep, bool_t stateful, vfs_t *vfsp) 186 0 stevel { 187 0 stevel int recov = 0; 188 0 stevel mntinfo4_t *mi; 189 0 stevel 190 0 stevel /* 191 0 stevel * Try failover if the error values justify it and if 192 0 stevel * it's a failover mount. Don't try if the mount is in 193 0 stevel * progress, failures are handled explicitly by nfs4rootvp. 194 0 stevel */ 195 0 stevel if (nfs4_try_failover(ep)) { 196 0 stevel mi = VFTOMI4(vfsp); 197 0 stevel mutex_enter(&mi->mi_lock); 198 0 stevel recov = FAILOVER_MOUNT4(mi) && !(mi->mi_flags & MI4_MOUNTING); 199 0 stevel mutex_exit(&mi->mi_lock); 200 0 stevel if (recov) 201 0 stevel return (recov); 202 0 stevel } 203 0 stevel 204 0 stevel if (ep->error == EINTR || NFS4_FRC_UNMT_ERR(ep->error, vfsp)) { 205 0 stevel /* 206 0 stevel * The server may have gotten the request, so for stateful 207 0 stevel * ops we need to resynchronize and possibly back out the 208 0 stevel * op. 209 0 stevel */ 210 0 stevel return (stateful); 211 0 stevel } 212 0 stevel if (ep->error != 0) 213 0 stevel return (0); 214 0 stevel 215 0 stevel /* stat values are listed alphabetically */ 216 0 stevel /* 217 0 stevel * There are two lists here: the errors for which we have code, and 218 0 stevel * the errors for which we plan to have code before FCS. For the 219 0 stevel * second list, print a warning message but don't attempt recovery. 220 0 stevel */ 221 0 stevel switch (ep->stat) { 222 0 stevel case NFS4ERR_BADHANDLE: 223 0 stevel case NFS4ERR_BAD_SEQID: 224 0 stevel case NFS4ERR_BAD_STATEID: 225 0 stevel case NFS4ERR_DELAY: 226 0 stevel case NFS4ERR_EXPIRED: 227 0 stevel case NFS4ERR_FHEXPIRED: 228 0 stevel case NFS4ERR_GRACE: 229 0 stevel case NFS4ERR_OLD_STATEID: 230 0 stevel case NFS4ERR_RESOURCE: 231 0 stevel case NFS4ERR_STALE_CLIENTID: 232 0 stevel case NFS4ERR_STALE_STATEID: 233 0 stevel case NFS4ERR_WRONGSEC: 234 0 stevel case NFS4ERR_STALE: 235 0 stevel recov = 1; 236 0 stevel break; 237 0 stevel #ifdef DEBUG 238 0 stevel case NFS4ERR_LEASE_MOVED: 239 0 stevel case NFS4ERR_MOVED: 240 0 stevel zcmn_err(VFTOMI4(vfsp)->mi_zone->zone_id, 241 0 stevel CE_WARN, "!Can't yet recover from NFS status %d", 242 5302 th199096 ep->stat); 243 0 stevel break; 244 0 stevel #endif 245 0 stevel } 246 0 stevel 247 0 stevel return (recov); 248 0 stevel } 249 0 stevel 250 0 stevel /* 251 0 stevel * Some operations such as DELEGRETURN want to avoid invoking 252 0 stevel * recovery actions that will only mark the file dead. If 253 0 stevel * better handlers are invoked for any of these errors, this 254 0 stevel * routine should be modified. 255 0 stevel */ 256 0 stevel int 257 0 stevel nfs4_recov_marks_dead(nfsstat4 status) 258 0 stevel { 259 0 stevel if (status == NFS4ERR_BAD_SEQID || 260 0 stevel status == NFS4ERR_EXPIRED || 261 0 stevel status == NFS4ERR_BAD_STATEID || 262 0 stevel status == NFS4ERR_OLD_STATEID) 263 0 stevel return (1); 264 0 stevel return (0); 265 0 stevel } 266 0 stevel 267 0 stevel /* 268 0 stevel * Transfer the state recovery information in recovp to mi's resend queue, 269 0 stevel * and mark mi as having a lost state request. 270 0 stevel */ 271 0 stevel static void 272 0 stevel nfs4_enqueue_lost_rqst(recov_info_t *recovp, mntinfo4_t *mi) 273 0 stevel { 274 0 stevel nfs4_lost_rqst_t *lrp = recovp->rc_lost_rqst; 275 0 stevel 276 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 277 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 278 0 stevel 279 0 stevel ASSERT(lrp != NULL && lrp->lr_op != 0); 280 0 stevel 281 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 282 5302 th199096 "nfs4_enqueue_lost_rqst %p, op %d", 283 5302 th199096 (void *)lrp, lrp->lr_op)); 284 0 stevel 285 0 stevel mutex_enter(&mi->mi_lock); 286 0 stevel mi->mi_recovflags |= MI4R_LOST_STATE; 287 0 stevel if (lrp->lr_putfirst) 288 0 stevel list_insert_head(&mi->mi_lost_state, lrp); 289 0 stevel else 290 0 stevel list_insert_tail(&mi->mi_lost_state, lrp); 291 0 stevel recovp->rc_lost_rqst = NULL; 292 0 stevel mutex_exit(&mi->mi_lock); 293 0 stevel 294 0 stevel nfs4_queue_event(RE_LOST_STATE, mi, NULL, lrp->lr_op, lrp->lr_vp, 295 5302 th199096 lrp->lr_dvp, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 296 0 stevel } 297 0 stevel 298 0 stevel /* 299 0 stevel * Transfer the bad seqid recovery information in recovp to mi's 300 0 stevel * bad seqid queue, and mark mi as having a bad seqid request. 301 0 stevel */ 302 0 stevel void 303 0 stevel enqueue_bseqid_rqst(recov_info_t *recovp, mntinfo4_t *mi) 304 0 stevel { 305 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 306 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 307 0 stevel ASSERT(recovp->rc_bseqid_rqst != NULL); 308 0 stevel 309 0 stevel mutex_enter(&mi->mi_lock); 310 0 stevel mi->mi_recovflags |= MI4R_BAD_SEQID; 311 0 stevel list_insert_tail(&mi->mi_bseqid_list, recovp->rc_bseqid_rqst); 312 0 stevel recovp->rc_bseqid_rqst = NULL; 313 0 stevel mutex_exit(&mi->mi_lock); 314 0 stevel } 315 0 stevel 316 0 stevel /* 317 0 stevel * Initiate recovery. 318 0 stevel * 319 0 stevel * The nfs4_error_t contains the return codes that triggered a recovery 320 0 stevel * attempt. mi, vp1, and vp2 refer to the filesystem and files that were 321 0 stevel * being operated on. vp1 and vp2 may be NULL. 322 0 stevel * 323 0 stevel * Multiple calls are okay. If recovery is already underway, the call 324 0 stevel * updates the information about what state needs recovery but does not 325 0 stevel * start a new thread. The caller should hold mi->mi_recovlock as a reader 326 0 stevel * for proper synchronization with any recovery thread. 327 0 stevel * 328 0 stevel * This will return TRUE if recovery was aborted, and FALSE otherwise. 329 0 stevel */ 330 0 stevel bool_t 331 0 stevel nfs4_start_recovery(nfs4_error_t *ep, mntinfo4_t *mi, vnode_t *vp1, 332 0 stevel vnode_t *vp2, stateid4 *sid, nfs4_lost_rqst_t *lost_rqstp, nfs_opnum4 op, 333 0 stevel nfs4_bseqid_entry_t *bsep) 334 0 stevel { 335 0 stevel recov_info_t *recovp; 336 0 stevel nfs4_server_t *sp; 337 0 stevel bool_t abort = FALSE; 338 0 stevel bool_t gone = FALSE; 339 0 stevel 340 766 carlsonj ASSERT(nfs_zone() == mi->mi_zone); 341 0 stevel mutex_enter(&mi->mi_lock); 342 0 stevel /* 343 0 stevel * If there is lost state, we need to kick off recovery even if the 344 0 stevel * filesystem has been unmounted or the zone is shutting down. 345 0 stevel */ 346 0 stevel gone = FS_OR_ZONE_GONE4(mi->mi_vfsp); 347 0 stevel if (gone) { 348 0 stevel ASSERT(ep->error != EINTR || lost_rqstp != NULL); 349 0 stevel if (ep->error == EIO && lost_rqstp == NULL) { 350 0 stevel /* failed due to forced unmount, no new lost state */ 351 0 stevel abort = TRUE; 352 0 stevel } 353 0 stevel if ((ep->error == 0 || ep->error == ETIMEDOUT) && 354 0 stevel !(mi->mi_recovflags & MI4R_LOST_STATE)) { 355 0 stevel /* some other failure, no existing lost state */ 356 0 stevel abort = TRUE; 357 0 stevel } 358 0 stevel if (abort) { 359 0 stevel mutex_exit(&mi->mi_lock); 360 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 361 5302 th199096 "nfs4_start_recovery: fs unmounted")); 362 0 stevel return (TRUE); 363 0 stevel } 364 0 stevel } 365 0 stevel mi->mi_in_recovery++; 366 0 stevel mutex_exit(&mi->mi_lock); 367 0 stevel 368 0 stevel recovp = kmem_alloc(sizeof (recov_info_t), KM_SLEEP); 369 0 stevel recovp->rc_orig_errors = *ep; 370 0 stevel sp = find_nfs4_server(mi); 371 5302 th199096 errs_to_action(recovp, sp, mi, sid, lost_rqstp, gone, op, bsep); 372 0 stevel if (sp != NULL) 373 0 stevel mutex_exit(&sp->s_lock); 374 0 stevel start_recovery(recovp, mi, vp1, vp2, sp); 375 0 stevel if (sp != NULL) 376 0 stevel nfs4_server_rele(sp); 377 0 stevel return (FALSE); 378 0 stevel } 379 0 stevel 380 0 stevel /* 381 0 stevel * Internal version of nfs4_start_recovery. The difference is that the 382 0 stevel * caller specifies the recovery action, rather than the errors leading to 383 0 stevel * recovery. 384 0 stevel */ 385 0 stevel static void 386 0 stevel start_recovery_action(nfs4_recov_t what, bool_t reboot, mntinfo4_t *mi, 387 5302 th199096 vnode_t *vp1, vnode_t *vp2) 388 0 stevel { 389 0 stevel recov_info_t *recovp; 390 0 stevel 391 766 carlsonj ASSERT(nfs_zone() == mi->mi_zone); 392 0 stevel mutex_enter(&mi->mi_lock); 393 0 stevel mi->mi_in_recovery++; 394 0 stevel mutex_exit(&mi->mi_lock); 395 0 stevel 396 0 stevel recovp = kmem_zalloc(sizeof (recov_info_t), KM_SLEEP); 397 0 stevel recovp->rc_action = what; 398 0 stevel recovp->rc_srv_reboot = reboot; 399 0 stevel recovp->rc_error = EIO; 400 0 stevel start_recovery(recovp, mi, vp1, vp2, NULL); 401 0 stevel } 402 0 stevel 403 0 stevel static void 404 0 stevel start_recovery(recov_info_t *recovp, mntinfo4_t *mi, 405 5302 th199096 vnode_t *vp1, vnode_t *vp2, nfs4_server_t *sp) 406 0 stevel { 407 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 408 5302 th199096 "start_recovery: mi %p, what %s", (void*)mi, 409 5302 th199096 nfs4_recov_action_to_str(recovp->rc_action))); 410 0 stevel 411 0 stevel /* 412 0 stevel * Bump the reference on the vfs so that we can pass it to the 413 0 stevel * recovery thread. 414 0 stevel */ 415 0 stevel VFS_HOLD(mi->mi_vfsp); 416 1705 jwahlig MI4_HOLD(mi); 417 0 stevel again: 418 0 stevel switch (recovp->rc_action) { 419 0 stevel case NR_FAILOVER: 420 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 421 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 422 0 stevel if (mi->mi_servers->sv_next == NULL) 423 0 stevel goto out_no_thread; 424 0 stevel mutex_enter(&mi->mi_lock); 425 0 stevel mi->mi_recovflags |= MI4R_NEED_NEW_SERVER; 426 0 stevel mutex_exit(&mi->mi_lock); 427 0 stevel 428 0 stevel if (recovp->rc_lost_rqst != NULL) 429 0 stevel nfs4_enqueue_lost_rqst(recovp, mi); 430 0 stevel break; 431 0 stevel 432 0 stevel case NR_CLIENTID: 433 0 stevel /* 434 0 stevel * If the filesystem has been unmounted, punt. 435 0 stevel */ 436 0 stevel if (sp == NULL) 437 0 stevel goto out_no_thread; 438 0 stevel 439 0 stevel /* 440 0 stevel * If nobody else is working on the clientid, mark the 441 0 stevel * clientid as being no longer set. Then mark the specific 442 0 stevel * filesystem being worked on. 443 0 stevel */ 444 0 stevel if (!nfs4_server_in_recovery(sp)) { 445 0 stevel mutex_enter(&sp->s_lock); 446 0 stevel sp->s_flags &= ~N4S_CLIENTID_SET; 447 0 stevel mutex_exit(&sp->s_lock); 448 0 stevel } 449 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 450 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 451 0 stevel mutex_enter(&mi->mi_lock); 452 0 stevel mi->mi_recovflags |= MI4R_NEED_CLIENTID; 453 0 stevel if (recovp->rc_srv_reboot) 454 0 stevel mi->mi_recovflags |= MI4R_SRV_REBOOT; 455 0 stevel mutex_exit(&mi->mi_lock); 456 0 stevel break; 457 0 stevel 458 0 stevel case NR_OPENFILES: 459 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 460 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 461 0 stevel mutex_enter(&mi->mi_lock); 462 0 stevel mi->mi_recovflags |= MI4R_REOPEN_FILES; 463 0 stevel if (recovp->rc_srv_reboot) 464 0 stevel mi->mi_recovflags |= MI4R_SRV_REBOOT; 465 0 stevel mutex_exit(&mi->mi_lock); 466 0 stevel break; 467 0 stevel 468 0 stevel case NR_WRONGSEC: 469 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 470 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 471 0 stevel mutex_enter(&mi->mi_lock); 472 0 stevel mi->mi_recovflags |= MI4R_NEED_SECINFO; 473 0 stevel mutex_exit(&mi->mi_lock); 474 0 stevel break; 475 0 stevel 476 0 stevel case NR_EXPIRED: 477 0 stevel if (vp1 != NULL) 478 0 stevel recov_badstate(recovp, vp1, NFS4ERR_EXPIRED); 479 0 stevel if (vp2 != NULL) 480 0 stevel recov_badstate(recovp, vp2, NFS4ERR_EXPIRED); 481 0 stevel goto out_no_thread; /* no further recovery possible */ 482 0 stevel 483 0 stevel case NR_BAD_STATEID: 484 0 stevel if (vp1 != NULL) 485 0 stevel recov_badstate(recovp, vp1, NFS4ERR_BAD_STATEID); 486 0 stevel if (vp2 != NULL) 487 0 stevel recov_badstate(recovp, vp2, NFS4ERR_BAD_STATEID); 488 0 stevel goto out_no_thread; /* no further recovery possible */ 489 0 stevel 490 0 stevel case NR_FHEXPIRED: 491 0 stevel case NR_BADHANDLE: 492 0 stevel if (vp1 != NULL) 493 0 stevel recov_throttle(recovp, vp1); 494 0 stevel if (vp2 != NULL) 495 0 stevel recov_throttle(recovp, vp2); 496 0 stevel /* 497 0 stevel * Recover the filehandle now, rather than using a 498 0 stevel * separate thread. We can do this because filehandle 499 0 stevel * recovery is independent of any other state, and because 500 0 stevel * we know that we are not competing with the recovery 501 0 stevel * thread at this time. recov_filehandle will deal with 502 0 stevel * threads that are competing to recover this filehandle. 503 0 stevel */ 504 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 505 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 506 0 stevel if (vp1 != NULL) 507 0 stevel recov_filehandle(recovp->rc_action, mi, vp1); 508 0 stevel if (vp2 != NULL) 509 0 stevel recov_filehandle(recovp->rc_action, mi, vp2); 510 0 stevel goto out_no_thread; /* no further recovery needed */ 511 0 stevel 512 0 stevel case NR_STALE: 513 0 stevel /* 514 0 stevel * NFS4ERR_STALE handling 515 0 stevel * recov_stale() could set MI4R_NEED_NEW_SERVER to 516 0 stevel * indicate that we can and should failover. 517 0 stevel */ 518 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_READER) || 519 0 stevel nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 520 0 stevel 521 0 stevel if (vp1 != NULL) 522 0 stevel recov_stale(mi, vp1); 523 0 stevel if (vp2 != NULL) 524 0 stevel recov_stale(mi, vp2); 525 0 stevel mutex_enter(&mi->mi_lock); 526 0 stevel if ((mi->mi_recovflags & MI4R_NEED_NEW_SERVER) == 0) { 527 0 stevel mutex_exit(&mi->mi_lock); 528 0 stevel goto out_no_thread; 529 0 stevel } 530 0 stevel mutex_exit(&mi->mi_lock); 531 0 stevel recovp->rc_action = NR_FAILOVER; 532 0 stevel goto again; 533 0 stevel 534 0 stevel case NR_BAD_SEQID: 535 0 stevel if (recovp->rc_bseqid_rqst) { 536 0 stevel enqueue_bseqid_rqst(recovp, mi); 537 0 stevel break; 538 0 stevel } 539 0 stevel 540 0 stevel if (vp1 != NULL) 541 0 stevel recov_badstate(recovp, vp1, NFS4ERR_BAD_SEQID); 542 0 stevel if (vp2 != NULL) 543 0 stevel recov_badstate(recovp, vp2, NFS4ERR_BAD_SEQID); 544 0 stevel goto out_no_thread; /* no further recovery possible */ 545 0 stevel 546 0 stevel case NR_OLDSTATEID: 547 0 stevel if (vp1 != NULL) 548 0 stevel recov_badstate(recovp, vp1, NFS4ERR_OLD_STATEID); 549 0 stevel if (vp2 != NULL) 550 0 stevel recov_badstate(recovp, vp2, NFS4ERR_OLD_STATEID); 551 0 stevel goto out_no_thread; /* no further recovery possible */ 552 0 stevel 553 0 stevel case NR_GRACE: 554 0 stevel nfs4_set_grace_wait(mi); 555 0 stevel goto out_no_thread; /* no further action required for GRACE */ 556 0 stevel 557 0 stevel case NR_DELAY: 558 0 stevel if (vp1) 559 0 stevel nfs4_set_delay_wait(vp1); 560 0 stevel goto out_no_thread; /* no further action required for DELAY */ 561 0 stevel 562 0 stevel case NR_LOST_STATE_RQST: 563 0 stevel case NR_LOST_LOCK: 564 0 stevel nfs4_enqueue_lost_rqst(recovp, mi); 565 0 stevel break; 566 0 stevel 567 0 stevel default: 568 0 stevel nfs4_queue_event(RE_UNEXPECTED_ACTION, mi, NULL, 569 0 stevel recovp->rc_action, NULL, NULL, 0, NULL, 0, TAG_NONE, 570 0 stevel TAG_NONE, 0, 0); 571 0 stevel goto out_no_thread; 572 0 stevel } 573 0 stevel 574 0 stevel /* 575 0 stevel * If either file recently went through the same recovery, wait 576 0 stevel * awhile. This is in case there is some sort of bug; we might not 577 0 stevel * be able to recover properly, but at least we won't bombard the 578 0 stevel * server with calls, and we won't tie up the client. 579 0 stevel */ 580 0 stevel if (vp1 != NULL) 581 0 stevel recov_throttle(recovp, vp1); 582 0 stevel if (vp2 != NULL) 583 0 stevel recov_throttle(recovp, vp2); 584 0 stevel 585 0 stevel /* 586 0 stevel * If there's already a recovery thread, don't start another one. 587 0 stevel */ 588 0 stevel 589 0 stevel mutex_enter(&mi->mi_lock); 590 0 stevel if (mi->mi_flags & MI4_RECOV_ACTIV) { 591 0 stevel mutex_exit(&mi->mi_lock); 592 0 stevel goto out_no_thread; 593 0 stevel } 594 0 stevel mi->mi_flags |= MI4_RECOV_ACTIV; 595 0 stevel mutex_exit(&mi->mi_lock); 596 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 597 5302 th199096 "start_recovery: starting new thread for mi %p", (void*)mi)); 598 0 stevel 599 0 stevel recovp->rc_mi = mi; 600 0 stevel recovp->rc_vp1 = vp1; 601 0 stevel if (vp1 != NULL) { 602 0 stevel ASSERT(VTOMI4(vp1) == mi); 603 0 stevel VN_HOLD(recovp->rc_vp1); 604 0 stevel } 605 0 stevel recovp->rc_vp2 = vp2; 606 0 stevel if (vp2 != NULL) { 607 0 stevel ASSERT(VTOMI4(vp2) == mi); 608 0 stevel VN_HOLD(recovp->rc_vp2); 609 0 stevel } 610 0 stevel 611 0 stevel (void) zthread_create(NULL, 0, nfs4_recov_thread, recovp, 0, 612 5302 th199096 minclsyspri); 613 0 stevel return; 614 0 stevel 615 0 stevel /* not reached by thread creating call */ 616 0 stevel out_no_thread: 617 0 stevel mutex_enter(&mi->mi_lock); 618 0 stevel mi->mi_in_recovery--; 619 855 jwahlig if (mi->mi_in_recovery == 0) 620 855 jwahlig cv_broadcast(&mi->mi_cv_in_recov); 621 0 stevel mutex_exit(&mi->mi_lock); 622 0 stevel 623 0 stevel VFS_RELE(mi->mi_vfsp); 624 1705 jwahlig MI4_RELE(mi); 625 0 stevel /* 626 0 stevel * Free up resources that were allocated for us. 627 0 stevel */ 628 0 stevel kmem_free(recovp, sizeof (recov_info_t)); 629 0 stevel } 630 0 stevel 631 0 stevel static int 632 0 stevel nfs4_check_recov_err(vnode_t *vp, nfs4_op_hint_t op, 633 5302 th199096 nfs4_recov_state_t *rsp, int retry_err_cnt, char *str) 634 0 stevel { 635 0 stevel rnode4_t *rp; 636 0 stevel int error = 0; 637 0 stevel int exempt; 638 0 stevel 639 0 stevel if (vp == NULL) 640 0 stevel return (0); 641 0 stevel 642 0 stevel exempt = (op == OH_CLOSE || op == OH_LOCKU || op == OH_DELEGRETURN); 643 0 stevel rp = VTOR4(vp); 644 0 stevel mutex_enter(&rp->r_statelock); 645 0 stevel 646 0 stevel /* 647 0 stevel * If there was a recovery error, then allow op hints "exempt" from 648 0 stevel * recov errors to retry (currently 3 times). Either r_error or 649 0 stevel * EIO is returned for non-exempt op hints. 650 0 stevel */ 651 0 stevel if (rp->r_flags & R4RECOVERR) { 652 0 stevel if (exempt && rsp->rs_num_retry_despite_err <= 653 5302 th199096 nfs4_max_recov_error_retry) { 654 0 stevel 655 0 stevel /* 656 0 stevel * Check to make sure that we haven't already inc'd 657 0 stevel * rs_num_retry_despite_err for current nfs4_start_fop 658 0 stevel * instance. We don't want to double inc (if we were 659 0 stevel * called with vp2, then the vp1 call could have 660 0 stevel * already incremented. 661 0 stevel */ 662 0 stevel if (retry_err_cnt == rsp->rs_num_retry_despite_err) 663 0 stevel rsp->rs_num_retry_despite_err++; 664 0 stevel 665 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 666 5302 th199096 "nfs4_start_fop: %s %p DEAD, cnt=%d", str, 667 5302 th199096 (void *)vp, rsp->rs_num_retry_despite_err)); 668 0 stevel } else { 669 0 stevel error = (rp->r_error ? rp->r_error : EIO); 670 0 stevel /* 671 0 stevel * An ESTALE error on a non-regular file is not 672 0 stevel * "sticky". Return the ESTALE error once, but 673 0 stevel * clear the condition to allow future operations 674 0 stevel * to go OTW. This will allow the client to 675 0 stevel * recover if the server has merely unshared then 676 0 stevel * re-shared the file system. For regular files, 677 0 stevel * the unshare has destroyed the open state at the 678 0 stevel * server and we aren't willing to do a reopen (yet). 679 0 stevel */ 680 0 stevel if (error == ESTALE && vp->v_type != VREG) { 681 0 stevel rp->r_flags &= 682 5302 th199096 ~(R4RECOVERR|R4RECOVERRP|R4STALE); 683 0 stevel rp->r_error = 0; 684 0 stevel error = ESTALE; 685 0 stevel } 686 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 687 5302 th199096 "nfs4_start_fop: %s %p DEAD, cnt=%d error=%d", 688 5302 th199096 str, (void *)vp, 689 5302 th199096 rsp->rs_num_retry_despite_err, error)); 690 0 stevel } 691 0 stevel } 692 5302 th199096 693 0 stevel mutex_exit(&rp->r_statelock); 694 0 stevel return (error); 695 0 stevel } 696 0 stevel 697 0 stevel /* 698 0 stevel * Initial setup code that every operation should call if it might invoke 699 0 stevel * client recovery. Can block waiting for recovery to finish on a 700 0 stevel * filesystem. Either vnode ptr can be NULL. 701 0 stevel * 702 0 stevel * Returns 0 if there are no outstanding errors. Can return an 703 0 stevel * errno value under various circumstances (e.g., failed recovery, or 704 0 stevel * interrupted while waiting for recovery to finish). 705 0 stevel * 706 0 stevel * There must be a corresponding call to nfs4_end_op() to free up any locks 707 0 stevel * or resources allocated by this call (assuming this call succeeded), 708 0 stevel * using the same rsp that's passed in here. 709 0 stevel * 710 0 stevel * The open and lock seqid synchronization must be stopped before calling this 711 0 stevel * function, as it could lead to deadlock when trying to reopen a file or 712 0 stevel * reclaim a lock. The synchronization is obtained with calls to: 713 0 stevel * nfs4_start_open_seqid_sync() 714 0 stevel * nfs4_start_lock_seqid_sync() 715 0 stevel * 716 0 stevel * *startrecovp is set TRUE if the caller should not bother with the 717 0 stevel * over-the-wire call, and just initiate recovery for the given request. 718 0 stevel * This is typically used for state-releasing ops if the filesystem has 719 0 stevel * been forcibly unmounted. startrecovp may be NULL for 720 0 stevel * non-state-releasing ops. 721 0 stevel */ 722 0 stevel 723 0 stevel int 724 0 stevel nfs4_start_fop(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 725 5302 th199096 nfs4_recov_state_t *rsp, bool_t *startrecovp) 726 0 stevel { 727 0 stevel int error = 0, rerr_cnt; 728 0 stevel nfs4_server_t *sp = NULL; 729 0 stevel nfs4_server_t *tsp; 730 0 stevel nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 731 9858 Pavel uint_t droplock_cnt; 732 0 stevel #ifdef DEBUG 733 0 stevel void *fop_caller; 734 0 stevel #endif 735 0 stevel 736 0 stevel ASSERT(vp1 == NULL || vp1->v_vfsp == mi->mi_vfsp); 737 0 stevel ASSERT(vp2 == NULL || vp2->v_vfsp == mi->mi_vfsp); 738 0 stevel 739 0 stevel #ifdef DEBUG 740 0 stevel if ((fop_caller = tsd_get(nfs4_tsd_key)) != NULL) { 741 0 stevel cmn_err(CE_PANIC, "Missing nfs4_end_fop: last caller %p", 742 5302 th199096 fop_caller); 743 0 stevel } 744 0 stevel (void) tsd_set(nfs4_tsd_key, caller()); 745 0 stevel #endif 746 0 stevel 747 0 stevel rsp->rs_sp = NULL; 748 0 stevel rsp->rs_flags &= ~NFS4_RS_RENAME_HELD; 749 0 stevel rerr_cnt = rsp->rs_num_retry_despite_err; 750 0 stevel 751 0 stevel /* 752 0 stevel * Process the items that may delay() based on server response 753 0 stevel */ 754 0 stevel error = nfs4_wait_for_grace(mi, rsp); 755 0 stevel if (error) 756 0 stevel goto out; 757 0 stevel 758 0 stevel if (vp1 != NULL) { 759 0 stevel error = nfs4_wait_for_delay(vp1, rsp); 760 0 stevel if (error) 761 0 stevel goto out; 762 0 stevel } 763 0 stevel 764 0 stevel /* Wait for a delegation recall to complete. */ 765 0 stevel 766 0 stevel error = wait_for_recall(vp1, vp2, op, rsp); 767 0 stevel if (error) 768 0 stevel goto out; 769 0 stevel 770 0 stevel /* 771 0 stevel * Wait for any current recovery actions to finish. Note that a 772 0 stevel * recovery thread can still start up after wait_for_recovery() 773 0 stevel * finishes. We don't block out recovery operations until we 774 0 stevel * acquire s_recovlock and mi_recovlock. 775 0 stevel */ 776 0 stevel error = wait_for_recovery(mi, op); 777 0 stevel if (error) 778 0 stevel goto out; 779 0 stevel 780 0 stevel /* 781 0 stevel * Check to see if the rnode is already marked with a 782 0 stevel * recovery error. If so, return it immediately. But 783 0 stevel * always pass CLOSE, LOCKU, and DELEGRETURN so we can 784 0 stevel * clean up state on the server. 785 0 stevel */ 786 0 stevel 787 0 stevel if (vp1 != NULL) { 788 0 stevel if (error = nfs4_check_recov_err(vp1, op, rsp, rerr_cnt, "vp1")) 789 0 stevel goto out; 790 0 stevel nfs4_check_remap(mi, vp1, NFS4_REMAP_CKATTRS, &e); 791 0 stevel } 792 0 stevel 793 0 stevel if (vp2 != NULL) { 794 0 stevel if (error = nfs4_check_recov_err(vp2, op, rsp, rerr_cnt, "vp2")) 795 0 stevel goto out; 796 0 stevel nfs4_check_remap(mi, vp2, NFS4_REMAP_CKATTRS, &e); 797 0 stevel } 798 0 stevel 799 0 stevel /* 800 0 stevel * The lock order calls for us to acquire s_recovlock before 801 0 stevel * mi_recovlock, but we have to hold mi_recovlock to look up sp (to 802 0 stevel * prevent races with the failover/migration code). So acquire 803 0 stevel * mi_recovlock, look up sp, drop mi_recovlock, acquire 804 0 stevel * s_recovlock and mi_recovlock, then verify that sp is still the 805 0 stevel * right object. XXX Can we find a simpler way to deal with this? 806 0 stevel */ 807 0 stevel if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 808 0 stevel mi->mi_flags & MI4_INT)) { 809 0 stevel error = EINTR; 810 0 stevel goto out; 811 0 stevel } 812 0 stevel get_sp: 813 0 stevel sp = find_nfs4_server(mi); 814 0 stevel if (sp != NULL) { 815 0 stevel sp->s_otw_call_count++; 816 163 ek110237 mutex_exit(&sp->s_lock); 817 9858 Pavel droplock_cnt = mi->mi_srvset_cnt; 818 0 stevel } 819 0 stevel nfs_rw_exit(&mi->mi_recovlock); 820 0 stevel 821 0 stevel if (sp != NULL) { 822 0 stevel if (nfs_rw_enter_sig(&sp->s_recovlock, RW_READER, 823 5302 th199096 mi->mi_flags & MI4_INT)) { 824 0 stevel error = EINTR; 825 0 stevel goto out; 826 0 stevel } 827 0 stevel } 828 0 stevel if (nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 829 5302 th199096 mi->mi_flags & MI4_INT)) { 830 0 stevel if (sp != NULL) 831 0 stevel nfs_rw_exit(&sp->s_recovlock); 832 0 stevel error = EINTR; 833 0 stevel goto out; 834 0 stevel } 835 0 stevel /* 836 0 stevel * If the mntinfo4_t hasn't changed nfs4_sever_ts then 837 0 stevel * there's no point in double checking to make sure it 838 0 stevel * has switched. 839 0 stevel */ 840 9858 Pavel if (sp == NULL || droplock_cnt != mi->mi_srvset_cnt) { 841 0 stevel tsp = find_nfs4_server(mi); 842 0 stevel if (tsp != sp) { 843 0 stevel /* try again */ 844 0 stevel if (tsp != NULL) { 845 0 stevel mutex_exit(&tsp->s_lock); 846 0 stevel nfs4_server_rele(tsp); 847 0 stevel tsp = NULL; 848 0 stevel } 849 0 stevel if (sp != NULL) { 850 0 stevel nfs_rw_exit(&sp->s_recovlock); 851 0 stevel mutex_enter(&sp->s_lock); 852 0 stevel sp->s_otw_call_count--; 853 0 stevel mutex_exit(&sp->s_lock); 854 0 stevel nfs4_server_rele(sp); 855 0 stevel sp = NULL; 856 0 stevel } 857 0 stevel goto get_sp; 858 0 stevel } else { 859 0 stevel if (tsp != NULL) { 860 0 stevel mutex_exit(&tsp->s_lock); 861 0 stevel nfs4_server_rele(tsp); 862 0 stevel tsp = NULL; 863 0 stevel } 864 0 stevel } 865 0 stevel } 866 0 stevel 867 0 stevel if (sp != NULL) { 868 0 stevel rsp->rs_sp = sp; 869 0 stevel } 870 0 stevel 871 0 stevel /* 872 0 stevel * If the fileystem uses volatile filehandles, obtain a lock so 873 0 stevel * that we synchronize with renames. Exception: mount operations 874 0 stevel * can change mi_fh_expire_type, which could be a problem, since 875 0 stevel * the end_op code needs to be consistent with the start_op code 876 0 stevel * about mi_rename_lock. Since mounts don't compete with renames, 877 0 stevel * it's simpler to just not acquire the rename lock for mounts. 878 0 stevel */ 879 0 stevel if (NFS4_VOLATILE_FH(mi) && op != OH_MOUNT) { 880 0 stevel if (nfs_rw_enter_sig(&mi->mi_rename_lock, 881 5302 th199096 op == OH_VFH_RENAME ? RW_WRITER : RW_READER, 882 5302 th199096 mi->mi_flags & MI4_INT)) { 883 0 stevel nfs_rw_exit(&mi->mi_recovlock); 884 0 stevel if (sp != NULL) 885 0 stevel nfs_rw_exit(&sp->s_recovlock); 886 0 stevel error = EINTR; 887 0 stevel goto out; 888 0 stevel } 889 0 stevel rsp->rs_flags |= NFS4_RS_RENAME_HELD; 890 0 stevel } 891 0 stevel 892 0 stevel if (OH_IS_STATE_RELE(op)) { 893 0 stevel /* 894 0 stevel * For forced unmount, letting the request proceed will 895 0 stevel * almost always delay response to the user, so hand it off 896 0 stevel * to the recovery thread. For exiting lwp's, we don't 897 0 stevel * have a good way to tell if the request will hang. We 898 0 stevel * generally want processes to handle their own requests so 899 0 stevel * that they can be done in parallel, but if there is 900 0 stevel * already a recovery thread, hand the request off to it. 901 0 stevel * This will improve user response at no cost to overall 902 0 stevel * system throughput. For zone shutdown, we'd prefer 903 0 stevel * the recovery thread to handle this as well. 904 0 stevel */ 905 0 stevel ASSERT(startrecovp != NULL); 906 0 stevel mutex_enter(&mi->mi_lock); 907 0 stevel if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) 908 0 stevel *startrecovp = TRUE; 909 0 stevel else if ((curthread->t_proc_flag & TP_LWPEXIT) && 910 0 stevel (mi->mi_flags & MI4_RECOV_ACTIV)) 911 0 stevel *startrecovp = TRUE; 912 0 stevel else 913 0 stevel *startrecovp = FALSE; 914 0 stevel mutex_exit(&mi->mi_lock); 915 0 stevel } else 916 0 stevel if (startrecovp != NULL) 917 0 stevel *startrecovp = FALSE; 918 0 stevel 919 0 stevel ASSERT(error == 0); 920 0 stevel return (error); 921 0 stevel 922 0 stevel out: 923 0 stevel ASSERT(error != 0); 924 0 stevel if (sp != NULL) { 925 0 stevel mutex_enter(&sp->s_lock); 926 0 stevel sp->s_otw_call_count--; 927 0 stevel mutex_exit(&sp->s_lock); 928 0 stevel nfs4_server_rele(sp); 929 0 stevel rsp->rs_sp = NULL; 930 0 stevel } 931 0 stevel nfs4_end_op_recall(vp1, vp2, rsp); 932 0 stevel 933 0 stevel #ifdef DEBUG 934 0 stevel (void) tsd_set(nfs4_tsd_key, NULL); 935 0 stevel #endif 936 0 stevel return (error); 937 0 stevel } 938 0 stevel 939 0 stevel /* 940 0 stevel * It is up to the caller to determine if rsp->rs_sp being NULL 941 0 stevel * is detrimental or not. 942 0 stevel */ 943 0 stevel int 944 0 stevel nfs4_start_op(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, 945 5302 th199096 nfs4_recov_state_t *rsp) 946 0 stevel { 947 0 stevel ASSERT(rsp->rs_num_retry_despite_err == 0); 948 0 stevel rsp->rs_num_retry_despite_err = 0; 949 0 stevel return (nfs4_start_fop(mi, vp1, vp2, OH_OTHER, rsp, NULL)); 950 0 stevel } 951 0 stevel 952 0 stevel /* 953 0 stevel * Release any resources acquired by nfs4_start_op(). 954 0 stevel * 'sp' should be the nfs4_server pointer returned by nfs4_start_op(). 955 0 stevel * 956 0 stevel * The operation hint is used to avoid a deadlock by bypassing delegation 957 0 stevel * return logic for writes, which are done while returning a delegation. 958 0 stevel */ 959 0 stevel 960 0 stevel void 961 0 stevel nfs4_end_fop(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, nfs4_op_hint_t op, 962 5302 th199096 nfs4_recov_state_t *rsp, bool_t needs_recov) 963 0 stevel { 964 0 stevel nfs4_server_t *sp = rsp->rs_sp; 965 0 stevel rnode4_t *rp = NULL; 966 0 stevel 967 0 stevel #ifdef lint 968 0 stevel /* 969 0 stevel * The op hint isn't used any more, but might be in 970 0 stevel * the future. 971 0 stevel */ 972 0 stevel op = op; 973 0 stevel #endif 974 0 stevel 975 0 stevel #ifdef DEBUG 976 0 stevel ASSERT(tsd_get(nfs4_tsd_key) != NULL); 977 0 stevel (void) tsd_set(nfs4_tsd_key, NULL); 978 0 stevel #endif 979 0 stevel 980 0 stevel nfs4_end_op_recall(vp1, vp2, rsp); 981 0 stevel 982 0 stevel if (rsp->rs_flags & NFS4_RS_RENAME_HELD) 983 0 stevel nfs_rw_exit(&mi->mi_rename_lock); 984 0 stevel 985 0 stevel if (!needs_recov) { 986 0 stevel if (rsp->rs_flags & NFS4_RS_DELAY_MSG) { 987 0 stevel /* may need to clear the delay interval */ 988 0 stevel if (vp1 != NULL) { 989 0 stevel rp = VTOR4(vp1); 990 0 stevel mutex_enter(&rp->r_statelock); 991 0 stevel rp->r_delay_interval = 0; 992 0 stevel mutex_exit(&rp->r_statelock); 993 0 stevel } 994 0 stevel } 995 0 stevel rsp->rs_flags &= ~(NFS4_RS_GRACE_MSG|NFS4_RS_DELAY_MSG); 996 0 stevel } 997 0 stevel 998 0 stevel /* 999 0 stevel * If the corresponding nfs4_start_op() found a sp, 1000 0 stevel * then there must still be a sp. 1001 0 stevel */ 1002 0 stevel if (sp != NULL) { 1003 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1004 0 stevel nfs_rw_exit(&sp->s_recovlock); 1005 0 stevel mutex_enter(&sp->s_lock); 1006 0 stevel sp->s_otw_call_count--; 1007 0 stevel cv_broadcast(&sp->s_cv_otw_count); 1008 0 stevel mutex_exit(&sp->s_lock); 1009 0 stevel nfs4_server_rele(sp); 1010 0 stevel } else { 1011 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1012 0 stevel } 1013 0 stevel } 1014 0 stevel 1015 0 stevel void 1016 0 stevel nfs4_end_op(mntinfo4_t *mi, vnode_t *vp1, vnode_t *vp2, 1017 5302 th199096 nfs4_recov_state_t *rsp, bool_t needrecov) 1018 0 stevel { 1019 0 stevel nfs4_end_fop(mi, vp1, vp2, OH_OTHER, rsp, needrecov); 1020 0 stevel } 1021 0 stevel 1022 0 stevel /* 1023 0 stevel * If the filesystem is going through client recovery, block until 1024 0 stevel * finished. 1025 0 stevel * Exceptions: 1026 0 stevel * - state-releasing ops (CLOSE, LOCKU, DELEGRETURN) are allowed to proceed 1027 0 stevel * if the filesystem has been forcibly unmounted or the lwp is exiting. 1028 0 stevel * 1029 0 stevel * Return value: 1030 0 stevel * - 0 if no errors 1031 0 stevel * - EINTR if the call was interrupted 1032 0 stevel * - EIO if the filesystem has been forcibly unmounted (non-state-releasing 1033 0 stevel * op) 1034 0 stevel * - the errno value from the recovery thread, if recovery failed 1035 0 stevel */ 1036 0 stevel 1037 0 stevel static int 1038 0 stevel wait_for_recovery(mntinfo4_t *mi, nfs4_op_hint_t op_hint) 1039 0 stevel { 1040 0 stevel int error = 0; 1041 0 stevel 1042 0 stevel mutex_enter(&mi->mi_lock); 1043 0 stevel 1044 0 stevel while (mi->mi_recovflags != 0) { 1045 0 stevel klwp_t *lwp = ttolwp(curthread); 1046 0 stevel 1047 6520 vv149972 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) || 1048 6520 vv149972 (mi->mi_flags & MI4_RECOV_FAIL)) 1049 0 stevel break; 1050 0 stevel if (OH_IS_STATE_RELE(op_hint) && 1051 0 stevel (curthread->t_proc_flag & TP_LWPEXIT)) 1052 0 stevel break; 1053 0 stevel 1054 0 stevel if (lwp != NULL) 1055 0 stevel lwp->lwp_nostop++; 1056 0 stevel /* XXX - use different cv? */ 1057 0 stevel if (cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock) == 0) { 1058 0 stevel error = EINTR; 1059 0 stevel if (lwp != NULL) 1060 0 stevel lwp->lwp_nostop--; 1061 0 stevel break; 1062 0 stevel } 1063 0 stevel if (lwp != NULL) 1064 0 stevel lwp->lwp_nostop--; 1065 0 stevel } 1066 0 stevel 1067 6520 vv149972 if ((mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED) && 1068 0 stevel !OH_IS_STATE_RELE(op_hint)) { 1069 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1070 5302 th199096 "wait_for_recovery: forced unmount")); 1071 0 stevel error = EIO; 1072 6520 vv149972 } else if (mi->mi_flags & MI4_RECOV_FAIL) { 1073 6520 vv149972 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1074 6520 vv149972 "wait_for_recovery: fail since RECOV FAIL")); 1075 6520 vv149972 error = mi->mi_error; 1076 0 stevel } 1077 0 stevel 1078 0 stevel mutex_exit(&mi->mi_lock); 1079 0 stevel 1080 0 stevel return (error); 1081 0 stevel } 1082 0 stevel 1083 0 stevel /* 1084 0 stevel * If the client received NFS4ERR_GRACE for this particular mount, 1085 0 stevel * the client blocks here until it is time to try again. 1086 0 stevel * 1087 0 stevel * Return value: 1088 0 stevel * - 0 if wait was successful 1089 0 stevel * - EINTR if the call was interrupted 1090 0 stevel */ 1091 0 stevel 1092 0 stevel int 1093 0 stevel nfs4_wait_for_grace(mntinfo4_t *mi, nfs4_recov_state_t *rsp) 1094 0 stevel { 1095 0 stevel int error = 0; 1096 0 stevel time_t curtime, time_to_wait; 1097 0 stevel 1098 0 stevel /* do a unprotected check to reduce mi_lock contention */ 1099 0 stevel if (mi->mi_grace_wait != 0) { 1100 0 stevel mutex_enter(&mi->mi_lock); 1101 0 stevel 1102 0 stevel if (mi->mi_grace_wait != 0) { 1103 0 stevel if (!(rsp->rs_flags & NFS4_RS_GRACE_MSG)) 1104 0 stevel rsp->rs_flags |= NFS4_RS_GRACE_MSG; 1105 0 stevel 1106 0 stevel curtime = gethrestime_sec(); 1107 0 stevel 1108 0 stevel if (curtime < mi->mi_grace_wait) { 1109 0 stevel 1110 0 stevel time_to_wait = mi->mi_grace_wait - curtime; 1111 0 stevel 1112 0 stevel mutex_exit(&mi->mi_lock); 1113 0 stevel 1114 5583 dm120769 delay(SEC_TO_TICK(time_to_wait)); 1115 0 stevel 1116 0 stevel curtime = gethrestime_sec(); 1117 0 stevel 1118 0 stevel mutex_enter(&mi->mi_lock); 1119 0 stevel 1120 0 stevel if (curtime >= mi->mi_grace_wait) 1121 0 stevel mi->mi_grace_wait = 0; 1122 0 stevel } else { 1123 0 stevel mi->mi_grace_wait = 0; 1124 0 stevel } 1125 0 stevel } 1126 0 stevel mutex_exit(&mi->mi_lock); 1127 0 stevel } 1128 0 stevel 1129 0 stevel return (error); 1130 0 stevel } 1131 0 stevel 1132 0 stevel /* 1133 0 stevel * If the client received NFS4ERR_DELAY for an operation on a vnode, 1134 0 stevel * the client blocks here until it is time to try again. 1135 0 stevel * 1136 0 stevel * Return value: 1137 0 stevel * - 0 if wait was successful 1138 0 stevel * - EINTR if the call was interrupted 1139 0 stevel */ 1140 0 stevel 1141 0 stevel int 1142 0 stevel nfs4_wait_for_delay(vnode_t *vp, nfs4_recov_state_t *rsp) 1143 0 stevel { 1144 0 stevel int error = 0; 1145 0 stevel time_t curtime, time_to_wait; 1146 0 stevel rnode4_t *rp; 1147 0 stevel 1148 0 stevel ASSERT(vp != NULL); 1149 0 stevel 1150 0 stevel rp = VTOR4(vp); 1151 0 stevel 1152 0 stevel /* do a unprotected check to reduce r_statelock contention */ 1153 0 stevel if (rp->r_delay_wait != 0) { 1154 0 stevel mutex_enter(&rp->r_statelock); 1155 0 stevel 1156 0 stevel if (rp->r_delay_wait != 0) { 1157 0 stevel 1158 0 stevel if (!(rsp->rs_flags & NFS4_RS_DELAY_MSG)) { 1159 0 stevel rsp->rs_flags |= NFS4_RS_DELAY_MSG; 1160 0 stevel nfs4_mi_kstat_inc_delay(VTOMI4(vp)); 1161 0 stevel } 1162 0 stevel 1163 0 stevel curtime = gethrestime_sec(); 1164 0 stevel 1165 0 stevel if (curtime < rp->r_delay_wait) { 1166 0 stevel 1167 0 stevel time_to_wait = rp->r_delay_wait - curtime; 1168 0 stevel 1169 0 stevel mutex_exit(&rp->r_statelock); 1170 0 stevel 1171 5583 dm120769 delay(SEC_TO_TICK(time_to_wait)); 1172 0 stevel 1173 0 stevel curtime = gethrestime_sec(); 1174 0 stevel 1175 0 stevel mutex_enter(&rp->r_statelock); 1176 0 stevel 1177 0 stevel if (curtime >= rp->r_delay_wait) 1178 0 stevel rp->r_delay_wait = 0; 1179 0 stevel } else { 1180 0 stevel rp->r_delay_wait = 0; 1181 0 stevel } 1182 0 stevel } 1183 0 stevel mutex_exit(&rp->r_statelock); 1184 0 stevel } 1185 0 stevel 1186 0 stevel return (error); 1187 0 stevel } 1188 0 stevel 1189 0 stevel /* 1190 0 stevel * The recovery thread. 1191 0 stevel */ 1192 0 stevel 1193 0 stevel static void 1194 0 stevel nfs4_recov_thread(recov_info_t *recovp) 1195 0 stevel { 1196 0 stevel mntinfo4_t *mi = recovp->rc_mi; 1197 0 stevel nfs4_server_t *sp; 1198 0 stevel int done = 0, error = 0; 1199 0 stevel bool_t recov_fail = FALSE; 1200 0 stevel callb_cpr_t cpr_info; 1201 0 stevel kmutex_t cpr_lock; 1202 0 stevel 1203 0 stevel nfs4_queue_event(RE_START, mi, NULL, mi->mi_recovflags, 1204 0 stevel recovp->rc_vp1, recovp->rc_vp2, 0, NULL, 0, TAG_NONE, TAG_NONE, 1205 0 stevel 0, 0); 1206 0 stevel 1207 0 stevel mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL); 1208 0 stevel CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Recov"); 1209 0 stevel 1210 0 stevel mutex_enter(&mi->mi_lock); 1211 0 stevel mi->mi_recovthread = curthread; 1212 0 stevel mutex_exit(&mi->mi_lock); 1213 0 stevel 1214 0 stevel /* 1215 0 stevel * We don't really need protection here against failover or 1216 0 stevel * migration, since the current thread is the one that would make 1217 0 stevel * any changes, but hold mi_recovlock anyway for completeness (and 1218 0 stevel * to satisfy any ASSERTs). 1219 0 stevel */ 1220 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1221 0 stevel sp = find_nfs4_server(mi); 1222 0 stevel if (sp != NULL) 1223 0 stevel mutex_exit(&sp->s_lock); 1224 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1225 0 stevel 1226 0 stevel /* 1227 0 stevel * Do any necessary recovery, based on the information in recovp 1228 0 stevel * and any recovery flags. 1229 0 stevel */ 1230 0 stevel 1231 0 stevel do { 1232 0 stevel mutex_enter(&mi->mi_lock); 1233 0 stevel if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 1234 0 stevel bool_t activesrv; 1235 0 stevel 1236 0 stevel NFS4_DEBUG(nfs4_client_recov_debug && 1237 0 stevel mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED, (CE_NOTE, 1238 5302 th199096 "nfs4_recov_thread: file system has been " 1239 5302 th199096 "unmounted")); 1240 0 stevel NFS4_DEBUG(nfs4_client_recov_debug && 1241 0 stevel zone_status_get(curproc->p_zone) >= 1242 0 stevel ZONE_IS_SHUTTING_DOWN, (CE_NOTE, 1243 5302 th199096 "nfs4_recov_thread: zone shutting down")); 1244 0 stevel /* 1245 0 stevel * If the server has lost its state for us and 1246 0 stevel * the filesystem is unmounted, then the filesystem 1247 0 stevel * can be tossed, even if there are lost lock or 1248 0 stevel * lost state calls in the recovery queue. 1249 0 stevel */ 1250 0 stevel if (mi->mi_recovflags & 1251 0 stevel (MI4R_NEED_CLIENTID | MI4R_REOPEN_FILES)) { 1252 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1253 0 stevel "nfs4_recov_thread: bailing out")); 1254 0 stevel mi->mi_flags |= MI4_RECOV_FAIL; 1255 0 stevel mi->mi_error = recovp->rc_error; 1256 0 stevel recov_fail = TRUE; 1257 0 stevel } 1258 0 stevel /* 1259 0 stevel * We don't know if the server has any state for 1260 0 stevel * us, and the filesystem has been unmounted. If 1261 0 stevel * there are "lost state" recovery items, keep 1262 0 stevel * trying to process them until there are no more 1263 0 stevel * mounted filesystems for the server. Otherwise, 1264 0 stevel * bail out. The reason we don't mark the 1265 0 stevel * filesystem as failing recovery is in case we 1266 0 stevel * have to do "lost state" recovery later (e.g., a 1267 0 stevel * user process exits). 1268 0 stevel */ 1269 0 stevel if (!(mi->mi_recovflags & MI4R_LOST_STATE)) { 1270 855 jwahlig done = 1; 1271 0 stevel mutex_exit(&mi->mi_lock); 1272 0 stevel break; 1273 0 stevel } 1274 0 stevel mutex_exit(&mi->mi_lock); 1275 0 stevel 1276 0 stevel if (sp == NULL) 1277 0 stevel activesrv = FALSE; 1278 0 stevel else { 1279 0 stevel mutex_enter(&sp->s_lock); 1280 0 stevel activesrv = nfs4_fs_active(sp); 1281 0 stevel } 1282 0 stevel if (!activesrv) { 1283 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1284 5302 th199096 "no active fs for server %p", 1285 5302 th199096 (void *)sp)); 1286 0 stevel mutex_enter(&mi->mi_lock); 1287 0 stevel mi->mi_flags |= MI4_RECOV_FAIL; 1288 0 stevel mi->mi_error = recovp->rc_error; 1289 0 stevel mutex_exit(&mi->mi_lock); 1290 0 stevel recov_fail = TRUE; 1291 0 stevel if (sp != NULL) { 1292 0 stevel /* 1293 0 stevel * Mark the server instance as 1294 0 stevel * dead, so that nobody will attach 1295 0 stevel * a new filesystem. 1296 0 stevel */ 1297 0 stevel nfs4_mark_srv_dead(sp); 1298 0 stevel } 1299 0 stevel } 1300 0 stevel if (sp != NULL) 1301 0 stevel mutex_exit(&sp->s_lock); 1302 0 stevel } else { 1303 0 stevel mutex_exit(&mi->mi_lock); 1304 0 stevel } 1305 0 stevel 1306 0 stevel /* 1307 0 stevel * Check if we need to select a new server for a 1308 0 stevel * failover. Choosing a new server will force at 1309 0 stevel * least a check of the clientid. 1310 0 stevel */ 1311 0 stevel mutex_enter(&mi->mi_lock); 1312 0 stevel if (!recov_fail && 1313 0 stevel (mi->mi_recovflags & MI4R_NEED_NEW_SERVER)) { 1314 0 stevel mutex_exit(&mi->mi_lock); 1315 0 stevel recov_newserver(recovp, &sp, &recov_fail); 1316 0 stevel } else 1317 0 stevel mutex_exit(&mi->mi_lock); 1318 0 stevel 1319 0 stevel /* 1320 0 stevel * Check if we need to recover the clientid. This 1321 0 stevel * must be done before file and lock recovery, and it 1322 0 stevel * potentially affects the recovery threads for other 1323 0 stevel * filesystems, so it gets special treatment. 1324 0 stevel */ 1325 0 stevel if (sp != NULL && recov_fail == FALSE) { 1326 0 stevel mutex_enter(&sp->s_lock); 1327 0 stevel if (!(sp->s_flags & N4S_CLIENTID_SET)) { 1328 0 stevel mutex_exit(&sp->s_lock); 1329 0 stevel recov_clientid(recovp, sp); 1330 0 stevel } else { 1331 0 stevel /* 1332 0 stevel * Unset this flag in case another recovery 1333 0 stevel * thread successfully recovered the clientid 1334 0 stevel * for us already. 1335 0 stevel */ 1336 0 stevel mutex_enter(&mi->mi_lock); 1337 0 stevel mi->mi_recovflags &= ~MI4R_NEED_CLIENTID; 1338 0 stevel mutex_exit(&mi->mi_lock); 1339 0 stevel mutex_exit(&sp->s_lock); 1340 0 stevel } 1341 0 stevel } 1342 0 stevel 1343 0 stevel /* 1344 0 stevel * Check if we need to get the security information. 1345 0 stevel */ 1346 0 stevel mutex_enter(&mi->mi_lock); 1347 0 stevel if ((mi->mi_recovflags & MI4R_NEED_SECINFO) && 1348 0 stevel !(mi->mi_flags & MI4_RECOV_FAIL)) { 1349 0 stevel mutex_exit(&mi->mi_lock); 1350 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, 1351 5302 th199096 RW_WRITER, 0); 1352 0 stevel error = nfs4_secinfo_recov(recovp->rc_mi, 1353 5302 th199096 recovp->rc_vp1, recovp->rc_vp2); 1354 0 stevel /* 1355 0 stevel * If error, nothing more can be done, stop 1356 0 stevel * the recovery. 1357 0 stevel */ 1358 0 stevel if (error) { 1359 0 stevel mutex_enter(&mi->mi_lock); 1360 0 stevel mi->mi_flags |= MI4_RECOV_FAIL; 1361 0 stevel mi->mi_error = recovp->rc_error; 1362 0 stevel mutex_exit(&mi->mi_lock); 1363 0 stevel nfs4_queue_event(RE_WRONGSEC, mi, NULL, 1364 0 stevel error, recovp->rc_vp1, recovp->rc_vp2, 1365 0 stevel 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1366 0 stevel } 1367 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1368 0 stevel } else 1369 0 stevel mutex_exit(&mi->mi_lock); 1370 0 stevel 1371 0 stevel /* 1372 0 stevel * Check if there's a bad seqid to recover. 1373 0 stevel */ 1374 0 stevel mutex_enter(&mi->mi_lock); 1375 0 stevel if ((mi->mi_recovflags & MI4R_BAD_SEQID) && 1376 0 stevel !(mi->mi_flags & MI4_RECOV_FAIL)) { 1377 0 stevel mutex_exit(&mi->mi_lock); 1378 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, 1379 5302 th199096 RW_WRITER, 0); 1380 0 stevel recov_bad_seqid(recovp); 1381 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1382 0 stevel } else 1383 0 stevel mutex_exit(&mi->mi_lock); 1384 0 stevel 1385 0 stevel /* 1386 0 stevel * Next check for recovery that affects the entire 1387 0 stevel * filesystem. 1388 0 stevel */ 1389 0 stevel if (sp != NULL) { 1390 0 stevel mutex_enter(&mi->mi_lock); 1391 0 stevel if ((mi->mi_recovflags & MI4R_REOPEN_FILES) && 1392 0 stevel !(mi->mi_flags & MI4_RECOV_FAIL)) { 1393 0 stevel mutex_exit(&mi->mi_lock); 1394 0 stevel recov_openfiles(recovp, sp); 1395 0 stevel } else 1396 0 stevel mutex_exit(&mi->mi_lock); 1397 0 stevel } 1398 0 stevel 1399 0 stevel /* 1400 0 stevel * Send any queued state recovery requests. 1401 0 stevel */ 1402 0 stevel mutex_enter(&mi->mi_lock); 1403 0 stevel if (sp != NULL && 1404 0 stevel (mi->mi_recovflags & MI4R_LOST_STATE) && 1405 0 stevel !(mi->mi_flags & MI4_RECOV_FAIL)) { 1406 0 stevel mutex_exit(&mi->mi_lock); 1407 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, 1408 5302 th199096 RW_WRITER, 0); 1409 0 stevel nfs4_resend_lost_rqsts(recovp, sp); 1410 0 stevel if (list_head(&mi->mi_lost_state) == NULL) { 1411 0 stevel /* done */ 1412 0 stevel mutex_enter(&mi->mi_lock); 1413 0 stevel mi->mi_recovflags &= ~MI4R_LOST_STATE; 1414 0 stevel mutex_exit(&mi->mi_lock); 1415 0 stevel } 1416 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1417 0 stevel } else { 1418 0 stevel mutex_exit(&mi->mi_lock); 1419 0 stevel } 1420 0 stevel 1421 0 stevel /* 1422 0 stevel * See if there is anything more to do. If not, announce 1423 0 stevel * that we are done and exit. 1424 0 stevel * 1425 0 stevel * Need mi_recovlock to keep 'sp' valid. Must grab 1426 0 stevel * mi_recovlock before mi_lock to preserve lock ordering. 1427 0 stevel */ 1428 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0); 1429 0 stevel mutex_enter(&mi->mi_lock); 1430 0 stevel if ((mi->mi_recovflags & ~MI4R_SRV_REBOOT) == 0 || 1431 0 stevel (mi->mi_flags & MI4_RECOV_FAIL)) { 1432 0 stevel list_t local_lost_state; 1433 0 stevel nfs4_lost_rqst_t *lrp; 1434 0 stevel 1435 0 stevel /* 1436 0 stevel * We need to remove the lost requests before we 1437 0 stevel * unmark the mi as no longer doing recovery to 1438 0 stevel * avoid a race with a new thread putting new lost 1439 0 stevel * requests on the same mi (and the going away 1440 0 stevel * thread would remove the new lost requests). 1441 0 stevel * 1442 0 stevel * Move the lost requests to a local list since 1443 0 stevel * nfs4_remove_lost_rqst() drops mi_lock, and 1444 0 stevel * dropping the mi_lock would make our check to 1445 0 stevel * see if recovery is done no longer valid. 1446 0 stevel */ 1447 0 stevel list_create(&local_lost_state, 1448 0 stevel sizeof (nfs4_lost_rqst_t), 1449 0 stevel offsetof(nfs4_lost_rqst_t, lr_node)); 1450 0 stevel list_move_tail(&local_lost_state, &mi->mi_lost_state); 1451 0 stevel 1452 0 stevel done = 1; 1453 0 stevel mutex_exit(&mi->mi_lock); 1454 0 stevel /* 1455 0 stevel * Now officially free the "moved" 1456 0 stevel * lost requests. 1457 0 stevel */ 1458 0 stevel while ((lrp = list_head(&local_lost_state)) != NULL) { 1459 0 stevel list_remove(&local_lost_state, lrp); 1460 0 stevel nfs4_free_lost_rqst(lrp, sp); 1461 0 stevel } 1462 0 stevel list_destroy(&local_lost_state); 1463 0 stevel } else 1464 0 stevel mutex_exit(&mi->mi_lock); 1465 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1466 0 stevel 1467 0 stevel /* 1468 0 stevel * If the filesystem has been forcibly unmounted, there is 1469 0 stevel * probably no point in retrying immediately. Furthermore, 1470 0 stevel * there might be user processes waiting for a chance to 1471 0 stevel * queue up "lost state" requests, so that they can exit. 1472 0 stevel * So pause here for a moment. Same logic for zone shutdown. 1473 0 stevel */ 1474 0 stevel if (!done && FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 1475 0 stevel mutex_enter(&mi->mi_lock); 1476 0 stevel cv_broadcast(&mi->mi_failover_cv); 1477 0 stevel mutex_exit(&mi->mi_lock); 1478 0 stevel delay(SEC_TO_TICK(nfs4_unmount_delay)); 1479 0 stevel } 1480 0 stevel 1481 0 stevel } while (!done); 1482 0 stevel 1483 0 stevel if (sp != NULL) 1484 0 stevel nfs4_server_rele(sp); 1485 0 stevel 1486 0 stevel /* 1487 0 stevel * Return all recalled delegations 1488 0 stevel */ 1489 0 stevel nfs4_dlistclean(); 1490 0 stevel 1491 855 jwahlig mutex_enter(&mi->mi_lock); 1492 855 jwahlig recov_done(mi, recovp); 1493 855 jwahlig mutex_exit(&mi->mi_lock); 1494 855 jwahlig 1495 0 stevel /* 1496 0 stevel * Free up resources that were allocated for us. 1497 0 stevel */ 1498 0 stevel if (recovp->rc_vp1 != NULL) 1499 0 stevel VN_RELE(recovp->rc_vp1); 1500 0 stevel if (recovp->rc_vp2 != NULL) 1501 0 stevel VN_RELE(recovp->rc_vp2); 1502 1126 jwahlig 1503 855 jwahlig /* now we are done using the mi struct, signal the waiters */ 1504 855 jwahlig mutex_enter(&mi->mi_lock); 1505 855 jwahlig mi->mi_in_recovery--; 1506 855 jwahlig if (mi->mi_in_recovery == 0) 1507 855 jwahlig cv_broadcast(&mi->mi_cv_in_recov); 1508 855 jwahlig mutex_exit(&mi->mi_lock); 1509 1126 jwahlig 1510 1705 jwahlig VFS_RELE(mi->mi_vfsp); 1511 1705 jwahlig MI4_RELE(mi); 1512 0 stevel kmem_free(recovp, sizeof (recov_info_t)); 1513 0 stevel mutex_enter(&cpr_lock); 1514 0 stevel CALLB_CPR_EXIT(&cpr_info); 1515 0 stevel mutex_destroy(&cpr_lock); 1516 0 stevel zthread_exit(); 1517 0 stevel } 1518 0 stevel 1519 0 stevel /* 1520 0 stevel * Log the end of recovery and notify any waiting threads. 1521 0 stevel */ 1522 0 stevel 1523 0 stevel static void 1524 0 stevel recov_done(mntinfo4_t *mi, recov_info_t *recovp) 1525 0 stevel { 1526 0 stevel 1527 0 stevel ASSERT(MUTEX_HELD(&mi->mi_lock)); 1528 0 stevel 1529 0 stevel nfs4_queue_event(RE_END, mi, NULL, 0, recovp->rc_vp1, 1530 5302 th199096 recovp->rc_vp2, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1531 0 stevel mi->mi_recovthread = NULL; 1532 0 stevel mi->mi_flags &= ~MI4_RECOV_ACTIV; 1533 0 stevel mi->mi_recovflags &= ~MI4R_SRV_REBOOT; 1534 0 stevel cv_broadcast(&mi->mi_failover_cv); 1535 0 stevel } 1536 0 stevel 1537 0 stevel /* 1538 0 stevel * State-specific recovery routines, by state. 1539 0 stevel */ 1540 0 stevel 1541 0 stevel /* 1542 0 stevel * Failover. 1543 0 stevel * 1544 0 stevel * Replaces *spp with a reference to the new server, which must 1545 0 stevel * eventually be freed. 1546 0 stevel */ 1547 0 stevel 1548 0 stevel static void 1549 0 stevel recov_newserver(recov_info_t *recovp, nfs4_server_t **spp, bool_t *recov_fail) 1550 0 stevel { 1551 0 stevel mntinfo4_t *mi = recovp->rc_mi; 1552 0 stevel servinfo4_t *svp = NULL; 1553 0 stevel nfs4_server_t *osp = *spp; 1554 0 stevel CLIENT *cl; 1555 0 stevel enum clnt_stat status; 1556 0 stevel struct timeval tv; 1557 0 stevel int error; 1558 0 stevel int oncethru = 0; 1559 0 stevel rnode4_t *rp; 1560 0 stevel int index; 1561 0 stevel nfs_fh4 fh; 1562 0 stevel char *snames; 1563 0 stevel size_t len; 1564 0 stevel 1565 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1566 0 stevel 1567 0 stevel tv.tv_sec = 2; 1568 0 stevel tv.tv_usec = 0; 1569 0 stevel 1570 0 stevel #ifdef lint 1571 0 stevel /* 1572 0 stevel * Lint can't follow the logic, so thinks that snames and len 1573 0 stevel * can be used before being set. They can't, but lint can't 1574 0 stevel * figure it out. To address the lint warning, initialize 1575 0 stevel * snames and len for lint. 1576 0 stevel */ 1577 0 stevel snames = NULL; 1578 0 stevel len = 0; 1579 0 stevel #endif 1580 0 stevel 1581 0 stevel /* 1582 0 stevel * Ping the null NFS procedure of every server in 1583 0 stevel * the list until one responds. We always start 1584 0 stevel * at the head of the list and always skip the one 1585 0 stevel * that is current, since it's caused us a problem. 1586 0 stevel */ 1587 0 stevel while (svp == NULL) { 1588 0 stevel for (svp = mi->mi_servers; svp; svp = svp->sv_next) { 1589 0 stevel 1590 0 stevel mutex_enter(&mi->mi_lock); 1591 0 stevel if (FS_OR_ZONE_GONE4(mi->mi_vfsp)) { 1592 0 stevel mi->mi_flags |= MI4_RECOV_FAIL; 1593 0 stevel mutex_exit(&mi->mi_lock); 1594 0 stevel (void) nfs_rw_exit(&mi->mi_recovlock); 1595 0 stevel *recov_fail = TRUE; 1596 0 stevel if (oncethru) 1597 0 stevel kmem_free(snames, len); 1598 0 stevel return; 1599 0 stevel } 1600 0 stevel mutex_exit(&mi->mi_lock); 1601 0 stevel 1602 0 stevel (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1603 0 stevel if (svp->sv_flags & SV4_NOTINUSE) { 1604 0 stevel nfs_rw_exit(&svp->sv_lock); 1605 0 stevel continue; 1606 0 stevel } 1607 0 stevel nfs_rw_exit(&svp->sv_lock); 1608 0 stevel 1609 0 stevel if (!oncethru && svp == mi->mi_curr_serv) 1610 0 stevel continue; 1611 0 stevel 1612 0 stevel error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, 1613 0 stevel NFS_PROGRAM, NFS_V4, 0, 1, CRED(), &cl); 1614 0 stevel if (error) 1615 0 stevel continue; 1616 0 stevel 1617 0 stevel if (!(mi->mi_flags & MI4_INT)) 1618 0 stevel cl->cl_nosignal = TRUE; 1619 0 stevel status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, 1620 0 stevel xdr_void, NULL, tv); 1621 0 stevel if (!(mi->mi_flags & MI4_INT)) 1622 0 stevel cl->cl_nosignal = FALSE; 1623 0 stevel AUTH_DESTROY(cl->cl_auth); 1624 0 stevel CLNT_DESTROY(cl); 1625 0 stevel if (status == RPC_SUCCESS) { 1626 0 stevel nfs4_queue_event(RE_FAILOVER, mi, 1627 0 stevel svp == mi->mi_curr_serv ? NULL : 1628 0 stevel svp->sv_hostname, 0, NULL, NULL, 0, 1629 0 stevel NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1630 0 stevel break; 1631 0 stevel } 1632 0 stevel } 1633 0 stevel 1634 0 stevel if (svp == NULL) { 1635 0 stevel if (!oncethru) { 1636 0 stevel snames = nfs4_getsrvnames(mi, &len); 1637 0 stevel nfs4_queue_fact(RF_SRVS_NOT_RESPOND, mi, 1638 0 stevel 0, 0, 0, FALSE, snames, 0, NULL); 1639 0 stevel oncethru = 1; 1640 0 stevel } 1641 0 stevel delay(hz); 1642 0 stevel } 1643 0 stevel } 1644 0 stevel 1645 0 stevel if (oncethru) { 1646 0 stevel nfs4_queue_fact(RF_SRVS_OK, mi, 0, 0, 0, FALSE, snames, 1647 0 stevel 0, NULL); 1648 0 stevel kmem_free(snames, len); 1649 0 stevel } 1650 0 stevel 1651 0 stevel #if DEBUG 1652 0 stevel (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1653 0 stevel ASSERT((svp->sv_flags & SV4_NOTINUSE) == 0); 1654 0 stevel nfs_rw_exit(&svp->sv_lock); 1655 0 stevel #endif 1656 0 stevel 1657 0 stevel mutex_enter(&mi->mi_lock); 1658 0 stevel mi->mi_recovflags &= ~MI4R_NEED_NEW_SERVER; 1659 0 stevel if (svp != mi->mi_curr_serv) { 1660 0 stevel servinfo4_t *osvp = mi->mi_curr_serv; 1661 0 stevel 1662 0 stevel mutex_exit(&mi->mi_lock); 1663 0 stevel 1664 0 stevel /* 1665 0 stevel * Update server-dependent fields in the root vnode. 1666 0 stevel */ 1667 0 stevel index = rtable4hash(mi->mi_rootfh); 1668 0 stevel rw_enter(&rtable4[index].r_lock, RW_WRITER); 1669 0 stevel 1670 0 stevel rp = r4find(&rtable4[index], mi->mi_rootfh, mi->mi_vfsp); 1671 0 stevel if (rp != NULL) { 1672 0 stevel NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1673 0 stevel "recov_newserver: remapping %s", rnode4info(rp))); 1674 0 stevel mutex_enter(&rp->r_statelock); 1675 0 stevel rp->r_server = svp; 1676 0 stevel PURGE_ATTRCACHE4_LOCKED(rp); 1677 0 stevel mutex_exit(&rp->r_statelock); 1678 0 stevel (void) nfs4_free_data_reclaim(rp); 1679 0 stevel nfs4_purge_rddir_cache(RTOV4(rp)); 1680 0 stevel rw_exit(&rtable4[index].r_lock); 1681 0 stevel NFS4_DEBUG(nfs4_client_failover_debug, (CE_NOTE, 1682 0 stevel "recov_newserver: done with %s", 1683 0 stevel rnode4info(rp))); 1684 0 stevel VN_RELE(RTOV4(rp)); 1685 0 stevel } else 1686 0 stevel rw_exit(&rtable4[index].r_lock); 1687 0 stevel (void) dnlc_purge_vfsp(mi->mi_vfsp, 0); 1688 0 stevel 1689 0 stevel mutex_enter(&mi->mi_lock); 1690 0 stevel mi->mi_recovflags |= MI4R_REOPEN_FILES | MI4R_REMAP_FILES; 1691 0 stevel if (recovp->rc_srv_reboot) 1692 0 stevel mi->mi_recovflags |= MI4R_SRV_REBOOT; 1693 0 stevel mi->mi_curr_serv = svp; 1694 0 stevel mi->mi_failover++; 1695 0 stevel mi->mi_flags &= ~MI4_BADOWNER_DEBUG; 1696 0 stevel mutex_exit(&mi->mi_lock); 1697 0 stevel 1698 0 stevel (void) nfs_rw_enter_sig(&svp->sv_lock, RW_READER, 0); 1699 0 stevel fh.nfs_fh4_len = svp->sv_fhandle.fh_len; 1700 0 stevel fh.nfs_fh4_val = svp->sv_fhandle.fh_buf; 1701 0 stevel sfh4_update(mi->mi_rootfh, &fh); 1702 0 stevel fh.nfs_fh4_len = svp->sv_pfhandle.fh_len; 1703 0 stevel fh.nfs_fh4_val = svp->sv_pfhandle.fh_buf; 1704 0 stevel sfh4_update(mi->mi_srvparentfh, &fh); 1705 0 stevel nfs_rw_exit(&svp->sv_lock); 1706 0 stevel 1707 0 stevel *spp = nfs4_move_mi(mi, osvp, svp); 1708 0 stevel if (osp != NULL) 1709 0 stevel nfs4_server_rele(osp); 1710 0 stevel } else 1711 0 stevel mutex_exit(&mi->mi_lock); 1712 0 stevel (void) nfs_rw_exit(&mi->mi_recovlock); 1713 0 stevel } 1714 0 stevel 1715 0 stevel /* 1716 0 stevel * Clientid. 1717 0 stevel */ 1718 0 stevel 1719 0 stevel static void 1720 0 stevel recov_clientid(recov_info_t *recovp, nfs4_server_t *sp) 1721 0 stevel { 1722 0 stevel mntinfo4_t *mi = recovp->rc_mi; 1723 0 stevel int error = 0; 1724 0 stevel int still_stale; 1725 0 stevel int need_new_s; 1726 0 stevel 1727 0 stevel ASSERT(sp != NULL); 1728 0 stevel 1729 0 stevel /* 1730 0 stevel * Acquire the recovery lock and then verify that the clientid 1731 0 stevel * still needs to be recovered. (Note that s_recovlock is supposed 1732 0 stevel * to be acquired before s_lock.) Since the thread holds the 1733 0 stevel * recovery lock, no other thread will recover the clientid. 1734 0 stevel */ 1735 0 stevel (void) nfs_rw_enter_sig(&sp->s_recovlock, RW_WRITER, 0); 1736 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 1737 0 stevel mutex_enter(&sp->s_lock); 1738 0 stevel still_stale = ((sp->s_flags & N4S_CLIENTID_SET) == 0); 1739 0 stevel mutex_exit(&sp->s_lock); 1740 0 stevel 1741 0 stevel if (still_stale) { 1742 0 stevel nfs4_error_t n4e; 1743 0 stevel 1744 0 stevel nfs4_error_zinit(&n4e); 1745 0 stevel nfs4setclientid(mi, kcred, TRUE, &n4e); 1746 0 stevel error = n4e.error; 1747 0 stevel if (error != 0) { 1748 0 stevel 1749 0 stevel /* 1750 0 stevel * nfs4setclientid may have set MI4R_NEED_NEW_SERVER, 1751 0 stevel * if so, just return and let recov_thread drive 1752 0 stevel * failover. 1753 0 stevel */ 1754 0 stevel mutex_enter(&mi->mi_lock); 1755 0 stevel need_new_s = mi->mi_recovflags & MI4R_NEED_NEW_SERVER; 1756 0 stevel mutex_exit(&mi->mi_lock); 1757 0 stevel 1758 0 stevel if (need_new_s) { 1759 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1760 0 stevel nfs_rw_exit(&sp->s_recovlock); 1761 0 stevel return; 1762 0 stevel } 1763 0 stevel 1764 0 stevel nfs4_queue_event(RE_CLIENTID, mi, NULL, n4e.error, NULL, 1765 0 stevel NULL, n4e.stat, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1766 0 stevel mutex_enter(&mi->mi_lock); 1767 0 stevel mi->mi_flags |= MI4_RECOV_FAIL; 1768 0 stevel mi->mi_error = recovp->rc_error; 1769 0 stevel mutex_exit(&mi->mi_lock); 1770 0 stevel /* don't destroy the nfs4_server, let umount do it */ 1771 0 stevel } 1772 0 stevel } 1773 0 stevel 1774 0 stevel if (error == 0) { 1775 0 stevel mutex_enter(&mi->mi_lock); 1776 0 stevel mi->mi_recovflags &= ~MI4R_NEED_CLIENTID; 1777 0 stevel /* 1778 0 stevel * If still_stale isn't true, then another thread already 1779 0 stevel * recovered the clientid. And that thread that set the 1780 0 stevel * clientid will have initiated reopening files on all the 1781 0 stevel * filesystems for the server, so we should not initiate 1782 0 stevel * reopening for this filesystem here. 1783 0 stevel */ 1784 0 stevel if (still_stale) { 1785 0 stevel mi->mi_recovflags |= MI4R_REOPEN_FILES; 1786 0 stevel if (recovp->rc_srv_reboot) 1787 0 stevel mi->mi_recovflags |= MI4R_SRV_REBOOT; 1788 0 stevel } 1789 0 stevel mutex_exit(&mi->mi_lock); 1790 0 stevel } 1791 0 stevel 1792 0 stevel nfs_rw_exit(&mi->mi_recovlock); 1793 0 stevel 1794 0 stevel if (error != 0) { 1795 0 stevel nfs_rw_exit(&sp->s_recovlock); 1796 0 stevel mutex_enter(&mi->mi_lock); 1797 0 stevel if ((mi->mi_flags & MI4_RECOV_FAIL) == 0) 1798 0 stevel delay(SEC_TO_TICK(recov_err_delay)); 1799 0 stevel mutex_exit(&mi->mi_lock); 1800 0 stevel } else { 1801 0 stevel mntinfo4_t **milist; 1802 0 stevel mntinfo4_t *tmi; 1803 0 stevel int nummi, i; 1804 0 stevel 1805 0 stevel /* 1806 0 stevel * Initiate recovery of open files for other filesystems. 1807 0 stevel * We create an array of filesystems, rather than just 1808 0 stevel * walking the filesystem list, to avoid deadlock issues 1809 0 stevel * with s_lock and mi_recovlock. 1810 0 stevel */ 1811 0 stevel milist = make_milist(sp, &nummi); 1812 0 stevel for (i = 0; i < nummi; i++) { 1813 0 stevel tmi = milist[i]; 1814 0 stevel if (tmi != mi) { 1815 0 stevel (void) nfs_rw_enter_sig(&tmi->mi_recovlock, 1816 5302 th199096 RW_READER, 0); 1817 0 stevel start_recovery_action(NR_OPENFILES, TRUE, tmi, 1818 5302 th199096 NULL, NULL); 1819 0 stevel nfs_rw_exit(&tmi->mi_recovlock); 1820 0 stevel } 1821 0 stevel } 1822 0 stevel free_milist(milist, nummi); 1823 0 stevel 1824 0 stevel nfs_rw_exit(&sp->s_recovlock); 1825 0 stevel } 1826 0 stevel } 1827 0 stevel 1828 0 stevel /* 1829 0 stevel * Return an array of filesystems associated with the given server. The 1830 0 stevel * caller should call free_milist() to free the references and memory. 1831 0 stevel */ 1832 0 stevel 1833 0 stevel static mntinfo4_t ** 1834 0 stevel make_milist(nfs4_server_t *sp, int *nummip) 1835 0 stevel { 1836 0 stevel int nummi, i; 1837 0 stevel mntinfo4_t **milist; 1838 0 stevel mntinfo4_t *tmi; 1839 0 stevel 1840 0 stevel mutex_enter(&sp->s_lock); 1841 0 stevel nummi = 0; 1842 0 stevel for (tmi = sp->mntinfo4_list; tmi != NULL; tmi = tmi->mi_clientid_next) 1843 0 stevel nummi++; 1844 0 stevel 1845 4254 jwahlig milist = kmem_alloc(nummi * sizeof (mntinfo4_t *), KM_SLEEP); 1846 0 stevel 1847 0 stevel for (i = 0, tmi = sp->mntinfo4_list; tmi != NULL; i++, 1848 0 stevel tmi = tmi->mi_clientid_next) { 1849 0 stevel milist[i] = tmi; 1850 0 stevel VFS_HOLD(tmi->mi_vfsp); 1851 0 stevel } 1852 0 stevel mutex_exit(&sp->s_lock); 1853 0 stevel 1854 0 stevel *nummip = nummi; 1855 0 stevel return (milist); 1856 0 stevel } 1857 0 stevel 1858 0 stevel /* 1859 0 stevel * Free the filesystem list created by make_milist(). 1860 0 stevel */ 1861 0 stevel 1862 0 stevel static void 1863 0 stevel free_milist(mntinfo4_t **milist, int nummi) 1864 0 stevel { 1865 0 stevel mntinfo4_t *tmi; 1866 0 stevel int i; 1867 0 stevel 1868 0 stevel for (i = 0; i < nummi; i++) { 1869 0 stevel tmi = milist[i]; 1870 0 stevel VFS_RELE(tmi->mi_vfsp); 1871 0 stevel } 1872 0 stevel kmem_free(milist, nummi * sizeof (mntinfo4_t *)); 1873 0 stevel } 1874 0 stevel 1875 0 stevel /* 1876 0 stevel * Filehandle 1877 0 stevel */ 1878 0 stevel 1879 0 stevel /* 1880 0 stevel * Lookup the filehandle for the given vnode and update the rnode if it has 1881 0 stevel * changed. 1882 0 stevel * 1883 0 stevel * Errors: 1884 0 stevel * - if the filehandle could not be updated because of an error that 1885 0 stevel * requires further recovery, initiate that recovery and return. 1886 0 stevel * - if the filehandle could not be updated because of a signal, pretend we 1887 0 stevel * succeeded and let someone else deal with it. 1888 0 stevel * - if the filehandle could not be updated and the filesystem has been 1889 0 stevel * forcibly unmounted, pretend we succeeded, and let the caller deal with 1890 0 stevel * the forced unmount (to retry or not to retry, that is the question). 1891 0 stevel * - if the filehandle could not be updated because of some other error, 1892 0 stevel * mark the rnode bad and return. 1893 0 stevel */ 1894 0 stevel static void 1895 0 stevel recov_filehandle(nfs4_recov_t action, mntinfo4_t *mi, vnode_t *vp) 1896 0 stevel { 1897 0 stevel rnode4_t *rp = VTOR4(vp); 1898 0 stevel nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1899 0 stevel bool_t needrecov; 1900 0 stevel 1901 0 stevel mutex_enter(&rp->r_statelock); 1902 0 stevel 1903 0 stevel if (rp->r_flags & R4RECOVERR) { 1904 0 stevel mutex_exit(&rp->r_statelock); 1905 0 stevel return; 1906 0 stevel } 1907 0 stevel 1908 0 stevel /* 1909 0 stevel * If someone else is updating the filehandle, wait for them to 1910 0 stevel * finish and then let our caller retry. 1911 0 stevel */ 1912 0 stevel if (rp->r_flags & R4RECEXPFH) { 1913 0 stevel while (rp->r_flags & R4RECEXPFH) { 1914 0 stevel cv_wait(&rp->r_cv, &rp->r_statelock); 1915 0 stevel } 1916 0 stevel mutex_exit(&rp->r_statelock); 1917 0 stevel return; 1918 0 stevel } 1919 0 stevel rp->r_flags |= R4RECEXPFH; 1920 0 stevel mutex_exit(&rp->r_statelock); 1921 0 stevel 1922 0 stevel if (action == NR_BADHANDLE) { 1923 0 stevel /* shouldn't happen */ 1924 0 stevel nfs4_queue_event(RE_BADHANDLE, mi, NULL, 0, 1925 0 stevel vp, NULL, 0, NULL, 0, TAG_NONE, TAG_NONE, 0, 0); 1926 0 stevel } 1927 0 stevel 1928 0 stevel nfs4_remap_file(mi, vp, 0, &e); 1929 0 stevel needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp); 1930 0 stevel 1931 0 stevel /* 1932 0 stevel * If we get BADHANDLE or FHEXPIRED in their handler, something is 1933 0 stevel * broken. Don't try to recover, just mark the file dead. 1934 0 stevel */ 1935 0 stevel if (needrecov && e.error == 0 && 1936 0 stevel (e.stat == NFS4ERR_BADHANDLE || e.stat == NFS4ERR_FHEXPIRED)) 1937 0 stevel needrecov = FALSE; 1938 0 stevel if (needrecov) { 1939 0 stevel (void) nfs4_start_recovery(&e, mi, vp, 1940 5302 th199096 NULL, NULL, NULL, OP_LOOKUP, NULL); 1941 0 stevel } else if (e.error != EINTR && 1942 0 stevel !NFS4_FRC_UNMT_ERR(e.error, mi->mi_vfsp) && 1943 0 stevel (e.error != 0 || e.stat != NFS4_OK)) { 1944 0 stevel nfs4_recov_fh_fail(vp, e.error, e.stat); 1945 0 stevel /* 1946 0 stevel * Don't set r_error to ESTALE. Higher-level code (e.g., 1947 0 stevel * cstatat_getvp()) retries on ESTALE, which would cause 1948 0 stevel * an infinite loop. 1949 0 stevel */ 1950 0 stevel } 1951 0 stevel 1952 0 stevel mutex_enter(&rp->r_statelock); 1953 0 stevel rp->r_flags &= ~R4RECEXPFH; 1954 0 stevel cv_broadcast(&rp->r_cv); 1955 0 stevel mutex_exit(&rp->r_statelock); 1956 0 stevel } 1957 0 stevel 1958 0 stevel /* 1959 0 stevel * Stale Filehandle 1960 0 stevel */ 1961 0 stevel 1962 0 stevel /* 1963 0 stevel * A stale filehandle can happen when an individual file has 1964 0 stevel * been removed, or when an entire filesystem has been taken 1965 0 stevel * offline. To distinguish these cases, we do this: 1966 0 stevel * - if a GETATTR with the current filehandle is okay, we do 1967 0 stevel * nothing (this can happen with two-filehandle ops) 1968 0 stevel * - if the GETATTR fails, but a GETATTR of the root filehandle 1969 0 stevel * succeeds, mark the rnode with R4STALE, which will stop use 1970 0 stevel * - if the GETATTR fails, and a GETATTR of the root filehandle 1971 0 stevel * also fails, we consider the problem filesystem-wide, so: 1972 0 stevel * - if we can failover, we should 1973 0 stevel * - if we can't failover, we should mark both the original 1974 0 stevel * vnode and the root bad 1975 0 stevel */ 1976 0 stevel static void 1977 0 stevel recov_stale(mntinfo4_t *mi, vnode_t *vp) 1978 0 stevel { 1979 0 stevel rnode4_t *rp = VTOR4(vp); 1980 0 stevel vnode_t *rootvp = NULL; 1981 0 stevel nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 1982 0 stevel nfs4_ga_res_t gar; 1983 0 stevel char *fail_msg = "failed to recover from NFS4ERR_STALE"; 1984 0 stevel bool_t needrecov; 1985 0 stevel 1986 0 stevel mutex_enter(&rp->r_statelock); 1987 0 stevel 1988 0 stevel if (rp->r_flags & R4RECOVERR) { 1989 0 stevel mutex_exit(&rp->r_statelock); 1990 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1991 0 stevel "recov_stale: already marked dead, rp %s", 1992 0 stevel rnode4info(rp))); 1993 0 stevel return; 1994 0 stevel } 1995 0 stevel 1996 0 stevel if (rp->r_flags & R4STALE) { 1997 0 stevel mutex_exit(&rp->r_statelock); 1998 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1999 0 stevel "recov_stale: already marked stale, rp %s", 2000 0 stevel rnode4info(rp))); 2001 0 stevel return; 2002 0 stevel } 2003 0 stevel 2004 0 stevel mutex_exit(&rp->r_statelock); 2005 0 stevel 2006 0 stevel /* Try a GETATTR on this vnode */ 2007 0 stevel nfs4_getattr_otw_norecovery(vp, &gar, &e, CRED(), 0); 2008 0 stevel 2009 0 stevel /* 2010 0 stevel * Handle non-STALE recoverable errors 2011 0 stevel */ 2012 0 stevel needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); 2013 0 stevel if (needrecov && (e.error != 0 || e.stat != NFS4ERR_STALE)) { 2014 0 stevel (void) nfs4_start_recovery(&e, mi, vp, 2015 5302 th199096 NULL, NULL, NULL, OP_GETATTR, NULL); 2016 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2017 0 stevel "recov_stale: error=%d, stat=%d seen on rp %s", 2018 0 stevel e.error, e.stat, rnode4info(rp))); 2019 0 stevel goto out; 2020 0 stevel } 2021 0 stevel 2022 0 stevel /* Are things OK for this vnode? */ 2023 0 stevel if (!e.error && e.stat == NFS4_OK) { 2024 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2025 0 stevel "recov_stale: file appears fine, rp %s", 2026 0 stevel rnode4info(rp))); 2027 0 stevel goto out; 2028 0 stevel } 2029 0 stevel 2030 0 stevel /* Did we get an unrelated non-recoverable error? */ 2031 0 stevel if (e.error || e.stat != NFS4ERR_STALE) { 2032 0 stevel nfs4_fail_recov(vp, fail_msg, e.error, e.stat); 2033 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2034 0 stevel "recov_stale: unrelated fatal error, rp %s", 2035 0 stevel rnode4info(rp))); 2036 0 stevel goto out; 2037 0 stevel } 2038 0 stevel 2039 0 stevel /* 2040 0 stevel * If we don't appear to be dealing with the root node, find it. 2041 0 stevel */ 2042 0 stevel if ((vp->v_flag & VROOT) == 0) { 2043 0 stevel nfs4_error_zinit(&e); 2044 0 stevel e.error = VFS_ROOT(vp->v_vfsp, &rootvp); 2045 0 stevel if (e.error) { 2046 0 stevel nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 2047 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2048 0 stevel "recov_stale: can't find root node for rp %s", 2049 0 stevel rnode4info(rp))); 2050 0 stevel goto out; 2051 0 stevel } 2052 0 stevel } 2053 0 stevel 2054 0 stevel /* Try a GETATTR on the root vnode */ 2055 0 stevel if (rootvp != NULL) { 2056 0 stevel nfs4_error_zinit(&e); 2057 0 stevel nfs4_getattr_otw_norecovery(rootvp, &gar, &e, CRED(), 0); 2058 0 stevel 2059 0 stevel /* Try recovery? */ 2060 0 stevel if (e.error != 0 || e.stat != NFS4ERR_STALE) { 2061 0 stevel needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp); 2062 0 stevel if (needrecov) { 2063 0 stevel (void) nfs4_start_recovery(&e, 2064 5302 th199096 mi, rootvp, NULL, NULL, NULL, 2065 5302 th199096 OP_GETATTR, NULL); 2066 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2067 0 stevel "recov_stale: error=%d, stat=%d seen " 2068 0 stevel "on rp %s", e.error, e.stat, 2069 0 stevel rnode4info(rp))); 2070 0 stevel } 2071 0 stevel } 2072 0 stevel 2073 0 stevel /* 2074 0 stevel * Check to see if a failover attempt is warranted 2075 0 stevel * NB: nfs4_try_failover doesn't check for STALE 2076 0 stevel * because recov_stale gets a shot first. Now that 2077 0 stevel * recov_stale has failed, go ahead and try failover. 2078 0 stevel * 2079 0 stevel * If the getattr on the root filehandle was successful, 2080 0 stevel * then mark recovery as failed for 'vp' and exit. 2081 0 stevel */ 2082 0 stevel if (nfs4_try_failover(&e) == 0 && e.stat != NFS4ERR_STALE) { 2083 0 stevel /* 2084 0 stevel * pass the original error to fail_recov, not 2085 0 stevel * the one from trying the root vnode. 2086 0 stevel */ 2087 0 stevel nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 2088 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2089 0 stevel "recov_stale: root node OK, marking " 2090 0 stevel "dead rp %s", rnode4info(rp))); 2091 0 stevel goto out; 2092 0 stevel } 2093 0 stevel } 2094 0 stevel 2095 0 stevel /* 2096 0 stevel * Here, we know that both the original file and the 2097 0 stevel * root filehandle (which may be the same) are stale. 2098 0 stevel * We want to fail over if we can, and if we can't, we 2099 0 stevel * want to mark everything in sight bad. 2100 0 stevel */ 2101 0 stevel if (FAILOVER_MOUNT4(mi)) { 2102 0 stevel mutex_enter(&mi->mi_lock); 2103 0 stevel mi->mi_recovflags |= MI4R_NEED_NEW_SERVER; 2104 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2105 0 stevel "recov_stale: failing over due to rp %s", 2106 0 stevel rnode4info(rp))); 2107 0 stevel mutex_exit(&mi->mi_lock); 2108 0 stevel } else { 2109 0 stevel rnode4_t *rootrp; 2110 0 stevel servinfo4_t *svp; 2111 0 stevel 2112 0 stevel /* 2113 0 stevel * Can't fail over, so mark things dead. 2114 0 stevel * 2115 0 stevel * If rootvp is set, we know we have a distinct 2116 0 stevel * non-root vnode which can be marked dead in 2117 0 stevel * the usual way. 2118 0 stevel * 2119 0 stevel * Then we want to mark the root vnode dead. 2120 0 stevel * Note that if rootvp wasn't set, our vp is 2121 0 stevel * actually the root vnode. 2122 0 stevel */ 2123 0 stevel if (rootvp != NULL) { 2124 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 2125 0 stevel "recov_stale: can't fail over, marking dead rp %s", 2126 0 stevel rnode4info(rp))); 2127 0 stevel nfs4_fail_recov(vp, fail_msg, 0, NFS4ERR_STALE); 2128 0 stevel } else { 2129 0 stevel rootvp = vp; 2130 0 stevel VN_HOLD(rootvp); 2131 0 stevel } 2132 0 stevel 2133 0 stevel /* 2134 0 stevel * Mark root dead, but quietly - since 2135 0 stevel * the root rnode is frequently recreated, 2136 0 stevel * we can encounter this at every access. 2137 0 stevel * Also mark recovery as failed on this VFS. 2138 0 stevel */ 2139 0 stevel rootrp = VTOR4(rootvp); 2140 0 stevel NFS4_DEBUG(nfs4_client_recov_debug, (CE_CONT, 2141 0 stevel "recov_stale: marking dead root rp %s", 2142 0 stevel rnode4info(rootrp))); 2143 0 stevel mutex_enter(&rootrp->r_statelock); 2144 0 stevel rootrp->r_flags |= (R4RECOVERR | R4STALE); 2145 0 stevel rootrp->r_error = ESTALE; 2146 0 stevel mutex_exit(&rootrp->r_statelock); 2147 0 stevel mutex_enter(&mi->mi_lock); 2148 0 stevel mi->mi_error = ESTALE; 2149 0 stevel mutex_exit(&mi->mi_lock); 2150 0 stevel 2151 0 stevel svp = mi->mi_curr_serv; 2152 0 stevel (void) nfs_rw_enter_sig(&svp->sv_lock, RW_WRITER, 0); 2153 0 stevel svp->sv_flags |= SV4_ROOT_STALE; 2154 0 stevel nfs_rw_exit(&svp->sv_lock); 2155 0 stevel } 2156 0 stevel 2157 0 stevel out: 2158 0 stevel if (rootvp) 2159 0 stevel VN_RELE(rootvp); 2160 0 stevel } 2161 0 stevel 2162 0 stevel /* 2163 0 stevel * Locks. 2164 0 stevel */ 2165 0 stevel 2166 0 stevel /* 2167 0 stevel * Reclaim all the active (acquired) locks for the given file. 2168 0 stevel * If a process lost a lock, the process is sent a SIGLOST. This is not 2169 0 stevel * considered an error. 2170 0 stevel * 2171 0 stevel * Return values: 2172 0 stevel * Errors and status are returned via the nfs4_error_t parameter 2173 0 stevel * If an error indicates that recovery is needed, the caller is responsible 2174 0 stevel * for dealing with it. 2175 0 stevel */ 2176 0 stevel 2177 0 stevel static void 2178 0 stevel relock_file(vnode_t *vp, mntinfo4_t *mi, nfs4_error_t *ep, 2179 0 stevel fattr4_change pre_change) 2180 0 stevel { 2181 0 stevel locklist_t *locks, *llp; 2182 0 stevel rnode4_t *rp; 2183 0 stevel 2184 0 stevel ASSERT(ep != NULL); 2185 0 stevel nfs4_error_zinit(ep); 2186 0 stevel 2187 0 stevel if (VTOMI4(vp)->mi_flags & MI4_LLOCK) 2188 0 stevel return; 2189 0 stevel 2190 0 stevel nfs4_flush_lock_owners(VTOR4(vp)); 2191 0 stevel 2192 0 stevel /* 2193 0 stevel * If we get an error that requires recovery actions, just bail out 2194 0 stevel * and let the top-level recovery code handle it. 2195 0 stevel * 2196 0 stevel * If we get some other error, kill the process that owned the lock 2197 0 stevel * and mark its remaining locks (if any) as belonging to NOPID, so 2198 0 stevel * that we don't make any more reclaim requests for that process. 2199 0 stevel */ 2200 0 stevel 2201 0 stevel rp = VTOR4(vp); 2202 0 stevel locks = flk_active_locks_for_vp(vp); 2203 0 stevel for (llp = locks; llp != NULL; llp = llp->ll_next) { 2204 0 stevel int did_reclaim = 1; 2205 0 stevel 2206 0 stevel ASSERT(llp->ll_vp == vp); 2207 0 stevel if (llp->ll_flock.l_pid == NOPID) 2208 0 stevel continue; 2209 0 stevel reclaim_one_lock(vp, &llp->ll_flock, ep, &did_reclaim); 2210 0 stevel /* 2211 0 stevel * If we need to restart recovery, stop processing the 2212 0 stevel * list. Some errors would be recoverable under other 2213 0 stevel * circumstances, but if they happen here we just give up 2214 0 stevel * on the lock. 2215 0 stevel */ 2216 0 stevel if (nfs4_needs_recovery(ep, TRUE, vp->v_vfsp)) { 2217 0 stevel if (ep->error != 0) 2218 0 stevel break; 2219 0 stevel if (!nfs4_recov_marks_dead(ep->stat)) 2220 0 stevel break; 2221 0 stevel } 2222 0 stevel /* 2223 0 stevel * In case the server isn't offering us a grace period, or 2224 0 stevel * if we missed it, we might have opened & locked from scratch, 2225 0 stevel * rather than reopened/reclaimed. 2226 0 stevel * We need to ensure that the object hadn't been otherwise 2227 0 stevel * changed during this time, by comparing the changeinfo. 2228 0 stevel * We get passed the changeinfo from before the reopen by our 2229 0 stevel * caller, in pre_change. 2230 0 stevel * The changeinfo from after the reopen is in rp->r_change, 2231 0 stevel * courtesy of the GETATTR in the reopen. 2232 0 stevel * If they're different, then the file has changed, and we 2233 0 stevel * have to SIGLOST the app. 2234 0 stevel */ 2235 0 stevel if (ep->error == 0 && ep->stat == NFS4_OK && !did_reclaim) { 2236 0 stevel mutex_enter(&rp->r_statelock); 2237 0 stevel if (pre_change != rp->r_change) 2238 0 stevel ep->stat = NFS4ERR_NO_GRACE; 2239 0 stevel mutex_exit(&rp->r_statelock); 2240 0 stevel } 2241 0 stevel if (ep->error != 0 || ep->stat != NFS4_OK) { 2242 0 stevel if (ep->error != 0) 2243 0 stevel nfs4_queue_event(RE_FAIL_RELOCK, mi, 2244 0 stevel NULL, ep->error, vp, NULL, 0, NULL, 2245 0 stevel llp->ll_flock.l_pid, TAG_NONE, TAG_NONE, 2246 0 stevel 0, 0); 2247 0 stevel else 2248 0 stevel nfs4_queue_event(RE_FAIL_RELOCK, mi, 2249 0 stevel NULL, 0, vp, NULL, ep->stat, NULL, 2250 0 stevel llp->ll_flock.l_pid, TAG_NONE, TAG_NONE, 2251 0 stevel 0, 0); 2252 0 stevel nfs4_send_siglost(llp->ll_flock.l_pid, mi, vp, TRUE, 2253 0 stevel ep->error, ep->stat); 2254 0 stevel relock_skip_pid(llp, llp->ll_flock.l_pid); 2255 0 stevel 2256 0 stevel /* Reinitialize the nfs4_error and continue */ 2257 0 stevel nfs4_error_zinit(ep); 2258 0 stevel } 2259 0 stevel } 2260 0 stevel 2261 0 stevel if (locks != NULL) 2262 0 stevel flk_free_locklist(locks); 2263 0 stevel } 2264 0 stevel 2265 0 stevel /* 2266 0 stevel * Reclaim the given lock. 2267 0 stevel * If the lock can't be reclaimed, the process is sent SIGLOST, but this is 2268 0 stevel * not considered an error. 2269 0 stevel * 2270 0 stevel * Errors are returned via the nfs4_error_t parameter. 2271 0 stevel */ 2272 0 stevel static void 2273 0 stevel reclaim_one_lock(vnode_t *vp, flock64_t *flk, nfs4_error_t *ep, 2274 5302 th199096 int *did_reclaimp) 2275 0 stevel { 2276 0 stevel cred_t *cr; 2277 0 stevel rnode4_t *rp = VTOR4(vp); 2278 0 stevel 2279 0 stevel cr = pid_to_cr(flk->l_pid); 2280 0 stevel if (cr == NULL) { 2281 0 stevel nfs4_error_zinit(ep); 2282 0 stevel ep->error = ESRCH; 2283 0 stevel return; 2284 0 stevel } 2285 0 stevel 2286 0 stevel do { 2287 0 stevel mutex_enter(&rp->r_statelock); 2288 0 stevel if (rp->r_flags & R4RECOVERR) { 2289 0 stevel /* 2290 0 stevel * This shouldn't affect other reclaims, so don't 2291 0 stevel * return an error. 2292 0 stevel */ 2293 0 stevel mutex_exit(&rp->r_statelock); 2294 0 stevel break; 2295 0 stevel } 2296 0 stevel mutex_exit(&rp->r_statelock); 2297 0 stevel 2298 0 stevel nfs4frlock(NFS4_LCK_CTYPE_RECLAIM, vp, F_SETLK, flk, 2299 5302 th199096 FREAD|FWRITE, 0, cr, ep, NULL, did_reclaimp); 2300 0 stevel if (ep->error == 0 && ep->stat == NFS4ERR_FHEXPIRED) 2301 0 stevel start_recovery_action(NR_FHEXPIRED, TRUE, VTOMI4(vp), 2302 5302 th199096 vp, NULL); 2303 0 stevel } while (ep->error == 0 && ep->stat == NFS4ERR_FHEXPIRED); 2304 0 stevel 2305 0 stevel crfree(cr); 2306 0 stevel } 2307 0 stevel 2308 0 stevel /* 2309 0 stevel * Open files. 2310 0 stevel */ 2311 0 stevel 2312 0 stevel /* 2313 0 stevel * Verifies if the nfsstat4 is a valid error for marking this vnode dead. 2314 0 stevel * Returns 1 if the error is valid; 0 otherwise. 2315 0 stevel */ 2316 0 stevel static int 2317 0 stevel nfs4_valid_recov_err_for_vp(vnode_t *vp, nfsstat4 stat) 2318 0 stevel { 2319 0 stevel /* 2320 0 stevel * We should not be marking non-regular files as dead, 2321 0 stevel * except in very rare cases (eg: BADHANDLE or NFS4ERR_BADNAME). 2322 0 stevel */ 2323 0 stevel if (vp->v_type != VREG && stat != NFS4ERR_BADHANDLE && 2324 0 stevel stat != NFS4ERR_BADNAME) 2325 0 stevel return (0); 2326 0 stevel 2327 0 stevel return (1); 2328 0 stevel } 2329 0 stevel 2330 0 stevel /* 2331 0 stevel * Failed attempting to recover a filehandle. If 'stat' is valid for 'vp', 2332 0 stevel * then mark the object dead. Since we've had to do a lookup for 2333 0 stevel * filehandle recovery, we will mark the object dead if we got NOENT. 2334 0 stevel */ 2335 0 stevel static void 2336 0 stevel nfs4_recov_fh_fail(vnode_t *vp, int error, nfsstat4 stat) 2337 0 stevel { 2338 0 stevel ASSERT(vp != NULL); 2339 0 stevel 2340 0 stevel if ((error == 0) && (stat != NFS4ERR_NOENT) && 2341 0 stevel (!nfs4_valid_recov_err_for_vp(vp, stat))) 2342 0 stevel return; 2343 0 stevel 2344 0 stevel nfs4_fail_recov(vp, "can't recover filehandle", error, stat); 2345 0 stevel } 2346 0 stevel 2347 0 stevel /* 2348 0 stevel * Recovery from a "shouldn't happen" error. In the long term, we'd like 2349 0 stevel * to mark only the data structure(s) that provided the bad value as being 2350 0 stevel * bad. But for now we'll just mark the entire file. 2351 0 stevel */ 2352 0 stevel 2353 0 stevel static void 2354 0 stevel recov_badstate(recov_info_t *recovp, vnode_t *vp, nfsstat4 stat) 2355 0 stevel { 2356 0 stevel ASSERT(vp != NULL); 2357 0 stevel recov_throttle(recovp, vp); 2358 0 stevel 2359 0 stevel if (!nfs4_valid_recov_err_for_vp(vp, stat)) 2360 0 stevel return; 2361 0 stevel 2362 0 stevel nfs4_fail_recov(vp, "", 0, stat); 2363 0 stevel } 2364 0 stevel 2365 0 stevel /* 2366 0 stevel * Free up the information saved for a lost state request. 2367 0 stevel */ 2368 0 stevel static void 2369 0 stevel nfs4_free_lost_rqst(nfs4_lost_rqst_t *lrp, nfs4_server_t *sp) 2370 0 stevel { 2371 0 stevel component4 *filep; 2372 0 stevel nfs4_open_stream_t *osp; 2373 0 stevel int have_sync_lock; 2374 0 stevel 2375 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, 2376 5302 th199096 (CE_NOTE, "nfs4_free_lost_rqst:")); 2377 0 stevel 2378 0 stevel switch (lrp->lr_op) { 2379 0 stevel case OP_OPEN: 2380 0 stevel filep = &lrp->lr_ofile; 2381 0 stevel if (filep->utf8string_val) { 2382 0 stevel kmem_free(filep->utf8string_val, filep->utf8string_len); 2383 0 stevel filep->utf8string_val = NULL; 2384 0 stevel } 2385 0 stevel break; 2386 0 stevel case OP_DELEGRETURN: 2387 0 stevel nfs4delegreturn_cleanup(VTOR4(lrp->lr_vp), sp); 2388 0 stevel break; 2389 0 stevel case OP_CLOSE: 2390 0 stevel osp = lrp->lr_osp; 2391 0 stevel ASSERT(osp != NULL); 2392 0 stevel mutex_enter(&osp->os_sync_lock); 2393 0 stevel have_sync_lock = 1; 2394 0 stevel if (osp->os_pending_close) { 2395 0 stevel /* clean up the open file state. */ 2396 0 stevel osp->os_pending_close = 0; 2397 0 stevel nfs4close_notw(lrp->lr_vp, osp, &have_sync_lock); 2398 0 stevel } 2399 0 stevel if (have_sync_lock) 2400 0 stevel mutex_exit(&osp->os_sync_lock); 2401 0 stevel break; 2402 0 stevel } 2403 0 stevel 2404 0 stevel lrp->lr_op = 0; 2405 0 stevel if (lrp->lr_oop != NULL) { 2406 0 stevel open_owner_rele(lrp->lr_oop); 2407 0 stevel lrp->lr_oop = NULL; 2408 0 stevel } 2409 0 stevel if (lrp->lr_osp != NULL) { 2410 0 stevel open_stream_rele(lrp->lr_osp, VTOR4(lrp->lr_vp)); 2411 0 stevel lrp->lr_osp = NULL; 2412 0 stevel } 2413 0 stevel if (lrp->lr_lop != NULL) { 2414 0 stevel lock_owner_rele(lrp->lr_lop); 2415 0 stevel lrp->lr_lop = NULL; 2416 0 stevel } 2417 0 stevel if (lrp->lr_flk != NULL) { 2418 0 stevel kmem_free(lrp->lr_flk, sizeof (flock64_t)); 2419 0 stevel lrp->lr_flk = NULL; 2420 0 stevel } 2421 0 stevel if (lrp->lr_vp != NULL) { 2422 0 stevel VN_RELE(lrp->lr_vp); 2423 0 stevel lrp->lr_vp = NULL; 2424 0 stevel } 2425 0 stevel if (lrp->lr_dvp != NULL) { 2426 0 stevel VN_RELE(lrp->lr_dvp); 2427 0 stevel lrp->lr_dvp = NULL; 2428 0 stevel } 2429 0 stevel if (lrp->lr_cr != NULL) { 2430 0 stevel crfree(lrp->lr_cr); 2431 0 stevel lrp->lr_cr = NULL; 2432 0 stevel } 2433 0 stevel 2434 0 stevel kmem_free(lrp, sizeof (nfs4_lost_rqst_t)); 2435 0 stevel } 2436 0 stevel 2437 0 stevel /* 2438 0 stevel * Remove any lost state requests and free them. 2439 0 stevel */ 2440 0 stevel static void 2441 0 stevel nfs4_remove_lost_rqsts(mntinfo4_t *mi, nfs4_server_t *sp) 2442 0 stevel { 2443 0 stevel nfs4_lost_rqst_t *lrp; 2444 0 stevel 2445 0 stevel mutex_enter(&mi->mi_lock); 2446 0 stevel while ((lrp = list_head(&mi->mi_lost_state)) != NULL) { 2447 0 stevel list_remove(&mi->mi_lost_state, lrp); 2448 0 stevel mutex_exit(&mi->mi_lock); 2449 0 stevel nfs4_free_lost_rqst(lrp, sp); 2450 0 stevel mutex_enter(&mi->mi_lock); 2451 0 stevel } 2452 0 stevel mutex_exit(&mi->mi_lock); 2453 0 stevel } 2454 0 stevel 2455 0 stevel /* 2456 0 stevel * Reopen all the files for the given filesystem and reclaim any locks. 2457 0 stevel */ 2458 0 stevel 2459 0 stevel static void 2460 0 stevel recov_openfiles(recov_info_t *recovp, nfs4_server_t *sp) 2461 0 stevel { 2462 0 stevel mntinfo4_t *mi = recovp->rc_mi; 2463 0 stevel nfs4_opinst_t *reopenlist = NULL, *rep; 2464 0 stevel nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS }; 2465 0 stevel open_claim_type4 claim; 2466 0 stevel int remap; 2467 0 stevel char *fail_msg = "No such file or directory on replica"; 2468 0 stevel rnode4_t *rp; 2469 0 stevel fattr4_change pre_change; 2470 0 stevel 2471 0 stevel ASSERT(sp != NULL); 2472 0 stevel 2473 0 stevel /* 2474 0 stevel * This check is to allow a 10ms pause before we reopen files 2475 0 stevel * it should allow the server time to have received the CB_NULL 2476 0 stevel * reply and update its internal structures such that (if 2477 0 stevel * applicable) we are granted a delegation on reopened files. 2478 0 stevel */ 2479 0 stevel mutex_enter(&sp->s_lock); 2480 0 stevel if ((sp->s_flags & (N4S_CB_PINGED | N4S_CB_WAITER)) == 0) { 2481 0 stevel sp->s_flags |= N4S_CB_WAITER; 2482 11066 rafael (void) cv_reltimedwait(&sp->wait_cb_null, &sp->s_lock, 2483 11066 rafael drv_usectohz(N4S_CB_PAUSE_TIME), TR_CLOCK_TICK); 2484 0 stevel } 2485 0 stevel mutex_exit(&sp->s_lock); 2486 0 stevel 2487 0 stevel (void) nfs_rw_enter_sig(&sp->s_recovlock, RW_READER, 0); 2488 0 stevel (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_WRITER, 0); 2489 0 stevel 2490 0 stevel if (NFS4_VOLATILE_FH(mi)) { 2491 0 stevel nfs4_remap_root(mi, &e, 0); 2492 0 stevel if (nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp)) { 2493 0 stevel (void) nfs4_start_recovery(&e, mi, NULL, 2494 5302 th199096 NULL, NULL, NULL, OP_LOOKUP, NULL); 2495 0 stevel } 2496 0 stevel } 2497 0 stevel 2498 0 stevel mutex_enter(&mi->mi_lock); 2499 0 stevel if (recovp->rc_srv_reboot || (mi->mi_recovflags & MI4R_SRV_REBOOT)) 2500 0 stevel claim = CLAIM_PREVIOUS; 2501 0 stevel else 2502 0 stevel claim = CLAIM_NULL; 2503 0 stevel mutex_exit(&mi->mi_lock); 2504 0 stevel 2505 0 stevel if (e.error == 0 && e.stat == NFS4_OK) { 2506 0 stevel /* 2507 0 stevel * Get a snapshot of open files in the filesystem. Note 2508 0 stevel * that new opens will stall until the server's grace 2509 0 stevel * period is done. 2510 0 stevel */ 2511 0 stevel reopenlist = r4mkopenlist(mi); 2512 0 stevel 2513 0 stevel mutex_enter(&mi->mi_lock); 2514 0 stevel remap = mi->mi_recovflags & MI4R_REMAP_FILES; 2515 0 stevel mutex_exit(&mi->mi_lock); 2516 0 stevel /* 2517 0 stevel * Since we are re-establishing state on the 2518 0 stevel * server, its ok to blow away the saved lost 2519 0 stevel * requests since we don't need to reissue it. 2520 0 stevel */ 2521 0 stevel nfs4_remove_lost_rqsts(mi, sp); 2522 0 stevel 2523 0 stevel for (rep = reopenlist; rep; rep = rep->re_next) { 2524 0 stevel 2525 0 stevel if (remap) { 2526 0 stevel nfs4_remap_file(mi, rep->re_vp, 2527 5302 th199096 NFS4_REMAP_CKATTRS, &e); 2528 0 stevel } 2529 0 stevel if (e.error == ENOENT || e.stat == NFS4ERR_NOENT) { 2530 0 stevel /* 2531 0 stevel * The current server does not have the file 2532 0 stevel * that is to be remapped. This is most 2533 0 stevel * likely due to an improperly maintained 2534 0 stevel * replica. The files that are missing from 2535 0 stevel * the server will be marked dead and logged 2536 0 stevel * in order to make sys admins aware of the 2537 0 stevel * problem. 2538 0 stevel */ 2539 0 stevel nfs4_fail_recov(rep->re_vp, 2540 5302 th199096 fail_msg, e.error, e.stat); 2541 0 stevel /* 2542 0 stevel * We've already handled the error so clear it. 2543 0 stevel */ 2544 0 stevel nfs4_error_zinit(&e); 2545 0 stevel continue; 2546 0 stevel } else if (e.error == 0 && e.stat == NFS4_OK) { 2547 0 stevel int j; 2548 0 stevel 2549 0 stevel rp = VTOR4(rep->re_vp); 2550 0 stevel mutex_enter(&rp->r_statelock); 2551 0 stevel pre_change = rp->r_change; 2552 0 stevel mutex_exit(&rp->r_statelock); 2553 0 stevel 2554 0 stevel for (j = 0; j < rep->re_numosp; j++) { 2555 0 stevel nfs4_reopen(rep->re_vp, rep->re_osp[j], 2556 5302 th199096 &e, claim, FALSE, TRUE); 2557 0 stevel if (e.error != 0 || e.stat != NFS4_OK) 2558 0 stevel break; 2559 0 stevel } 2560 0 stevel if (nfs4_needs_recovery(&e, TRUE, 2561 0 stevel mi->mi_vfsp)) { 2562 0 stevel (void) nfs4_start_recovery(&e, mi, 2563 5302 th199096 rep->re_vp, NULL, NULL, NULL, 2564 5302 th199096 OP_OPEN, NULL); 2565 0 stevel break; 2566 0 stevel } 2567 0 stevel } 2568 0 stevel #ifdef DEBUG 2569 0 stevel if (nfs4_recovdelay > 0) 2570 0 stevel delay(MSEC_TO_TICK(nfs4_recovdelay * 1000)); 2571 0 stevel #endif 2572 0 stevel if (e.error == 0 && e.stat == NFS4_OK) 2573 0 stevel relock_file(rep->re_vp, mi, &e, pre_change); 2574 0 stevel 2575 0 stevel if (nfs4_needs_recovery(&e, TRUE, mi->mi_vfsp)) 2576 0 stevel (void) nfs4_start_recovery(&e, mi, 2577 5302 th199096 rep->re_vp, NULL, NULL, NULL, OP_LOCK, 2578 5302 th199096 NULL); 2579 0 stevel if (e.error != 0 || e.stat != NFS4_OK) 2580 0 stevel break; 2581 0 stevel } 2582 0 stevel 2583 0 stevel /* 2584 0 stevel * Check to see if we need to remap files passed in 2585 0 stevel * via the recovery arguments; this will have been 2586 0 stevel * done for open files. A failure here is not fatal. 2587 0 stevel */ 2588 0 stevel if (remap) { 2589 0 stevel nfs4_error_t ignore; 2590 0 stevel nfs4_check_remap(mi, recovp->rc_vp1, NFS4_REMAP_CKATTRS, 2591 5302 th199096 &ignore); 2592 0 stevel nfs4_check_remap(mi, recovp->rc_vp2, NFS4_REMAP_CKATTRS, 2593 5302 th199096 &ignore); 2594 0 stevel } 2595 0 stevel } 2596 0 stevel 2597 0 stevel if (e.error == 0 && e.stat == NFS4_OK) { 2598 0 stevel mutex_enter(&mi->mi_lock); 2599 0 stevel mi->mi_recovflags &= ~(MI4R_REOPEN_FILES | MI4R_REMAP_FILES); 2600 0 stevel mutex_exit(&mi->mi_lock); 2601 0 stevel } 2602 0 stevel 2603 0 stevel nfs_rw_exit(&mi->mi_recovlock); 2604 0 stevel nfs_rw_exit(&sp->s_recovlock); 2605 0 stevel 2606 0 stevel if (reopenlist != NULL) 2607 0 stevel r4releopenlist(reopenlist); 2608 0 stevel } 2609 0 stevel 2610 0 stevel /* 2611 0 stevel * Resend the queued state recovery requests in "rqsts". 2612 0 stevel */ 2613 0 stevel 2614 0 stevel static void 2615 0 stevel nfs4_resend_lost_rqsts(recov_info_t *recovp, nfs4_server_t *sp) 2616 0 stevel { 2617 0 stevel nfs4_lost_rqst_t *lrp, *tlrp; 2618 0 stevel mntinfo4_t *mi = recovp->rc_mi; 2619 284 ek110237 nfs4_error_t n4e; 2620 0 stevel #ifdef NOTYET 2621 0 stevel uint32_t deny_bits = 0; 2622 0 stevel #endif 2623 0 stevel 2624 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "nfs4_resend_lost_rqsts")); 2625 0 stevel 2626 0 stevel ASSERT(mi != NULL); 2627 0 stevel ASSERT(nfs_rw_lock_held(&mi->mi_recovlock, RW_WRITER)); 2628 0 stevel 2629 0 stevel mutex_enter(&mi->mi_lock); 2630 0 stevel lrp = list_head(&mi->mi_lost_state); 2631 0 stevel mutex_exit(&mi->mi_lock); 2632 0 stevel while (lrp != NULL) { 2633 284 ek110237 nfs4_error_zinit(&n4e); 2634 284 ek110237 resend_one_op(lrp, &n4e, mi, sp); 2635 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2636 0 stevel "nfs4_resend_lost_rqsts: resend request: for vp %p got " 2637 284 ek110237 "error %d stat %d", (void *)lrp->lr_vp, n4e.error, 2638 284 ek110237 n4e.stat)); 2639 0 stevel 2640 0 stevel /* 2641 0 stevel * If we get a recovery error that we can actually 2642 0 stevel * recover from (such as ETIMEDOUT, FHEXPIRED), we 2643 0 stevel * return and let the recovery thread redrive the call. 2644 0 stevel * Don't requeue unless the zone is still healthy. 2645 0 stevel */ 2646 0 stevel if (zone_status_get(curproc->p_zone) < ZONE_IS_SHUTTING_DOWN && 2647 284 ek110237 nfs4_needs_recovery(&n4e, TRUE, mi->mi_vfsp) && 2648 284 ek110237 (nfs4_try_failover(&n4e) || 2649 284 ek110237 NFS4_FRC_UNMT_ERR(n4e.error, mi->mi_vfsp) || 2650 284 ek110237 (n4e.error == 0 && n4e.stat != NFS4ERR_BADHANDLE && 2651 284 ek110237 !nfs4_recov_marks_dead(n4e.stat)))) { 2652 0 stevel /* 2653 0 stevel * For these three errors, we want to delay a bit 2654 0 stevel * instead of pounding the server into submission. 2655 0 stevel * We have to do this manually; the normal 2656 0 stevel * processing for these errors only works for 2657 0 stevel * non-recovery requests. 2658 0 stevel */ 2659 284 ek110237 if ((n4e.error == 0 && n4e.stat == NFS4ERR_DELAY) || 2660 284 ek110237 (n4e.error == 0 && n4e.stat == NFS4ERR_GRACE) || 2661 284 ek110237 (n4e.error == 0 && n4e.stat == NFS4ERR_RESOURCE) || 2662 284 ek110237 NFS4_FRC_UNMT_ERR(n4e.error, mi->mi_vfsp)) { 2663 0 stevel delay(SEC_TO_TICK(nfs4err_delay_time)); 2664 0 stevel } else { 2665 284 ek110237 (void) nfs4_start_recovery(&n4e, 2666 5302 th199096 mi, lrp->lr_dvp, lrp->lr_vp, NULL, NULL, 2667 5302 th199096 lrp->lr_op, NULL); 2668 0 stevel } 2669 0 stevel return; 2670 0 stevel } 2671 0 stevel 2672 0 stevel mutex_enter(&mi->mi_lock); 2673 0 stevel list_remove(&mi->mi_lost_state, lrp); 2674 0 stevel tlrp = lrp; 2675 0 stevel lrp = list_head(&mi->mi_lost_state); 2676 0 stevel mutex_exit(&mi->mi_lock); 2677 0 stevel nfs4_free_lost_rqst(tlrp, sp); 2678 0 stevel } 2679 0 stevel } 2680 0 stevel 2681 0 stevel /* 2682 0 stevel * Resend the given op, and issue any necessary undo call. 2683 0 stevel * errors are returned via the nfs4_error_t parameter. 2684 0 stevel */ 2685 0 stevel 2686 0 stevel static void 2687 0 stevel resend_one_op(nfs4_lost_rqst_t *lrp, nfs4_error_t *ep, 2688 5302 th199096 mntinfo4_t *mi, nfs4_server_t *sp) 2689 0 stevel { 2690 0 stevel vnode_t *vp; 2691 0 stevel nfs4_open_stream_t *osp; 2692 0 stevel cred_t *cr; 2693 0 stevel uint32_t acc_bits; 2694 0 stevel 2695 0 stevel vp = lrp->lr_vp; 2696 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_one_op: " 2697 0 stevel "have a lost open/close request for vp %p", (void *)vp)); 2698 0 stevel 2699 0 stevel switch (lrp->lr_op) { 2700 0 stevel case OP_OPEN: 2701 0 stevel nfs4_resend_open_otw(&vp, lrp, ep); 2702 0 stevel break; 2703 0 stevel case OP_OPEN_DOWNGRADE: 2704 0 stevel ASSERT(lrp->lr_oop != NULL); 2705 0 stevel ep->error = nfs4_start_open_seqid_sync(lrp->lr_oop, mi); 2706 0 stevel ASSERT(!ep->error); /* recov thread always succeeds */ 2707 0 stevel ASSERT(lrp->lr_osp != NULL); 2708 0 stevel mutex_enter(&lrp->lr_osp->os_sync_lock); 2709 0 stevel nfs4_open_downgrade(lrp->lr_dg_acc, lrp->lr_dg_deny, 2710 5302 th199096 lrp->lr_oop, lrp->lr_osp, vp, lrp->lr_cr, lrp, 2711 5302 th199096 ep, NULL, NULL); 2712 0 stevel mutex_exit(&lrp->lr_osp->os_sync_lock); 2713 0 stevel nfs4_end_open_seqid_sync(lrp->lr_oop); 2714 0 stevel break; 2715 0 stevel case OP_CLOSE: 2716 0 stevel osp = lrp->lr_osp; 2717 0 stevel cr = lrp->lr_cr; 2718 0 stevel acc_bits = 0; 2719 0 stevel mutex_enter(&osp->os_sync_lock); 2720 0 stevel if (osp->os_share_acc_read) 2721 0 stevel acc_bits |= OPEN4_SHARE_ACCESS_READ; 2722 0 stevel if (osp->os_share_acc_write) 2723 0 stevel acc_bits |= OPEN4_SHARE_ACCESS_WRITE; 2724 0 stevel mutex_exit(&osp->os_sync_lock); 2725 0 stevel nfs4close_one(vp, osp, cr, acc_bits, lrp, ep, 2726 5302 th199096 CLOSE_RESEND, 0, 0, 0); 2727 0 stevel break; 2728 0 stevel case OP_LOCK: 2729 0 stevel case OP_LOCKU: 2730 0 stevel resend_lock(lrp, ep); 2731 0 stevel goto done; 2732 0 stevel case OP_DELEGRETURN: 2733 0 stevel nfs4_resend_delegreturn(lrp, ep, sp); 2734 0 stevel goto done; 2735 0 stevel default: 2736 0 stevel #ifdef DEBUG 2737 0 stevel cmn_err(CE_PANIC, "resend_one_op: unexpected op: %d", 2738 5302 th199096 lrp->lr_op); 2739 0 stevel #endif 2740 0 stevel nfs4_queue_event(RE_LOST_STATE_BAD_OP, mi, NULL, 2741 0 stevel lrp->lr_op, lrp->lr_vp, lrp->lr_dvp, NFS4_OK, NULL, 0, 2742 0 stevel TAG_NONE, TAG_NONE, 0, 0); 2743 0 stevel nfs4_error_init(ep, EINVAL); 2744 0 stevel return; 2745 0 stevel } 2746 0 stevel 2747 0 stevel /* 2748 0 stevel * No need to retry nor send an "undo" CLOSE in the 2749 0 stevel * event the server rebooted. 2750 0 stevel */ 2751 0 stevel if (ep->error == 0 && (ep->stat == NFS4ERR_STALE_CLIENTID || 2752 0 stevel ep->stat == NFS4ERR_STALE_STATEID || ep->stat == NFS4ERR_EXPIRED)) 2753 0 stevel goto done; 2754 0 stevel 2755 0 stevel /* 2756 0 stevel * If we resent a CLOSE or OPEN_DOWNGRADE, there's nothing 2757 0 stevel * to undo. Undoing locking operations was handled by 2758 0 stevel * resend_lock(). 2759 0 stevel */ 2760 0 stevel if (lrp->lr_op == OP_OPEN_DOWNGRADE || lrp->lr_op == OP_CLOSE) 2761 0 stevel goto done; 2762 0 stevel 2763 0 stevel /* 2764 0 stevel * If we get any other error for OPEN, then don't attempt 2765 0 stevel * to undo the resend of the open (since it was never 2766 0 stevel * successful!). 2767 0 stevel */ 2768 0 stevel ASSERT(lrp->lr_op == OP_OPEN); 2769 0 stevel if (ep->error || ep->stat != NFS4_OK) 2770 0 stevel goto done; 2771 0 stevel 2772 0 stevel /* 2773 0 stevel * Now let's undo our OPEN. 2774 0 stevel */ 2775 0 stevel nfs4_error_zinit(ep); 2776 0 stevel close_after_open_resend(vp, lrp->lr_cr, lrp->lr_oacc, ep); 2777 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_one_op: " 2778 0 stevel "nfs4close_one: for vp %p got error %d stat %d", 2779 0 stevel (void *)vp, ep->error, ep->stat)); 2780 0 stevel 2781 0 stevel done: 2782 0 stevel if (vp != lrp->lr_vp) 2783 0 stevel VN_RELE(vp); 2784 0 stevel } 2785 0 stevel 2786 0 stevel /* 2787 0 stevel * Close a file that was opened via a resent OPEN. 2788 0 stevel * Most errors are passed back to the caller (via the return value and 2789 0 stevel * *statp), except for FHEXPIRED, which is retried. 2790 0 stevel * 2791 0 stevel * It might be conceptually cleaner to push the CLOSE request onto the 2792 0 stevel * front of the resend queue, rather than sending it here. That would 2793 0 stevel * match the way we undo lost lock requests. On the other 2794 0 stevel * hand, we've already got something that works, and there's no reason to 2795 0 stevel * change it at this time. 2796 0 stevel */ 2797 0 stevel 2798 0 stevel static void 2799 0 stevel close_after_open_resend(vnode_t *vp, cred_t *cr, uint32_t acc_bits, 2800 5302 th199096 nfs4_error_t *ep) 2801 0 stevel { 2802 0 stevel 2803 0 stevel for (;;) { 2804 0 stevel nfs4close_one(vp, NULL, cr, acc_bits, NULL, ep, 2805 5302 th199096 CLOSE_AFTER_RESEND, 0, 0, 0); 2806 0 stevel if (ep->error == 0 && ep->stat == NFS4_OK) 2807 0 stevel break; /* success; done */ 2808 0 stevel if (ep->error != 0 || ep->stat != NFS4ERR_FHEXPIRED) 2809 0 stevel break; 2810 0 stevel /* else retry FHEXPIRED */ 2811 0 stevel } 2812 0 stevel 2813 0 stevel } 2814 0 stevel 2815 0 stevel /* 2816 0 stevel * Resend the given lost lock request. Return an errno value. If zero, 2817 0 stevel * *statp is set to the NFS status code for the call. 2818 0 stevel * 2819 0 stevel * Issue a SIGLOST and mark the rnode dead if we get a non-recovery error or 2820 0 stevel * a recovery error that we don't actually recover from yet (eg: BAD_SEQID). 2821 0 stevel * Let the recovery thread redrive the call if we get a recovery error that 2822 0 stevel * we can actually recover from. 2823 0 stevel */ 2824 0 stevel static void 2825 0 stevel resend_lock(nfs4_lost_rqst_t *lrp, nfs4_error_t *ep) 2826 0 stevel { 2827 0 stevel bool_t send_siglost = FALSE; 2828 0 stevel vnode_t *vp = lrp->lr_vp; 2829 0 stevel 2830 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_lock:")); 2831 0 stevel ASSERT(lrp->lr_ctype == NFS4_LCK_CTYPE_REINSTATE || 2832 0 stevel lrp->lr_ctype == NFS4_LCK_CTYPE_RESEND); 2833 0 stevel 2834 0 stevel nfs4frlock(lrp->lr_ctype, vp, F_SETLK, 2835 5302 th199096 lrp->lr_flk, FREAD|FWRITE, 0, lrp->lr_cr, ep, lrp, NULL); 2836 0 stevel 2837 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, "resend_lock: " 2838 0 stevel "nfs4frlock for vp %p returned error %d, stat %d", 2839 0 stevel (void *)vp, ep->error, ep->stat)); 2840 0 stevel 2841 0 stevel if (ep->error == 0 && ep->stat == 0) 2842 0 stevel goto done; 2843 0 stevel if (ep->error == 0 && ep->stat == NFS4ERR_DENIED && 2844 0 stevel lrp->lr_ctype == NFS4_LCK_CTYPE_RESEND) 2845 0 stevel goto done; 2846 0 stevel 2847 0 stevel /* 2848 0 stevel * If we failed with a non-recovery error, send SIGLOST and 2849 0 stevel * mark the file dead. 2850 0 stevel */ 2851 0 stevel if (!nfs4_needs_recovery(ep, TRUE, vp->v_vfsp)) 2852 0 stevel send_siglost = TRUE; 2853 0 stevel else { 2854 0 stevel /* 2855 0 stevel * Done with recovering LOST LOCK in the event the 2856 0 stevel * server rebooted or we've lost the lease. 2857 0 stevel */ 2858 0 stevel if (ep->error == 0 && (ep->stat == NFS4ERR_STALE_CLIENTID || 2859 0 stevel ep->stat == NFS4ERR_STALE_STATEID || 2860 0 stevel ep->stat == NFS4ERR_EXPIRED)) { 2861 0 stevel goto done; 2862 0 stevel } 2863 0 stevel 2864 0 stevel /* 2865 0 stevel * BAD_STATEID on an unlock indicates that the server has 2866 0 stevel * forgotten about the lock anyway, so act like the call 2867 0 stevel * was successful. 2868 0 stevel */ 2869 0 stevel if (ep->error == 0 && ep->stat == NFS4ERR_BAD_STATEID && 2870 0 stevel lrp->lr_op == OP_LOCKU) 2871 0 stevel goto done; 2872 0 stevel 2873 0 stevel /* 2874 0 stevel * If we got a recovery error that we don't actually 2875 0 stevel * recover from, send SIGLOST. If the filesystem was 2876 0 stevel * forcibly unmounted, we skip the SIGLOST because (a) it's 2877 0 stevel * unnecessary noise, and (b) there could be a new process 2878 0 stevel * with the same pid as the one that had generated the lost 2879 0 stevel * state request. 2880 0 stevel */ 2881 0 stevel if (ep->error == 0 && (ep->stat == NFS4ERR_BADHANDLE || 2882 0 stevel nfs4_recov_marks_dead(ep->stat))) { 2883 0 stevel if (!(vp->v_vfsp->vfs_flag & VFS_UNMOUNTED)) 2884 0 stevel send_siglost = TRUE; 2885 0 stevel goto done; 2886 0 stevel } 2887 0 stevel 2888 0 stevel /* 2889 0 stevel * If the filesystem was forcibly unmounted, we 2890 0 stevel * still need to synchronize with the server and 2891 0 stevel * release state. Try again later. 2892 0 stevel */ 2893 0 stevel if (NFS4_FRC_UNMT_ERR(ep->error, vp->v_vfsp)) 2894 0 stevel goto done; 2895 0 stevel 2896 0 stevel /* 2897 0 stevel * If we get a recovery error that we can actually 2898 0 stevel * recover from (such as ETIMEDOUT, FHEXPIRED), 2899 0 stevel * return and let the recovery thread redrive the call. 2900 0 stevel * 2901 0 stevel * For the three errors below, we want to delay a bit 2902 0 stevel * instead of pounding the server into submission. 2903 0 stevel */ 2904 0 stevel if ((ep->error == 0 && ep->stat == NFS4ERR_DELAY) || 2905 0 stevel (ep->error == 0 && ep->stat == NFS4ERR_GRACE) || 2906 0 stevel (ep->error == 0 && ep->stat == NFS4ERR_RESOURCE)) 2907 0 stevel delay(SEC_TO_TICK(recov_err_delay)); 2908 0 stevel goto done; 2909 0 stevel } 2910 0 stevel 2911 0 stevel done: 2912 0 stevel if (send_siglost) { 2913 0 stevel cred_t *sv_cred; 2914 0 stevel 2915 0 stevel /* 2916 0 stevel * Must be root or the actual thread being issued the 2917 0 stevel * SIGLOST for this to work, so just become root. 2918 0 stevel */ 2919 0 stevel sv_cred = curthread->t_cred; 2920 0 stevel curthread->t_cred = kcred; 2921 0 stevel nfs4_send_siglost(lrp->lr_flk->l_pid, VTOMI4(vp), vp, FALSE, 2922 0 stevel ep->error, ep->stat); 2923 0 stevel curthread->t_cred = sv_cred; 2924 0 stevel 2925 0 stevel /* 2926 0 stevel * Flush any additional reinstantiation requests for 2927 0 stevel * this operation. Sending multiple SIGLOSTs to the user 2928 0 stevel * process is unlikely to help and may cause trouble. 2929 0 stevel */ 2930 0 stevel if (lrp->lr_ctype == NFS4_LCK_CTYPE_REINSTATE) 2931 0 stevel flush_reinstate(lrp); 2932 0 stevel } 2933 0 stevel } 2934 0 stevel 2935 0 stevel /* 2936 0 stevel * Remove any lock reinstantiation requests that correspond to the given 2937 0 stevel * lost request. We only remove items that follow lrp in the queue, 2938 0 stevel * assuming that lrp will be removed by the generic lost state code. 2939 0 stevel */ 2940 0 stevel 2941 0 stevel static void 2942 0 stevel flush_reinstate(nfs4_lost_rqst_t *lrp) 2943 0 stevel { 2944 0 stevel vnode_t *vp; 2945 0 stevel pid_t pid; 2946 0 stevel mntinfo4_t *mi; 2947 0 stevel nfs4_lost_rqst_t *nlrp; 2948 0 stevel 2949 0 stevel vp = lrp->lr_vp; 2950 0 stevel mi = VTOMI4(vp); 2951 0 stevel pid = lrp->lr_flk->l_pid; 2952 0 stevel 2953 0 stevel /* 2954 0 stevel * If there are any more reinstantation requests to get rid of, 2955 0 stevel * they should all be clustered at the front of the lost state 2956 0 stevel * queue. 2957 0 stevel */ 2958 0 stevel mutex_enter(&mi->mi_lock); 2959 0 stevel for (lrp = list_next(&mi->mi_lost_state, lrp); lrp != NULL; 2960 0 stevel lrp = nlrp) { 2961 0 stevel nlrp = list_next(&mi->mi_lost_state, lrp); 2962 0 stevel if (lrp->lr_op != OP_LOCK && lrp->lr_op != OP_LOCKU) 2963 0 stevel break; 2964 0 stevel if (lrp->lr_ctype != NFS4_LCK_CTYPE_REINSTATE) 2965 0 stevel break; 2966 0 stevel ASSERT(lrp->lr_vp == vp); 2967 0 stevel ASSERT(lrp->lr_flk->l_pid == pid); 2968 0 stevel NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2969 5302 th199096 "remove reinstantiation %p", (void *)lrp)); 2970 0 stevel list_remove(&mi->mi_lost_state, lrp); 2971 0 stevel nfs4_free_lost_rqst(lrp, NULL); 2972 0 stevel } 2973 0 stevel mutex_exit(&mi->mi_lock); 2974 0 stevel } 2975 0 stevel 2976 0 stevel /* 2977 0 stevel * End of state-specific recovery routines. 2978 0 stevel */ 2979 0 stevel 2980 0 stevel /* 2981 0 stevel * Allocate a lost request struct, initialize it from lost_rqstp (including 2982 0 stevel * bumping the reference counts for the referenced vnode, etc.), and hang 2983 0 stevel * it off of recovp. 2984 0 stevel */ 2985 0 stevel 2986 0 stevel static void 2987 0 stevel nfs4_save_lost_rqst(nfs4_lost_rqst_t *lost_rqstp, recov_info_t *recovp, 2988 5302 th199096 nfs4_recov_t *action, mntinfo4_t *mi) 2989 0 stevel { 2990 0 stevel nfs4_lost_rqst_t *destp; 2991 0 stevel 2992 0 stevel ASSERT(recovp->rc_lost_rqst == NULL); 2993 0 stevel 2994 0 stevel destp = kmem_alloc(sizeof (nfs4_lost_rqst_t), KM_SLEEP); 2995 0 stevel recovp->rc_lost_rqst = destp; 2996 0 stevel 2997