1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/cred.h> 30 #include <sys/kstat.h> 31 #include <sys/list.h> 32 #include <sys/systm.h> 33 #include <sys/vfs.h> 34 #include <sys/vnode.h> 35 #include <sys/cmn_err.h> 36 37 #include <nfs/nfs4_clnt.h> 38 #include <nfs/rnode4.h> 39 40 /* 41 * Recovery kstats 42 */ 43 typedef struct rkstat { 44 kstat_named_t badhandle; 45 kstat_named_t badowner; 46 kstat_named_t clientid; 47 kstat_named_t dead_file; 48 kstat_named_t delay; 49 kstat_named_t fail_relock; 50 kstat_named_t file_diff; 51 kstat_named_t no_grace; 52 kstat_named_t not_responding; 53 kstat_named_t opens_changed; 54 kstat_named_t siglost; 55 kstat_named_t unexp_action; 56 kstat_named_t unexp_errno; 57 kstat_named_t unexp_status; 58 kstat_named_t wrongsec; 59 kstat_named_t lost_state_bad_op; 60 } rkstat_t; 61 62 static rkstat_t rkstat_template = { 63 { "badhandle", KSTAT_DATA_ULONG }, 64 { "badowner", KSTAT_DATA_ULONG }, 65 { "clientid", KSTAT_DATA_ULONG }, 66 { "dead_file", KSTAT_DATA_ULONG }, 67 { "delay", KSTAT_DATA_ULONG }, 68 { "fail_relock", KSTAT_DATA_ULONG }, 69 { "file_diff", KSTAT_DATA_ULONG }, 70 { "no_grace", KSTAT_DATA_ULONG }, 71 { "not_responding", KSTAT_DATA_ULONG }, 72 { "opens_changed", KSTAT_DATA_ULONG }, 73 { "siglost", KSTAT_DATA_ULONG }, 74 { "unexp_action", KSTAT_DATA_ULONG }, 75 { "unexp_errno", KSTAT_DATA_ULONG }, 76 { "unexp_status", KSTAT_DATA_ULONG }, 77 { "wrongsec", KSTAT_DATA_ULONG }, 78 { "bad_op", KSTAT_DATA_ULONG }, 79 }; 80 81 /* maximum number of messages allowed on the mi's mi_msg_list */ 82 int nfs4_msg_max = NFS4_MSG_MAX; 83 #define DEFAULT_LEASE 180 84 85 static char *strdup(const char *); 86 87 /* 88 * Sets the appropiate fields of "ep", given "id" and various parameters. 89 * Assumes that ep's fields have been initialized to zero/null, except for 90 * re_type and mount point info, which are already set. 91 */ 92 static void 93 set_event(nfs4_event_type_t id, nfs4_revent_t *ep, mntinfo4_t *mi, 94 rnode4_t *rp1, rnode4_t *rp2, uint_t count, pid_t pid, nfsstat4 nfs4_error, 95 char *server1, char *why, nfs4_tag_type_t tag1, nfs4_tag_type_t tag2, 96 seqid4 seqid1, seqid4 seqid2) 97 { 98 int len; 99 100 switch (id) { 101 case RE_BAD_SEQID: 102 ep->re_mi = mi; 103 104 /* bad seqid'd file <path/component name> */ 105 if (rp1 && rp1->r_svnode.sv_name) 106 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 107 else 108 ep->re_char1 = NULL; 109 ep->re_rp1 = rp1; 110 111 /* for LOCK/LOCKU */ 112 ep->re_pid = pid; 113 114 ep->re_stat4 = nfs4_error; 115 ep->re_tag1 = tag1; 116 ep->re_tag2 = tag2; 117 ep->re_seqid1 = seqid1; 118 ep->re_seqid2 = seqid2; 119 break; 120 case RE_BADHANDLE: 121 ASSERT(rp1 != NULL); 122 123 /* dead file <path/component name> */ 124 if (rp1->r_svnode.sv_name) 125 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 126 else 127 ep->re_char1 = NULL; 128 ep->re_rp1 = rp1; 129 break; 130 case RE_CLIENTID: 131 ep->re_mi = mi; 132 133 /* the error we failed with */ 134 ep->re_uint = count; 135 ep->re_stat4 = nfs4_error; 136 break; 137 case RE_DEAD_FILE: 138 ASSERT(rp1 != NULL); 139 140 /* dead file <path/component name> */ 141 if (rp1->r_svnode.sv_name) 142 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 143 else 144 ep->re_char1 = NULL; 145 ep->re_rp1 = rp1; 146 147 /* why the file got killed */ 148 if (why) { 149 len = strlen(why); 150 ep->re_char2 = kmem_alloc(len + 1, KM_SLEEP); 151 bcopy(why, ep->re_char2, len); 152 ep->re_char2[len] = '\0'; 153 } else 154 ep->re_char2 = NULL; 155 156 ep->re_stat4 = nfs4_error; 157 break; 158 case RE_END: 159 /* first rnode */ 160 if (rp1 && rp1->r_svnode.sv_name) 161 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 162 else 163 ep->re_char1 = NULL; 164 ep->re_rp1 = rp1; 165 166 /* second rnode */ 167 if (rp2 && rp2->r_svnode.sv_name) 168 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 169 else 170 ep->re_char2 = NULL; 171 ep->re_rp2 = rp2; 172 173 ep->re_mi = mi; 174 break; 175 case RE_FAIL_RELOCK: 176 ASSERT(rp1 != NULL); 177 178 /* error on fail relock */ 179 ep->re_uint = count; 180 181 /* process that failed */ 182 ep->re_pid = pid; 183 184 /* nfs4 error */ 185 ep->re_stat4 = nfs4_error; 186 187 /* file <path/component name> */ 188 if (rp1->r_svnode.sv_name) 189 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 190 else 191 ep->re_char1 = NULL; 192 ep->re_rp1 = rp1; 193 break; 194 case RE_FAIL_REMAP_LEN: 195 /* length of returned filehandle */ 196 ep->re_uint = count; 197 break; 198 case RE_FAIL_REMAP_OP: 199 break; 200 case RE_FAILOVER: 201 /* server we're failing over to (if not picking original) */ 202 if (server1 != NULL) { 203 len = strlen(server1); 204 ep->re_char1 = kmem_alloc(len + 1, KM_SLEEP); 205 bcopy(server1, ep->re_char1, len); 206 ep->re_char1[len] = '\0'; 207 } else { 208 ep->re_char1 = NULL; 209 } 210 break; 211 case RE_FILE_DIFF: 212 ASSERT(rp1 != NULL); 213 214 /* dead file <path/component name> */ 215 if (rp1->r_svnode.sv_name) 216 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 217 else 218 ep->re_char1 = NULL; 219 ep->re_rp1 = rp1; 220 break; 221 case RE_LOST_STATE: 222 ep->re_uint = count; /* op number */ 223 if (rp1 && rp1->r_svnode.sv_name) 224 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 225 else 226 ep->re_char1 = NULL; 227 ep->re_rp1 = rp1; 228 if (rp2 && rp2->r_svnode.sv_name) 229 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 230 else 231 ep->re_char2 = NULL; 232 ep->re_rp2 = rp2; 233 break; 234 case RE_OPENS_CHANGED: 235 ep->re_mi = mi; 236 237 /* original number of open files */ 238 ep->re_uint = count; 239 /* new number of open files */ 240 ep->re_pid = pid; 241 break; 242 case RE_SIGLOST: 243 case RE_SIGLOST_NO_DUMP: 244 ASSERT(rp1 != NULL); 245 246 /* file <path/component name> */ 247 if (rp1->r_svnode.sv_name) 248 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 249 else 250 ep->re_char1 = NULL; 251 ep->re_rp1 = rp1; 252 ep->re_pid = pid; 253 ep->re_uint = count; 254 ep->re_stat4 = nfs4_error; 255 break; 256 case RE_START: 257 /* file <path/component name> */ 258 if (rp1 && rp1->r_svnode.sv_name) 259 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 260 else 261 ep->re_char1 = NULL; 262 ep->re_rp1 = rp1; 263 264 /* file <path/component name> */ 265 if (rp2 && rp2->r_svnode.sv_name) 266 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 267 else 268 ep->re_char2 = NULL; 269 ep->re_rp2 = rp2; 270 271 ep->re_mi = mi; 272 ep->re_uint = count; 273 break; 274 case RE_UNEXPECTED_ACTION: 275 case RE_UNEXPECTED_ERRNO: 276 /* the error that is unexpected */ 277 ep->re_uint = count; 278 break; 279 case RE_UNEXPECTED_STATUS: 280 /* nfsstat4 error */ 281 ep->re_stat4 = nfs4_error; 282 break; 283 case RE_WRONGSEC: 284 /* the error we failed with */ 285 ep->re_uint = count; 286 287 /* file <path/component name> */ 288 if (rp1 && rp1->r_svnode.sv_name) 289 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 290 else 291 ep->re_char1 = NULL; 292 ep->re_rp1 = rp1; 293 294 /* file <path/component name> */ 295 if (rp2 && rp2->r_svnode.sv_name) 296 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 297 else 298 ep->re_char2 = NULL; 299 ep->re_rp2 = rp2; 300 break; 301 case RE_LOST_STATE_BAD_OP: 302 ep->re_uint = count; /* the unexpected op */ 303 ep->re_pid = pid; 304 ep->re_rp1 = rp1; 305 if (rp1 != NULL && rp1->r_svnode.sv_name != NULL) 306 ep->re_char1 = fn_path(rp1->r_svnode.sv_name); 307 ep->re_rp2 = rp2; 308 if (rp2 != NULL && rp2->r_svnode.sv_name != NULL) 309 ep->re_char2 = fn_path(rp2->r_svnode.sv_name); 310 break; 311 default: 312 break; 313 } 314 } 315 316 /* 317 * Sets the appropiate fields of the 'fact' for this 'id'. 318 */ 319 static void 320 set_fact(nfs4_fact_type_t id, nfs4_rfact_t *fp, nfsstat4 stat4, 321 nfs4_recov_t raction, nfs_opnum4 op, bool_t reboot, int error, 322 vnode_t *vp) 323 { 324 rnode4_t *rp1; 325 326 switch (id) { 327 case RF_BADOWNER: 328 fp->rf_op = op; 329 fp->rf_reboot = reboot; 330 fp->rf_stat4 = stat4; 331 break; 332 case RF_RENEW_EXPIRED: 333 break; 334 case RF_ERR: 335 fp->rf_op = op; 336 fp->rf_reboot = reboot; 337 fp->rf_stat4 = stat4; 338 fp->rf_action = raction; 339 fp->rf_error = error; 340 break; 341 case RF_SRV_OK: 342 break; 343 case RF_SRV_NOT_RESPOND: 344 break; 345 case RF_SRVS_OK: 346 break; 347 case RF_SRVS_NOT_RESPOND: 348 gethrestime(&fp->rf_time); 349 break; 350 case RF_DELMAP_CB_ERR: 351 fp->rf_op = op; 352 fp->rf_stat4 = stat4; 353 354 rp1 = VTOR4(vp); 355 fp->rf_rp1 = rp1; 356 if (rp1 && rp1->r_svnode.sv_name) 357 fp->rf_char1 = fn_path(rp1->r_svnode.sv_name); 358 else 359 fp->rf_char1 = NULL; 360 break; 361 default: 362 zcmn_err(getzoneid(), CE_NOTE, "illegal fact %d", id); 363 break; 364 } 365 } 366 367 /* 368 * Returns 1 if the event/fact is of a successful communication 369 * from the server; 0 otherwise. 370 */ 371 static int 372 successful_comm(nfs4_debug_msg_t *msgp) 373 { 374 if (msgp->msg_type == RM_EVENT) { 375 switch (msgp->rmsg_u.msg_event.re_type) { 376 case RE_BAD_SEQID: 377 case RE_BADHANDLE: 378 case RE_FAIL_REMAP_LEN: 379 case RE_FAIL_REMAP_OP: 380 case RE_FILE_DIFF: 381 case RE_START: 382 case RE_UNEXPECTED_ACTION: 383 case RE_UNEXPECTED_ERRNO: 384 case RE_UNEXPECTED_STATUS: 385 case RE_WRONGSEC: 386 return (1); 387 case RE_CLIENTID: 388 case RE_DEAD_FILE: 389 case RE_END: 390 case RE_FAIL_RELOCK: 391 case RE_FAILOVER: 392 case RE_LOST_STATE: 393 case RE_OPENS_CHANGED: 394 case RE_SIGLOST: 395 case RE_SIGLOST_NO_DUMP: 396 case RE_LOST_STATE_BAD_OP: 397 return (0); 398 default: 399 return (0); 400 } 401 } else { 402 switch (msgp->rmsg_u.msg_fact.rf_type) { 403 case RF_BADOWNER: 404 case RF_ERR: 405 case RF_RENEW_EXPIRED: 406 case RF_SRV_OK: 407 case RF_SRVS_OK: 408 case RF_DELMAP_CB_ERR: 409 return (1); 410 case RF_SRV_NOT_RESPOND: 411 case RF_SRVS_NOT_RESPOND: 412 return (0); 413 default: 414 return (0); 415 } 416 } 417 } 418 419 /* 420 * Iterate backwards through the mi's mi_msg_list to find the earliest 421 * message that we should find relevant facts to investigate. 422 */ 423 static nfs4_debug_msg_t * 424 find_beginning(nfs4_debug_msg_t *first_msg, mntinfo4_t *mi) 425 { 426 nfs4_debug_msg_t *oldest_msg, *cur_msg; 427 time_t lease; 428 429 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 430 if (mi->mi_lease_period > 0) 431 lease = 2 * mi->mi_lease_period; 432 else 433 lease = DEFAULT_LEASE; 434 435 oldest_msg = first_msg; 436 cur_msg = list_prev(&mi->mi_msg_list, first_msg); 437 while (cur_msg && 438 first_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 439 oldest_msg = cur_msg; 440 if ((cur_msg->msg_type == RM_FACT) && 441 (cur_msg->rmsg_u.msg_fact.rf_type == RF_SRV_OK)) { 442 /* find where we lost contact with the server */ 443 while (cur_msg) { 444 if ((cur_msg->msg_type == RM_FACT) && 445 (cur_msg->rmsg_u.msg_fact.rf_type == 446 RF_SRV_NOT_RESPOND)) 447 break; 448 oldest_msg = cur_msg; 449 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 450 } 451 /* 452 * Find the first successful message before 453 * we lost contact with the server. 454 */ 455 if (cur_msg) { 456 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 457 while (cur_msg && !successful_comm(cur_msg)) { 458 oldest_msg = cur_msg; 459 cur_msg = list_prev(&mi->mi_msg_list, 460 cur_msg); 461 } 462 } 463 /* 464 * If we're not at the dummy head pointer, 465 * set the oldest and current message. 466 */ 467 if (cur_msg) { 468 first_msg = cur_msg; 469 oldest_msg = cur_msg; 470 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 471 } 472 } else 473 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 474 } 475 476 return (oldest_msg); 477 } 478 479 /* 480 * Returns 1 if facts have been found; 0 otherwise. 481 */ 482 static int 483 get_facts(nfs4_debug_msg_t *msgp, nfs4_rfact_t *ret_fp, char **mnt_pt, 484 mntinfo4_t *mi) 485 { 486 nfs4_debug_msg_t *cur_msg, *oldest_msg; 487 nfs4_rfact_t *cur_fp; 488 int found_a_fact = 0; 489 int len; 490 491 cur_msg = msgp; 492 493 /* find the oldest msg to search backwards to */ 494 oldest_msg = find_beginning(cur_msg, mi); 495 ASSERT(oldest_msg != NULL); 496 497 /* 498 * Create a fact sheet by searching from our current message 499 * backwards to the 'oldest_msg', recording facts along the way 500 * until we found facts that have been inspected by another time. 501 */ 502 while (cur_msg && cur_msg != list_prev(&mi->mi_msg_list, oldest_msg)) { 503 if (cur_msg->msg_type != RM_FACT) { 504 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 505 continue; 506 } 507 508 cur_fp = &cur_msg->rmsg_u.msg_fact; 509 /* 510 * If this fact has already been looked at, then so 511 * have all preceding facts. Return Now. 512 */ 513 if (cur_fp->rf_status == RFS_INSPECT) 514 return (found_a_fact); 515 516 cur_fp->rf_status = RFS_INSPECT; 517 found_a_fact = 1; 518 switch (cur_fp->rf_type) { 519 case RF_BADOWNER: 520 break; 521 case RF_ERR: 522 /* 523 * Don't want to overwrite a fact that was 524 * previously found during our current search. 525 */ 526 if (!ret_fp->rf_reboot) 527 ret_fp->rf_reboot = cur_fp->rf_reboot; 528 if (!ret_fp->rf_stat4) 529 ret_fp->rf_stat4 = cur_fp->rf_stat4; 530 if (!ret_fp->rf_action) 531 ret_fp->rf_action = cur_fp->rf_action; 532 break; 533 case RF_RENEW_EXPIRED: 534 if (cur_msg->msg_mntpt && !(*mnt_pt)) { 535 len = strlen(cur_msg->msg_mntpt) + 1; 536 *mnt_pt = kmem_alloc(len, KM_SLEEP); 537 bcopy(cur_msg->msg_mntpt, *mnt_pt, len); 538 } 539 break; 540 case RF_SRV_OK: 541 break; 542 case RF_SRV_NOT_RESPOND: 543 /* 544 * Okay to overwrite this fact as 545 * we want the earliest time. 546 */ 547 ret_fp->rf_time = cur_fp->rf_time; 548 break; 549 case RF_SRVS_OK: 550 break; 551 case RF_SRVS_NOT_RESPOND: 552 break; 553 case RF_DELMAP_CB_ERR: 554 break; 555 default: 556 zcmn_err(getzoneid(), CE_NOTE, 557 "get facts: illegal fact %d", cur_fp->rf_type); 558 break; 559 } 560 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 561 } 562 563 return (found_a_fact); 564 } 565 566 /* 567 * Returns 1 if this fact is identical to the last fact recorded 568 * (only checks for a match within the last 2 lease periods). 569 */ 570 static int 571 facts_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 572 mntinfo4_t *mi) 573 { 574 nfs4_rfact_t *fp1, *fp2; 575 int lease, len; 576 577 ASSERT(mutex_owned(&mi->mi_msg_list_lock)); 578 if (mi->mi_lease_period > 0) 579 lease = 2 * mi->mi_lease_period; 580 else 581 lease = DEFAULT_LEASE; 582 583 fp2 = &new_msg->rmsg_u.msg_fact; 584 585 while (cur_msg && 586 new_msg->msg_time.tv_sec - cur_msg->msg_time.tv_sec < lease) { 587 if (cur_msg->msg_type != RM_FACT) { 588 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 589 continue; 590 } 591 fp1 = &cur_msg->rmsg_u.msg_fact; 592 if (fp1->rf_type != fp2->rf_type) 593 return (0); 594 595 /* now actually compare the facts */ 596 if (fp1->rf_action != fp2->rf_action) 597 return (0); 598 if (fp1->rf_stat4 != fp2->rf_stat4) 599 return (0); 600 if (fp1->rf_reboot != fp2->rf_reboot) 601 return (0); 602 if (fp1->rf_op != fp2->rf_op) 603 return (0); 604 if (fp1->rf_time.tv_sec != fp2->rf_time.tv_sec) 605 return (0); 606 if (fp1->rf_error != fp2->rf_error) 607 return (0); 608 if (fp1->rf_rp1 != fp2->rf_rp1) 609 return (0); 610 if (cur_msg->msg_srv != NULL) { 611 if (new_msg->msg_srv == NULL) 612 return (0); 613 len = strlen(cur_msg->msg_srv); 614 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, 615 len) != 0) 616 return (0); 617 } else if (new_msg->msg_srv != NULL) { 618 return (0); 619 } 620 if (cur_msg->msg_mntpt != NULL) { 621 if (new_msg->msg_mntpt == NULL) 622 return (0); 623 len = strlen(cur_msg->msg_mntpt); 624 if (strncmp(cur_msg->msg_mntpt, new_msg->msg_mntpt, 625 len) != 0) 626 return (0); 627 } else if (new_msg->msg_mntpt != NULL) { 628 return (0); 629 } 630 if (fp1->rf_char1 != NULL) { 631 if (fp2->rf_char1 == NULL) 632 return (0); 633 len = strlen(fp1->rf_char1); 634 if (strncmp(fp1->rf_char1, fp2->rf_char1, len) != 0) 635 return (0); 636 } else if (fp2->rf_char1 != NULL) { 637 return (0); 638 } 639 return (1); 640 } 641 642 return (0); 643 } 644 645 /* 646 * Returns 1 if these two messages are identical; 0 otherwise. 647 */ 648 static int 649 events_same(nfs4_debug_msg_t *cur_msg, nfs4_debug_msg_t *new_msg, 650 mntinfo4_t *mi) 651 { 652 nfs4_revent_t *ep1, *ep2; 653 int len; 654 655 /* find the last event, bypassing all facts */ 656 while (cur_msg && cur_msg->msg_type != RM_EVENT) 657 cur_msg = list_prev(&mi->mi_msg_list, cur_msg); 658 659 if (!cur_msg) 660 return (0); 661 662 if (cur_msg->msg_type != RM_EVENT) 663 return (0); 664 665 ep1 = &cur_msg->rmsg_u.msg_event; 666 ep2 = &new_msg->rmsg_u.msg_event; 667 if (ep1->re_type != ep2->re_type) 668 return (0); 669 670 /* 671 * Since we zalloc the buffer, then the two nfs4_debug_msg's 672 * must match up even if all the fields weren't filled in 673 * the first place. 674 */ 675 if (ep1->re_mi != ep2->re_mi) 676 return (0); 677 if (ep1->re_uint != ep2->re_uint) 678 return (0); 679 if (ep1->re_stat4 != ep2->re_stat4) 680 return (0); 681 if (ep1->re_pid != ep2->re_pid) 682 return (0); 683 if (ep1->re_rp1 != ep2->re_rp1) 684 return (0); 685 if (ep1->re_rp2 != ep2->re_rp2) 686 return (0); 687 if (ep1->re_tag1 != ep2->re_tag1) 688 return (0); 689 if (ep1->re_tag2 != ep2->re_tag2) 690 return (0); 691 if (ep1->re_seqid1 != ep2->re_seqid1) 692 return (0); 693 if (ep1->re_seqid2 != ep2->re_seqid2) 694 return (0); 695 696 if (cur_msg->msg_srv != NULL) { 697 if (new_msg->msg_srv == NULL) 698 return (0); 699 len = strlen(cur_msg->msg_srv); 700 if (strncmp(cur_msg->msg_srv, new_msg->msg_srv, len) != 0) 701 return (0); 702 } else if (new_msg->msg_srv != NULL) { 703 return (0); 704 } 705 706 if (ep1->re_char1 != NULL) { 707 if (ep2->re_char1 == NULL) 708 return (0); 709 len = strlen(ep1->re_char1); 710 if (strncmp(ep1->re_char1, ep2->re_char1, len) != 0) 711 return (0); 712 } else if (ep2->re_char1 != NULL) { 713 return (0); 714 } 715 716 if (ep1->re_char2 != NULL) { 717 if (ep2->re_char2 == NULL) 718 return (0); 719 len = strlen(ep1->re_char2); 720 if (strncmp(ep1->re_char2, ep2->re_char2, len) != 0) 721 return (0); 722 } else if (ep2->re_char2 != NULL) { 723 return (0); 724 } 725 726 if (cur_msg->msg_mntpt != NULL) { 727 if (new_msg->msg_mntpt == NULL) 728 return (0); 729 len = strlen(cur_msg->msg_mntpt); 730 if (strncmp(cur_msg->msg_mntpt, cur_msg->msg_mntpt, len) != 0) 731 return (0); 732 } else if (new_msg->msg_mntpt != NULL) { 733 return (0); 734 } 735 736 return (1); 737 } 738 739 /* 740 * Free up a recovery event. 741 */ 742 static void 743 free_event(nfs4_revent_t *ep) 744 { 745 int len; 746 747 if (ep->re_char1) { 748 len = strlen(ep->re_char1) + 1; 749 kmem_free(ep->re_char1, len); 750 } 751 if (ep->re_char2) { 752 len = strlen(ep->re_char2) + 1; 753 kmem_free(ep->re_char2, len); 754 } 755 } 756 757 /* 758 * Free up a recovery fact. 759 */ 760 static void 761 free_fact(nfs4_rfact_t *fp) 762 { 763 int len; 764 765 if (fp->rf_char1) { 766 len = strlen(fp->rf_char1) + 1; 767 kmem_free(fp->rf_char1, len); 768 } 769 } 770 771 /* 772 * Free up the message. 773 */ 774 void 775 nfs4_free_msg(nfs4_debug_msg_t *msg) 776 { 777 int len; 778 779 if (msg->msg_type == RM_EVENT) 780 free_event(&msg->rmsg_u.msg_event); 781 else 782 free_fact(&msg->rmsg_u.msg_fact); 783 784 if (msg->msg_srv) { 785 len = strlen(msg->msg_srv) + 1; 786 kmem_free(msg->msg_srv, len); 787 } 788 789 if (msg->msg_mntpt) { 790 len = strlen(msg->msg_mntpt) + 1; 791 kmem_free(msg->msg_mntpt, len); 792 } 793 794 /* free up the data structure itself */ 795 kmem_free(msg, sizeof (*msg)); 796 } 797 798 /* 799 * Prints out the interesting facts for recovery events: 800 * -DEAD_FILE 801 * -SIGLOST(_NO_DUMP) 802 */ 803 static void 804 print_facts(nfs4_debug_msg_t *msg, mntinfo4_t *mi) 805 { 806 nfs4_rfact_t *fp; 807 char *mount_pt; 808 int len; 809 810 if (msg->rmsg_u.msg_event.re_type != RE_DEAD_FILE && 811 msg->rmsg_u.msg_event.re_type != RE_SIGLOST && 812 msg->rmsg_u.msg_event.re_type != RE_SIGLOST_NO_DUMP) 813 return; 814 815 fp = kmem_zalloc(sizeof (*fp), KM_SLEEP); 816 mount_pt = NULL; 817 818 if (get_facts(msg, fp, &mount_pt, mi)) { 819 char time[256]; 820 821 822 if (fp->rf_time.tv_sec) 823 (void) snprintf(time, 256, "%ld", 824 (gethrestime_sec() - fp->rf_time.tv_sec)/60); 825 zcmn_err(mi->mi_zone->zone_id, CE_NOTE, 826 "!NFS4 FACT SHEET: %s%s %s%s %s %s%s%s %s%s", 827 fp->rf_action ? "\n Action: " : "", 828 fp->rf_action ? nfs4_recov_action_to_str(fp->rf_action) : 829 "", 830 fp->rf_stat4 ? "\n NFS4 error: " : "", 831 fp->rf_stat4 ? nfs4_stat_to_str(fp->rf_stat4) : "", 832 fp->rf_reboot ? "\n Suspected server reboot. " : "", 833 fp->rf_time.tv_sec ? "\n Server was down for " : "", 834 fp->rf_time.tv_sec ? time : "", 835 fp->rf_time.tv_sec ? " minutes." : "", 836 mount_pt ? " \n Client's lease expired on mount " : "", 837 mount_pt ? mount_pt : ""); 838 } 839 840 if (mount_pt) { 841 len = strlen(mount_pt) + 1; 842 kmem_free(mount_pt, len); 843 } 844 845 /* free the fact struct itself */ 846 if (fp) 847 kmem_free(fp, sizeof (*fp)); 848 } 849 850 /* 851 * Print an event message to /var/adm/messages 852 * The last argument to this fuction dictates the repeat status 853 * of the event. If set to 1, it means that we are dumping this 854 * event and it will _never_ be printed after this time. Else if 855 * set to 0 it will be printed again. 856 */ 857 static void 858 queue_print_event(nfs4_debug_msg_t *msg, mntinfo4_t *mi, int dump) 859 { 860 nfs4_revent_t *ep; 861 zoneid_t zoneid; 862 863 ep = &msg->rmsg_u.msg_event; 864 zoneid = mi->mi_zone->zone_id; 865 866 switch (ep->re_type) { 867 case RE_BAD_SEQID: 868 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 869 "Operation %s for file %s (rnode_pt 0x%p), pid %d using " 870 "seqid %d got %s. Last good seqid was %d for " 871 "operation %s.", 872 msg->msg_srv, msg->msg_mntpt, 873 nfs4_ctags[ep->re_tag1].ct_str, ep->re_char1, 874 (void *)ep->re_rp1, ep->re_pid, ep->re_seqid1, 875 nfs4_stat_to_str(ep->re_stat4), ep->re_seqid2, 876 nfs4_ctags[ep->re_tag2].ct_str); 877 break; 878 case RE_BADHANDLE: 879 ASSERT(ep->re_rp1 != NULL); 880 if (ep->re_char1 != NULL) { 881 zcmn_err(zoneid, CE_NOTE, 882 "![NFS4][Server: %s][Mntpt: %s]" 883 "server %s said filehandle was " 884 "invalid for file: %s (rnode_pt 0x%p) on mount %s", 885 msg->msg_srv, msg->msg_mntpt, msg->msg_srv, 886 ep->re_char1, (void *)ep->re_rp1, msg->msg_mntpt); 887 } else { 888 zcmn_err(zoneid, CE_NOTE, 889 "![NFS4][Server: %s][Mntpt: %s]" 890 "server %s said filehandle was " 891 "invalid for file: (rnode_pt 0x%p) on mount %s" 892 " for fh:", msg->msg_srv, msg->msg_mntpt, 893 msg->msg_srv, (void *)ep->re_rp1, msg->msg_mntpt); 894 sfh4_printfhandle(ep->re_rp1->r_fh); 895 } 896 break; 897 case RE_CLIENTID: 898 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 899 "Can't recover clientid on mount point %s " 900 "(mi 0x%p) due to error %d (%s), for server %s. Marking " 901 "file system as unusable.", 902 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 903 (void *)ep->re_mi, ep->re_uint, 904 nfs4_stat_to_str(ep->re_stat4), 905 msg->msg_srv); 906 break; 907 case RE_DEAD_FILE: 908 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 909 "File %s (rnode_pt: %p) was closed due to NFS " 910 "recovery error on server %s(%s %s)", msg->msg_srv, 911 msg->msg_mntpt, ep->re_char1, (void *)ep->re_rp1, 912 msg->msg_srv, ep->re_char2 ? ep->re_char2 : "", 913 ep->re_stat4 ? nfs4_stat_to_str(ep->re_stat4) : ""); 914 break; 915 case RE_END: 916 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 917 "NFS Recovery done for mount %s (mi 0x%p) " 918 "on server %s, rnode_pt1 %s (0x%p), " 919 "rnode_pt2 %s (0x%p)", msg->msg_srv, msg->msg_mntpt, 920 msg->msg_mntpt, (void *)ep->re_mi, msg->msg_srv, 921 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 922 (void *)ep->re_rp2); 923 break; 924 case RE_FAIL_RELOCK: 925 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 926 "Couldn't reclaim lock for pid %d for " 927 "file %s (rnode_pt 0x%p) on (server %s): error %d", 928 msg->msg_srv, msg->msg_mntpt, ep->re_pid, ep->re_char1, 929 (void *)ep->re_rp1, msg->msg_srv, 930 ep->re_uint ? ep->re_uint : ep->re_stat4); 931 break; 932 case RE_FAIL_REMAP_LEN: 933 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 934 "remap_lookup: server %s returned bad " 935 "fhandle length (%d)", msg->msg_srv, msg->msg_mntpt, 936 msg->msg_srv, ep->re_uint); 937 break; 938 case RE_FAIL_REMAP_OP: 939 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 940 "remap_lookup: didn't get expected OP_GETFH" 941 " for server %s", msg->msg_srv, msg->msg_mntpt, 942 msg->msg_srv); 943 break; 944 case RE_FAILOVER: 945 if (ep->re_char1) 946 zcmn_err(zoneid, CE_NOTE, 947 "![NFS4][Server: %s][Mntpt: %s]" 948 "failing over from %s to %s", msg->msg_srv, 949 msg->msg_mntpt, msg->msg_srv, ep->re_char1); 950 else 951 zcmn_err(zoneid, CE_NOTE, 952 "![NFS4][Server: %s][Mntpt: %s]" 953 "NFS4: failing over: selecting " 954 "original server %s", msg->msg_srv, msg->msg_mntpt, 955 msg->msg_srv); 956 break; 957 case RE_FILE_DIFF: 958 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 959 "File %s (rnode_pt: %p) on server %s was closed " 960 "and failed attempted failover since its is different than " 961 "the original file", msg->msg_srv, msg->msg_mntpt, 962 ep->re_char1, (void *)ep->re_rp1, msg->msg_srv); 963 break; 964 case RE_LOST_STATE: 965 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 966 "Lost %s request for fs %s, file %s (rnode_pt: 0x%p), " 967 "dir %s (0x%p) for server %s", msg->msg_srv, msg->msg_mntpt, 968 nfs4_op_to_str(ep->re_uint), msg->msg_mntpt, 969 ep->re_char1, (void *)ep->re_rp1, ep->re_char2, 970 (void *)ep->re_rp2, msg->msg_srv); 971 break; 972 case RE_OPENS_CHANGED: 973 zcmn_err(zoneid, CE_NOTE, "![NFS4][Server: %s][Mntpt: %s]" 974 "The number of open files to reopen changed " 975 "for mount %s mi 0x%p (old %d, new %d) on server %s", 976 msg->msg_srv, msg->msg_mntpt, msg->msg_mntpt, 977 (void *)ep->re_mi, ep->re_uint, ep->re_pid, msg->msg_srv); 978 break; 979 case RE_SIGLOST: 980 case RE_SIGLOST_NO_DUMP: 981 if (ep->re_uint) 982 zcmn_err(zoneid, CE_NOTE, 983 "![NFS4][Server: %s][Mntpt: %s]" 984 "Process %d lost its locks on " 985 "file %s (rnode_pt: %p) due to NFS recovery error " 986 "(%d) on server %s.", msg->msg_srv, msg->msg_mntpt,