1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the License). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/CDDL.txt 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/CDDL.txt. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets [] replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * dns.c - Common utilities for highly available DNS 27 * 28 */ 29 #pragma ident "@(#)dns.c 1.37 07/06/06 SMI" 30 31 #include <strings.h> 32 #include <sys/wait.h> 33 #include <netinet/in.h> 34 #include <arpa/inet.h> 35 #include <sys/stat.h> 36 #include <sys/types.h> 37 #include <time.h> 38 #include <errno.h> 39 #include <stdio.h> 40 #include <libintl.h> 41 #include <ds_common.h> 42 #include "dns.h" 43 44 char *svcs[] = {"/network/dns/server:default", NULL}; 45 46 /* 47 * The initial timeout allowed for the dataservice to 48 * be fully up and running. 49 */ 50 #define SVC_WAIT_PCT 0 51 52 /* 53 * SVC_WAIT_TIME is used only during starting in svc_wait(). 54 * In svc_wait() we need to be sure that the service is up 55 * before returning, thus we need to call svc_probe() to 56 * monitor the service. SVC_WAIT_TIME is the time between 57 * such probes. 58 */ 59 #define SVC_WAIT_TIME 2 60 61 /* 62 * svc_validate(): 63 * Do DNS specific validation of the resource configration. 64 * Called by start/validate/update/monitor methods. 65 * 66 * If print_messages is true, it will also print any messages 67 * to stderr (we assume the locale has been set for us). 68 * This is in addition to syslogging. 69 * 70 * Return 0 on success, > 0 on failures. 71 */ 72 73 int 74 svc_validate(scds_handle_t handle, char *mode, boolean_t print_messages) 75 { 76 char dns_named_dir[SCDS_ARRAY_SIZE]; 77 char dns_config[SCDS_ARRAY_SIZE]; 78 int rc; 79 scha_str_array_t *config_dir; 80 scds_net_resource_list_t *snrlp; 81 struct stat statbuf; 82 scds_port_list_t *port_list; 83 scds_hasp_status_t hasp_status; 84 85 86 /* 87 * Just in case! Actually the caller should 88 * make sure mode is not NULL 89 */ 90 if (mode == NULL) { 91 scds_syslog(LOG_ERR, "Property %s is not set.", DNS_MODE_USED); 92 if (print_messages) { 93 (void) fprintf(stderr, gettext("Property %s is not " 94 "set.\n"), DNS_MODE_USED); 95 } 96 return (1); 97 } 98 99 100 if (os_newer_than_s10()) { 101 rc = check_disabled_smf_services(svcs, print_messages); 102 if (rc != 0) 103 return (1); 104 } 105 106 107 /* There should be only 1 port for HA DNS */ 108 rc = scds_get_port_list(handle, &port_list); 109 if (rc != SCHA_ERR_NOERR) { 110 /* 111 * SCMSGS 112 * @explanation 113 * An API operation has failed while retrieving the resource 114 * property. Low memory or API call failure might be the 115 * reasons. 116 * @user_action 117 * In case of low memory, the problem will probably cured by 118 * rebooting. If the problem reoccurs, you might need to 119 * increase swap space by configuring additional swap devices. 120 * Otherwise, if it is API call failure, check the syslog 121 * messages from other components. For the resource name and 122 * property name, check the current syslog message. 123 */ 124 scds_syslog(LOG_ERR, "Failed to retrieve the resource " 125 "property %s: %s.", SCHA_PORT_LIST, 126 scds_error_string(rc)); 127 if (print_messages) { 128 (void) fprintf(stderr, gettext("Failed to retrieve the " 129 "resource property %s: %s.\n"), SCHA_PORT_LIST, 130 gettext(scds_error_string(rc))); 131 } 132 return (1); 133 } 134 if (port_list->num_ports != 1) { 135 /* 136 * SCMSGS 137 * @explanation 138 * A multi-valued (comma-separated) list was provided to the 139 * scrgadm command for the property, while the implementation 140 * supports only one value for this property. 141 * @user_action 142 * Specify a single value for the property on the scrgadm 143 * command. 144 */ 145 scds_syslog(LOG_ERR, "Property %s should have only one value.", 146 SCHA_PORT_LIST); 147 if (print_messages) { 148 (void) fprintf(stderr, gettext("Property %s should " 149 "have only one value.\n"), SCHA_PORT_LIST); 150 } 151 return (1); 152 } 153 scds_free_port_list(port_list); 154 155 /* 156 * Return an error if unable to get the Logical host resources 157 * to use for this resource 158 */ 159 rc = scds_get_rs_hostnames(handle, &snrlp); 160 if (rc != SCHA_ERR_NOERR) { 161 scds_syslog(LOG_ERR, "Failed to retrieve the resource " 162 "property %s: %s.", SCHA_NETWORK_RESOURCES_USED, 163 scds_error_string(rc)); 164 if (print_messages) { 165 (void) fprintf(stderr, gettext("Failed to retrieve the " 166 "resource property %s: %s.\n"), 167 SCHA_NETWORK_RESOURCES_USED, 168 gettext(scds_error_string(rc))); 169 } 170 return (1); 171 } 172 /* Return error if there are no Logicalhost resources configured */ 173 if ((snrlp == NULL) || (snrlp->num_netresources == 0)) { 174 /* 175 * SCMSGS 176 * @explanation 177 * The probe method for this data service could not find a 178 * LogicalHostname resource in the same resource group as the 179 * data service. 180 * @user_action 181 * Use scrgadm to configure the resource group to hold both 182 * the data service and the LogicalHostname. 183 */ 184 scds_syslog(LOG_ERR, "No LogicalHostname resource in " 185 "resource group."); 186 if (print_messages) { 187 (void) fprintf(stderr, gettext("No LogicalHostname " 188 "resource in resource group.\n")); 189 } 190 return (1); 191 } 192 scds_free_net_list(snrlp); 193 194 if (scds_get_ext_monitor_retry_count(handle) <= 0) { 195 scds_syslog(LOG_ERR, "Property %s is not set.", 196 "Monitor_retry_count"); 197 if (print_messages) { 198 (void) fprintf(stderr, gettext("Property %s is not " 199 "set.\n"), "Monitor_retry_count"); 200 } 201 return (1); 202 } 203 204 if (scds_get_ext_monitor_retry_interval(handle) <= 0) { 205 scds_syslog(LOG_ERR, "Property %s is not set.", 206 "Monitor_retry_interval"); 207 if (print_messages) { 208 (void) fprintf(stderr, gettext("Property %s is not " 209 "set.\n"), "Monitor_retry_interval"); 210 } 211 return (1); 212 } 213 214 if (scds_get_ext_probe_timeout(handle) <= 0) { 215 scds_syslog(LOG_ERR, "Property %s is not set.", 216 "Probe_timeout"); 217 if (print_messages) { 218 (void) fprintf(stderr, gettext("Property %s is not " 219 "set.\n"), "Probe_timeout"); 220 } 221 return (1); 222 } 223 224 /* check for HAStoragePlus resources */ 225 rc = scds_hasp_check(handle, &hasp_status); 226 if (rc != SCHA_ERR_NOERR) { 227 /* scds_hasp_check() logs everytime it fails */ 228 if (print_messages) { 229 (void) fprintf(stderr, gettext("INTERNAL ERROR: %s.\n"), 230 gettext("scds_hasp_check failed")); 231 } 232 return (1); 233 } 234 235 if (hasp_status == SCDS_HASP_NOT_ONLINE) { 236 scds_syslog(LOG_ERR, "Resource depends on a " 237 "SUNW.HAStoragePlus type resource that is " 238 "not online anywhere."); 239 if (print_messages) { 240 (void) fprintf(stderr, gettext("Resource depends on a " 241 "SUNW.HAStoragePlus type resource that is " 242 "not online anywhere.\n")); 243 } 244 return (1); 245 } else if (hasp_status == SCDS_HASP_ERR_CONFIG) { 246 /* problem syslogged by scds_hasp_check */ 247 if (print_messages) { 248 (void) fprintf(stderr, gettext("This resource depends " 249 "on a HAStoragePlus resouce that is in a " 250 "different Resource Group. This configuration " 251 "is not supported.\n")); 252 } 253 return (1); 254 } 255 256 config_dir = scds_get_ext_confdir_list(handle); 257 if ((config_dir == NULL) || (config_dir->array_cnt == 0)) { 258 scds_syslog(LOG_ERR, "Property %s is not set.", 259 "Confdir_list"); 260 if (print_messages) { 261 (void) fprintf(stderr, gettext("Property %s is not " 262 "set.\n"), "Confdir_list"); 263 } 264 return (1); 265 } 266 267 if (config_dir->array_cnt > 1) { 268 /* 269 * SCMSGS 270 * @explanation 271 * Failover data service must have one and only one 272 * value for Confdir_list. 273 * @user_action 274 * Create a failover resource group for each 275 * configuration file. 276 */ 277 scds_syslog(LOG_ERR, 278 "Failover %s data service must have exactly " 279 "one value for extension property %s.", 280 APP_NAME, "Config_dir"); 281 if (print_messages) { 282 (void) fprintf(stderr, gettext("Failover %s data " 283 "service must have exactly " 284 "one value for extension " 285 "property %s.\n"), 286 APP_NAME, "Config_dir"); 287 } 288 return (1); 289 } 290 291 scds_syslog_debug(DBG_LEVEL_LOW, "The DNS config mode is %s.", mode); 292 if (strcmp(mode, "conf") == 0) { 293 rc = snprintf(dns_config, sizeof (dns_config), 294 "%s/named.conf", config_dir->str_array[0]); 295 if (rc == -1) { 296 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 297 "String handling error creating path to " 298 "configuration file: named.conf. " 299 "The path may be too long"); 300 if (print_messages) { 301 (void) fprintf(stderr, gettext("INTERNAL " 302 "ERROR: %s.\n"), 303 gettext("String handling error " 304 "creating path to configuration file: " 305 "named.conf. The path may be too " 306 "long")); 307 } 308 return (1); 309 } 310 } else if (strcmp(mode, "boot") == 0) { 311 rc = snprintf(dns_config, sizeof (dns_config), 312 "%s/named.boot", config_dir->str_array[0]); 313 if (rc == -1) { 314 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 315 "String handling error creating path to " 316 "configuration file: named.boot. " 317 "The path may be too long"); 318 if (print_messages) { 319 (void) fprintf(stderr, gettext("INTERNAL " 320 "ERROR: %s.\n"), 321 gettext("String handling error " 322 "creating path to configuration file: " 323 "named.boot. The path may be too " 324 "long")); 325 } 326 return (1); 327 } 328 } else { 329 /* 330 * SCMSGS 331 * @explanation 332 * An invalid value was supplied for the property. 333 * @user_action 334 * Supply "conf" or "boot" as the value for DNS_mode property. 335 */ 336 scds_syslog(LOG_ERR, "Invalid value %s for " 337 "property %s.", mode, DNS_MODE_USED); 338 if (print_messages) { 339 (void) fprintf(stderr, gettext("Invalid value %s for " 340 "property %s.\n"), mode, DNS_MODE_USED); 341 } 342 return (1); 343 } 344 345 if (stat(dns_config, &statbuf) != 0) { 346 /* 347 * if hasp_status is not SCDS_HASP_ONLINE_NOT_LOCAL and the 348 * stat above fails, we are in trouble. Also, if the stat 349 * fails for anything else other than an ENOENT (when 350 * hasp_status is SCDS_HASP_ONLINE_NOT_LOCAL), thats also 351 * an error. 352 */ 353 if ((hasp_status != SCDS_HASP_ONLINE_NOT_LOCAL) || 354 (errno != ENOENT)) { /*lint !e746 */ 355 rc = errno; 356 scds_syslog(LOG_ERR, 357 "File %s is not readable: %s.", 358 dns_config, strerror(rc)); /*lint !e746 */ 359 if (print_messages) { 360 (void) fprintf(stderr, gettext("File %s is not " 361 "readable: %s.\n"), dns_config, 362 gettext(strerror(rc))); 363 } 364 return (1); 365 } 366 } 367 368 /* 369 * Check to see if the named directory which houses 370 * the database files for the Zones in DNS is accessible 371 */ 372 rc = snprintf(dns_named_dir, sizeof (dns_named_dir), "%s/named", 373 config_dir->str_array[0]); 374 if (rc == -1) { 375 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 376 "String handling error creating path to " 377 "database directory. The path may be too long"); 378 if (print_messages) { 379 (void) fprintf(stderr, gettext("INTERNAL ERROR: %s.\n"), 380 gettext("String handling error creating path " 381 "to database directory. The path may be too " 382 "long")); 383 } 384 return (1); 385 } 386 387 388 if (stat(dns_named_dir, &statbuf) != 0) { 389 if ((hasp_status != SCDS_HASP_ONLINE_NOT_LOCAL) || 390 (errno != ENOENT)) { 391 rc = errno; 392 /* 393 * SCMSGS 394 * @explanation 395 * The DNS database directory is not readable. This 396 * may be due to the directory not existing or the 397 * permissions not being set properly. 398 * @user_action 399 * Make sure the directory exists and has read 400 * permission set appropriately. Look at the prior 401 * syslog messages for any specific problems and 402 * correct them. 403 */ 404 scds_syslog(LOG_ERR, "DNS database directory %s is " 405 "not readable: %s", dns_named_dir, 406 strerror(rc)); 407 if (print_messages) { 408 (void) fprintf(stderr, gettext("DNS database " 409 "directory %s is not readable: %s\n"), 410 dns_named_dir, 411 gettext(strerror(rc))); 412 } 413 return (1); 414 } 415 } 416 417 418 /* check that the binary is accessible */ 419 if (stat(DNS_BINARY, &statbuf) != 0) { 420 rc = errno; 421 scds_syslog(LOG_ERR, "File %s is not readable: %s.", 422 DNS_BINARY, strerror(rc)); 423 if (print_messages) { 424 (void) fprintf(stderr, gettext("File %s is not " 425 "readable: %s.\n"), DNS_BINARY, 426 gettext(strerror(rc))); 427 } 428 return (1); 429 } 430 /* check that the binary is executable */ 431 if ((statbuf.st_mode & S_IXUSR) != S_IXUSR) { 432 scds_syslog(LOG_ERR, "Incorrect permissions set for %s.", 433 DNS_BINARY); 434 if (print_messages) { 435 (void) fprintf(stderr, gettext("Incorrect permissions " 436 "set for %s.\n"), DNS_BINARY); 437 } 438 return (1); 439 } 440 441 return (SCHA_ERR_NOERR); 442 } 443 444 int 445 svc_start(scds_handle_t handle, char *mode) 446 { 447 char dns_config[SCDS_ARRAY_SIZE]; 448 char dns_named[SCDS_ARRAY_SIZE]; 449 char cmd[SCDS_ARRAY_SIZE]; 450 int rc; 451 scha_str_array_t *config_dir; 452 453 scds_syslog_debug(DBG_LEVEL_HIGH, "Calling START method for " 454 "resource %s.", scds_get_resource_name(handle)); 455 456 /* Make sure everything looks OK */ 457 if (svc_validate(handle, mode, B_FALSE) != 0) { 458 scds_syslog(LOG_ERR, "Failed to validate configuration."); 459 return (1); /* Bail out, no point in continuing with this res */ 460 } 461 462 scds_syslog_debug(DBG_LEVEL_LOW, "DNS_mode property is set to %s.", 463 mode); 464 465 /* If Solaris 10 or later, disable SMF service */ 466 if (os_newer_than_s10()) { 467 rc = disable_smf_services(svcs); 468 if (rc != 0) 469 return (1); 470 } 471 472 config_dir = scds_get_ext_confdir_list(handle); 473 if (config_dir == NULL) { 474 scds_syslog(LOG_ERR, "Property %s is not set.", 475 "Confdir_list"); 476 return (1); 477 } 478 479 if (strcmp(mode, "conf") == 0) { 480 rc = snprintf(dns_config, sizeof (dns_config), "%s/named.conf", 481 config_dir->str_array[0]); 482 if (rc == -1) { 483 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 484 "String handling error creating path to " 485 "configuration file: named.conf. " 486 "The path may be too long"); 487 return (1); 488 } 489 } else if (strcmp(mode, "boot") == 0) { 490 rc = snprintf(dns_config, sizeof (dns_config), 491 "%s/named.boot", config_dir->str_array[0]); 492 if (rc == -1) { 493 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 494 "String handling error creating path " 495 "to configuration file: named.boot. " 496 "The path may be too long"); 497 return (1); 498 } 499 } else { 500 scds_syslog(LOG_ERR, "Invalid value %s for property %s.", 501 mode, DNS_MODE_USED); 502 return (1); 503 } 504 505 rc = snprintf(dns_named, sizeof (dns_named), DNS_BINARY); 506 if (rc == -1) { 507 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 508 "String handling error creating path to " 509 "in.named. The path may be too long"); 510 return (1); 511 } 512 513 514 /* Now construct the command to start DNS */ 515 rc = snprintf(cmd, sizeof (cmd), "%s -c %s ", dns_named, dns_config); 516 if (rc == -1) { 517 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 518 "String handling error creating " 519 "start command. The path may be too long"); 520 return (1); 521 } 522 523 /* 524 * SCMSGS 525 * @explanation 526 * Sun Cluster is starting the specified application with the 527 * specified command. 528 * @user_action 529 * This is an informational message, no user action is needed. 530 */ 531 scds_syslog(LOG_NOTICE, "Starting %s with command %s.", APP_NAME, cmd); 532 533 /* Start DNS */ 534 if (scds_pmf_start(handle, SCDS_PMF_TYPE_SVC, 0, cmd, -1) != 535 SCHA_ERR_NOERR) { 536 char msg[SCDS_ARRAY_SIZE]; 537 538 (void) snprintf(msg, sizeof (msg), "Failed to start %s.", 539 APP_NAME); 540 541 /* cant use scds syslog(LOG_ERR, msg); make scmsgs complains */ 542 scds_syslog(LOG_ERR, "Failed to start %s.", APP_NAME); 543 544 (void) scha_resource_setstatus(scds_get_resource_name(handle), 545 scds_get_resource_group_name(handle), 546 SCHA_RSSTATUS_FAULTED, msg); 547 exit(1); 548 } 549 550 scds_syslog(LOG_INFO, "Completed successfully."); 551 (void) scha_resource_setstatus(scds_get_resource_name(handle), 552 scds_get_resource_group_name(handle), SCHA_RSSTATUS_OK, 553 "Completed successfully."); 554 555 return (SCHA_ERR_NOERR); 556 } 557 558 559 /* 560 * dns_svc_start() calls svc_wait() just after it calls svc_start() 561 * and before it returns. svc_start() starts up the application (DNS 562 * server), but does not wait for the application to complete coming up 563 * before returning. 564 * 565 * The RGM framework specifies that the START method should not return until 566 * the application is up. svc_wait() verifies that the application is 567 * up before it returns. It does this by probing the application across all 568 * its port/ip combinations. When the probing is successful, svc_wait() 569 * returns immediately indicating success. When the probing is unsuccessful 570 * (i.e., svc_wait() probed without success for it's allotted time, 571 * SVC_TIMEOUT_PCT/100 * start method timeout value), svc_wait() returns 572 * an error. 573 * 574 * Since we don't want the START method to timeout, we don't try to probe 575 * for 100% of the start method timeout value, but only SVC_TIMEOUT_PCT/100 576 * of it. Also, since probing too early crashes some applications, we wait 577 * a percentage of the start method timeout value before starting to probe, 578 * SVC_WAIT_PCT. 579 * 580 * Returns: 0=probing succeeded, application is up 581 * 1=time ran out without a successful probe, application wasn't 582 * determined to be up. 583 */ 584 int 585 svc_wait(scds_handle_t handle) 586 { 587 int err = 0, svc_start_timeout, probe_result; 588 589 svc_start_timeout = scds_get_rs_start_timeout(handle); 590 591 /* 592 * sleep for SVC_WAIT_PCT percentage of start_timeout time 593 * before actually probing the dataservice. This is to allow 594 * the dataservice to be fully up inorder to reply to the 595 * probe. NOTE: the value for SVC_WAIT_PCT could be different 596 * for different dataservices. 597 * Instead of calling sleep(), 598 * call scds_svc_wait() so that if service fails too 599 * many times, we give up and return early. 600 */ 601 err = scds_svc_wait(handle, (svc_start_timeout * SVC_WAIT_PCT / 100)); 602 603 if (err != SCHA_ERR_NOERR) { 604 scds_syslog_debug(DBG_LEVEL_LOW, "INTERNAL ERROR: %s.", 605 "Failed to wait before probing service"); 606 } 607 608 while (1) { 609 /* probe the data service */ 610 probe_result = svc_probe(handle, B_FALSE); 611 if (probe_result == 0) { 612 /* everything looks good */ 613 /* 614 * SCMSGS 615 * @explanation 616 * While attempting to check the health of the data 617 * service, probe detected that the resource status is 618 * fine and it is online. 619 * @user_action 620 * This is informational message. No user action is 621 * needed. 622 */ 623 scds_syslog(LOG_NOTICE, "Service is online."); 624 return (0); 625 } 626 627 /* 628 * SCMSGS 629 * @explanation 630 * The specific service or process is not yet up. 631 * @user_action 632 * This is an informative message. Suitable action may be 633 * taken if the specified service or process does not come up 634 * within a configured time limit. 635 */ 636 scds_syslog(LOG_NOTICE, "Waiting for %s to come up.", APP_NAME); 637 638 /* 639 * Dataservice is still trying to come up. Sleep for a while 640 * before probing again. Instead of calling sleep(), 641 * call scds_svc_wait() so that if service fails too 642 * many times, we give up and return early. 643 */ 644 err = scds_svc_wait(handle, SVC_WAIT_TIME); 645 if (err != SCHA_ERR_NOERR) 646 return (err); 647 648 /* We rely on RGM to timeout and terminate the program */ 649 } 650 } 651 652 653 int 654 svc_stop(scds_handle_t handle) 655 { 656 int svc_stop_timeout; 657 char msg[SCDS_ARRAY_SIZE]; 658 659 scds_syslog_debug(DBG_LEVEL_HIGH, "Calling STOP method for " 660 "resource %s.", scds_get_resource_name(handle)); 661 662 svc_stop_timeout = scds_get_rs_stop_timeout(handle); 663 664 scds_syslog(LOG_NOTICE, "Stopping %s.", APP_NAME); 665 666 if (scds_pmf_stop(handle, SCDS_PMF_TYPE_SVC, 0, SIGTERM, 667 svc_stop_timeout) != SCHA_ERR_NOERR) { 668 scds_syslog(LOG_ERR, "Failed to stop %s.", 669 APP_NAME" . Retrying.."); /* strings concatenated */ 670 } 671 672 /* 673 * Do a stop again, this time with infinite timeout and with SIGKILL. 674 * Even if the tag is gone by now, scds_pmf_stop will not complain 675 * so we are fine even if we do this unconditionally 676 */ 677 if (scds_pmf_stop(handle, SCDS_PMF_TYPE_SVC, 0, SIGKILL, -1) != 678 SCHA_ERR_NOERR) { 679 scds_syslog(LOG_ERR, "Failed to stop %s.", APP_NAME); 680 (void) snprintf(msg, sizeof (msg), "Failed to stop %s.", 681 APP_NAME); 682 (void) scha_resource_setstatus(scds_get_resource_name(handle), 683 scds_get_resource_group_name(handle), 684 SCHA_RSSTATUS_FAULTED, msg); 685 return (1); 686 } else { 687 /* 688 * SCMSGS 689 * @explanation 690 * The resource was successfully stopped by Sun Cluster. 691 * @user_action 692 * No user action is required. 693 */ 694 scds_syslog(LOG_NOTICE, "Successfully stopped %s.", APP_NAME); 695 (void) snprintf(msg, sizeof (msg), "Successfully stopped %s.", 696 APP_NAME); 697 (void) scha_resource_setstatus(scds_get_resource_name(handle), 698 scds_get_resource_group_name(handle), SCHA_RSSTATUS_OFFLINE, 699 msg); 700 return (SCHA_ERR_NOERR); 701 } 702 } 703 704 705 /* 706 * This function starts the fault monitor for a HA-DNS resource. 707 * This is done by starting the probe under PMF. The PMF tag 708 * is derived as RG-name,RS-name.mon. The restart option of PMF 709 * is used but not the "infinite restart". Instead 710 * interval/retry_time is obtained from the RTR file. 711 */ 712 713 int 714 svc_fm_start(scds_handle_t handle) 715 { 716 717 scds_syslog_debug(DBG_LEVEL_HIGH, "Calling MONITOR_START method " 718 "for resource %s.", scds_get_resource_name(handle)); 719 720 if (scds_pmf_start(handle, SCDS_PMF_TYPE_MON, 0, "dns_probe", 0) 721 != SCHA_ERR_NOERR) { 722 scds_syslog(LOG_ERR, "Failed to start fault monitor."); 723 return (1); 724 } 725 726 scds_syslog(LOG_INFO, "Started the fault monitor."); 727 728 return (SCHA_ERR_NOERR); 729 } 730 731 732 /* 733 * This function stops the fault monitor for a HA-DNS resource. 734 * This is done via PMF. The PMF tag for the fault monitor is 735 * constructed based on RG-name_RS-name.mon. 736 */ 737 738 int 739 svc_fm_stop(scds_handle_t handle) 740 { 741 scds_syslog_debug(DBG_LEVEL_HIGH, "Calling MONITOR_STOP method for " 742 "resource %s.", scds_get_resource_name(handle)); 743 744 if (scds_pmf_stop(handle, SCDS_PMF_TYPE_MON, 0, SIGKILL, -1) != 745 SCHA_ERR_NOERR) { 746 scds_syslog(LOG_ERR, "Failed to stop fault monitor."); 747 return (1); 748 } 749 750 scds_syslog(LOG_INFO, "Stopped the fault monitor."); 751 752 return (SCHA_ERR_NOERR); 753 } 754 755 /* 756 * svc_probe(): Do data service specific probing. Return a value 757 * between 0 (success) and 100(complete failure). 758 */ 759 int 760 svc_probe(scds_handle_t handle, boolean_t arg_syslog_msgs) 761 { 762 scds_netaddr_list_t *snrlp; 763 int rc, probe_remaining_time, retval, probe_timeout; 764 int exit_code; 765 hrtime_t probe_start_time; 766 char cmd[SCDS_ARRAY_SIZE]; 767 768 probe_start_time = gethrtime(); 769 probe_timeout = scds_get_ext_probe_timeout(handle); 770 771 rc = scds_get_netaddr_list(handle, &snrlp); 772 if (rc != SCHA_ERR_NOERR) { 773 scds_syslog(LOG_ERR, "Failed to retrieve the resource " 774 "property %s: %s.", SCHA_NETWORK_RESOURCES_USED, 775 scds_error_string(rc)); 776 } 777 778 if ((snrlp == NULL) || (snrlp->num_netaddrs == 0)) { 779 if (arg_syslog_msgs) { 780 scds_syslog(LOG_ERR, "No LogicalHostname resource " 781 "in resource group."); 782 } 783 784 exit(1); 785 } 786 787 probe_remaining_time = probe_timeout - (int)((gethrtime() - 788 probe_start_time) / 1e9); 789 if (probe_remaining_time < 1) { 790 if (arg_syslog_msgs) { 791 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 792 "No time left for service probe"); 793 } 794 795 retval = SCDS_PROBE_COMPLETE_FAILURE / 2; 796 goto finished; 797 } 798 799 /* 800 * By using timeout=0, we actually get a timeout of 1 sec (but 801 * with no exponential backoff). So effectively, nslookup runs 802 * for retry * 1 = probe_remaining_time seconds. 803 */ 804 rc = snprintf(cmd, sizeof (cmd), "/usr/sbin/nslookup -retry=%d " 805 "-timeout=0 %s %s >/dev/null 2>&1", probe_remaining_time, 806 snrlp->netaddrs[0].hostname, snrlp->netaddrs[0].hostname); 807 if (rc == -1) { 808 if (arg_syslog_msgs) { 809 scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.", 810 "String handling error creating " 811 "the nslookup command for probe"); 812 } 813 retval = SCDS_PROBE_COMPLETE_FAILURE / 2; 814 goto finished; 815 } 816 817 /* run nslookup */ 818 rc = scds_timerun(handle, cmd, probe_remaining_time, SIGKILL, 819 &exit_code); 820 if (rc != 0) { 821 if (arg_syslog_msgs) { 822 /* 823 * SCMSGS 824 * @explanation 825 * The command could not be run successfully. 826 * @user_action 827 * The error message specifies both - the exact 828 * command that failed, and the reason why it failed. 829 * Try the command manually and see if it works. 830 * Consider increasing the timeout if the failure is 831 * due to lack of time. For other failures, contact 832 * your authorized Sun service provider. 833 */ 834 scds_syslog(LOG_ERR, "Command [%s] failed: %s.", 835 cmd, scds_error_string(rc)); 836 } 837 838 if ((rc == SCHA_ERR_TIMEOUT) || (rc == SCHA_ERR_INTERNAL)) { 839 retval = SCDS_PROBE_COMPLETE_FAILURE / 2; 840 } else { 841 retval = SCDS_PROBE_COMPLETE_FAILURE; 842 } 843 844 goto finished; 845 } 846 847 /* check the return code from nslookup */ 848 if (exit_code != 0) { 849 if (arg_syslog_msgs) { 850 /* 851 * SCMSGS 852 * @explanation 853 * Fault monitor was unable to perform complete health 854 * check of the service. 855 * @user_action 856 * 1) Fault monitor would take appropiate action (by 857 * restarting or failing over the service.). 858 * 859 * 2) Data service could be under load, try increasing 860 * the values for Probe_timeout and 861 * Thororugh_probe_interval properties. 862 * 863 * 3) If this problem continues to occur, look at 864 * other messages in syslog to determine the root 865 * cause of the problem. If all else fails reboot 866 * node. 867 */ 868 scds_syslog(LOG_ERR, "Probe failed."); 869 } 870 retval = SCDS_PROBE_COMPLETE_FAILURE; 871 goto finished; 872 } 873 874 /* All OK */ 875 retval = 0; 876 877 finished: 878 scds_free_netaddr_list(snrlp); 879 return (retval); 880 881 } 882