Home | History | Annotate | Download | only in dns
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the License).
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/CDDL.txt
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/CDDL.txt.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets [] replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  *
     26  * dns.c - Common utilities for highly available DNS
     27  *
     28  */
     29 #pragma ident	"@(#)dns.c	1.37	07/06/06 SMI"
     30 
     31 #include <strings.h>
     32 #include <sys/wait.h>
     33 #include <netinet/in.h>
     34 #include <arpa/inet.h>
     35 #include <sys/stat.h>
     36 #include <sys/types.h>
     37 #include <time.h>
     38 #include <errno.h>
     39 #include <stdio.h>
     40 #include <libintl.h>
     41 #include <ds_common.h>
     42 #include "dns.h"
     43 
     44 char 	*svcs[] = {"/network/dns/server:default", NULL};
     45 
     46 /*
     47  * The initial timeout allowed  for the dataservice to
     48  * be fully up and running.
     49  */
     50 #define	SVC_WAIT_PCT		0
     51 
     52 /*
     53  * SVC_WAIT_TIME is used only during starting in svc_wait().
     54  * In svc_wait() we need to be sure that the service is up
     55  * before returning, thus we need to call svc_probe() to
     56  * monitor the service. SVC_WAIT_TIME is the time between
     57  * such probes.
     58  */
     59 #define		SVC_WAIT_TIME		2
     60 
     61 /*
     62  * svc_validate():
     63  * Do DNS specific validation of the resource configration.
     64  * Called by start/validate/update/monitor methods.
     65  *
     66  * If print_messages is true, it will also print any messages
     67  * to stderr (we assume the locale has been set for us).
     68  * This is in addition to syslogging.
     69  *
     70  * Return 0 on success, > 0 on failures.
     71  */
     72 
     73 int
     74 svc_validate(scds_handle_t handle, char *mode, boolean_t print_messages)
     75 {
     76 	char	dns_named_dir[SCDS_ARRAY_SIZE];
     77 	char	dns_config[SCDS_ARRAY_SIZE];
     78 	int 	rc;
     79 	scha_str_array_t *config_dir;
     80 	scds_net_resource_list_t *snrlp;
     81 	struct stat statbuf;
     82 	scds_port_list_t	*port_list;
     83 	scds_hasp_status_t	hasp_status;
     84 
     85 
     86 	/*
     87 	 * Just in case! Actually the caller should
     88 	 * make sure mode is not NULL
     89 	 */
     90 	if (mode == NULL) {
     91 		scds_syslog(LOG_ERR, "Property %s is not set.", DNS_MODE_USED);
     92 		if (print_messages) {
     93 			(void) fprintf(stderr, gettext("Property %s is not "
     94 				"set.\n"), DNS_MODE_USED);
     95 		}
     96 		return (1);
     97 	}
     98 
     99 
    100 	if (os_newer_than_s10()) {
    101 		rc = check_disabled_smf_services(svcs, print_messages);
    102 		if (rc != 0)
    103 			return (1);
    104 	}
    105 
    106 
    107 	/* There should be only 1 port for HA DNS */
    108 	rc = scds_get_port_list(handle, &port_list);
    109 	if (rc != SCHA_ERR_NOERR) {
    110 		/*
    111 		 * SCMSGS
    112 		 * @explanation
    113 		 * An API operation has failed while retrieving the resource
    114 		 * property. Low memory or API call failure might be the
    115 		 * reasons.
    116 		 * @user_action
    117 		 * In case of low memory, the problem will probably cured by
    118 		 * rebooting. If the problem reoccurs, you might need to
    119 		 * increase swap space by configuring additional swap devices.
    120 		 * Otherwise, if it is API call failure, check the syslog
    121 		 * messages from other components. For the resource name and
    122 		 * property name, check the current syslog message.
    123 		 */
    124 		scds_syslog(LOG_ERR, "Failed to retrieve the resource "
    125 			"property %s: %s.", SCHA_PORT_LIST,
    126 			scds_error_string(rc));
    127 		if (print_messages) {
    128 			(void) fprintf(stderr, gettext("Failed to retrieve the "
    129 				"resource property %s: %s.\n"), SCHA_PORT_LIST,
    130 				gettext(scds_error_string(rc)));
    131 		}
    132 		return (1);
    133 	}
    134 	if (port_list->num_ports != 1) {
    135 		/*
    136 		 * SCMSGS
    137 		 * @explanation
    138 		 * A multi-valued (comma-separated) list was provided to the
    139 		 * scrgadm command for the property, while the implementation
    140 		 * supports only one value for this property.
    141 		 * @user_action
    142 		 * Specify a single value for the property on the scrgadm
    143 		 * command.
    144 		 */
    145 		scds_syslog(LOG_ERR, "Property %s should have only one value.",
    146 		    SCHA_PORT_LIST);
    147 		if (print_messages) {
    148 			(void) fprintf(stderr, gettext("Property %s should "
    149 				"have only one value.\n"), SCHA_PORT_LIST);
    150 		}
    151 		return (1);
    152 	}
    153 	scds_free_port_list(port_list);
    154 
    155 	/*
    156 	 * Return an error if unable to get the Logical host resources
    157 	 * to use for this resource
    158 	 */
    159 	rc = scds_get_rs_hostnames(handle, &snrlp);
    160 	if (rc != SCHA_ERR_NOERR) {
    161 		scds_syslog(LOG_ERR, "Failed to retrieve the resource "
    162 			"property %s: %s.", SCHA_NETWORK_RESOURCES_USED,
    163 			scds_error_string(rc));
    164 		if (print_messages) {
    165 			(void) fprintf(stderr, gettext("Failed to retrieve the "
    166 				"resource property %s: %s.\n"),
    167 				SCHA_NETWORK_RESOURCES_USED,
    168 				gettext(scds_error_string(rc)));
    169 		}
    170 		return (1);
    171 	}
    172 	/* Return error if there are no Logicalhost resources configured */
    173 	if ((snrlp == NULL) || (snrlp->num_netresources == 0)) {
    174 		/*
    175 		 * SCMSGS
    176 		 * @explanation
    177 		 * The probe method for this data service could not find a
    178 		 * LogicalHostname resource in the same resource group as the
    179 		 * data service.
    180 		 * @user_action
    181 		 * Use scrgadm to configure the resource group to hold both
    182 		 * the data service and the LogicalHostname.
    183 		 */
    184 		scds_syslog(LOG_ERR, "No LogicalHostname resource in "
    185 			"resource group.");
    186 		if (print_messages) {
    187 			(void) fprintf(stderr, gettext("No LogicalHostname "
    188 				"resource in resource group.\n"));
    189 		}
    190 		return (1);
    191 	}
    192 	scds_free_net_list(snrlp);
    193 
    194 	if (scds_get_ext_monitor_retry_count(handle) <= 0) {
    195 		scds_syslog(LOG_ERR, "Property %s is not set.",
    196 		    "Monitor_retry_count");
    197 		if (print_messages) {
    198 			(void) fprintf(stderr, gettext("Property %s is not "
    199 				"set.\n"), "Monitor_retry_count");
    200 		}
    201 		return (1);
    202 	}
    203 
    204 	if (scds_get_ext_monitor_retry_interval(handle) <= 0) {
    205 		scds_syslog(LOG_ERR, "Property %s is not set.",
    206 		    "Monitor_retry_interval");
    207 		if (print_messages) {
    208 			(void) fprintf(stderr, gettext("Property %s is not "
    209 				"set.\n"), "Monitor_retry_interval");
    210 		}
    211 		return (1);
    212 	}
    213 
    214 	if (scds_get_ext_probe_timeout(handle) <= 0) {
    215 		scds_syslog(LOG_ERR, "Property %s is not set.",
    216 		    "Probe_timeout");
    217 		if (print_messages) {
    218 			(void) fprintf(stderr, gettext("Property %s is not "
    219 				"set.\n"), "Probe_timeout");
    220 		}
    221 		return (1);
    222 	}
    223 
    224 	/* check for HAStoragePlus resources */
    225 	rc = scds_hasp_check(handle, &hasp_status);
    226 	if (rc != SCHA_ERR_NOERR) {
    227 		/* scds_hasp_check() logs everytime it fails */
    228 		if (print_messages) {
    229 			(void) fprintf(stderr, gettext("INTERNAL ERROR: %s.\n"),
    230 				gettext("scds_hasp_check failed"));
    231 		}
    232 		return (1);
    233 	}
    234 
    235 	if (hasp_status == SCDS_HASP_NOT_ONLINE) {
    236 		scds_syslog(LOG_ERR, "Resource depends on a "
    237 			"SUNW.HAStoragePlus type resource that is "
    238 			"not online anywhere.");
    239 		if (print_messages) {
    240 			(void) fprintf(stderr, gettext("Resource depends on a "
    241 				"SUNW.HAStoragePlus type resource that is "
    242 				"not online anywhere.\n"));
    243 		}
    244 		return (1);
    245 	} else if (hasp_status == SCDS_HASP_ERR_CONFIG) {
    246 		/* problem syslogged by scds_hasp_check */
    247 		if (print_messages) {
    248 			(void) fprintf(stderr, gettext("This resource depends "
    249 				"on a HAStoragePlus resouce that is in a "
    250 				"different Resource Group. This configuration "
    251 				"is not supported.\n"));
    252 		}
    253 		return (1);
    254 	}
    255 
    256 	config_dir = scds_get_ext_confdir_list(handle);
    257 	if ((config_dir == NULL) || (config_dir->array_cnt == 0)) {
    258 		scds_syslog(LOG_ERR, "Property %s is not set.",
    259 			"Confdir_list");
    260 		if (print_messages) {
    261 			(void) fprintf(stderr, gettext("Property %s is not "
    262 				"set.\n"), "Confdir_list");
    263 		}
    264 		return (1);
    265 	}
    266 
    267 	if (config_dir->array_cnt > 1) {
    268 		/*
    269 		* SCMSGS
    270 		* @explanation
    271 		* Failover data service must have one and only one
    272 		* value for Confdir_list.
    273 		* @user_action
    274 		* Create a failover resource group for each
    275 		* configuration file.
    276 		*/
    277 		scds_syslog(LOG_ERR,
    278 			"Failover %s data service must have exactly "
    279 			"one value for extension property %s.",
    280 			APP_NAME, "Config_dir");
    281 		if (print_messages) {
    282 			(void) fprintf(stderr, gettext("Failover %s data "
    283 					"service must have exactly "
    284 					"one value for extension "
    285 					"property %s.\n"),
    286 				APP_NAME, "Config_dir");
    287 		}
    288 		return (1);
    289 	}
    290 
    291 	scds_syslog_debug(DBG_LEVEL_LOW, "The DNS config mode is %s.", mode);
    292 	if (strcmp(mode, "conf") == 0) {
    293 		rc = snprintf(dns_config, sizeof (dns_config),
    294 		    "%s/named.conf", config_dir->str_array[0]);
    295 		if (rc == -1) {
    296 			scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    297 			    "String handling error creating path to "
    298 			    "configuration file: named.conf. "
    299 			    "The path may be too long");
    300 			if (print_messages) {
    301 				(void) fprintf(stderr, gettext("INTERNAL "
    302 					"ERROR: %s.\n"),
    303 					gettext("String handling error "
    304 					"creating path to configuration file: "
    305 					"named.conf. The path may be too "
    306 					"long"));
    307 			}
    308 			return (1);
    309 		}
    310 	} else if (strcmp(mode, "boot") == 0) {
    311 		rc = snprintf(dns_config, sizeof (dns_config),
    312 		    "%s/named.boot", config_dir->str_array[0]);
    313 		if (rc == -1) {
    314 			scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    315 			    "String handling error creating path to "
    316 			    "configuration file: named.boot. "
    317 			    "The path may be too long");
    318 			if (print_messages) {
    319 				(void) fprintf(stderr, gettext("INTERNAL "
    320 					"ERROR: %s.\n"),
    321 					gettext("String handling error "
    322 					"creating path to configuration file: "
    323 					"named.boot. The path may be too "
    324 					"long"));
    325 			}
    326 			return (1);
    327 		}
    328 	} else {
    329 		/*
    330 		 * SCMSGS
    331 		 * @explanation
    332 		 * An invalid value was supplied for the property.
    333 		 * @user_action
    334 		 * Supply "conf" or "boot" as the value for DNS_mode property.
    335 		 */
    336 		scds_syslog(LOG_ERR, "Invalid value %s for "
    337 			"property %s.", mode, DNS_MODE_USED);
    338 		if (print_messages) {
    339 			(void) fprintf(stderr, gettext("Invalid value %s for "
    340 				"property %s.\n"), mode, DNS_MODE_USED);
    341 		}
    342 		return (1);
    343 	}
    344 
    345 	if (stat(dns_config, &statbuf) != 0) {
    346 		/*
    347 		 * if hasp_status is not SCDS_HASP_ONLINE_NOT_LOCAL and the
    348 		 * stat above fails, we are in trouble. Also, if the stat
    349 		 * fails for anything else other than an ENOENT (when
    350 		 * hasp_status is SCDS_HASP_ONLINE_NOT_LOCAL), thats also
    351 		 * an error.
    352 		 */
    353 		if ((hasp_status != SCDS_HASP_ONLINE_NOT_LOCAL) ||
    354 			(errno != ENOENT)) {			/*lint !e746 */
    355 			rc = errno;
    356 			scds_syslog(LOG_ERR,
    357 			    "File %s is not readable: %s.",
    358 			    dns_config, strerror(rc)); 	/*lint !e746 */
    359 			if (print_messages) {
    360 				(void) fprintf(stderr, gettext("File %s is not "
    361 					"readable: %s.\n"), dns_config,
    362 					gettext(strerror(rc)));
    363 			}
    364 			return (1);
    365 		}
    366 	}
    367 
    368 	/*
    369 	 * Check to see if the named directory which houses
    370 	 * the database files for the Zones in DNS is accessible
    371 	 */
    372 	rc = snprintf(dns_named_dir, sizeof (dns_named_dir), "%s/named",
    373 	    config_dir->str_array[0]);
    374 	if (rc == -1) {
    375 		scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    376 		    "String handling error creating path to "
    377 		    "database directory. The path may be too long");
    378 		if (print_messages) {
    379 			(void) fprintf(stderr, gettext("INTERNAL ERROR: %s.\n"),
    380 				gettext("String handling error creating path "
    381 				"to database directory. The path may be too "
    382 				"long"));
    383 		}
    384 		return (1);
    385 	}
    386 
    387 
    388 	if (stat(dns_named_dir, &statbuf) != 0) {
    389 		if ((hasp_status != SCDS_HASP_ONLINE_NOT_LOCAL) ||
    390 			(errno != ENOENT)) {
    391 			rc = errno;
    392 			/*
    393 			 * SCMSGS
    394 			 * @explanation
    395 			 * The DNS database directory is not readable. This
    396 			 * may be due to the directory not existing or the
    397 			 * permissions not being set properly.
    398 			 * @user_action
    399 			 * Make sure the directory exists and has read
    400 			 * permission set appropriately. Look at the prior
    401 			 * syslog messages for any specific problems and
    402 			 * correct them.
    403 			 */
    404 			scds_syslog(LOG_ERR, "DNS database directory %s is "
    405 				"not readable: %s", dns_named_dir,
    406 				strerror(rc));
    407 			if (print_messages) {
    408 				(void) fprintf(stderr, gettext("DNS database "
    409 					"directory %s is not readable: %s\n"),
    410 					dns_named_dir,
    411 					gettext(strerror(rc)));
    412 			}
    413 			return (1);
    414 		}
    415 	}
    416 
    417 
    418 	/* check that the binary is accessible */
    419 	if (stat(DNS_BINARY, &statbuf) != 0) {
    420 		rc = errno;
    421 		scds_syslog(LOG_ERR, "File %s is not readable: %s.",
    422 			DNS_BINARY, strerror(rc));
    423 		if (print_messages) {
    424 			(void) fprintf(stderr, gettext("File %s is not "
    425 				"readable: %s.\n"), DNS_BINARY,
    426 				gettext(strerror(rc)));
    427 		}
    428 		return (1);
    429 	}
    430 	/* check that the binary is executable */
    431 	if ((statbuf.st_mode & S_IXUSR) != S_IXUSR) {
    432 		scds_syslog(LOG_ERR, "Incorrect permissions set for %s.",
    433 			DNS_BINARY);
    434 		if (print_messages) {
    435 			(void) fprintf(stderr, gettext("Incorrect permissions "
    436 				"set for %s.\n"), DNS_BINARY);
    437 		}
    438 		return (1);
    439 	}
    440 
    441 	return (SCHA_ERR_NOERR);
    442 }
    443 
    444 int
    445 svc_start(scds_handle_t handle, char *mode)
    446 {
    447 	char	dns_config[SCDS_ARRAY_SIZE];
    448 	char	dns_named[SCDS_ARRAY_SIZE];
    449 	char	cmd[SCDS_ARRAY_SIZE];
    450 	int 	rc;
    451 	scha_str_array_t *config_dir;
    452 
    453 	scds_syslog_debug(DBG_LEVEL_HIGH, "Calling START method for "
    454 		"resource %s.", scds_get_resource_name(handle));
    455 
    456 	/* Make sure everything looks OK */
    457 	if (svc_validate(handle, mode, B_FALSE) != 0) {
    458 		scds_syslog(LOG_ERR, "Failed to validate configuration.");
    459 		return (1); /* Bail out, no point in continuing with this res */
    460 	}
    461 
    462 	scds_syslog_debug(DBG_LEVEL_LOW, "DNS_mode property is set to %s.",
    463 		mode);
    464 
    465 	/* If Solaris 10 or later, disable SMF service */
    466 	if (os_newer_than_s10()) {
    467 		rc = disable_smf_services(svcs);
    468 		if (rc != 0)
    469 			return (1);
    470 	}
    471 
    472 	config_dir = scds_get_ext_confdir_list(handle);
    473 	if (config_dir == NULL) {
    474 		scds_syslog(LOG_ERR, "Property %s is not set.",
    475 			"Confdir_list");
    476 		return (1);
    477 	}
    478 
    479 	if (strcmp(mode, "conf") == 0) {
    480 		rc = snprintf(dns_config, sizeof (dns_config), "%s/named.conf",
    481 			config_dir->str_array[0]);
    482 		if (rc == -1) {
    483 			scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    484 				"String handling error creating path to "
    485 				"configuration file: named.conf. "
    486 				"The path may be too long");
    487 			return (1);
    488 		}
    489 	} else if (strcmp(mode, "boot") == 0) {
    490 			rc = snprintf(dns_config, sizeof (dns_config),
    491 			    "%s/named.boot", config_dir->str_array[0]);
    492 			if (rc == -1) {
    493 				scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    494 					"String handling error creating path "
    495 					"to configuration file: named.boot. "
    496 					"The path may be too long");
    497 				return (1);
    498 			}
    499 	} else {
    500 		scds_syslog(LOG_ERR, "Invalid value %s for property %s.",
    501 			mode, DNS_MODE_USED);
    502 		return (1);
    503 	}
    504 
    505 	rc = snprintf(dns_named, sizeof (dns_named), DNS_BINARY);
    506 	if (rc == -1) {
    507 		scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    508 			"String handling error creating path to "
    509 			"in.named. The path may be too long");
    510 		return (1);
    511 	}
    512 
    513 
    514 	/* Now construct the command to start DNS */
    515 	rc = snprintf(cmd, sizeof (cmd), "%s -c %s ", dns_named, dns_config);
    516 	if (rc == -1) {
    517 		scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    518 			"String handling error creating "
    519 			"start command. The path may be too long");
    520 		return (1);
    521 	}
    522 
    523 	/*
    524 	 * SCMSGS
    525 	 * @explanation
    526 	 * Sun Cluster is starting the specified application with the
    527 	 * specified command.
    528 	 * @user_action
    529 	 * This is an informational message, no user action is needed.
    530 	 */
    531 	scds_syslog(LOG_NOTICE, "Starting %s with command %s.", APP_NAME, cmd);
    532 
    533 	/* Start DNS */
    534 	if (scds_pmf_start(handle, SCDS_PMF_TYPE_SVC, 0, cmd, -1) !=
    535 		SCHA_ERR_NOERR) {
    536 		char msg[SCDS_ARRAY_SIZE];
    537 
    538 		(void) snprintf(msg, sizeof (msg), "Failed to start %s.",
    539 			APP_NAME);
    540 
    541 		/* cant use scds syslog(LOG_ERR, msg); make scmsgs complains */
    542 		scds_syslog(LOG_ERR, "Failed to start %s.", APP_NAME);
    543 
    544 		(void) scha_resource_setstatus(scds_get_resource_name(handle),
    545 			scds_get_resource_group_name(handle),
    546 			SCHA_RSSTATUS_FAULTED, msg);
    547 		exit(1);
    548 	}
    549 
    550 	scds_syslog(LOG_INFO, "Completed successfully.");
    551 	(void) scha_resource_setstatus(scds_get_resource_name(handle),
    552 		scds_get_resource_group_name(handle), SCHA_RSSTATUS_OK,
    553 		"Completed successfully.");
    554 
    555 	return (SCHA_ERR_NOERR);
    556 }
    557 
    558 
    559 /*
    560  * dns_svc_start() calls svc_wait() just after it calls svc_start()
    561  * and before it returns.  svc_start() starts up the application (DNS
    562  * server), but does not wait for the application to complete coming up
    563  * before returning.
    564  *
    565  * The RGM framework specifies that the START method should not return until
    566  * the application is up.  svc_wait() verifies that the application is
    567  * up before it returns.  It does this by probing the application across all
    568  * its port/ip combinations.  When the probing is successful, svc_wait()
    569  * returns immediately indicating success.  When the probing is unsuccessful
    570  * (i.e., svc_wait() probed without success for it's allotted time,
    571  * SVC_TIMEOUT_PCT/100 * start method timeout value), svc_wait() returns
    572  * an error.
    573  *
    574  * Since we don't want the START method to timeout, we don't try to probe
    575  * for 100% of the start method timeout value, but only SVC_TIMEOUT_PCT/100
    576  * of it.  Also, since probing too early crashes some applications, we wait
    577  * a percentage of the start method timeout value before starting to probe,
    578  * SVC_WAIT_PCT.
    579  *
    580  * Returns: 0=probing succeeded, application is up
    581  *          1=time ran out without a successful probe, application wasn't
    582  *            determined to be up.
    583  */
    584 int
    585 svc_wait(scds_handle_t handle)
    586 {
    587 	int err = 0, svc_start_timeout, probe_result;
    588 
    589 	svc_start_timeout = scds_get_rs_start_timeout(handle);
    590 
    591 	/*
    592 	 * sleep for SVC_WAIT_PCT percentage of start_timeout time
    593 	 * before actually probing the dataservice. This is to allow
    594 	 * the dataservice to be fully up inorder to reply to the
    595 	 * probe. NOTE: the value for SVC_WAIT_PCT could be different
    596 	 * for different dataservices.
    597 	 * Instead of calling sleep(),
    598 	 * call scds_svc_wait() so that if service fails too
    599 	 * many times, we give up and return early.
    600 	 */
    601 	err = scds_svc_wait(handle, (svc_start_timeout * SVC_WAIT_PCT / 100));
    602 
    603 	if (err != SCHA_ERR_NOERR) {
    604 		scds_syslog_debug(DBG_LEVEL_LOW, "INTERNAL ERROR: %s.",
    605 			"Failed to wait before probing service");
    606 	}
    607 
    608 	while (1) {
    609 		/* probe the data service */
    610 		probe_result = svc_probe(handle, B_FALSE);
    611 		if (probe_result == 0) {
    612 			/* everything looks good */
    613 			/*
    614 			 * SCMSGS
    615 			 * @explanation
    616 			 * While attempting to check the health of the data
    617 			 * service, probe detected that the resource status is
    618 			 * fine and it is online.
    619 			 * @user_action
    620 			 * This is informational message. No user action is
    621 			 * needed.
    622 			 */
    623 			scds_syslog(LOG_NOTICE, "Service is online.");
    624 			return (0);
    625 		}
    626 
    627 		/*
    628 		 * SCMSGS
    629 		 * @explanation
    630 		 * The specific service or process is not yet up.
    631 		 * @user_action
    632 		 * This is an informative message. Suitable action may be
    633 		 * taken if the specified service or process does not come up
    634 		 * within a configured time limit.
    635 		 */
    636 		scds_syslog(LOG_NOTICE, "Waiting for %s to come up.", APP_NAME);
    637 
    638 		/*
    639 		 * Dataservice is still trying to come up. Sleep for a while
    640 		 * before probing again. Instead of calling sleep(),
    641 		 * call scds_svc_wait() so that if service fails too
    642 		 * many times, we give up and return early.
    643 		 */
    644 		err = scds_svc_wait(handle, SVC_WAIT_TIME);
    645 		if (err != SCHA_ERR_NOERR)
    646 			return (err);
    647 
    648 	/* We rely on RGM to timeout and terminate the program */
    649 	}
    650 }
    651 
    652 
    653 int
    654 svc_stop(scds_handle_t handle)
    655 {
    656 	int svc_stop_timeout;
    657 	char msg[SCDS_ARRAY_SIZE];
    658 
    659 	scds_syslog_debug(DBG_LEVEL_HIGH, "Calling STOP method for "
    660 		"resource %s.", scds_get_resource_name(handle));
    661 
    662 	svc_stop_timeout = scds_get_rs_stop_timeout(handle);
    663 
    664 	scds_syslog(LOG_NOTICE, "Stopping %s.", APP_NAME);
    665 
    666 	if (scds_pmf_stop(handle, SCDS_PMF_TYPE_SVC, 0, SIGTERM,
    667 	    svc_stop_timeout) != SCHA_ERR_NOERR) {
    668 		scds_syslog(LOG_ERR, "Failed to stop %s.",
    669 		    APP_NAME" . Retrying.."); /* strings concatenated */
    670 	}
    671 
    672 	/*
    673 	 * Do a stop again, this time with infinite timeout and with SIGKILL.
    674 	 * Even if the tag is gone by now, scds_pmf_stop will not complain
    675 	 * so we are fine even if we do this unconditionally
    676 	 */
    677 	if (scds_pmf_stop(handle, SCDS_PMF_TYPE_SVC, 0, SIGKILL, -1) !=
    678 	    SCHA_ERR_NOERR) {
    679 		scds_syslog(LOG_ERR, "Failed to stop %s.", APP_NAME);
    680 		(void) snprintf(msg, sizeof (msg), "Failed to stop %s.",
    681 			APP_NAME);
    682 		(void) scha_resource_setstatus(scds_get_resource_name(handle),
    683 			scds_get_resource_group_name(handle),
    684 			SCHA_RSSTATUS_FAULTED, msg);
    685 		return (1);
    686 	} else {
    687 		/*
    688 		 * SCMSGS
    689 		 * @explanation
    690 		 * The resource was successfully stopped by Sun Cluster.
    691 		 * @user_action
    692 		 * No user action is required.
    693 		 */
    694 		scds_syslog(LOG_NOTICE, "Successfully stopped %s.", APP_NAME);
    695 		(void) snprintf(msg, sizeof (msg), "Successfully stopped %s.",
    696 		    APP_NAME);
    697 		(void) scha_resource_setstatus(scds_get_resource_name(handle),
    698 		    scds_get_resource_group_name(handle), SCHA_RSSTATUS_OFFLINE,
    699 		    msg);
    700 		return (SCHA_ERR_NOERR);
    701 	}
    702 }
    703 
    704 
    705 /*
    706  * This function starts the fault monitor for a HA-DNS resource.
    707  * This is done by starting the probe under PMF. The PMF tag
    708  * is derived as RG-name,RS-name.mon. The restart option of PMF
    709  * is used but not the "infinite restart". Instead
    710  * interval/retry_time is obtained from the RTR file.
    711  */
    712 
    713 int
    714 svc_fm_start(scds_handle_t handle)
    715 {
    716 
    717 	scds_syslog_debug(DBG_LEVEL_HIGH, "Calling MONITOR_START method "
    718 		"for resource %s.", scds_get_resource_name(handle));
    719 
    720 	if (scds_pmf_start(handle, SCDS_PMF_TYPE_MON, 0, "dns_probe", 0)
    721 		!= SCHA_ERR_NOERR) {
    722 		scds_syslog(LOG_ERR, "Failed to start fault monitor.");
    723 		return (1);
    724 	}
    725 
    726 	scds_syslog(LOG_INFO, "Started the fault monitor.");
    727 
    728 	return (SCHA_ERR_NOERR);
    729 }
    730 
    731 
    732 /*
    733  * This function stops the fault monitor for a HA-DNS resource.
    734  * This is done via PMF. The PMF tag for the fault monitor is
    735  * constructed based on RG-name_RS-name.mon.
    736  */
    737 
    738 int
    739 svc_fm_stop(scds_handle_t handle)
    740 {
    741 	scds_syslog_debug(DBG_LEVEL_HIGH, "Calling MONITOR_STOP method for "
    742 		"resource %s.", scds_get_resource_name(handle));
    743 
    744 	if (scds_pmf_stop(handle, SCDS_PMF_TYPE_MON, 0, SIGKILL, -1) !=
    745 	    SCHA_ERR_NOERR) {
    746 		scds_syslog(LOG_ERR, "Failed to stop fault monitor.");
    747 		return (1);
    748 	}
    749 
    750 	scds_syslog(LOG_INFO, "Stopped the fault monitor.");
    751 
    752 	return (SCHA_ERR_NOERR);
    753 }
    754 
    755 /*
    756  * svc_probe(): Do data service specific probing. Return a value
    757  * between 0 (success) and 100(complete failure).
    758  */
    759 int
    760 svc_probe(scds_handle_t handle, boolean_t arg_syslog_msgs)
    761 {
    762 	scds_netaddr_list_t *snrlp;
    763 	int rc, probe_remaining_time, retval, probe_timeout;
    764 	int exit_code;
    765 	hrtime_t probe_start_time;
    766 	char cmd[SCDS_ARRAY_SIZE];
    767 
    768 	probe_start_time = gethrtime();
    769 	probe_timeout = scds_get_ext_probe_timeout(handle);
    770 
    771 	rc = scds_get_netaddr_list(handle, &snrlp);
    772 	if (rc != SCHA_ERR_NOERR) {
    773 		scds_syslog(LOG_ERR, "Failed to retrieve the resource "
    774 			"property %s: %s.", SCHA_NETWORK_RESOURCES_USED,
    775 			scds_error_string(rc));
    776 	}
    777 
    778 	if ((snrlp == NULL) || (snrlp->num_netaddrs == 0)) {
    779 		if (arg_syslog_msgs) {
    780 			scds_syslog(LOG_ERR, "No LogicalHostname resource "
    781 				"in resource group.");
    782 		}
    783 
    784 		exit(1);
    785 	}
    786 
    787 	probe_remaining_time = probe_timeout - (int)((gethrtime() -
    788 		probe_start_time) / 1e9);
    789 	if (probe_remaining_time < 1) {
    790 		if (arg_syslog_msgs) {
    791 			scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    792 				"No time left for service probe");
    793 		}
    794 
    795 		retval = SCDS_PROBE_COMPLETE_FAILURE / 2;
    796 		goto finished;
    797 	}
    798 
    799 	/*
    800 	 * By using timeout=0, we actually get a timeout of 1 sec (but
    801 	 * with no exponential backoff). So effectively, nslookup runs
    802 	 * for retry * 1 = probe_remaining_time seconds.
    803 	 */
    804 	rc = snprintf(cmd, sizeof (cmd), "/usr/sbin/nslookup -retry=%d "
    805 		"-timeout=0 %s %s >/dev/null 2>&1", probe_remaining_time,
    806 		snrlp->netaddrs[0].hostname, snrlp->netaddrs[0].hostname);
    807 	if (rc == -1) {
    808 		if (arg_syslog_msgs) {
    809 			scds_syslog(LOG_ERR, "INTERNAL ERROR: %s.",
    810 				"String handling error creating "
    811 				"the nslookup command for probe");
    812 		}
    813 		retval = SCDS_PROBE_COMPLETE_FAILURE / 2;
    814 		goto finished;
    815 	}
    816 
    817 	/* run nslookup */
    818 	rc = scds_timerun(handle, cmd, probe_remaining_time, SIGKILL,
    819 		&exit_code);
    820 	if (rc != 0) {
    821 		if (arg_syslog_msgs) {
    822 			/*
    823 			 * SCMSGS
    824 			 * @explanation
    825 			 * The command could not be run successfully.
    826 			 * @user_action
    827 			 * The error message specifies both - the exact
    828 			 * command that failed, and the reason why it failed.
    829 			 * Try the command manually and see if it works.
    830 			 * Consider increasing the timeout if the failure is
    831 			 * due to lack of time. For other failures, contact
    832 			 * your authorized Sun service provider.
    833 			 */
    834 			scds_syslog(LOG_ERR, "Command [%s] failed: %s.",
    835 				cmd, scds_error_string(rc));
    836 		}
    837 
    838 		if ((rc == SCHA_ERR_TIMEOUT) || (rc == SCHA_ERR_INTERNAL)) {
    839 			retval = SCDS_PROBE_COMPLETE_FAILURE / 2;
    840 		} else {
    841 			retval = SCDS_PROBE_COMPLETE_FAILURE;
    842 		}
    843 
    844 		goto finished;
    845 	}
    846 
    847 	/* check the return code from nslookup */
    848 	if (exit_code != 0) {
    849 		if (arg_syslog_msgs) {
    850 			/*
    851 			 * SCMSGS
    852 			 * @explanation
    853 			 * Fault monitor was unable to perform complete health
    854 			 * check of the service.
    855 			 * @user_action
    856 			 * 1) Fault monitor would take appropiate action (by
    857 			 * restarting or failing over the service.).
    858 			 *
    859 			 * 2) Data service could be under load, try increasing
    860 			 * the values for Probe_timeout and
    861 			 * Thororugh_probe_interval properties.
    862 			 *
    863 			 * 3) If this problem continues to occur, look at
    864 			 * other messages in syslog to determine the root
    865 			 * cause of the problem. If all else fails reboot
    866 			 * node.
    867 			 */
    868 			scds_syslog(LOG_ERR, "Probe failed.");
    869 		}
    870 		retval = SCDS_PROBE_COMPLETE_FAILURE;
    871 		goto finished;
    872 	}
    873 
    874 	/* All OK */
    875 	retval = 0;
    876 
    877 finished:
    878 	scds_free_netaddr_list(snrlp);
    879 	return (retval);
    880 
    881 }
    882