Home | History | Annotate | Download | only in md_monitord
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * probedev issues ioctls for all the metadevices
     28  */
     29 
     30 #include "md_monitord.h"
     31 #include <sdssc.h>
     32 
     33 extern char queue_name[];
     34 boolean_e issue_ioctl = True;
     35 
     36 
     37 #define	DEBUG_LEVEL_FORK	9	/* will run in background at all */
     38 					/* levels less than DEBUG_LEVEL_FORK */
     39 
     40 /* function prototypes */
     41 static void usage(void);
     42 static void catch_sig(int);
     43 static pid_t enter_daemon_lock(void);
     44 static void exit_daemon_lock(void);
     45 static void probe_all_devs(boolean_e, md_error_t *, boolean_e);
     46 
     47 #define	DAEMON_LOCK_FILE "/etc/lvm/.mdmonitord.lock"
     48 
     49 /*
     50  * Global variable
     51  */
     52 mdsetname_t	*sp;
     53 
     54 static int hold_daemon_lock;
     55 static const char *daemon_lock_file = DAEMON_LOCK_FILE;
     56 static int daemon_lock_fd;
     57 
     58 static int		debug_level;
     59 static int		logflag;
     60 static char		*prog;
     61 static struct itimerval	itimer;
     62 static boolean_e	probe_started;	/* flag to indicate main is probing */
     63 
     64 static void
     65 usage() {
     66 	(void) fprintf(stderr, gettext(
     67 		"usage: mdmonitord [-d <debug_level>] [-t poll time]\n"
     68 		    "higher debug levels get progressively more"
     69 		    "detailed debug information.\n\n"
     70 		    "mdmonitord will run in background if run"
     71 		    "with a debug_level less than %d.\n"), DEBUG_LEVEL_FORK);
     72 	exit(-1);
     73 }
     74 
     75 
     76 /* common exit function which ensures releasing locks */
     77 void
     78 monitord_exit(int status)
     79 {
     80 	monitord_print(1, gettext("exit status = %d\n"), status);
     81 
     82 	monitord_print(8, "hold_daemon_lock %d\n", hold_daemon_lock);
     83 	if (hold_daemon_lock) {
     84 		exit_daemon_lock();
     85 	}
     86 	md_exit(sp, status);
     87 }
     88 
     89 
     90 /*
     91  * When SIGHUP is received, reload modules?
     92  */
     93 void
     94 catch_sig(int sig)
     95 {
     96 	boolean_e startup = False;
     97 	md_error_t status = mdnullerror;
     98 	boolean_e sig_verbose = True;
     99 
    100 	if (sig == SIGALRM) {
    101 		monitord_print(6, gettext("SIGALRM processing"));
    102 		if (probe_started == True) {
    103 			monitord_print(6, gettext(
    104 			    " probe_started returning\n"));
    105 			return;
    106 		}
    107 		monitord_print(6, gettext(
    108 		    " starting probe from signal handler\n"));
    109 		probe_all_devs(startup, &status, sig_verbose);
    110 		(void) setitimer(ITIMER_REAL, &itimer, NULL);
    111 	}
    112 	if (sig == SIGHUP)
    113 		monitord_exit(sig);
    114 }
    115 
    116 /*
    117  * Use an advisory lock to ensure that only one daemon process is
    118  * active at any point in time.
    119  */
    120 static pid_t
    121 check_daemon_lock(void)
    122 {
    123 	struct flock	lock;
    124 
    125 	monitord_print(1, gettext("check_daemon_lock: lock file = %s\n"),
    126 	    daemon_lock_file);
    127 
    128 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
    129 	if (daemon_lock_fd < 0) {
    130 		monitord_print(0, "open(%s) - %s\n", daemon_lock_file,
    131 		    strerror(errno));
    132 		monitord_exit(-1);
    133 	}
    134 
    135 	lock.l_type = F_WRLCK;
    136 	lock.l_whence = SEEK_SET;
    137 	lock.l_start = 0;
    138 	lock.l_len = 0;
    139 
    140 	if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) {
    141 		monitord_print(0, "lock(%s) - %s", daemon_lock_file,
    142 		    strerror(errno));
    143 		monitord_exit(-1);
    144 	}
    145 
    146 	return (lock.l_type == F_UNLCK ? 0 : lock.l_pid);
    147 }
    148 
    149 static pid_t
    150 enter_daemon_lock(void)
    151 {
    152 	struct flock	lock;
    153 
    154 	monitord_print(1, gettext(
    155 	    "enter_daemon_lock: lock file = %s\n"), daemon_lock_file);
    156 
    157 	daemon_lock_fd = open(daemon_lock_file, O_CREAT|O_RDWR, 0644);
    158 	if (daemon_lock_fd < 0) {
    159 		monitord_print(0, "open(%s) - %s\n",
    160 		    daemon_lock_file, strerror(errno));
    161 		monitord_exit(-1);
    162 	}
    163 
    164 	lock.l_type = F_WRLCK;
    165 	lock.l_whence = SEEK_SET;
    166 	lock.l_start = 0;
    167 	lock.l_len = 0;
    168 
    169 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
    170 
    171 		if (errno == EAGAIN || errno == EDEADLK) {
    172 
    173 			if (fcntl(daemon_lock_fd, F_GETLK, &lock) == -1) {
    174 				monitord_print(0, "lock(%s) - %s",
    175 				    daemon_lock_file, strerror(errno));
    176 				monitord_exit(-1);
    177 			}
    178 
    179 			return (lock.l_pid);
    180 		}
    181 	}
    182 	hold_daemon_lock = 1;
    183 
    184 	return (0);
    185 }
    186 
    187 /*
    188  * Drop the advisory daemon lock, close lock file
    189  */
    190 static void
    191 exit_daemon_lock(void)
    192 {
    193 	struct flock lock;
    194 
    195 	lock.l_type = F_UNLCK;
    196 	lock.l_whence = SEEK_SET;
    197 	lock.l_start = 0;
    198 	lock.l_len = 0;
    199 
    200 	if (fcntl(daemon_lock_fd, F_SETLK, &lock) == -1) {
    201 		monitord_print(0, "unlock(%s) - %s",
    202 		    daemon_lock_file, strerror(errno));
    203 	}
    204 
    205 	if (close(daemon_lock_fd) == -1) {
    206 		monitord_print(0, "close(%s) failed - %s\n",
    207 		    daemon_lock_file, strerror(errno));
    208 		monitord_exit(-1);
    209 	}
    210 	(void) unlink(daemon_lock_file);
    211 }
    212 
    213 
    214 /*
    215  * print error messages to the terminal or to syslog
    216  */
    217 /*PRINTFLIKE2*/
    218 void
    219 monitord_print(int level, char *message, ...)
    220 {
    221 	va_list ap;
    222 	static int newline = 1;
    223 
    224 	if (level > debug_level) {
    225 		return;
    226 	}
    227 
    228 	va_start(ap, message);
    229 	if (level == 0) {
    230 		if (logflag) {
    231 			(void) vsyslog(LOG_ERR, message, ap);
    232 		} else {
    233 			(void) vfprintf(stderr, message, ap);
    234 		}
    235 
    236 	} else {
    237 		if (logflag) {
    238 			(void) syslog(LOG_DEBUG, "%s[%ld]: ",
    239 			    prog, getpid());
    240 			(void) vsyslog(LOG_DEBUG, message, ap);
    241 		} else {
    242 			if (newline) {
    243 				(void) fprintf(stdout, "%s[%ld]: ",
    244 				    prog, getpid());
    245 				(void) vfprintf(stdout, message, ap);
    246 			} else {
    247 				(void) vfprintf(stdout, message, ap);
    248 			}
    249 		}
    250 	}
    251 	if (message[strlen(message)-1] == '\n') {
    252 		newline = 1;
    253 	} else {
    254 		newline = 0;
    255 	}
    256 	va_end(ap);
    257 }
    258 
    259 
    260 char *
    261 int2string(intmap_t *map, int value)
    262 {
    263 	const char	*name = (const char *)NULL;
    264 	char		charstr[100];
    265 
    266 	for (; map->im_name != (const char *)NULL; map++) {
    267 		if (map->im_int == value) {
    268 			name = map->im_name;
    269 			break;
    270 		}
    271 	}
    272 	if (name == (const char *)NULL) {
    273 		/* No match.  Convert the string to an int. */
    274 		(void) sprintf(charstr, "%d", value);
    275 	} else {
    276 		(void) snprintf(charstr, sizeof (charstr), "%d %s",
    277 		    value, name);
    278 	}
    279 	return (strdup(charstr));
    280 }
    281 
    282 void
    283 probe_all_devs(boolean_e startup, md_error_t *statusp, boolean_e verbose)
    284 {
    285 	set_t		max_sets, set_idx;
    286 
    287 	probe_started = True;
    288 	(void) set_snarf(statusp);
    289 
    290 	if ((max_sets = get_max_sets(statusp)) == 0) {
    291 		mde_perror(statusp, gettext(
    292 		    "Can't find max number of sets\n"));
    293 		monitord_exit(1);
    294 	}
    295 
    296 	/*
    297 	 * We delete the FF_Q to avoid recurse errors. Yes we will lose
    298 	 * some but its the corner case.
    299 	 */
    300 
    301 	if (startup == False &&
    302 	    (meta_notify_deleteq(MD_FF_Q, statusp) != 0)) {
    303 		mde_perror(statusp, gettext(
    304 		    "delete queue failed\n"));
    305 		monitord_exit(1);
    306 	}
    307 
    308 	for (set_idx = 0; set_idx < max_sets; set_idx++) {
    309 		if ((sp = metasetnosetname(set_idx, statusp)) == NULL) {
    310 			if (mdiserror(statusp, MDE_NO_SET) == 0) {
    311 				/*
    312 				 * done break the loop
    313 				 */
    314 				break;
    315 			} else {
    316 				mdclrerror(statusp);
    317 				continue;
    318 			}
    319 		}
    320 
    321 		/* if we dont have ownership or cannot lock it continue. */
    322 		if ((meta_check_ownership(sp, statusp) == NULL) &&
    323 		    meta_lock(sp, TRUE, statusp))
    324 			continue;
    325 
    326 		/* Skip if a MN set */
    327 		if (meta_is_mn_set(sp, statusp)) {
    328 			(void) meta_unlock(sp, statusp);
    329 			continue;
    330 		}
    331 
    332 		probe_mirror_devs(verbose);
    333 		probe_raid_devs(verbose);
    334 		probe_trans_devs(verbose);
    335 		probe_hotspare_devs(verbose);
    336 		(void) meta_unlock(sp, statusp);
    337 	}
    338 	if (meta_notify_createq(MD_FF_Q, 0, statusp)) {
    339 		mde_perror(statusp, gettext(
    340 		    "create queue failed"));
    341 		monitord_exit(1);
    342 	}
    343 	probe_started = False;
    344 	/*
    345 	 * need to do it here only at startup.
    346 	 * The daemon will restart the alarm.
    347 	 */
    348 
    349 	if (startup == True)
    350 		(void) setitimer(ITIMER_REAL, &itimer, NULL);
    351 }
    352 
    353 evid_t
    354 wait_for_event(md_error_t *statusp)
    355 {
    356 	md_ev_t		event;
    357 
    358 
    359 	event.setno = EV_ALLSETS;
    360 	event.obj = EV_ALLOBJS;
    361 
    362 	do {
    363 		if (meta_notify_getev(MD_FF_Q, EVFLG_WAIT, &event,
    364 		    statusp) < 0) {
    365 			monitord_print(8,
    366 			    "meta_notify_getev: errno 0x%x\n", -errno);
    367 			monitord_exit(-errno);
    368 		}
    369 	} while ((event.ev != EV_IOERR && event.ev != EV_ERRED &&
    370 	    event.ev != EV_LASTERRED));
    371 	return (event.ev);
    372 }
    373 
    374 int
    375 main(int argc, char **argv)
    376 {
    377 	boolean_e	startup = True;
    378 	boolean_e	verbose = False;
    379 	int		i;
    380 	char		c;
    381 	md_error_t	status = mdnullerror;
    382 	struct sigaction act;
    383 	sigset_t	mask;
    384 	unsigned long	timerval = 0;
    385 
    386 	/*
    387 	 * Get the locale set up before calling any other routines
    388 	 * with messages to ouput.  Just in case we're not in a build
    389 	 * environment, make sure that TEXT_DOMAIN gets set to
    390 	 * something.
    391 	 */
    392 #if !defined(TEXT_DOMAIN)
    393 #define	TEXT_DOMAIN "SYS_TEST"
    394 #endif
    395 	(void) setlocale(LC_ALL, "");
    396 	(void) textdomain(TEXT_DOMAIN);
    397 
    398 	if (sdssc_bind_library() == SDSSC_ERROR) {
    399 		(void) printf(gettext(
    400 		    "%s: Interface error with libsds_sc.so\n"), argv[0]);
    401 		exit(1);
    402 	}
    403 
    404 	if (md_init(argc, argv, 0, 1, &status) != 0 ||
    405 	    meta_check_root(&status) != 0) {
    406 		mde_perror(&status, "");
    407 		monitord_exit(1);
    408 	}
    409 
    410 	(void) sigfillset(&mask);
    411 	(void) thr_sigsetmask(SIG_BLOCK, &mask, NULL);
    412 
    413 	if (argc > 7) {
    414 		usage();
    415 	}
    416 
    417 	if ((prog = strrchr(argv[0], '/')) == NULL) {
    418 		prog = argv[0];
    419 	} else {
    420 		prog++;
    421 	}
    422 
    423 	/*
    424 	 * Reset optind/opterr so that the command line arguments can be
    425 	 * parsed. This is in case anything has already called getopt,
    426 	 * for example sdssc_cmd_proxy which is not currently used but
    427 	 * may be in the future.
    428 	 */
    429 	optind = 1;
    430 	opterr = 1;
    431 	while ((c = getopt(argc, argv, "ivd:t:")) != EOF) {
    432 		switch (c) {
    433 		case 'v':
    434 			verbose = True;
    435 			break;
    436 		case 'i':
    437 			issue_ioctl = True;
    438 			break;
    439 		case 'd':
    440 			debug_level = atoi(optarg);
    441 			break;
    442 		case 't':
    443 			timerval = atol(optarg);
    444 			break;
    445 		default:
    446 			usage();
    447 			exit(0);
    448 		}
    449 	}
    450 
    451 	if (timerval == 0) {
    452 		monitord_print(8, gettext(
    453 		    "operating in interrupt mode\n"));
    454 	} else {
    455 		itimer.it_value.tv_sec = timerval;
    456 		itimer.it_interval.tv_sec = timerval;
    457 		monitord_print(8, gettext(
    458 		    "set value and interval %lu sec  mode\n"), timerval);
    459 	}
    460 	/*
    461 	 * set up our signal handler for SIGALRM. The
    462 	 * rest are setup by md_init.
    463 	 */
    464 
    465 	act.sa_handler = catch_sig;
    466 	(void) sigemptyset(&act.sa_mask);
    467 	act.sa_flags = SA_RESTART;
    468 	(void) sigaction(SIGALRM, &act, NULL);
    469 	(void) sigaction(SIGHUP, &act, NULL);
    470 
    471 	(void) sigemptyset(&mask);
    472 	(void) sigaddset(&mask, SIGALRM);
    473 	(void) sigaddset(&mask, SIGHUP);
    474 	(void) thr_sigsetmask(SIG_UNBLOCK, &mask, NULL);
    475 
    476 	/* demonize ourselves */
    477 	if (debug_level < DEBUG_LEVEL_FORK) {
    478 		pid_t pid;
    479 
    480 		if ((pid = check_daemon_lock()) != 0) {
    481 			monitord_print(0, gettext(
    482 			    "mdmonitord daemon pid %ld already running\n"),
    483 			    pid);
    484 			exit(-1);
    485 		}
    486 
    487 		if (fork()) {
    488 			exit(0);
    489 		}
    490 
    491 		/* only one daemon can run at a time */
    492 		if ((pid = enter_daemon_lock()) != 0) {
    493 			monitord_print(0, gettext(
    494 			    "mdmonitord daemon pid %ld already running\n"),
    495 			    pid);
    496 			exit(-1);
    497 		}
    498 
    499 		(void) chdir("/");
    500 
    501 		(void) setsid();
    502 		if (debug_level <= 1) {
    503 			for (i = 0; i < 3; i++) {
    504 				(void) close(i);
    505 			}
    506 			(void) open("/dev/null", 0);
    507 			(void) dup2(0, 1);
    508 			(void) dup2(0, 2);
    509 			logflag = 1;
    510 		}
    511 	}
    512 
    513 	openlog("mdmonitord", LOG_PID, LOG_DAEMON);
    514 
    515 	monitord_print(8, gettext(
    516 	    "mdmonitord started, debug level = %d\n"), debug_level);
    517 
    518 
    519 	/* loop forever waiting for events */
    520 	do {
    521 		metaflushnames(1);
    522 		probe_all_devs(startup, &status, verbose);
    523 		startup = False; /* since we have gone through once */
    524 	} while (wait_for_event(&status));
    525 	return (0);
    526 }
    527