Home | History | Annotate | Download | only in nfsd
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T		*/
     27 /*	  All Rights Reserved  	*/
     28 
     29 /*
     30  * University Copyright- Copyright (c) 1982, 1986, 1988
     31  * The Regents of the University of California
     32  * All Rights Reserved
     33  *
     34  * University Acknowledgment- Portions of this document are derived from
     35  * software developed by the University of California, Berkeley, and its
     36  * contributors.
     37  */
     38 
     39 /* LINTLIBRARY */
     40 /* PROTOLIB1 */
     41 
     42 /* NFS server */
     43 
     44 #include <sys/param.h>
     45 #include <sys/types.h>
     46 #include <sys/stat.h>
     47 #include <syslog.h>
     48 #include <tiuser.h>
     49 #include <rpc/rpc.h>
     50 #include <errno.h>
     51 #include <thread.h>
     52 #include <sys/resource.h>
     53 #include <sys/time.h>
     54 #include <sys/file.h>
     55 #include <nfs/nfs.h>
     56 #include <nfs/nfs_acl.h>
     57 #include <nfs/nfssys.h>
     58 #include <stdio.h>
     59 #include <stdio_ext.h>
     60 #include <stdlib.h>
     61 #include <signal.h>
     62 #include <netconfig.h>
     63 #include <netdir.h>
     64 #include <string.h>
     65 #include <unistd.h>
     66 #include <stropts.h>
     67 #include <sys/tihdr.h>
     68 #include <sys/wait.h>
     69 #include <poll.h>
     70 #include <priv_utils.h>
     71 #include <sys/tiuser.h>
     72 #include <netinet/tcp.h>
     73 #include <deflt.h>
     74 #include <rpcsvc/daemon_utils.h>
     75 #include <rpcsvc/nfs4_prot.h>
     76 #include <libnvpair.h>
     77 #include "nfs_tbind.h"
     78 #include "thrpool.h"
     79 
     80 /* quiesce requests will be ignored if nfs_server_vers_max < QUIESCE_VERSMIN */
     81 #define	QUIESCE_VERSMIN	4
     82 /* DSS: distributed stable storage */
     83 #define	DSS_VERSMIN	4
     84 
     85 static	int	nfssvc(int, struct netbuf, struct netconfig *);
     86 static	int	nfssvcpool(int maxservers);
     87 static	int	dss_init(uint_t npaths, char **pathnames);
     88 static	void	dss_mkleafdirs(uint_t npaths, char **pathnames);
     89 static	void	dss_mkleafdir(char *dir, char *leaf, char *path);
     90 static	void	usage(void);
     91 int		qstrcmp(const void *s1, const void *s2);
     92 
     93 extern	int	_nfssys(int, void *);
     94 
     95 extern int	daemonize_init(void);
     96 extern void	daemonize_fini(int fd);
     97 
     98 /* signal handlers */
     99 static void sigflush(int);
    100 static void quiesce(int);
    101 
    102 static	char	*MyName;
    103 static	NETSELDECL(defaultproviders)[] = { "/dev/tcp6", "/dev/tcp", "/dev/udp",
    104 					    "/dev/udp6", NULL };
    105 /* static	NETSELDECL(defaultprotos)[] =	{ NC_UDP, NC_TCP, NULL }; */
    106 /*
    107  * The following are all globals used by routines in nfs_tbind.c.
    108  */
    109 size_t	end_listen_fds;		/* used by conn_close_oldest() */
    110 size_t	num_fds = 0;		/* used by multiple routines */
    111 int	listen_backlog = 32;	/* used by bind_to_{provider,proto}() */
    112 int	num_servers;		/* used by cots_listen_event() */
    113 int	(*Mysvc)(int, struct netbuf, struct netconfig *) = nfssvc;
    114 				/* used by cots_listen_event() */
    115 int	max_conns_allowed = -1;	/* used by cots_listen_event() */
    116 
    117 /*
    118  * Keep track of min/max versions of NFS protocol to be started.
    119  * Start with the defaults (min == 2, max == 3).  We have the
    120  * capability of starting vers=4 but only if the user requests it.
    121  */
    122 int	nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
    123 int	nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
    124 
    125 /*
    126  * Set the default for server delegation enablement and set per
    127  * /etc/default/nfs configuration (if present).
    128  */
    129 int	nfs_server_delegation = NFS_SERVER_DELEGATION_DEFAULT;
    130 
    131 int
    132 main(int ac, char *av[])
    133 {
    134 	char *dir = "/";
    135 	int allflag = 0;
    136 	int df_allflag = 0;
    137 	int opt_cnt = 0;
    138 	int maxservers = 1;	/* zero allows inifinte number of threads */
    139 	int maxservers_set = 0;
    140 	int logmaxservers = 0;
    141 	int pid;
    142 	int i;
    143 	char *provider = (char *)NULL;
    144 	char *df_provider = (char *)NULL;
    145 	struct protob *protobp0, *protobp;
    146 	NETSELDECL(proto) = NULL;
    147 	NETSELDECL(df_proto) = NULL;
    148 	NETSELPDECL(providerp);
    149 	char *defval;
    150 	boolean_t can_do_mlp;
    151 	uint_t dss_npaths = 0;
    152 	char **dss_pathnames = NULL;
    153 	sigset_t sgset;
    154 
    155 	int pipe_fd = -1;
    156 
    157 	MyName = *av;
    158 
    159 	/*
    160 	 * Initializations that require more privileges than we need to run.
    161 	 */
    162 	(void) _create_daemon_lock(NFSD, DAEMON_UID, DAEMON_GID);
    163 	svcsetprio();
    164 
    165 	can_do_mlp = priv_ineffect(PRIV_NET_BINDMLP);
    166 	if (__init_daemon_priv(PU_RESETGROUPS|PU_CLEARLIMITSET,
    167 	    DAEMON_UID, DAEMON_GID, PRIV_SYS_NFS,
    168 	    can_do_mlp ? PRIV_NET_BINDMLP : NULL, NULL) == -1) {
    169 		(void) fprintf(stderr, "%s should be run with"
    170 		    " sufficient privileges\n", av[0]);
    171 		exit(1);
    172 	}
    173 
    174 	(void) enable_extended_FILE_stdio(-1, -1);
    175 
    176 	/*
    177 	 * Read in the values from config file first before we check
    178 	 * command line options so the options override the file.
    179 	 */
    180 	if ((defopen(NFSADMIN)) == 0) {
    181 		if ((defval = defread("NFSD_MAX_CONNECTIONS=")) != NULL) {
    182 			errno = 0;
    183 			max_conns_allowed = strtol(defval, (char **)NULL, 10);
    184 			if (errno != 0) {
    185 				max_conns_allowed = -1;
    186 			}
    187 		}
    188 		if ((defval = defread("NFSD_LISTEN_BACKLOG=")) != NULL) {
    189 			errno = 0;
    190 			listen_backlog = strtol(defval, (char **)NULL, 10);
    191 			if (errno != 0) {
    192 				listen_backlog = 32;
    193 			}
    194 		}
    195 		if ((defval = defread("NFSD_PROTOCOL=")) != NULL) {
    196 			df_proto = strdup(defval);
    197 			opt_cnt++;
    198 			if (strncasecmp("ALL", defval, 3) == 0) {
    199 				free(df_proto);
    200 				df_proto = NULL;
    201 				df_allflag = 1;
    202 			}
    203 		}
    204 		if ((defval = defread("NFSD_DEVICE=")) != NULL) {
    205 			df_provider = strdup(defval);
    206 			opt_cnt++;
    207 		}
    208 		if ((defval = defread("NFSD_SERVERS=")) != NULL) {
    209 			errno = 0;
    210 			maxservers = strtol(defval, (char **)NULL, 10);
    211 			if (errno != 0) {
    212 				maxservers = 1;
    213 			} else {
    214 				maxservers_set = 1;
    215 			}
    216 		}
    217 		if ((defval = defread("NFS_SERVER_VERSMIN=")) != NULL) {
    218 			errno = 0;
    219 			nfs_server_vers_min =
    220 			    strtol(defval, (char **)NULL, 10);
    221 			if (errno != 0) {
    222 				nfs_server_vers_min = NFS_VERSMIN_DEFAULT;
    223 			}
    224 		}
    225 		if ((defval = defread("NFS_SERVER_VERSMAX=")) != NULL) {
    226 			errno = 0;
    227 			nfs_server_vers_max =
    228 			    strtol(defval, (char **)NULL, 10);
    229 			if (errno != 0) {
    230 				nfs_server_vers_max = NFS_VERSMAX_DEFAULT;
    231 			}
    232 		}
    233 		if ((defval = defread("NFS_SERVER_DELEGATION=")) != NULL) {
    234 			if (strcmp(defval, "off") == 0) {
    235 				nfs_server_delegation = FALSE;
    236 			}
    237 		}
    238 
    239 		/* close defaults file */
    240 		defopen(NULL);
    241 	}
    242 
    243 	/*
    244 	 * Conflict options error messages.
    245 	 */
    246 	if (opt_cnt > 1) {
    247 		(void) fprintf(stderr, "\nConflicting options, only one of "
    248 		    "the following options can be specified\n"
    249 		    "in " NFSADMIN ":\n"
    250 		    "\tNFSD_PROTOCOL=ALL\n"
    251 		    "\tNFSD_PROTOCOL=protocol\n"
    252 		    "\tNFSD_DEVICE=device\n\n");
    253 		usage();
    254 	}
    255 	opt_cnt = 0;
    256 
    257 	while ((i = getopt(ac, av, "ac:p:s:t:l:")) != EOF) {
    258 		switch (i) {
    259 		case 'a':
    260 			free(df_proto);
    261 			df_proto = NULL;
    262 			free(df_provider);
    263 			df_provider = NULL;
    264 
    265 			allflag = 1;
    266 			opt_cnt++;
    267 			break;
    268 
    269 		case 'c':
    270 			max_conns_allowed = atoi(optarg);
    271 			break;
    272 
    273 		case 'p':
    274 			proto = optarg;
    275 			df_allflag = 0;
    276 			opt_cnt++;
    277 			break;
    278 
    279 		/*
    280 		 * DSS: NFSv4 distributed stable storage.
    281 		 *
    282 		 * This is a Contracted Project Private interface, for
    283 		 * the sole use of Sun Cluster HA-NFS. See PSARC/2006/313.
    284 		 */
    285 		case 's':
    286 			if (strlen(optarg) < MAXPATHLEN) {
    287 				/* first "-s" option encountered? */
    288 				if (dss_pathnames == NULL) {
    289 					/*
    290 					 * Allocate maximum possible space
    291 					 * required given cmdline arg count;
    292 					 * "-s <path>" consumes two args.
    293 					 */
    294 					size_t sz = (ac / 2) * sizeof (char *);
    295 					dss_pathnames = (char **)malloc(sz);
    296 					if (dss_pathnames == NULL) {
    297 						(void) fprintf(stderr, "%s: "
    298 						    "dss paths malloc failed\n",
    299 						    av[0]);
    300 						exit(1);
    301 					}
    302 					(void) memset(dss_pathnames, 0, sz);
    303 				}
    304 				dss_pathnames[dss_npaths] = optarg;
    305 				dss_npaths++;
    306 			} else {
    307 				(void) fprintf(stderr,
    308 				    "%s: -s pathname too long.\n", av[0]);
    309 			}
    310 			break;
    311 
    312 		case 't':
    313 			provider = optarg;
    314 			df_allflag = 0;
    315 			opt_cnt++;
    316 			break;
    317 
    318 		case 'l':
    319 			listen_backlog = atoi(optarg);
    320 			break;
    321 
    322 		case '?':
    323 			usage();
    324 			/* NOTREACHED */
    325 		}
    326 	}
    327 
    328 	allflag = df_allflag;
    329 	if (proto == NULL)
    330 		proto = df_proto;
    331 	if (provider == NULL)
    332 		provider = df_provider;
    333 
    334 	/*
    335 	 * Conflict options error messages.
    336 	 */
    337 	if (opt_cnt > 1) {
    338 		(void) fprintf(stderr, "\nConflicting options, only one of "
    339 		    "the following options can be specified\n"
    340 		    "on the command line:\n"
    341 		    "\t-a\n"
    342 		    "\t-p protocol\n"
    343 		    "\t-t transport\n\n");
    344 		usage();
    345 	}
    346 
    347 	if (proto != NULL &&
    348 	    strncasecmp(proto, NC_UDP, strlen(NC_UDP)) == 0) {
    349 		if (nfs_server_vers_max == NFS_V4) {
    350 			if (nfs_server_vers_min == NFS_V4) {
    351 				fprintf(stderr,
    352 				    "NFS version 4 is not supported "
    353 				    "with the UDP protocol.  Exiting\n");
    354 				exit(3);
    355 			} else {
    356 				fprintf(stderr,
    357 				    "NFS version 4 is not supported "
    358 				    "with the UDP protocol.\n");
    359 			}
    360 		}
    361 	}
    362 
    363 	/*
    364 	 * If there is exactly one more argument, it is the number of
    365 	 * servers.
    366 	 */
    367 	if (optind == ac - 1) {
    368 		maxservers = atoi(av[optind]);
    369 		maxservers_set = 1;
    370 	}
    371 	/*
    372 	 * If there are two or more arguments, then this is a usage error.
    373 	 */
    374 	else if (optind < ac - 1)
    375 		usage();
    376 	/*
    377 	 * Check the ranges for min/max version specified
    378 	 */
    379 	else if ((nfs_server_vers_min > nfs_server_vers_max) ||
    380 	    (nfs_server_vers_min < NFS_VERSMIN) ||
    381 	    (nfs_server_vers_max > NFS_VERSMAX))
    382 		usage();
    383 	/*
    384 	 * There are no additional arguments, and we haven't set maxservers
    385 	 * explicitly via the config file, we use a default number of
    386 	 * servers.  We will log this.
    387 	 */
    388 	else if (maxservers_set == 0)
    389 		logmaxservers = 1;
    390 
    391 	/*
    392 	 * Basic Sanity checks on options
    393 	 *
    394 	 * max_conns_allowed must be positive, except for the special
    395 	 * value of -1 which is used internally to mean unlimited, -1 isn't
    396 	 * documented but we allow it anyway.
    397 	 *
    398 	 * maxservers must be positive
    399 	 * listen_backlog must be positive or zero
    400 	 */
    401 	if (((max_conns_allowed != -1) && (max_conns_allowed <= 0)) ||
    402 	    (listen_backlog < 0) || (maxservers <= 0)) {
    403 		usage();
    404 	}
    405 
    406 	/*
    407 	 * Set current dir to server root
    408 	 */
    409 	if (chdir(dir) < 0) {
    410 		(void) fprintf(stderr, "%s:  ", MyName);
    411 		perror(dir);
    412 		exit(1);
    413 	}
    414 
    415 #ifndef DEBUG
    416 	pipe_fd = daemonize_init();
    417 #endif
    418 
    419 	openlog(MyName, LOG_PID | LOG_NDELAY, LOG_DAEMON);
    420 
    421 	/*
    422 	 * establish our lock on the lock file and write our pid to it.
    423 	 * exit if some other process holds the lock, or if there's any
    424 	 * error in writing/locking the file.
    425 	 */
    426 	pid = _enter_daemon_lock(NFSD);
    427 	switch (pid) {
    428 	case 0:
    429 		break;
    430 	case -1:
    431 		fprintf(stderr, "error locking for %s: %s", NFSD,
    432 		    strerror(errno));
    433 		exit(2);
    434 	default:
    435 		/* daemon was already running */
    436 		exit(0);
    437 	}
    438 
    439 	/*
    440 	 * If we've been given a list of paths to be used for distributed
    441 	 * stable storage, and provided we're going to run a version
    442 	 * that supports it, setup the DSS paths.
    443 	 */
    444 	if (dss_pathnames != NULL && nfs_server_vers_max >= DSS_VERSMIN) {
    445 		if (dss_init(dss_npaths, dss_pathnames) != 0) {
    446 			fprintf(stderr, "%s", "dss_init failed. Exiting.");
    447 			exit(1);
    448 		}
    449 	}
    450 
    451 	/*
    452 	 * Block all signals till we spawn other
    453 	 * threads.
    454 	 */
    455 	(void) sigfillset(&sgset);
    456 	(void) thr_sigsetmask(SIG_BLOCK, &sgset, NULL);
    457 
    458 	if (logmaxservers) {
    459 		fprintf(stderr,
    460 		    "Number of servers not specified. Using default of %d.",
    461 		    maxservers);
    462 	}
    463 
    464 	/*
    465 	 * Make sure to unregister any previous versions in case the
    466 	 * user is reconfiguring the server in interesting ways.
    467 	 */
    468 	svc_unreg(NFS_PROGRAM, NFS_VERSION);
    469 	svc_unreg(NFS_PROGRAM, NFS_V3);
    470 	svc_unreg(NFS_PROGRAM, NFS_V4);
    471 	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V2);
    472 	svc_unreg(NFS_ACL_PROGRAM, NFS_ACL_V3);
    473 
    474 	/*
    475 	 * Set up kernel RPC thread pool for the NFS server.
    476 	 */
    477 	if (nfssvcpool(maxservers)) {
    478 		fprintf(stderr, "Can't set up kernel NFS service: %s. Exiting",
    479 		    strerror(errno));
    480 		exit(1);
    481 	}
    482 
    483 	/*
    484 	 * Set up blocked thread to do LWP creation on behalf of the kernel.
    485 	 */
    486 	if (svcwait(NFS_SVCPOOL_ID)) {
    487 		fprintf(stderr, "Can't set up NFS pool creator: %s. Exiting",
    488 		    strerror(errno));
    489 		exit(1);
    490 	}
    491 
    492 	/*
    493 	 * RDMA start and stop thread.
    494 	 * Per pool RDMA listener creation and
    495 	 * destructor thread.
    496 	 *
    497 	 * start rdma services and block in the kernel.
    498 	 * (only if proto or provider is not set to TCP or UDP)
    499 	 */
    500 	if ((proto == NULL) && (provider == NULL)) {
    501 		if (svcrdma(NFS_SVCPOOL_ID, nfs_server_vers_min,
    502 		    nfs_server_vers_max, nfs_server_delegation)) {
    503 			fprintf(stderr,
    504 			    "Can't set up RDMA creator thread : %s",
    505 			    strerror(errno));
    506 		}
    507 	}
    508 
    509 	/*
    510 	 * Now open up for signal delivery
    511 	 */
    512 
    513 	(void) thr_sigsetmask(SIG_UNBLOCK, &sgset, NULL);
    514 	sigset(SIGTERM, sigflush);
    515 	sigset(SIGUSR1, quiesce);
    516 
    517 	/*
    518 	 * Build a protocol block list for registration.
    519 	 */
    520 	protobp0 = protobp = (struct protob *)malloc(sizeof (struct protob));
    521 	protobp->serv = "NFS";
    522 	protobp->versmin = nfs_server_vers_min;
    523 	protobp->versmax = nfs_server_vers_max;
    524 	protobp->program = NFS_PROGRAM;
    525 
    526 	protobp->next = (struct protob *)malloc(sizeof (struct protob));
    527 	protobp = protobp->next;
    528 	protobp->serv = "NFS_ACL";		/* not used */
    529 	protobp->versmin = nfs_server_vers_min;
    530 	/* XXX - this needs work to get the version just right */
    531 	protobp->versmax = (nfs_server_vers_max > NFS_ACL_V3) ?
    532 	    NFS_ACL_V3 : nfs_server_vers_max;
    533 	protobp->program = NFS_ACL_PROGRAM;
    534 	protobp->next = (struct protob *)NULL;
    535 
    536 	if (allflag) {
    537 		if (do_all(protobp0, nfssvc, 0) == -1) {
    538 			fprintf(stderr, "setnetconfig failed : %s",
    539 			    strerror(errno));
    540 			exit(1);
    541 		}
    542 	} else if (proto) {
    543 		/* there's more than one match for the same protocol */
    544 		struct netconfig *nconf;
    545 		NCONF_HANDLE *nc;
    546 		bool_t	protoFound = FALSE;
    547 		if ((nc = setnetconfig()) == (NCONF_HANDLE *) NULL) {
    548 			fprintf(stderr, "setnetconfig failed : %s",
    549 			    strerror(errno));
    550 			goto done;
    551 		}
    552 		while (nconf = getnetconfig(nc)) {
    553 			if (strcmp(nconf->nc_proto, proto) == 0) {
    554 				protoFound = TRUE;
    555 				do_one(nconf->nc_device, NULL,
    556 				    protobp0, nfssvc, 0);
    557 			}
    558 		}
    559 		(void) endnetconfig(nc);
    560 		if (protoFound == FALSE) {
    561 			fprintf(stderr,
    562 			    "couldn't find netconfig entry for protocol %s",
    563 			    proto);
    564 		}
    565 	} else if (provider)
    566 		do_one(provider, proto, protobp0, nfssvc, 0);
    567 	else {
    568 		for (providerp = defaultproviders;
    569 		    *providerp != NULL; providerp++) {
    570 			provider = *providerp;
    571 			do_one(provider, NULL, protobp0, nfssvc, 0);
    572 		}
    573 	}
    574 done:
    575 
    576 	free(protobp);
    577 	free(protobp0);
    578 
    579 	if (num_fds == 0) {
    580 		fprintf(stderr, "Could not start NFS service for any protocol."
    581 		    " Exiting");
    582 		exit(1);
    583 	}
    584 
    585 	end_listen_fds = num_fds;
    586 
    587 	/*
    588 	 * nfsd is up and running as far as we are concerned.
    589 	 */
    590 	daemonize_fini(pipe_fd);
    591 
    592 	/*
    593 	 * Get rid of unneeded privileges.
    594 	 */
    595 	__fini_daemon_priv(PRIV_PROC_FORK, PRIV_PROC_EXEC, PRIV_PROC_SESSION,
    596 	    PRIV_FILE_LINK_ANY, PRIV_PROC_INFO, (char *)NULL);
    597 
    598 	/*
    599 	 * Poll for non-data control events on the transport descriptors.
    600 	 */
    601 	poll_for_action();
    602 
    603 	/*
    604 	 * If we get here, something failed in poll_for_action().
    605 	 */
    606 	return (1);
    607 }
    608 
    609 static int
    610 nfssvcpool(int maxservers)
    611 {
    612 	struct svcpool_args npa;
    613 
    614 	npa.id = NFS_SVCPOOL_ID;
    615 	npa.maxthreads = maxservers;
    616 	npa.redline = 0;
    617 	npa.qsize = 0;
    618 	npa.timeout = 0;
    619 	npa.stksize = 0;
    620 	npa.max_same_xprt = 0;
    621 	return (_nfssys(SVCPOOL_CREATE, &npa));
    622 }
    623 
    624 /*
    625  * Establish NFS service thread.
    626  */
    627 static int
    628 nfssvc(int fd, struct netbuf addrmask, struct netconfig *nconf)
    629 {
    630 	struct nfs_svc_args nsa;
    631 
    632 	nsa.fd = fd;
    633 	nsa.netid = nconf->nc_netid;
    634 	nsa.addrmask = addrmask;
    635 	if (strncasecmp(nconf->nc_proto, NC_UDP, strlen(NC_UDP)) == 0) {
    636 		nsa.versmax = (nfs_server_vers_max > NFS_V3) ?
    637 		    NFS_V3 : nfs_server_vers_max;
    638 		nsa.versmin = nfs_server_vers_min;
    639 		/*
    640 		 * If no version left, silently do nothing, previous
    641 		 * checks will have assured at least TCP is available.
    642 		 */
    643 		if (nsa.versmin > nsa.versmax)
    644 			return (0);
    645 	} else {
    646 		nsa.versmax = nfs_server_vers_max;
    647 		nsa.versmin = nfs_server_vers_min;
    648 	}
    649 	nsa.delegation = nfs_server_delegation;
    650 	return (_nfssys(NFS_SVC, &nsa));
    651 }
    652 
    653 static void
    654 usage(void)
    655 {
    656 	(void) fprintf(stderr,
    657 "usage: %s [ -a ] [ -c max_conns ] [ -p protocol ] [ -t transport ] ", MyName);
    658 	(void) fprintf(stderr, "\n[ -l listen_backlog ] [ nservers ]\n");
    659 	(void) fprintf(stderr,
    660 "\twhere -a causes <nservers> to be started on each appropriate transport,\n");
    661 	(void) fprintf(stderr,
    662 "\tmax_conns is the maximum number of concurrent connections allowed,\n");
    663 	(void) fprintf(stderr, "\t\tand max_conns must be a decimal number");
    664 	(void) fprintf(stderr, "> zero,\n");
    665 	(void) fprintf(stderr, "\tprotocol is a protocol identifier,\n");
    666 	(void) fprintf(stderr,
    667 	    "\ttransport is a transport provider name (i.e. device),\n");
    668 	(void) fprintf(stderr,
    669 	    "\tlisten_backlog is the TCP listen backlog,\n");
    670 	(void) fprintf(stderr,
    671 	    "\tand <nservers> must be a decimal number > zero.\n");
    672 	exit(1);
    673 }
    674 
    675 /*
    676  * Issue nfssys system call to flush all logging buffers asynchronously.
    677  *
    678  * NOTICE: It is extremely important to flush NFS logging buffers when
    679  *	   nfsd exits. When the system is halted or rebooted nfslogd
    680  *	   may not have an opportunity to flush the buffers.
    681  */
    682 static void
    683 nfsl_flush()
    684 {
    685 	struct nfsl_flush_args nfa;
    686 
    687 	memset((void *)&nfa, 0, sizeof (nfa));
    688 	nfa.version = NFSL_FLUSH_ARGS_VERS;
    689 	nfa.directive = NFSL_ALL;	/* flush all asynchronously */
    690 
    691 	if (_nfssys(LOG_FLUSH, &nfa) < 0)
    692 		syslog(LOG_ERR, "_nfssys(LOG_FLUSH) failed: %s\n",
    693 		    strerror(errno));
    694 }
    695 
    696 /*
    697  * SIGTERM handler.
    698  * Flush logging buffers and exit.
    699  */
    700 static void
    701 sigflush(int sig)
    702 {
    703 	nfsl_flush();
    704 	_exit(0);
    705 }
    706 
    707 /*
    708  * SIGUSR1 handler.
    709  *
    710  * Request that server quiesce, then (nfsd) exit. For subsequent warm start.
    711  *
    712  * This is a Contracted Project Private interface, for the sole use
    713  * of Sun Cluster HA-NFS. See PSARC/2004/497.
    714  *
    715  * Equivalent to SIGTERM handler if nfs_server_vers_max < QUIESCE_VERSMIN.
    716  */
    717 static void
    718 quiesce(int sig)
    719 {
    720 	int error;
    721 	int id = NFS_SVCPOOL_ID;
    722 
    723 	if (nfs_server_vers_max >= QUIESCE_VERSMIN) {
    724 		/* Request server quiesce at next shutdown */
    725 		error = _nfssys(NFS4_SVC_REQUEST_QUIESCE, &id);
    726 
    727 		/*
    728 		 * ENOENT is returned if there is no matching SVC pool
    729 		 * for the id. Possibly because the pool is not yet setup.
    730 		 * In this case, just exit as if no error. For all other errors,
    731 		 * just return and allow caller to retry.
    732 		 */
    733 		if (error && errno != ENOENT) {
    734 			syslog(LOG_ERR,
    735 			    "_nfssys(NFS4_SVC_REQUEST_QUIESCE) failed: %s",
    736 			    strerror(errno));
    737 			return;
    738 		}
    739 	}
    740 
    741 	/* Flush logging buffers */
    742 	nfsl_flush();
    743 
    744 	_exit(0);
    745 }
    746 
    747 /*
    748  * DSS: distributed stable storage.
    749  * Create leaf directories as required, keeping an eye on path
    750  * lengths. Calls exit(1) on failure.
    751  * The pathnames passed in must already exist, and must be writeable by nfsd.
    752  * Note: the leaf directories under NFS4_VAR_DIR are not created here;
    753  * they're created at pkg install.
    754  */
    755 static void
    756 dss_mkleafdirs(uint_t npaths, char **pathnames)
    757 {
    758 	int i;
    759 	char *tmppath = NULL;
    760 
    761 	/*
    762 	 * Create the temporary storage used by dss_mkleafdir() here,
    763 	 * rather than in that function, so that it only needs to be
    764 	 * done once, rather than once for each call. Too big to put
    765 	 * on the function's stack.
    766 	 */
    767 	tmppath = (char *)malloc(MAXPATHLEN);
    768 	if (tmppath == NULL) {
    769 		syslog(LOG_ERR, "tmppath malloc failed. Exiting");
    770 		exit(1);
    771 	}
    772 
    773 	for (i = 0; i < npaths; i++) {
    774 		char *p = pathnames[i];
    775 
    776 		dss_mkleafdir(p, NFS4_DSS_STATE_LEAF, tmppath);
    777 		dss_mkleafdir(p, NFS4_DSS_OLDSTATE_LEAF, tmppath);
    778 	}
    779 
    780 	free(tmppath);
    781 }
    782 
    783 /*
    784  * Create "leaf" in "dir" (which must already exist).
    785  * leaf: should start with a '/'
    786  */
    787 static void
    788 dss_mkleafdir(char *dir, char *leaf, char *tmppath)
    789 {
    790 	/* MAXPATHLEN includes the terminating NUL */
    791 	if (strlen(dir) + strlen(leaf) > MAXPATHLEN - 1) {
    792 		fprintf(stderr, "stable storage path too long: %s%s. Exiting",
    793 		    dir, leaf);
    794 		exit(1);
    795 	}
    796 
    797 	(void) snprintf(tmppath, MAXPATHLEN, "%s/%s", dir, leaf);
    798 
    799 	/* the directory may already exist: that's OK */
    800 	if (mkdir(tmppath, NFS4_DSS_DIR_MODE) == -1 && errno != EEXIST) {
    801 		fprintf(stderr, "error creating stable storage directory: "
    802 		    "%s: %s. Exiting", strerror(errno), tmppath);
    803 		exit(1);
    804 	}
    805 }
    806 
    807 /*
    808  * Create the storage dirs, and pass the path list to the kernel.
    809  * This requires the nfssrv module to be loaded; the _nfssys() syscall
    810  * will fail ENOTSUP if it is not.
    811  * Use libnvpair(3LIB) to pass the data to the kernel.
    812  */
    813 static int
    814 dss_init(uint_t npaths, char **pathnames)
    815 {
    816 	int i, j, nskipped, error;
    817 	char *bufp;
    818 	uint32_t bufsize;
    819 	size_t buflen;
    820 	nvlist_t *nvl;
    821 
    822 	if (npaths > 1) {
    823 		/*
    824 		 * We need to remove duplicate paths; this might be user error
    825 		 * in the general case, but HA-NFSv4 can also cause this.
    826 		 * Sort the pathnames array, and NULL out duplicates,
    827 		 * then write the non-NULL entries to a new array.
    828 		 * Sorting will also allow the kernel to optimise its searches.
    829 		 */
    830 
    831 		qsort(pathnames, npaths, sizeof (char *), qstrcmp);
    832 
    833 		/* now NULL out any duplicates */
    834 		i = 0; j = 1; nskipped = 0;
    835 		while (j < npaths) {
    836 			if (strcmp(pathnames[i], pathnames[j]) == NULL) {
    837 				pathnames[j] = NULL;
    838 				j++;
    839 				nskipped++;
    840 				continue;
    841 			}
    842 
    843 			/* skip i over any of its NULLed duplicates */
    844 			i = j++;
    845 		}
    846 
    847 		/* finally, write the non-NULL entries to a new array */
    848 		if (nskipped > 0) {
    849 			int nreal;
    850 			size_t sz;
    851 			char **tmp_pathnames;
    852 
    853 			nreal = npaths - nskipped;
    854 
    855 			sz = nreal * sizeof (char *);
    856 			tmp_pathnames = (char **)malloc(sz);
    857 			if (tmp_pathnames == NULL) {
    858 				fprintf(stderr, "tmp_pathnames malloc failed");
    859 				exit(1);
    860 			}
    861 
    862 			for (i = 0, j = 0; i < npaths; i++)
    863 				if (pathnames[i] != NULL)
    864 					tmp_pathnames[j++] = pathnames[i];
    865 			free(pathnames);
    866 			pathnames = tmp_pathnames;
    867 			npaths = nreal;
    868 		}
    869 
    870 	}
    871 
    872 	/* Create directories to store the distributed state files */
    873 	dss_mkleafdirs(npaths, pathnames);
    874 
    875 	/* Create the name-value pair list */
    876 	error = nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0);
    877 	if (error) {
    878 		fprintf(stderr, "nvlist_alloc failed: %s.", strerror(errno));
    879 		return (1);
    880 	}
    881 
    882 	/* Add the pathnames array as a single name-value pair */
    883 	error = nvlist_add_string_array(nvl, NFS4_DSS_NVPAIR_NAME,
    884 	    pathnames, npaths);
    885 	if (error) {
    886 		fprintf(stderr, "nvlist_add_string_array failed: %s.",
    887 		    strerror(errno));
    888 		nvlist_free(nvl);
    889 		return (1);
    890 	}
    891 
    892 	/*
    893 	 * Pack list into contiguous memory, for passing to kernel.
    894 	 * nvlist_pack() will allocate the memory for the buffer,
    895 	 * which we should free() when no longer needed.
    896 	 * NV_ENCODE_XDR for safety across ILP32/LP64 kernel boundary.
    897 	 */
    898 	bufp = NULL;
    899 	error = nvlist_pack(nvl, &bufp, &buflen, NV_ENCODE_XDR, 0);
    900 	if (error) {
    901 		fprintf(stderr, "nvlist_pack failed: %s.", strerror(errno));
    902 		nvlist_free(nvl);
    903 		return (1);
    904 	}
    905 
    906 	/* Now we have the packed buffer, we no longer need the list */
    907 	nvlist_free(nvl);
    908 
    909 	/*
    910 	 * Let the kernel know in advance how big the buffer is.
    911 	 * NOTE: we cannot just pass buflen, since size_t is a long, and
    912 	 * thus a different size between ILP32 userland and LP64 kernel.
    913 	 * Use an int for the transfer, since that should be big enough;
    914 	 * this is a no-op at the moment, here, since nfsd is 32-bit, but
    915 	 * that could change.
    916 	 */
    917 	bufsize = (uint32_t)buflen;
    918 	error = _nfssys(NFS4_DSS_SETPATHS_SIZE, &bufsize);
    919 	if (error) {
    920 		fprintf(stderr,
    921 		    "_nfssys(NFS4_DSS_SETPATHS_SIZE) failed: %s. ",
    922 		    strerror(errno));
    923 		free(bufp);
    924 		return (1);
    925 	}
    926 
    927 	/* Pass the packed buffer to the kernel */
    928 	error = _nfssys(NFS4_DSS_SETPATHS, bufp);
    929 	if (error) {
    930 		fprintf(stderr,
    931 		    "_nfssys(NFS4_DSS_SETPATHS) failed: %s. ", strerror(errno));
    932 		free(bufp);
    933 		return (1);
    934 	}
    935 
    936 	/*
    937 	 * The kernel has now unpacked the buffer and extracted the
    938 	 * pathnames array, we no longer need the buffer.
    939 	 */
    940 	free(bufp);
    941 
    942 	return (0);
    943 }
    944 
    945 /*
    946  * Quick sort string compare routine, for qsort.
    947  * Needed to make arg types correct.
    948  */
    949 int
    950 qstrcmp(const void *p1, const void *p2)
    951 {
    952 	char *s1 = *((char **)p1);
    953 	char *s2 = *((char **)p2);
    954 
    955 	return (strcmp(s1, s2));
    956 }
    957