Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 
     28 #include <sys/param.h>
     29 #include <sys/types.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/systm.h>
     32 #include <sys/errno.h>
     33 #include <sys/vfs.h>
     34 #include <sys/vnode.h>
     35 #include <sys/swap.h>
     36 #include <sys/file.h>
     37 #include <sys/proc.h>
     38 #include <sys/var.h>
     39 #include <sys/uadmin.h>
     40 #include <sys/signal.h>
     41 #include <sys/time.h>
     42 #include <vm/seg_kmem.h>
     43 #include <sys/modctl.h>
     44 #include <sys/callb.h>
     45 #include <sys/dumphdr.h>
     46 #include <sys/debug.h>
     47 #include <sys/ftrace.h>
     48 #include <sys/cmn_err.h>
     49 #include <sys/panic.h>
     50 #include <sys/ddi.h>
     51 #include <sys/sunddi.h>
     52 #include <sys/policy.h>
     53 #include <sys/zone.h>
     54 #include <sys/condvar.h>
     55 #include <sys/thread.h>
     56 #include <sys/sdt.h>
     57 
     58 /*
     59  * Administrivia system call.  We provide this in two flavors: one for calling
     60  * from the system call path (uadmin), and the other for calling from elsewhere
     61  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
     62  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
     63  */
     64 
     65 extern ksema_t fsflush_sema;
     66 kmutex_t ualock;
     67 kcondvar_t uacond;
     68 kthread_t *ua_shutdown_thread = NULL;
     69 
     70 int sys_shutdown = 0;
     71 volatile int fastreboot_dryrun = 0;
     72 
     73 /*
     74  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
     75  * passed in when the system as a whole is shutting down.  The lack of per-zone
     76  * process lists is likely to make the following a performance bottleneck on a
     77  * system with many zones.
     78  */
     79 void
     80 killall(zoneid_t zoneid)
     81 {
     82 	proc_t *p;
     83 
     84 	ASSERT(zoneid != GLOBAL_ZONEID);
     85 	/*
     86 	 * Kill all processes except kernel daemons and ourself.
     87 	 * Make a first pass to stop all processes so they won't
     88 	 * be trying to restart children as we kill them.
     89 	 */
     90 	mutex_enter(&pidlock);
     91 	for (p = practive; p != NULL; p = p->p_next) {
     92 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
     93 		    p->p_exec != NULLVP &&	/* kernel daemons */
     94 		    p->p_as != &kas &&
     95 		    p->p_stat != SZOMB) {
     96 			mutex_enter(&p->p_lock);
     97 			p->p_flag |= SNOWAIT;
     98 			sigtoproc(p, NULL, SIGSTOP);
     99 			mutex_exit(&p->p_lock);
    100 		}
    101 	}
    102 	p = practive;
    103 	while (p != NULL) {
    104 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
    105 		    p->p_exec != NULLVP &&	/* kernel daemons */
    106 		    p->p_as != &kas &&
    107 		    p->p_stat != SIDL &&
    108 		    p->p_stat != SZOMB) {
    109 			mutex_enter(&p->p_lock);
    110 			if (sigismember(&p->p_sig, SIGKILL)) {
    111 				mutex_exit(&p->p_lock);
    112 				p = p->p_next;
    113 			} else {
    114 				sigtoproc(p, NULL, SIGKILL);
    115 				mutex_exit(&p->p_lock);
    116 				(void) cv_timedwait(&p->p_srwchan_cv, &pidlock,
    117 				    lbolt + hz);
    118 				p = practive;
    119 			}
    120 		} else {
    121 			p = p->p_next;
    122 		}
    123 	}
    124 	mutex_exit(&pidlock);
    125 }
    126 
    127 int
    128 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
    129 {
    130 	int error = 0;
    131 	char *buf;
    132 	size_t buflen = 0;
    133 	boolean_t invoke_cb = B_FALSE;
    134 
    135 	/*
    136 	 * We might be called directly by the kernel's fault-handling code, so
    137 	 * we can't assert that the caller is in the global zone.
    138 	 */
    139 
    140 	/*
    141 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
    142 	 * and that we have appropriate privileges for this action.
    143 	 */
    144 	switch (cmd) {
    145 	case A_FTRACE:
    146 	case A_SHUTDOWN:
    147 	case A_REBOOT:
    148 	case A_REMOUNT:
    149 	case A_FREEZE:
    150 	case A_DUMP:
    151 	case A_SDTTEST:
    152 	case A_CONFIG:
    153 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
    154 			return (EPERM);
    155 		break;
    156 
    157 	default:
    158 		return (EINVAL);
    159 	}
    160 
    161 	/*
    162 	 * Serialize these operations on ualock.  If it is held, the
    163 	 * system should shutdown, reboot, or remount shortly, unless there is
    164 	 * an error.  We need a cv rather than just a mutex because proper
    165 	 * functioning of A_REBOOT relies on being able to interrupt blocked
    166 	 * userland callers.
    167 	 *
    168 	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
    169 	 * Other commands should never return.
    170 	 */
    171 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
    172 	    cmd == A_CONFIG) {
    173 		mutex_enter(&ualock);
    174 		while (ua_shutdown_thread != NULL) {
    175 			if (cv_wait_sig(&uacond, &ualock) == 0) {
    176 				/*
    177 				 * If we were interrupted, leave, and handle
    178 				 * the signal (or exit, depending on what
    179 				 * happened)
    180 				 */
    181 				mutex_exit(&ualock);
    182 				return (EINTR);
    183 			}
    184 		}
    185 		ua_shutdown_thread = curthread;
    186 		mutex_exit(&ualock);
    187 	}
    188 
    189 	switch (cmd) {
    190 	case A_SHUTDOWN:
    191 	{
    192 		proc_t *p = ttoproc(curthread);
    193 
    194 		/*
    195 		 * Release (almost) all of our own resources if we are called
    196 		 * from a user context, however if we are calling kadmin() from
    197 		 * a kernel context then we do not release these resources.
    198 		 */
    199 		if (p != &p0) {
    200 			proc_is_exiting(p);
    201 			if ((error = exitlwps(0)) != 0) {
    202 				/*
    203 				 * Another thread in this process also called
    204 				 * exitlwps().
    205 				 */
    206 				mutex_enter(&ualock);
    207 				ua_shutdown_thread = NULL;
    208 				cv_signal(&uacond);
    209 				mutex_exit(&ualock);
    210 				return (error);
    211 			}
    212 			mutex_enter(&p->p_lock);
    213 			p->p_flag |= SNOWAIT;
    214 			sigfillset(&p->p_ignore);
    215 			curthread->t_lwp->lwp_cursig = 0;
    216 			curthread->t_lwp->lwp_extsig = 0;
    217 			if (p->p_exec) {
    218 				vnode_t *exec_vp = p->p_exec;
    219 				p->p_exec = NULLVP;
    220 				mutex_exit(&p->p_lock);
    221 				VN_RELE(exec_vp);
    222 			} else {
    223 				mutex_exit(&p->p_lock);
    224 			}
    225 
    226 			pollcleanup();
    227 			closeall(P_FINFO(curproc));
    228 			relvm();
    229 
    230 		} else {
    231 			/*
    232 			 * Reset t_cred if not set because much of the
    233 			 * filesystem code depends on CRED() being valid.
    234 			 */
    235 			if (curthread->t_cred == NULL)
    236 				curthread->t_cred = kcred;
    237 		}
    238 
    239 		/* indicate shutdown in progress */
    240 		sys_shutdown = 1;
    241 
    242 		/*
    243 		 * Communcate that init shouldn't be restarted.
    244 		 */
    245 		zone_shutdown_global();
    246 
    247 		killall(ALL_ZONES);
    248 		/*
    249 		 * If we are calling kadmin() from a kernel context then we
    250 		 * do not release these resources.
    251 		 */
    252 		if (ttoproc(curthread) != &p0) {
    253 			VN_RELE(PTOU(curproc)->u_cdir);
    254 			if (PTOU(curproc)->u_rdir)
    255 				VN_RELE(PTOU(curproc)->u_rdir);
    256 			if (PTOU(curproc)->u_cwd)
    257 				refstr_rele(PTOU(curproc)->u_cwd);
    258 
    259 			PTOU(curproc)->u_cdir = rootdir;
    260 			PTOU(curproc)->u_rdir = NULL;
    261 			PTOU(curproc)->u_cwd = NULL;
    262 		}
    263 
    264 		/*
    265 		 * Allow the reboot/halt/poweroff code a chance to do
    266 		 * anything it needs to whilst we still have filesystems
    267 		 * mounted, like loading any modules necessary for later
    268 		 * performing the actual poweroff.
    269 		 */
    270 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    271 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
    272 			mdpreboot(cmd, fcn, buf);
    273 		} else
    274 			mdpreboot(cmd, fcn, mdep);
    275 
    276 		/*
    277 		 * Allow fsflush to finish running and then prevent it
    278 		 * from ever running again so that vfs_unmountall() and
    279 		 * vfs_syncall() can acquire the vfs locks they need.
    280 		 */
    281 		sema_p(&fsflush_sema);
    282 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
    283 
    284 		vfs_unmountall();
    285 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
    286 		vfs_syncall();
    287 
    288 		dump_ereports();
    289 		dump_messages();
    290 
    291 		invoke_cb = B_TRUE;
    292 
    293 		/* FALLTHROUGH */
    294 	}
    295 
    296 	case A_REBOOT:
    297 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    298 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
    299 			mdboot(cmd, fcn, buf, invoke_cb);
    300 		} else
    301 			mdboot(cmd, fcn, mdep, invoke_cb);
    302 		/* no return expected */
    303 		break;
    304 
    305 	case A_CONFIG:
    306 		switch (fcn) {
    307 		case AD_UPDATE_BOOT_CONFIG:
    308 #ifndef	__sparc
    309 		{
    310 			extern void fastboot_update_config(const char *);
    311 
    312 			fastboot_update_config(mdep);
    313 		}
    314 #endif
    315 
    316 			break;
    317 		}
    318 		/* Let other threads enter the shutdown path now */
    319 		mutex_enter(&ualock);
    320 		ua_shutdown_thread = NULL;
    321 		cv_signal(&uacond);
    322 		mutex_exit(&ualock);
    323 		break;
    324 
    325 	case A_REMOUNT:
    326 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
    327 		/* Let other threads enter the shutdown path now */
    328 		mutex_enter(&ualock);
    329 		ua_shutdown_thread = NULL;
    330 		cv_signal(&uacond);
    331 		mutex_exit(&ualock);
    332 		break;
    333 
    334 	case A_FREEZE:
    335 	{
    336 		/*
    337 		 * This is the entrypoint for all suspend/resume actions.
    338 		 */
    339 		extern int cpr(int, void *);
    340 
    341 		if (modload("misc", "cpr") == -1)
    342 			return (ENOTSUP);
    343 		/* Let the CPR module decide what to do with mdep */
    344 		error = cpr(fcn, mdep);
    345 		break;
    346 	}
    347 
    348 	case A_FTRACE:
    349 	{
    350 		switch (fcn) {
    351 		case AD_FTRACE_START:
    352 			(void) FTRACE_START();
    353 			break;
    354 		case AD_FTRACE_STOP:
    355 			(void) FTRACE_STOP();
    356 			break;
    357 		default:
    358 			error = EINVAL;
    359 		}
    360 		break;
    361 	}
    362 
    363 	case A_DUMP:
    364 	{
    365 		if (fcn == AD_NOSYNC) {
    366 			in_sync = 1;
    367 			break;
    368 		}
    369 
    370 		panic_bootfcn = fcn;
    371 		panic_forced = 1;
    372 
    373 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    374 			panic_bootstr = i_convert_boot_device_name(mdep,
    375 			    NULL, &buflen);
    376 		} else
    377 			panic_bootstr = mdep;
    378 
    379 #ifndef	__sparc
    380 		extern void fastboot_update_and_load(int, char *);
    381 
    382 		fastboot_update_and_load(fcn, mdep);
    383 #endif
    384 
    385 		panic("forced crash dump initiated at user request");
    386 		/*NOTREACHED*/
    387 	}
    388 
    389 	case A_SDTTEST:
    390 	{
    391 		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
    392 		    int, 6, int, 7);
    393 		break;
    394 	}
    395 
    396 	default:
    397 		error = EINVAL;
    398 	}
    399 
    400 	return (error);
    401 }
    402 
    403 int
    404 uadmin(int cmd, int fcn, uintptr_t mdep)
    405 {
    406 	int error = 0, rv = 0;
    407 	size_t nbytes = 0;
    408 	cred_t *credp = CRED();
    409 	char *bootargs = NULL;
    410 	int reset_status = 0;
    411 
    412 	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
    413 		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
    414 		    &reset_status);
    415 		if (reset_status != 0)
    416 			return (EIO);
    417 		else
    418 			return (0);
    419 	}
    420 
    421 	/*
    422 	 * The swapctl system call doesn't have its own entry point: it uses
    423 	 * uadmin as a wrapper so we just call it directly from here.
    424 	 */
    425 	if (cmd == A_SWAPCTL) {
    426 		if (get_udatamodel() == DATAMODEL_NATIVE)
    427 			error = swapctl(fcn, (void *)mdep, &rv);
    428 #if defined(_SYSCALL32_IMPL)
    429 		else
    430 			error = swapctl32(fcn, (void *)mdep, &rv);
    431 #endif /* _SYSCALL32_IMPL */
    432 		return (error ? set_errno(error) : rv);
    433 	}
    434 
    435 	/*
    436 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
    437 	 * a boot string.  We pull that in as bootargs, if applicable.
    438 	 */
    439 	if (mdep != NULL &&
    440 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
    441 	    cmd == A_FREEZE || cmd == A_CONFIG)) {
    442 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
    443 		if ((error = copyinstr((const char *)mdep, bootargs,
    444 		    BOOTARGS_MAX, &nbytes)) != 0) {
    445 			kmem_free(bootargs, BOOTARGS_MAX);
    446 			return (set_errno(error));
    447 		}
    448 	}
    449 
    450 	/*
    451 	 * Invoke the appropriate kadmin() routine.
    452 	 */
    453 	if (getzoneid() != GLOBAL_ZONEID)
    454 		error = zone_kadmin(cmd, fcn, bootargs, credp);
    455 	else
    456 		error = kadmin(cmd, fcn, bootargs, credp);
    457 
    458 	if (bootargs != NULL)
    459 		kmem_free(bootargs, BOOTARGS_MAX);
    460 	return (error ? set_errno(error) : 0);
    461 }
    462