Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/param.h>
     28 #include <sys/types.h>
     29 #include <sys/sysmacros.h>
     30 #include <sys/systm.h>
     31 #include <sys/errno.h>
     32 #include <sys/vfs.h>
     33 #include <sys/vnode.h>
     34 #include <sys/swap.h>
     35 #include <sys/file.h>
     36 #include <sys/proc.h>
     37 #include <sys/var.h>
     38 #include <sys/uadmin.h>
     39 #include <sys/signal.h>
     40 #include <sys/time.h>
     41 #include <vm/seg_kmem.h>
     42 #include <sys/modctl.h>
     43 #include <sys/callb.h>
     44 #include <sys/dumphdr.h>
     45 #include <sys/debug.h>
     46 #include <sys/ftrace.h>
     47 #include <sys/cmn_err.h>
     48 #include <sys/panic.h>
     49 #include <sys/ddi.h>
     50 #include <sys/sunddi.h>
     51 #include <sys/policy.h>
     52 #include <sys/zone.h>
     53 #include <sys/condvar.h>
     54 #include <sys/thread.h>
     55 #include <sys/sdt.h>
     56 
     57 /*
     58  * Administrivia system call.  We provide this in two flavors: one for calling
     59  * from the system call path (uadmin), and the other for calling from elsewhere
     60  * within the kernel (kadmin).  Callers must beware that certain uadmin cmd
     61  * values (specifically A_SWAPCTL) are only supported by uadmin and not kadmin.
     62  */
     63 
     64 extern ksema_t fsflush_sema;
     65 kmutex_t ualock;
     66 kcondvar_t uacond;
     67 kthread_t *ua_shutdown_thread = NULL;
     68 
     69 int sys_shutdown = 0;
     70 volatile int fastreboot_dryrun = 0;
     71 
     72 /*
     73  * Kill all user processes in said zone.  A special argument of ALL_ZONES is
     74  * passed in when the system as a whole is shutting down.  The lack of per-zone
     75  * process lists is likely to make the following a performance bottleneck on a
     76  * system with many zones.
     77  */
     78 void
     79 killall(zoneid_t zoneid)
     80 {
     81 	proc_t *p;
     82 
     83 	ASSERT(zoneid != GLOBAL_ZONEID);
     84 	/*
     85 	 * Kill all processes except kernel daemons and ourself.
     86 	 * Make a first pass to stop all processes so they won't
     87 	 * be trying to restart children as we kill them.
     88 	 */
     89 	mutex_enter(&pidlock);
     90 	for (p = practive; p != NULL; p = p->p_next) {
     91 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
     92 		    p->p_exec != NULLVP &&	/* kernel daemons */
     93 		    p->p_as != &kas &&
     94 		    p->p_stat != SZOMB) {
     95 			mutex_enter(&p->p_lock);
     96 			p->p_flag |= SNOWAIT;
     97 			sigtoproc(p, NULL, SIGSTOP);
     98 			mutex_exit(&p->p_lock);
     99 		}
    100 	}
    101 	p = practive;
    102 	while (p != NULL) {
    103 		if ((zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) &&
    104 		    p->p_exec != NULLVP &&	/* kernel daemons */
    105 		    p->p_as != &kas &&
    106 		    p->p_stat != SIDL &&
    107 		    p->p_stat != SZOMB) {
    108 			mutex_enter(&p->p_lock);
    109 			if (sigismember(&p->p_sig, SIGKILL)) {
    110 				mutex_exit(&p->p_lock);
    111 				p = p->p_next;
    112 			} else {
    113 				sigtoproc(p, NULL, SIGKILL);
    114 				mutex_exit(&p->p_lock);
    115 				(void) cv_reltimedwait(&p->p_srwchan_cv,
    116 				    &pidlock, hz, TR_CLOCK_TICK);
    117 				p = practive;
    118 			}
    119 		} else {
    120 			p = p->p_next;
    121 		}
    122 	}
    123 	mutex_exit(&pidlock);
    124 }
    125 
    126 int
    127 kadmin(int cmd, int fcn, void *mdep, cred_t *credp)
    128 {
    129 	int error = 0;
    130 	char *buf;
    131 	size_t buflen = 0;
    132 	boolean_t invoke_cb = B_FALSE;
    133 
    134 	/*
    135 	 * We might be called directly by the kernel's fault-handling code, so
    136 	 * we can't assert that the caller is in the global zone.
    137 	 */
    138 
    139 	/*
    140 	 * Make sure that cmd is one of the valid <sys/uadmin.h> command codes
    141 	 * and that we have appropriate privileges for this action.
    142 	 */
    143 	switch (cmd) {
    144 	case A_FTRACE:
    145 	case A_SHUTDOWN:
    146 	case A_REBOOT:
    147 	case A_REMOUNT:
    148 	case A_FREEZE:
    149 	case A_DUMP:
    150 	case A_SDTTEST:
    151 	case A_CONFIG:
    152 		if (secpolicy_sys_config(credp, B_FALSE) != 0)
    153 			return (EPERM);
    154 		break;
    155 
    156 	default:
    157 		return (EINVAL);
    158 	}
    159 
    160 	/*
    161 	 * Serialize these operations on ualock.  If it is held, the
    162 	 * system should shutdown, reboot, or remount shortly, unless there is
    163 	 * an error.  We need a cv rather than just a mutex because proper
    164 	 * functioning of A_REBOOT relies on being able to interrupt blocked
    165 	 * userland callers.
    166 	 *
    167 	 * We only clear ua_shutdown_thread after A_REMOUNT or A_CONFIG.
    168 	 * Other commands should never return.
    169 	 */
    170 	if (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_REMOUNT ||
    171 	    cmd == A_CONFIG) {
    172 		mutex_enter(&ualock);
    173 		while (ua_shutdown_thread != NULL) {
    174 			if (cv_wait_sig(&uacond, &ualock) == 0) {
    175 				/*
    176 				 * If we were interrupted, leave, and handle
    177 				 * the signal (or exit, depending on what
    178 				 * happened)
    179 				 */
    180 				mutex_exit(&ualock);
    181 				return (EINTR);
    182 			}
    183 		}
    184 		ua_shutdown_thread = curthread;
    185 		mutex_exit(&ualock);
    186 	}
    187 
    188 	switch (cmd) {
    189 	case A_SHUTDOWN:
    190 	{
    191 		proc_t *p = ttoproc(curthread);
    192 
    193 		/*
    194 		 * Release (almost) all of our own resources if we are called
    195 		 * from a user context, however if we are calling kadmin() from
    196 		 * a kernel context then we do not release these resources.
    197 		 */
    198 		if (p != &p0) {
    199 			proc_is_exiting(p);
    200 			if ((error = exitlwps(0)) != 0) {
    201 				/*
    202 				 * Another thread in this process also called
    203 				 * exitlwps().
    204 				 */
    205 				mutex_enter(&ualock);
    206 				ua_shutdown_thread = NULL;
    207 				cv_signal(&uacond);
    208 				mutex_exit(&ualock);
    209 				return (error);
    210 			}
    211 			mutex_enter(&p->p_lock);
    212 			p->p_flag |= SNOWAIT;
    213 			sigfillset(&p->p_ignore);
    214 			curthread->t_lwp->lwp_cursig = 0;
    215 			curthread->t_lwp->lwp_extsig = 0;
    216 			if (p->p_exec) {
    217 				vnode_t *exec_vp = p->p_exec;
    218 				p->p_exec = NULLVP;
    219 				mutex_exit(&p->p_lock);
    220 				VN_RELE(exec_vp);
    221 			} else {
    222 				mutex_exit(&p->p_lock);
    223 			}
    224 
    225 			pollcleanup();
    226 			closeall(P_FINFO(curproc));
    227 			relvm();
    228 
    229 		} else {
    230 			/*
    231 			 * Reset t_cred if not set because much of the
    232 			 * filesystem code depends on CRED() being valid.
    233 			 */
    234 			if (curthread->t_cred == NULL)
    235 				curthread->t_cred = kcred;
    236 		}
    237 
    238 		/* indicate shutdown in progress */
    239 		sys_shutdown = 1;
    240 
    241 		/*
    242 		 * Communcate that init shouldn't be restarted.
    243 		 */
    244 		zone_shutdown_global();
    245 
    246 		killall(ALL_ZONES);
    247 		/*
    248 		 * If we are calling kadmin() from a kernel context then we
    249 		 * do not release these resources.
    250 		 */
    251 		if (ttoproc(curthread) != &p0) {
    252 			VN_RELE(PTOU(curproc)->u_cdir);
    253 			if (PTOU(curproc)->u_rdir)
    254 				VN_RELE(PTOU(curproc)->u_rdir);
    255 			if (PTOU(curproc)->u_cwd)
    256 				refstr_rele(PTOU(curproc)->u_cwd);
    257 
    258 			PTOU(curproc)->u_cdir = rootdir;
    259 			PTOU(curproc)->u_rdir = NULL;
    260 			PTOU(curproc)->u_cwd = NULL;
    261 		}
    262 
    263 		/*
    264 		 * Allow the reboot/halt/poweroff code a chance to do
    265 		 * anything it needs to whilst we still have filesystems
    266 		 * mounted, like loading any modules necessary for later
    267 		 * performing the actual poweroff.
    268 		 */
    269 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    270 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
    271 			mdpreboot(cmd, fcn, buf);
    272 		} else
    273 			mdpreboot(cmd, fcn, mdep);
    274 
    275 		/*
    276 		 * Allow fsflush to finish running and then prevent it
    277 		 * from ever running again so that vfs_unmountall() and
    278 		 * vfs_syncall() can acquire the vfs locks they need.
    279 		 */
    280 		sema_p(&fsflush_sema);
    281 		(void) callb_execute_class(CB_CL_UADMIN_PRE_VFS, NULL);
    282 
    283 		vfs_unmountall();
    284 		(void) VFS_MOUNTROOT(rootvfs, ROOT_UNMOUNT);
    285 		vfs_syncall();
    286 
    287 		dump_ereports();
    288 		dump_messages();
    289 
    290 		invoke_cb = B_TRUE;
    291 
    292 		/* FALLTHROUGH */
    293 	}
    294 
    295 	case A_REBOOT:
    296 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    297 			buf = i_convert_boot_device_name(mdep, NULL, &buflen);
    298 			mdboot(cmd, fcn, buf, invoke_cb);
    299 		} else
    300 			mdboot(cmd, fcn, mdep, invoke_cb);
    301 		/* no return expected */
    302 		break;
    303 
    304 	case A_CONFIG:
    305 		switch (fcn) {
    306 		case AD_UPDATE_BOOT_CONFIG:
    307 #ifndef	__sparc
    308 		{
    309 			extern void fastboot_update_config(const char *);
    310 
    311 			fastboot_update_config(mdep);
    312 		}
    313 #endif
    314 
    315 			break;
    316 		}
    317 		/* Let other threads enter the shutdown path now */
    318 		mutex_enter(&ualock);
    319 		ua_shutdown_thread = NULL;
    320 		cv_signal(&uacond);
    321 		mutex_exit(&ualock);
    322 		break;
    323 
    324 	case A_REMOUNT:
    325 		(void) VFS_MOUNTROOT(rootvfs, ROOT_REMOUNT);
    326 		/* Let other threads enter the shutdown path now */
    327 		mutex_enter(&ualock);
    328 		ua_shutdown_thread = NULL;
    329 		cv_signal(&uacond);
    330 		mutex_exit(&ualock);
    331 		break;
    332 
    333 	case A_FREEZE:
    334 	{
    335 		/*
    336 		 * This is the entrypoint for all suspend/resume actions.
    337 		 */
    338 		extern int cpr(int, void *);
    339 
    340 		if (modload("misc", "cpr") == -1)
    341 			return (ENOTSUP);
    342 		/* Let the CPR module decide what to do with mdep */
    343 		error = cpr(fcn, mdep);
    344 		break;
    345 	}
    346 
    347 	case A_FTRACE:
    348 	{
    349 		switch (fcn) {
    350 		case AD_FTRACE_START:
    351 			(void) FTRACE_START();
    352 			break;
    353 		case AD_FTRACE_STOP:
    354 			(void) FTRACE_STOP();
    355 			break;
    356 		default:
    357 			error = EINVAL;
    358 		}
    359 		break;
    360 	}
    361 
    362 	case A_DUMP:
    363 	{
    364 		if (fcn == AD_NOSYNC) {
    365 			in_sync = 1;
    366 			break;
    367 		}
    368 
    369 		panic_bootfcn = fcn;
    370 		panic_forced = 1;
    371 
    372 		if ((mdep != NULL) && (*(char *)mdep == '/')) {
    373 			panic_bootstr = i_convert_boot_device_name(mdep,
    374 			    NULL, &buflen);
    375 		} else
    376 			panic_bootstr = mdep;
    377 
    378 #ifndef	__sparc
    379 		extern void fastboot_update_and_load(int, char *);
    380 
    381 		fastboot_update_and_load(fcn, mdep);
    382 #endif
    383 
    384 		panic("forced crash dump initiated at user request");
    385 		/*NOTREACHED*/
    386 	}
    387 
    388 	case A_SDTTEST:
    389 	{
    390 		DTRACE_PROBE7(test, int, 1, int, 2, int, 3, int, 4, int, 5,
    391 		    int, 6, int, 7);
    392 		break;
    393 	}
    394 
    395 	default:
    396 		error = EINVAL;
    397 	}
    398 
    399 	return (error);
    400 }
    401 
    402 int
    403 uadmin(int cmd, int fcn, uintptr_t mdep)
    404 {
    405 	int error = 0, rv = 0;
    406 	size_t nbytes = 0;
    407 	cred_t *credp = CRED();
    408 	char *bootargs = NULL;
    409 	int reset_status = 0;
    410 
    411 	if (cmd == A_SHUTDOWN && fcn == AD_FASTREBOOT_DRYRUN) {
    412 		ddi_walk_devs(ddi_root_node(), check_driver_quiesce,
    413 		    &reset_status);
    414 		if (reset_status != 0)
    415 			return (EIO);
    416 		else
    417 			return (0);
    418 	}
    419 
    420 	/*
    421 	 * The swapctl system call doesn't have its own entry point: it uses
    422 	 * uadmin as a wrapper so we just call it directly from here.
    423 	 */
    424 	if (cmd == A_SWAPCTL) {
    425 		if (get_udatamodel() == DATAMODEL_NATIVE)
    426 			error = swapctl(fcn, (void *)mdep, &rv);
    427 #if defined(_SYSCALL32_IMPL)
    428 		else
    429 			error = swapctl32(fcn, (void *)mdep, &rv);
    430 #endif /* _SYSCALL32_IMPL */
    431 		return (error ? set_errno(error) : rv);
    432 	}
    433 
    434 	/*
    435 	 * Certain subcommands intepret a non-NULL mdep value as a pointer to
    436 	 * a boot string.  We pull that in as bootargs, if applicable.
    437 	 */
    438 	if (mdep != NULL &&
    439 	    (cmd == A_SHUTDOWN || cmd == A_REBOOT || cmd == A_DUMP ||
    440 	    cmd == A_FREEZE || cmd == A_CONFIG)) {
    441 		bootargs = kmem_zalloc(BOOTARGS_MAX, KM_SLEEP);
    442 		if ((error = copyinstr((const char *)mdep, bootargs,
    443 		    BOOTARGS_MAX, &nbytes)) != 0) {
    444 			kmem_free(bootargs, BOOTARGS_MAX);
    445 			return (set_errno(error));
    446 		}
    447 	}
    448 
    449 	/*
    450 	 * Invoke the appropriate kadmin() routine.
    451 	 */
    452 	if (getzoneid() != GLOBAL_ZONEID)
    453 		error = zone_kadmin(cmd, fcn, bootargs, credp);
    454 	else
    455 		error = kadmin(cmd, fcn, bootargs, credp);
    456 
    457 	if (bootargs != NULL)
    458 		kmem_free(bootargs, BOOTARGS_MAX);
    459 	return (error ? set_errno(error) : 0);
    460 }
    461