Home | History | Annotate | Download | only in os
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 #include <sys/param.h>
     31 #include <sys/types.h>
     32 #include <sys/time.h>
     33 #include <sys/sysmacros.h>
     34 #include <sys/proc.h>
     35 #include <sys/systm.h>
     36 #include <sys/cred.h>
     37 #include <sys/user.h>
     38 #include <sys/utsname.h>
     39 #include <sys/errno.h>
     40 #include <sys/signal.h>
     41 #include <sys/siginfo.h>
     42 #include <sys/fault.h>
     43 #include <sys/syscall.h>
     44 #include <sys/ucontext.h>
     45 #include <sys/prsystm.h>
     46 #include <sys/vnode.h>
     47 #include <sys/var.h>
     48 #include <sys/file.h>
     49 #include <sys/pathname.h>
     50 #include <sys/vfs.h>
     51 #include <sys/exec.h>
     52 #include <sys/debug.h>
     53 #include <sys/stack.h>
     54 #include <sys/kmem.h>
     55 #include <sys/schedctl.h>
     56 #include <sys/core.h>
     57 #include <sys/corectl.h>
     58 #include <sys/cmn_err.h>
     59 #include <vm/as.h>
     60 #include <sys/rctl.h>
     61 #include <sys/nbmlock.h>
     62 #include <sys/stat.h>
     63 #include <sys/zone.h>
     64 #include <sys/contract/process_impl.h>
     65 #include <sys/ddi.h>
     66 
     67 /*
     68  * Processes running within a zone potentially dump core in 3 locations,
     69  * based on the per-process, per-zone, and the global zone's core settings.
     70  *
     71  * Per-zone and global zone settings are often referred to as "global"
     72  * settings since they apply to the system (or zone) as a whole, as
     73  * opposed to a particular process.
     74  */
     75 enum core_types {
     76 	CORE_PROC,	/* Use per-process settings */
     77 	CORE_ZONE,	/* Use per-zone settings */
     78 	CORE_GLOBAL	/* Use global zone settings */
     79 };
     80 
     81 /*
     82  * Log information about "global" core dumps to syslog.
     83  */
     84 static void
     85 core_log(struct core_globals *cg, int error, const char *why, const char *path,
     86     zoneid_t zoneid)
     87 {
     88 	proc_t *p = curproc;
     89 	pid_t pid = p->p_pid;
     90 	char *fn = PTOU(p)->u_comm;
     91 
     92 	if (!(cg->core_options & CC_GLOBAL_LOG))
     93 		return;
     94 
     95 	if (path == NULL)
     96 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s", fn, pid, why);
     97 	else if (error == 0)
     98 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s: %s", fn, pid,
     99 		    why, path);
    100 	else
    101 		zcmn_err(zoneid, CE_NOTE, "core_log: %s[%d] %s, errno=%d: %s",
    102 		    fn, pid, why, error, path);
    103 }
    104 
    105 /*
    106  * Private version of vn_remove().
    107  * Refuse to unlink a directory or an unwritable file.
    108  * Also allow the process to access files normally inaccessible due to
    109  * chroot(2) or Zone limitations.
    110  */
    111 static int
    112 remove_core_file(char *fp, enum core_types core_type)
    113 {
    114 	vnode_t *vp = NULL;		/* entry vnode */
    115 	vnode_t *dvp;			/* ptr to parent dir vnode */
    116 	vfs_t *dvfsp;
    117 	int error;
    118 	int in_crit = 0;
    119 	pathname_t pn;			/* name of entry */
    120 	vnode_t *startvp, *rootvp;
    121 
    122 	if ((error = pn_get(fp, UIO_SYSSPACE, &pn)) != 0)
    123 		return (error);
    124 	/*
    125 	 * Determine what rootvp to use.
    126 	 */
    127 	if (core_type == CORE_PROC) {
    128 		rootvp = (PTOU(curproc)->u_rdir == NULL ?
    129 		    curproc->p_zone->zone_rootvp : PTOU(curproc)->u_rdir);
    130 		startvp = (fp[0] == '/' ? rootvp : PTOU(curproc)->u_cdir);
    131 	} else if (core_type == CORE_ZONE) {
    132 		startvp = curproc->p_zone->zone_rootvp;
    133 		rootvp = curproc->p_zone->zone_rootvp;
    134 	} else {
    135 		ASSERT(core_type == CORE_GLOBAL);
    136 		startvp = rootdir;
    137 		rootvp = rootdir;
    138 	}
    139 	VN_HOLD(startvp);
    140 	if (rootvp != rootdir)
    141 		VN_HOLD(rootvp);
    142 	if ((error = lookuppnvp(&pn, NULL, NO_FOLLOW, &dvp, &vp, rootvp,
    143 	    startvp, CRED())) != 0) {
    144 		pn_free(&pn);
    145 		return (error);
    146 	}
    147 	/*
    148 	 * Succeed if there is no file.
    149 	 * Fail if the file is not a regular file.
    150 	 * Fail if the filesystem is mounted read-only.
    151 	 * Fail if the file is not writeable.
    152 	 * Fail if the file has NBMAND share reservations.
    153 	 */
    154 	if (vp == NULL)
    155 		error = 0;
    156 	else if (vp->v_type != VREG)
    157 		error = EACCES;
    158 	else if ((dvfsp = dvp->v_vfsp) != NULL &&
    159 	    (dvfsp->vfs_flag & VFS_RDONLY))
    160 		error = EROFS;
    161 	else if ((error = VOP_ACCESS(vp, VWRITE, 0, CRED(), NULL)) == 0) {
    162 		if (nbl_need_check(vp)) {
    163 			nbl_start_crit(vp, RW_READER);
    164 			in_crit = 1;
    165 			if (nbl_share_conflict(vp, NBL_REMOVE, NULL)) {
    166 				error = EACCES;
    167 			}
    168 		}
    169 		if (!error) {
    170 			error = VOP_REMOVE(dvp, pn.pn_path, CRED(), NULL, 0);
    171 		}
    172 	}
    173 
    174 	pn_free(&pn);
    175 	if (vp != NULL) {
    176 		if (in_crit)
    177 			nbl_end_crit(vp);
    178 		VN_RELE(vp);
    179 	}
    180 	VN_RELE(dvp);
    181 	return (error);
    182 }
    183 
    184 /*
    185  * Create the core file in a location that may be normally inaccessible due
    186  * to chroot(2) or Zone limitations.
    187  */
    188 static int
    189 create_core_file(char *fp, enum core_types core_type, vnode_t **vpp)
    190 {
    191 	int error;
    192 	mode_t perms = (S_IRUSR | S_IWUSR);
    193 	pathname_t pn;
    194 	char *file;
    195 	vnode_t *vp;
    196 	vnode_t *dvp;
    197 	vattr_t vattr;
    198 	cred_t *credp = CRED();
    199 
    200 	if (core_type == CORE_PROC) {
    201 		file = fp;
    202 		dvp = NULL;	/* regular lookup */
    203 	} else {
    204 		vnode_t *startvp, *rootvp;
    205 
    206 		ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
    207 		/*
    208 		 * This is tricky because we want to dump the core in
    209 		 * a location which may normally be inaccessible
    210 		 * to us (due to chroot(2) limitations, or zone
    211 		 * membership), and hence need to overcome u_rdir
    212 		 * restrictions.  The basic idea is to separate
    213 		 * the path from the filename, lookup the
    214 		 * pathname separately (starting from the global
    215 		 * zone's root directory), and then open the
    216 		 * file starting at the directory vnode.
    217 		 */
    218 		if (error = pn_get(fp, UIO_SYSSPACE, &pn))
    219 			return (error);
    220 
    221 		if (core_type == CORE_ZONE) {
    222 			startvp = rootvp = curproc->p_zone->zone_rootvp;
    223 		} else {
    224 			startvp = rootvp = rootdir;
    225 		}
    226 		/*
    227 		 * rootvp and startvp will be VN_RELE()'d by lookuppnvp() if
    228 		 * necessary.
    229 		 */
    230 		VN_HOLD(startvp);
    231 		if (rootvp != rootdir)
    232 			VN_HOLD(rootvp);
    233 		/*
    234 		 * Do a lookup on the full path, ignoring the actual file, but
    235 		 * finding the vnode for the directory.  It's OK if the file
    236 		 * doesn't exist -- it most likely won't since we just removed
    237 		 * it.
    238 		 */
    239 		error = lookuppnvp(&pn, NULL, FOLLOW, &dvp, NULLVPP,
    240 		    rootvp, startvp, credp);
    241 		pn_free(&pn);
    242 		if (error != 0)
    243 			return (error);
    244 		ASSERT(dvp != NULL);
    245 		/*
    246 		 * Now find the final component in the path (ie, the name of
    247 		 * the core file).
    248 		 */
    249 		if (error = pn_get(fp, UIO_SYSSPACE, &pn)) {
    250 			VN_RELE(dvp);
    251 			return (error);
    252 		}
    253 		pn_setlast(&pn);
    254 		file = pn.pn_path;
    255 	}
    256 	error =  vn_openat(file, UIO_SYSSPACE,
    257 	    FWRITE | FTRUNC | FEXCL | FCREAT | FOFFMAX,
    258 	    perms, &vp, CRCREAT, PTOU(curproc)->u_cmask, dvp, -1);
    259 	if (core_type != CORE_PROC) {
    260 		VN_RELE(dvp);
    261 		pn_free(&pn);
    262 	}
    263 	/*
    264 	 * Don't dump a core file owned by "nobody".
    265 	 */
    266 	vattr.va_mask = AT_UID;
    267 	if (error == 0 &&
    268 	    (VOP_GETATTR(vp, &vattr, 0, credp, NULL) != 0 ||
    269 	    vattr.va_uid != crgetuid(credp))) {
    270 		(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0,
    271 		    credp, NULL);
    272 		VN_RELE(vp);
    273 		(void) remove_core_file(fp, core_type);
    274 		error = EACCES;
    275 	}
    276 	*vpp = vp;
    277 	return (error);
    278 }
    279 
    280 /*
    281  * Install the specified held cred into the process, and return a pointer to
    282  * the held cred which was previously the value of p->p_cred.
    283  */
    284 static cred_t *
    285 set_cred(proc_t *p, cred_t *newcr)
    286 {
    287 	cred_t *oldcr;
    288 	uid_t olduid, newuid;
    289 
    290 	/*
    291 	 * Place a hold on the existing cred, and then install the new
    292 	 * cred into the proc structure.
    293 	 */
    294 	mutex_enter(&p->p_crlock);
    295 	oldcr = p->p_cred;
    296 	crhold(oldcr);
    297 	p->p_cred = newcr;
    298 	mutex_exit(&p->p_crlock);
    299 
    300 	ASSERT(crgetzoneid(oldcr) == crgetzoneid(newcr));
    301 
    302 	/*
    303 	 * If the real uid is changing, keep the per-user process
    304 	 * counts accurate.
    305 	 */
    306 	olduid = crgetruid(oldcr);
    307 	newuid = crgetruid(newcr);
    308 	if (olduid != newuid) {
    309 		zoneid_t zoneid = crgetzoneid(newcr);
    310 
    311 		mutex_enter(&pidlock);
    312 		upcount_dec(olduid, zoneid);
    313 		upcount_inc(newuid, zoneid);
    314 		mutex_exit(&pidlock);
    315 	}
    316 
    317 	/*
    318 	 * Broadcast the new cred to all the other threads.  The old
    319 	 * cred can be safely returned because we have a hold on it.
    320 	 */
    321 	crset(p, newcr);
    322 	return (oldcr);
    323 }
    324 
    325 static int
    326 do_core(char *fp, int sig, enum core_types core_type, struct core_globals *cg)
    327 {
    328 	proc_t *p = curproc;
    329 	cred_t *credp = CRED();
    330 	rlim64_t rlimit;
    331 	vnode_t *vp;
    332 	int error = 0;
    333 	struct execsw *eswp;
    334 	cred_t *ocredp = NULL;
    335 	int is_setid = 0;
    336 	core_content_t content;
    337 	uid_t uid;
    338 	gid_t gid;
    339 
    340 	if (core_type == CORE_GLOBAL || core_type == CORE_ZONE) {
    341 		mutex_enter(&cg->core_lock);
    342 		content = cg->core_content;
    343 		mutex_exit(&cg->core_lock);
    344 		rlimit = cg->core_rlimit;
    345 	} else {
    346 		mutex_enter(&p->p_lock);
    347 		rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE],
    348 		    p->p_rctls, p);
    349 		content = corectl_content_value(p->p_content);
    350 		mutex_exit(&p->p_lock);
    351 	}
    352 
    353 	if (rlimit == 0)
    354 		return (EFBIG);
    355 
    356 	/*
    357 	 * If SNOCD is set, or if the effective, real, and saved ids do
    358 	 * not match up, no one but a privileged user is allowed to view
    359 	 * this core file.  Set the credentials and the owner to root.
    360 	 */
    361 	if ((p->p_flag & SNOCD) ||
    362 	    (uid = crgetuid(credp)) != crgetruid(credp) ||
    363 	    uid != crgetsuid(credp) ||
    364 	    (gid = crgetgid(credp)) != crgetrgid(credp) ||
    365 	    gid != crgetsgid(credp)) {
    366 		/*
    367 		 * Because this is insecure against certain forms of file
    368 		 * system attack, do it only if set-id core files have been
    369 		 * enabled via corectl(CC_GLOBAL_SETID | CC_PROCESS_SETID).
    370 		 */
    371 		if (((core_type == CORE_GLOBAL || core_type == CORE_ZONE) &&
    372 		    !(cg->core_options & CC_GLOBAL_SETID)) ||
    373 		    (core_type == CORE_PROC &&
    374 		    !(cg->core_options & CC_PROCESS_SETID)))
    375 			return (ENOTSUP);
    376 
    377 		is_setid = 1;
    378 	}
    379 
    380 	/*
    381 	 * If we are doing a "global" core dump or a set-id core dump,
    382 	 * use kcred to do the dumping.
    383 	 */
    384 	if (core_type == CORE_GLOBAL || core_type == CORE_ZONE || is_setid) {
    385 		/*
    386 		 * Use the zone's "kcred" to prevent privilege
    387 		 * escalation.
    388 		 */
    389 		credp = zone_get_kcred(getzoneid());
    390 		ASSERT(credp != NULL);
    391 		ocredp = set_cred(p, credp);
    392 	}
    393 
    394 	/*
    395 	 * First remove any existing core file, then
    396 	 * open the new core file with (O_EXCL|O_CREAT).
    397 	 *
    398 	 * The reasons for doing this are manifold:
    399 	 *
    400 	 * For security reasons, we don't want root processes
    401 	 * to dump core through a symlink because that would
    402 	 * allow a malicious user to clobber any file on
    403 	 * the system if s/he could convince a root process,
    404 	 * perhaps a set-uid root process that s/he started,
    405 	 * to dump core in a directory writable by that user.
    406 	 * Similar security reasons apply to hard links.
    407 	 * For symmetry we do this unconditionally, not
    408 	 * just for root processes.
    409 	 *
    410 	 * If the process has the core file mmap()d into the
    411 	 * address space, we would be modifying the address
    412 	 * space that we are trying to dump if we did not first
    413 	 * remove the core file.  (The command "file core"
    414 	 * is the canonical example of this possibility.)
    415 	 *
    416 	 * Opening the core file with O_EXCL|O_CREAT ensures than
    417 	 * two concurrent core dumps don't clobber each other.
    418 	 * One is bound to lose; we don't want to make both lose.
    419 	 */
    420 	if ((error = remove_core_file(fp, core_type)) == 0) {
    421 		error = create_core_file(fp, core_type, &vp);
    422 	}
    423 
    424 	/*
    425 	 * Now that vn_open is complete, reset the process's credentials if
    426 	 * we changed them, and make 'credp' point to kcred used
    427 	 * above.  We use 'credp' to do i/o on the core file below, but leave
    428 	 * p->p_cred set to the original credential to allow the core file
    429 	 * to record this information.
    430 	 */
    431 	if (ocredp != NULL)
    432 		credp = set_cred(p, ocredp);
    433 
    434 	if (error == 0) {
    435 		int closerr;
    436 #if defined(__sparc)
    437 		(void) flush_user_windows_to_stack(NULL);
    438 #endif
    439 #ifdef SUN_SRC_COMPAT
    440 		PTOU(curproc)->u_acflag |= ACORE;
    441 #endif
    442 		if ((eswp = PTOU(curproc)->u_execsw) == NULL ||
    443 		    (eswp = findexec_by_magic(eswp->exec_magic)) == NULL) {
    444 			error = ENOSYS;
    445 		} else {
    446 			error = eswp->exec_core(vp, p, credp, rlimit, sig,
    447 			    content);
    448 			rw_exit(eswp->exec_lock);
    449 		}
    450 
    451 		closerr = VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, credp, NULL);
    452 		VN_RELE(vp);
    453 		if (error == 0)
    454 			error = closerr;
    455 	}
    456 
    457 	if (ocredp != NULL)
    458 		crfree(credp);
    459 
    460 	return (error);
    461 }
    462 
    463 /*
    464  * Convert a core name pattern to a pathname.
    465  */
    466 static int
    467 expand_string(const char *pat, char *fp, int size, cred_t *cr)
    468 {
    469 	proc_t *p = curproc;
    470 	char buf[24];
    471 	int len, i;
    472 	char *s;
    473 	char c;
    474 
    475 	while ((c = *pat++) != '\0') {
    476 		if (size < 2)
    477 			return (ENAMETOOLONG);
    478 		if (c != '%') {
    479 			size--;
    480 			*fp++ = c;
    481 			continue;
    482 		}
    483 		if ((c = *pat++) == '\0') {
    484 			size--;
    485 			*fp++ = '%';
    486 			break;
    487 		}
    488 		switch (c) {
    489 		case 'p':	/* pid */
    490 			(void) sprintf((s = buf), "%d", p->p_pid);
    491 			break;
    492 		case 'u':	/* effective uid */
    493 			(void) sprintf((s = buf), "%u", crgetuid(p->p_cred));
    494 			break;
    495 		case 'g':	/* effective gid */
    496 			(void) sprintf((s = buf), "%u", crgetgid(p->p_cred));
    497 			break;
    498 		case 'f':	/* exec'd filename */
    499 			s = PTOU(p)->u_comm;
    500 			break;
    501 		case 'd':	/* exec'd dirname */
    502 			/*
    503 			 * Even if pathname caching is disabled, we should
    504 			 * be able to lookup the pathname for a directory.
    505 			 */
    506 			if (p->p_execdir != NULL && vnodetopath(NULL,
    507 			    p->p_execdir, fp, size, cr) == 0) {
    508 				len = (int)strlen(fp);
    509 				ASSERT(len < size);
    510 				ASSERT(len >= 1);
    511 				ASSERT(fp[0] == '/');
    512 
    513 				/*
    514 				 * Strip off the leading slash.
    515 				 */
    516 				for (i = 0; i < len; i++) {
    517 					fp[i] = fp[i + 1];
    518 				}
    519 
    520 				len--;
    521 
    522 				size -= len;
    523 				fp += len;
    524 			} else {
    525 				*fp = '\0';
    526 			}
    527 
    528 			continue;
    529 		case 'n':	/* system nodename */
    530 			s = uts_nodename();
    531 			break;
    532 		case 'm':	/* machine (sun4u, etc) */
    533 			s = utsname.machine;
    534 			break;
    535 		case 't':	/* decimal value of time(2) */
    536 			(void) sprintf((s = buf), "%ld", gethrestime_sec());
    537 			break;
    538 		case 'z':
    539 			s = p->p_zone->zone_name;
    540 			break;
    541 		case '%':
    542 			(void) strcpy((s = buf), "%");
    543 			break;
    544 		default:
    545 			s = buf;
    546 			buf[0] = '%';
    547 			buf[1] = c;
    548 			buf[2] = '\0';
    549 			break;
    550 		}
    551 		len = (int)strlen(s);
    552 		if ((size -= len) <= 0)
    553 			return (ENAMETOOLONG);
    554 		(void) strcpy(fp, s);
    555 		fp += len;
    556 	}
    557 
    558 	*fp = '\0';
    559 	return (0);
    560 }
    561 
    562 static int
    563 dump_one_core(int sig, rlim64_t rlimit, enum core_types core_type,
    564     struct core_globals *cg, char **name)
    565 {
    566 	refstr_t *rp;
    567 	proc_t *p = curproc;
    568 	zoneid_t zoneid;
    569 	int error;
    570 	char *fp;
    571 	cred_t *cr;
    572 
    573 	ASSERT(core_type == CORE_ZONE || core_type == CORE_GLOBAL);
    574 	zoneid = (core_type == CORE_ZONE ? getzoneid() : GLOBAL_ZONEID);
    575 
    576 	mutex_enter(&cg->core_lock);
    577 	if ((rp = cg->core_file) != NULL)
    578 		refstr_hold(rp);
    579 	mutex_exit(&cg->core_lock);
    580 	if (rp == NULL) {
    581 		core_log(cg, 0, "no global core file pattern exists", NULL,
    582 		    zoneid);
    583 		return (1);	/* core file not generated */
    584 	}
    585 	fp = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    586 	cr = zone_get_kcred(getzoneid());
    587 	error = expand_string(refstr_value(rp), fp, MAXPATHLEN, cr);
    588 	crfree(cr);
    589 	if (error != 0) {
    590 		core_log(cg, 0, "global core file pattern too long",
    591 		    refstr_value(rp), zoneid);
    592 	} else if ((error = do_core(fp, sig, core_type, cg)) == 0) {
    593 		core_log(cg, 0, "core dumped", fp, zoneid);
    594 	} else if (error == ENOTSUP) {
    595 		core_log(cg, 0, "setid process, core not dumped", fp, zoneid);
    596 	} else if (error == ENOSPC) {
    597 		core_log(cg, 0, "no space left on device, core truncated",
    598 		    fp, zoneid);
    599 	} else if (error == EFBIG) {
    600 		if (rlimit == 0)
    601 			core_log(cg, 0, "core rlimit is zero, core not dumped",
    602 			    fp, zoneid);
    603 		else
    604 			core_log(cg, 0, "core rlimit exceeded, core truncated",
    605 			    fp, zoneid);
    606 		/*
    607 		 * In addition to the core result logging, we
    608 		 * may also have explicit actions defined on
    609 		 * core file size violations via the resource
    610 		 * control framework.
    611 		 */
    612 		mutex_enter(&p->p_lock);
    613 		(void) rctl_action(rctlproc_legacy[RLIMIT_CORE],
    614 		    p->p_rctls, p, RCA_SAFE);
    615 		mutex_exit(&p->p_lock);
    616 	} else {
    617 		core_log(cg, error, "core dump failed", fp, zoneid);
    618 	}
    619 	refstr_rele(rp);
    620 	if (name != NULL)
    621 		*name = fp;
    622 	else
    623 		kmem_free(fp, MAXPATHLEN);
    624 	return (error);
    625 }
    626 
    627 int
    628 core(int sig, int ext)
    629 {
    630 	proc_t *p = curproc;
    631 	klwp_t *lwp = ttolwp(curthread);
    632 	refstr_t *rp;
    633 	char *fp_process = NULL, *fp_global = NULL, *fp_zone = NULL;
    634 	int error1 = 1;
    635 	int error2 = 1;
    636 	int error3 = 1;
    637 	k_sigset_t sigmask;
    638 	k_sigset_t sighold;
    639 	rlim64_t rlimit;
    640 	struct core_globals *my_cg, *global_cg;
    641 
    642 	global_cg = zone_getspecific(core_zone_key, global_zone);
    643 	ASSERT(global_cg != NULL);
    644 
    645 	my_cg = zone_getspecific(core_zone_key, curproc->p_zone);
    646 	ASSERT(my_cg != NULL);
    647 
    648 	/* core files suppressed? */
    649 	if (!(my_cg->core_options & (CC_PROCESS_PATH|CC_GLOBAL_PATH)) &&
    650 	    !(global_cg->core_options & CC_GLOBAL_PATH)) {
    651 		if (!ext && p->p_ct_process != NULL)
    652 			contract_process_core(p->p_ct_process, p, sig,
    653 			    NULL, NULL, NULL);
    654 		return (1);
    655 	}
    656 
    657 	/*
    658 	 * Block all signals except SIGHUP, SIGINT, SIGKILL, and SIGTERM.
    659 	 * These signals are allowed to interrupt the core dump.
    660 	 * SIGQUIT is not allowed because it is supposed to make a core.
    661 	 * Additionally, get current limit on core file size for handling later
    662 	 * error reporting.
    663 	 */
    664 	mutex_enter(&p->p_lock);
    665 
    666 	p->p_flag |= SDOCORE;
    667 	schedctl_finish_sigblock(curthread);
    668 	sigmask = curthread->t_hold;	/* remember for later */
    669 	sigfillset(&sighold);
    670 	if (!sigismember(&sigmask, SIGHUP))
    671 		sigdelset(&sighold, SIGHUP);
    672 	if (!sigismember(&sigmask, SIGINT))
    673 		sigdelset(&sighold, SIGINT);
    674 	if (!sigismember(&sigmask, SIGKILL))
    675 		sigdelset(&sighold, SIGKILL);
    676 	if (!sigismember(&sigmask, SIGTERM))
    677 		sigdelset(&sighold, SIGTERM);
    678 	curthread->t_hold = sighold;
    679 
    680 	rlimit = rctl_enforced_value(rctlproc_legacy[RLIMIT_CORE], p->p_rctls,
    681 	    p);
    682 
    683 	mutex_exit(&p->p_lock);
    684 
    685 	/*
    686 	 * Undo any watchpoints.
    687 	 */
    688 	pr_free_watched_pages(p);
    689 
    690 	/*
    691 	 * The presence of a current signal prevents file i/o
    692 	 * from succeeding over a network.  We copy the current
    693 	 * signal information to the side and cancel the current
    694 	 * signal so that the core dump will succeed.
    695 	 */
    696 	ASSERT(lwp->lwp_cursig == sig);
    697 	lwp->lwp_cursig = 0;
    698 	lwp->lwp_extsig = 0;
    699 	if (lwp->lwp_curinfo == NULL) {
    700 		bzero(&lwp->lwp_siginfo, sizeof (k_siginfo_t));
    701 		lwp->lwp_siginfo.si_signo = sig;
    702 		lwp->lwp_siginfo.si_code = SI_NOINFO;
    703 	} else {
    704 		bcopy(&lwp->lwp_curinfo->sq_info,
    705 		    &lwp->lwp_siginfo, sizeof (k_siginfo_t));
    706 		siginfofree(lwp->lwp_curinfo);
    707 		lwp->lwp_curinfo = NULL;
    708 	}
    709 
    710 	/*
    711 	 * Convert the core file name patterns into path names
    712 	 * and call do_core() to write the core files.
    713 	 */
    714 
    715 	if (my_cg->core_options & CC_PROCESS_PATH) {
    716 		mutex_enter(&p->p_lock);
    717 		if (p->p_corefile != NULL)
    718 			rp = corectl_path_value(p->p_corefile);
    719 		else
    720 			rp = NULL;
    721 		mutex_exit(&p->p_lock);
    722 		if (rp != NULL) {
    723 			fp_process = kmem_alloc(MAXPATHLEN, KM_SLEEP);
    724 			error1 = expand_string(refstr_value(rp),
    725 			    fp_process, MAXPATHLEN, p->p_cred);
    726 			if (error1 == 0)
    727 				error1 = do_core(fp_process, sig, CORE_PROC,
    728 				    my_cg);
    729 			refstr_rele(rp);
    730 		}
    731 	}
    732 
    733 	if (my_cg->core_options & CC_GLOBAL_PATH)
    734 		error2 = dump_one_core(sig, rlimit, CORE_ZONE, my_cg,
    735 		    &fp_global);
    736 	if (global_cg != my_cg && (global_cg->core_options & CC_GLOBAL_PATH))
    737 		error3 = dump_one_core(sig, rlimit, CORE_GLOBAL, global_cg,
    738 		    &fp_zone);
    739 
    740 	/*
    741 	 * Restore the signal hold mask.
    742 	 */
    743 	mutex_enter(&p->p_lock);
    744 	curthread->t_hold = sigmask;
    745 	mutex_exit(&p->p_lock);
    746 
    747 	if (!ext && p->p_ct_process != NULL)
    748 		contract_process_core(p->p_ct_process, p, sig,
    749 		    error1 == 0 ? fp_process : NULL,
    750 		    error2 == 0 ? fp_global : NULL,
    751 		    error3 == 0 ? fp_zone : NULL);
    752 
    753 	if (fp_process != NULL)
    754 		kmem_free(fp_process, MAXPATHLEN);
    755 	if (fp_global != NULL)
    756 		kmem_free(fp_global, MAXPATHLEN);
    757 	if (fp_zone != NULL)
    758 		kmem_free(fp_zone, MAXPATHLEN);
    759 
    760 	/*
    761 	 * Return non-zero if no core file was created.
    762 	 */
    763 	return (error1 != 0 && error2 != 0 && error3 != 0);
    764 }
    765 
    766 /*
    767  * Maximum chunk size for dumping core files,
    768  * size in pages, patchable in /etc/system
    769  */
    770 uint_t	core_chunk = 32;
    771 
    772 /*
    773  * The delay between core_write() calls, in microseconds.  The default
    774  * matches one "normal" clock tick, or 10 milliseconds.
    775  */
    776 clock_t	core_delay_usec = 10000;
    777 
    778 /*
    779  * Common code to core dump process memory.  The core_seg routine does i/o
    780  * using core_write() below, and so it has the same failure semantics.
    781  */
    782 int
    783 core_seg(proc_t *p, vnode_t *vp, offset_t offset, caddr_t addr, size_t size,
    784     rlim64_t rlimit, cred_t *credp)
    785 {
    786 	caddr_t eaddr;
    787 	caddr_t base;
    788 	size_t len;
    789 	int err = 0;
    790 
    791 	eaddr = addr + size;
    792 	for (base = addr; base < eaddr; base += len) {
    793 		len = eaddr - base;
    794 		if (as_memory(p->p_as, &base, &len) != 0)
    795 			return (0);
    796 		/*
    797 		 * Reduce len to a reasonable value so that we don't
    798 		 * overwhelm the VM system with a monstrously large
    799 		 * single write and cause pageout to stop running.
    800 		 */
    801 		if (len > (size_t)core_chunk * PAGESIZE)
    802 			len = (size_t)core_chunk * PAGESIZE;
    803 
    804 		err = core_write(vp, UIO_USERSPACE,
    805 		    offset + (size_t)(base - addr), base, len, rlimit, credp);
    806 
    807 		if (err == 0) {
    808 			/*
    809 			 * Give pageout a chance to run.
    810 			 * Also allow core dumping to be interruptible.
    811 			 */
    812 			err = delay_sig(drv_usectohz(core_delay_usec));
    813 		}
    814 		if (err)
    815 			return (err);
    816 	}
    817 	return (0);
    818 }
    819 
    820 /*
    821  * Wrapper around vn_rdwr to perform writes to a core file.  For core files,
    822  * we always want to write as much as we possibly can, and then make sure to
    823  * return either 0 to the caller (for success), or the actual errno value.
    824  * By using this function, the caller can omit additional code for handling
    825  * retries and errors for partial writes returned by vn_rdwr.  If vn_rdwr
    826  * unexpectedly returns zero but no progress has been made, we return ENOSPC.
    827  */
    828 int
    829 core_write(vnode_t *vp, enum uio_seg segflg, offset_t offset,
    830     const void *buf, size_t len, rlim64_t rlimit, cred_t *credp)
    831 {
    832 	ssize_t resid = len;
    833 	int error = 0;
    834 
    835 	while (len != 0) {
    836 		error = vn_rdwr(UIO_WRITE, vp, (caddr_t)buf, len, offset,
    837 		    segflg, 0, rlimit, credp, &resid);
    838 
    839 		if (error != 0)
    840 			break;
    841 
    842 		if (resid >= len)
    843 			return (ENOSPC);
    844 
    845 		buf = (const char *)buf + len - resid;
    846 		offset += len - resid;
    847 		len = resid;
    848 	}
    849 
    850 	return (error);
    851 }
    852