Home | History | Annotate | Download | only in syscall
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/proc.h>
     27 #include <sys/systm.h>
     28 #include <sys/param.h>
     29 #include <sys/kmem.h>
     30 #include <sys/sysmacros.h>
     31 #include <sys/types.h>
     32 #include <sys/cmn_err.h>
     33 #include <sys/user.h>
     34 #include <sys/cred.h>
     35 #include <sys/vnode.h>
     36 #include <sys/file.h>
     37 #include <sys/pathname.h>
     38 #include <sys/modctl.h>
     39 #include <sys/acctctl.h>
     40 #include <sys/bitmap.h>
     41 #include <sys/exacct.h>
     42 #include <sys/policy.h>
     43 
     44 /*
     45  * acctctl(2)
     46  *
     47  *   acctctl() provides the administrative interface to the extended accounting
     48  *   subsystem.  The process and task accounting facilities are configurable:
     49  *   resources can be individually specified for recording in the appropriate
     50  *   accounting file.
     51  *
     52  *   The current implementation of acctctl() requires that the process and task
     53  *   and flow files be distinct across all zones.
     54  *
     55  * Locking
     56  *   Each accounting species has an ac_info_t which contains a mutex,
     57  *   used to protect the ac_info_t's contents, and to serialize access to the
     58  *   appropriate file.
     59  */
     60 
     61 static list_t exacct_globals_list;
     62 static kmutex_t exacct_globals_list_lock;
     63 
     64 static int
     65 ac_state_set(ac_info_t *info, void *buf, size_t bufsz)
     66 {
     67 	int state;
     68 
     69 	if (buf == NULL || (bufsz != sizeof (int)))
     70 		return (EINVAL);
     71 
     72 	if (copyin(buf, &state, bufsz) != 0)
     73 		return (EFAULT);
     74 
     75 	if (state != AC_ON && state != AC_OFF)
     76 		return (EINVAL);
     77 
     78 	mutex_enter(&info->ac_lock);
     79 	info->ac_state = state;
     80 	mutex_exit(&info->ac_lock);
     81 	return (0);
     82 }
     83 
     84 static int
     85 ac_state_get(ac_info_t *info, void *buf, size_t bufsz)
     86 {
     87 	if (buf == NULL || (bufsz != sizeof (int)))
     88 		return (EINVAL);
     89 
     90 	mutex_enter(&info->ac_lock);
     91 	if (copyout(&info->ac_state, buf, bufsz) != 0) {
     92 		mutex_exit(&info->ac_lock);
     93 		return (EFAULT);
     94 	}
     95 	mutex_exit(&info->ac_lock);
     96 	return (0);
     97 }
     98 
     99 static boolean_t
    100 ac_file_in_use(vnode_t *vp)
    101 {
    102 	boolean_t in_use = B_FALSE;
    103 	struct exacct_globals *acg;
    104 
    105 	if (vp == NULL)
    106 		return (B_FALSE);
    107 	mutex_enter(&exacct_globals_list_lock);
    108 	/*
    109 	 * Start off by grabbing all locks.
    110 	 */
    111 	for (acg = list_head(&exacct_globals_list); acg != NULL;
    112 	    acg = list_next(&exacct_globals_list, acg)) {
    113 		mutex_enter(&acg->ac_proc.ac_lock);
    114 		mutex_enter(&acg->ac_task.ac_lock);
    115 		mutex_enter(&acg->ac_flow.ac_lock);
    116 		mutex_enter(&acg->ac_net.ac_lock);
    117 	}
    118 
    119 	for (acg = list_head(&exacct_globals_list); !in_use && acg != NULL;
    120 	    acg = list_next(&exacct_globals_list, acg)) {
    121 		/*
    122 		 * We need to verify that we aren't already using this file for
    123 		 * accounting in any zone.
    124 		 */
    125 		if (vn_compare(acg->ac_proc.ac_vnode, vp) ||
    126 		    vn_compare(acg->ac_task.ac_vnode, vp) ||
    127 		    vn_compare(acg->ac_flow.ac_vnode, vp) ||
    128 		    vn_compare(acg->ac_net.ac_vnode, vp))
    129 			in_use = B_TRUE;
    130 	}
    131 
    132 	/*
    133 	 * Drop all locks.
    134 	 */
    135 	for (acg = list_head(&exacct_globals_list); acg != NULL;
    136 	    acg = list_next(&exacct_globals_list, acg)) {
    137 		mutex_exit(&acg->ac_proc.ac_lock);
    138 		mutex_exit(&acg->ac_task.ac_lock);
    139 		mutex_exit(&acg->ac_flow.ac_lock);
    140 		mutex_exit(&acg->ac_net.ac_lock);
    141 	}
    142 	mutex_exit(&exacct_globals_list_lock);
    143 	return (in_use);
    144 }
    145 
    146 static int
    147 ac_file_set(ac_info_t *info, void *ubuf, size_t bufsz)
    148 {
    149 	int error = 0;
    150 	void *kbuf;
    151 	void *namebuf;
    152 	int namelen;
    153 	vnode_t *vp;
    154 	void *hdr;
    155 	size_t hdrsize;
    156 	vattr_t va;
    157 
    158 	if (ubuf == NULL) {
    159 		mutex_enter(&info->ac_lock);
    160 
    161 		/*
    162 		 * Closing accounting file
    163 		 */
    164 		if (info->ac_vnode != NULL) {
    165 			error = VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0,
    166 			    CRED(), NULL);
    167 			if (error) {
    168 				mutex_exit(&info->ac_lock);
    169 				return (error);
    170 			}
    171 			VN_RELE(info->ac_vnode);
    172 			info->ac_vnode = NULL;
    173 		}
    174 		if (info->ac_file != NULL) {
    175 			kmem_free(info->ac_file, strlen(info->ac_file) + 1);
    176 			info->ac_file = NULL;
    177 		}
    178 
    179 		mutex_exit(&info->ac_lock);
    180 		return (error);
    181 	}
    182 
    183 	if (bufsz < 2 || bufsz > MAXPATHLEN)
    184 		return (EINVAL);
    185 
    186 	/*
    187 	 * We have to copy in the whole buffer since we can't tell the length
    188 	 * of the string in user's address space.
    189 	 */
    190 	kbuf = kmem_zalloc(bufsz, KM_SLEEP);
    191 	if ((error = copyinstr((char *)ubuf, (char *)kbuf, bufsz, NULL)) != 0) {
    192 		kmem_free(kbuf, bufsz);
    193 		return (error);
    194 	}
    195 	if (*((char *)kbuf) != '/') {
    196 		kmem_free(kbuf, bufsz);
    197 		return (EINVAL);
    198 	}
    199 
    200 	/*
    201 	 * Now, allocate the space where we are going to save the
    202 	 * name of the accounting file and kmem_free kbuf. We have to do this
    203 	 * now because it is not good to sleep in kmem_alloc() while
    204 	 * holding ac_info's lock.
    205 	 */
    206 	namelen = strlen(kbuf) + 1;
    207 	namebuf = kmem_alloc(namelen, KM_SLEEP);
    208 	(void) strcpy(namebuf, kbuf);
    209 	kmem_free(kbuf, bufsz);
    210 
    211 	/*
    212 	 * Check if this file already exists.
    213 	 */
    214 	error = lookupname(namebuf, UIO_SYSSPACE, FOLLOW, NULLVPP, &vp);
    215 
    216 	/*
    217 	 * Check if the file is already in use.
    218 	 */
    219 	if (!error) {
    220 		if (ac_file_in_use(vp)) {
    221 			/*
    222 			 * If we're already using it then return EBUSY
    223 			 */
    224 			kmem_free(namebuf, namelen);
    225 			VN_RELE(vp);
    226 			return (EBUSY);
    227 		}
    228 		VN_RELE(vp);
    229 	}
    230 
    231 	/*
    232 	 * Create an exacct header here because exacct_create_header() may
    233 	 * sleep so we should not be holding ac_lock. At this point we cannot
    234 	 * reliably know if we need the header or not, so we may end up not
    235 	 * using the header.
    236 	 */
    237 	hdr = exacct_create_header(&hdrsize);
    238 
    239 	/*
    240 	 * Now, grab info's ac_lock and try to set up everything.
    241 	 */
    242 	mutex_enter(&info->ac_lock);
    243 
    244 	if ((error = vn_open(namebuf, UIO_SYSSPACE,
    245 	    FCREAT | FWRITE | FOFFMAX, 0600, &vp, CRCREAT, 0)) != 0) {
    246 		mutex_exit(&info->ac_lock);
    247 		kmem_free(namebuf, namelen);
    248 		kmem_free(hdr, hdrsize);
    249 		return (error);
    250 	}
    251 
    252 	if (vp->v_type != VREG) {
    253 		VN_RELE(vp);
    254 		mutex_exit(&info->ac_lock);
    255 		kmem_free(namebuf, namelen);
    256 		kmem_free(hdr, hdrsize);
    257 		return (EACCES);
    258 	}
    259 
    260 	if (info->ac_vnode != NULL) {
    261 		/*
    262 		 * Switch from an old file to a new file by swapping
    263 		 * their vnode pointers.
    264 		 */
    265 		vnode_t *oldvp;
    266 		oldvp = info->ac_vnode;
    267 		info->ac_vnode = vp;
    268 		vp = oldvp;
    269 	} else {
    270 		/*
    271 		 * Start writing accounting records to a new file.
    272 		 */
    273 		info->ac_vnode = vp;
    274 		vp = NULL;
    275 	}
    276 	if (vp) {
    277 		/*
    278 		 * We still need to close the old file.
    279 		 */
    280 		if ((error = VOP_CLOSE(vp, FWRITE, 1, 0, CRED(), NULL)) != 0) {
    281 			VN_RELE(vp);
    282 			mutex_exit(&info->ac_lock);
    283 			kmem_free(namebuf, namelen);
    284 			kmem_free(hdr, hdrsize);
    285 			return (error);
    286 		}
    287 		VN_RELE(vp);
    288 		if (info->ac_file != NULL) {
    289 			kmem_free(info->ac_file,
    290 			    strlen(info->ac_file) + 1);
    291 			info->ac_file = NULL;
    292 		}
    293 	}
    294 	info->ac_file = namebuf;
    295 
    296 	/*
    297 	 * Write the exacct header only if the file is empty.
    298 	 */
    299 	error = VOP_GETATTR(info->ac_vnode, &va, AT_SIZE, CRED(), NULL);
    300 	if (error == 0 && va.va_size == 0)
    301 		error = exacct_write_header(info, hdr, hdrsize);
    302 
    303 	mutex_exit(&info->ac_lock);
    304 	kmem_free(hdr, hdrsize);
    305 	return (error);
    306 }
    307 
    308 static int
    309 ac_file_get(ac_info_t *info, void *buf, size_t bufsz)
    310 {
    311 	int error = 0;
    312 	vnode_t *vnode;
    313 	char *file;
    314 
    315 	mutex_enter(&info->ac_lock);
    316 	file = info->ac_file;
    317 	vnode = info->ac_vnode;
    318 
    319 	if (file == NULL || vnode == NULL) {
    320 		mutex_exit(&info->ac_lock);
    321 		return (ENOTACTIVE);
    322 	}
    323 
    324 	if (strlen(file) >= bufsz)
    325 		error = ENOMEM;
    326 	else
    327 		error = copyoutstr(file, buf, MAXPATHLEN, NULL);
    328 
    329 	mutex_exit(&info->ac_lock);
    330 	return (error);
    331 }
    332 
    333 static int
    334 ac_res_set(ac_info_t *info, void *buf, size_t bufsz, int maxres)
    335 {
    336 	ac_res_t *res;
    337 	ac_res_t *tmp;
    338 	ulong_t *maskp;
    339 	int id;
    340 	uint_t counter = 0;
    341 
    342 	/*
    343 	 * Validate that a non-zero buffer, sized within limits and to an
    344 	 * integral number of ac_res_t's has been specified.
    345 	 */
    346 	if (bufsz == 0 ||
    347 	    bufsz > sizeof (ac_res_t) * (AC_MAX_RES + 1) ||
    348 	    (bufsz / sizeof (ac_res_t)) * sizeof (ac_res_t) != bufsz)
    349 		return (EINVAL);
    350 
    351 	tmp = res = kmem_alloc(bufsz, KM_SLEEP);
    352 	if (copyin(buf, res, bufsz) != 0) {
    353 		kmem_free(res, bufsz);
    354 		return (EFAULT);
    355 	}
    356 
    357 	maskp = (ulong_t *)&info->ac_mask;
    358 
    359 	mutex_enter(&info->ac_lock);
    360 	while ((id = tmp->ar_id) != AC_NONE && counter < maxres + 1) {
    361 		if (id > maxres || id < 0) {
    362 			mutex_exit(&info->ac_lock);
    363 			kmem_free(res, bufsz);
    364 			return (EINVAL);
    365 		}
    366 		if (tmp->ar_state == AC_ON) {
    367 			BT_SET(maskp, id);
    368 		} else if (tmp->ar_state == AC_OFF) {
    369 			BT_CLEAR(maskp, id);
    370 		} else {
    371 			mutex_exit(&info->ac_lock);
    372 			kmem_free(res, bufsz);
    373 			return (EINVAL);
    374 		}
    375 		tmp++;
    376 		counter++;
    377 	}
    378 	mutex_exit(&info->ac_lock);
    379 	kmem_free(res, bufsz);
    380 	return (0);
    381 }
    382 
    383 static int
    384 ac_res_get(ac_info_t *info, void *buf, size_t bufsz, int maxres)
    385 {
    386 	int error = 0;
    387 	ac_res_t *res;
    388 	ac_res_t *tmp;
    389 	size_t ressz = sizeof (ac_res_t) * (maxres + 1);
    390 	ulong_t *maskp;
    391 	int id;
    392 
    393 	if (bufsz < ressz)
    394 		return (EINVAL);
    395 	tmp = res = kmem_alloc(ressz, KM_SLEEP);
    396 
    397 	mutex_enter(&info->ac_lock);
    398 	maskp = (ulong_t *)&info->ac_mask;
    399 	for (id = 1; id <= maxres; id++) {
    400 		tmp->ar_id = id;
    401 		tmp->ar_state = BT_TEST(maskp, id);
    402 		tmp++;
    403 	}
    404 	tmp->ar_id = AC_NONE;
    405 	tmp->ar_state = AC_OFF;
    406 	mutex_exit(&info->ac_lock);
    407 	error = copyout(res, buf, ressz);
    408 	kmem_free(res, ressz);
    409 	return (error);
    410 }
    411 
    412 /*
    413  * acctctl()
    414  *
    415  * Overview
    416  *   acctctl() is the entry point for the acctctl(2) system call.
    417  *
    418  * Return values
    419  *   On successful completion, return 0; otherwise -1 is returned and errno is
    420  *   set appropriately.
    421  *
    422  * Caller's context
    423  *   Called from the system call path.
    424  */
    425 int
    426 acctctl(int cmd, void *buf, size_t bufsz)
    427 {
    428 	int error = 0;
    429 	int mode = AC_MODE(cmd);
    430 	int option = AC_OPTION(cmd);
    431 	int maxres;
    432 	ac_info_t *info;
    433 	zone_t *zone = curproc->p_zone;
    434 	struct exacct_globals *acg;
    435 
    436 	acg = zone_getspecific(exacct_zone_key, zone);
    437 	/*
    438 	 * exacct_zone_key and associated per-zone state were initialized when
    439 	 * the module was loaded.
    440 	 */
    441 	ASSERT(exacct_zone_key != ZONE_KEY_UNINITIALIZED);
    442 	ASSERT(acg != NULL);
    443 
    444 	switch (mode) {	/* sanity check */
    445 	case AC_TASK:
    446 		info = &acg->ac_task;
    447 		maxres = AC_TASK_MAX_RES;
    448 		break;
    449 	case AC_PROC:
    450 		info = &acg->ac_proc;
    451 		maxres = AC_PROC_MAX_RES;
    452 		break;
    453 	/*
    454 	 * Flow/net accounting isn't configurable in non-global
    455 	 * zones, but we have this field on a per-zone basis for future
    456 	 * expansion as well as the ability to return default "unset"
    457 	 * values for the various AC_*_GET queries.  AC_*_SET commands
    458 	 * fail with EPERM for AC_FLOW and AC_NET in non-global zones.
    459 	 */
    460 	case AC_FLOW:
    461 		info = &acg->ac_flow;
    462 		maxres = AC_FLOW_MAX_RES;
    463 		break;
    464 	case AC_NET:
    465 		info = &acg->ac_net;
    466 		maxres = AC_NET_MAX_RES;
    467 		break;
    468 	default:
    469 		return (set_errno(EINVAL));
    470 	}
    471 
    472 	switch (option) {
    473 	case AC_STATE_SET:
    474 		if ((error = secpolicy_acct(CRED())) != 0)
    475 			break;
    476 		if ((mode == AC_FLOW || mode == AC_NET) &&
    477 		    getzoneid() != GLOBAL_ZONEID) {
    478 			error = EPERM;
    479 			break;
    480 		}
    481 		error = ac_state_set(info, buf, bufsz);
    482 		break;
    483 	case AC_STATE_GET:
    484 		error = ac_state_get(info, buf, bufsz);
    485 		break;
    486 	case AC_FILE_SET:
    487 		if ((error = secpolicy_acct(CRED())) != 0)
    488 			break;
    489 		if ((mode == AC_FLOW || mode == AC_NET) &&
    490 		    getzoneid() != GLOBAL_ZONEID) {
    491 			error = EPERM;
    492 			break;
    493 		}
    494 		error = ac_file_set(info, buf, bufsz);
    495 		break;
    496 	case AC_FILE_GET:
    497 		error = ac_file_get(info, buf, bufsz);
    498 		break;
    499 	case AC_RES_SET:
    500 		if ((error = secpolicy_acct(CRED())) != 0)
    501 			break;
    502 		if ((mode == AC_FLOW || mode == AC_NET) &&
    503 		    getzoneid() != GLOBAL_ZONEID) {
    504 			error = EPERM;
    505 			break;
    506 		}
    507 		error = ac_res_set(info, buf, bufsz, maxres);
    508 		break;
    509 	case AC_RES_GET:
    510 		error = ac_res_get(info, buf, bufsz, maxres);
    511 		break;
    512 	default:
    513 		return (set_errno(EINVAL));
    514 	}
    515 	if (error)
    516 		return (set_errno(error));
    517 	return (0);
    518 }
    519 
    520 static struct sysent ac_sysent = {
    521 	3,
    522 	SE_NOUNLOAD | SE_ARGC | SE_32RVAL1,
    523 	acctctl
    524 };
    525 
    526 static struct modlsys modlsys = {
    527 	&mod_syscallops,
    528 	"acctctl system call",
    529 	&ac_sysent
    530 };
    531 
    532 #ifdef _SYSCALL32_IMPL
    533 static struct modlsys modlsys32 = {
    534 	&mod_syscallops32,
    535 	"32-bit acctctl system call",
    536 	&ac_sysent
    537 };
    538 #endif
    539 
    540 static struct modlinkage modlinkage = {
    541 	MODREV_1,
    542 	&modlsys,
    543 #ifdef _SYSCALL32_IMPL
    544 	&modlsys32,
    545 #endif
    546 	NULL
    547 };
    548 
    549 /* ARGSUSED */
    550 static void *
    551 exacct_zone_init(zoneid_t zoneid)
    552 {
    553 	struct exacct_globals *acg;
    554 
    555 	acg = kmem_zalloc(sizeof (*acg), KM_SLEEP);
    556 	mutex_enter(&exacct_globals_list_lock);
    557 	list_insert_tail(&exacct_globals_list, acg);
    558 	mutex_exit(&exacct_globals_list_lock);
    559 	return (acg);
    560 }
    561 
    562 static void
    563 exacct_free_info(ac_info_t *info)
    564 {
    565 	mutex_enter(&info->ac_lock);
    566 	if (info->ac_vnode) {
    567 		(void) VOP_CLOSE(info->ac_vnode, FWRITE, 1, 0, kcred, NULL);
    568 		VN_RELE(info->ac_vnode);
    569 		kmem_free(info->ac_file, strlen(info->ac_file) + 1);
    570 	}
    571 	info->ac_state = AC_OFF;
    572 	info->ac_vnode = NULL;
    573 	info->ac_file = NULL;
    574 	mutex_exit(&info->ac_lock);
    575 }
    576 
    577 /* ARGSUSED */
    578 static void
    579 exacct_zone_shutdown(zoneid_t zoneid, void *data)
    580 {
    581 	struct exacct_globals *acg = data;
    582 
    583 	/*
    584 	 * The accounting files need to be closed during shutdown rather than
    585 	 * destroy, since otherwise the filesystem they reside on may fail to
    586 	 * unmount, thus causing the entire zone halt/reboot to fail.
    587 	 */
    588 	exacct_free_info(&acg->ac_proc);
    589 	exacct_free_info(&acg->ac_task);
    590 	exacct_free_info(&acg->ac_flow);
    591 	exacct_free_info(&acg->ac_net);
    592 }
    593 
    594 /* ARGSUSED */
    595 static void
    596 exacct_zone_fini(zoneid_t zoneid, void *data)
    597 {
    598 	struct exacct_globals *acg = data;
    599 
    600 	mutex_enter(&exacct_globals_list_lock);
    601 	list_remove(&exacct_globals_list, acg);
    602 	mutex_exit(&exacct_globals_list_lock);
    603 
    604 	mutex_destroy(&acg->ac_proc.ac_lock);
    605 	mutex_destroy(&acg->ac_task.ac_lock);
    606 	mutex_destroy(&acg->ac_flow.ac_lock);
    607 	mutex_destroy(&acg->ac_net.ac_lock);
    608 	kmem_free(acg, sizeof (*acg));
    609 }
    610 
    611 int
    612 _init()
    613 {
    614 	int error;
    615 
    616 	mutex_init(&exacct_globals_list_lock, NULL, MUTEX_DEFAULT, NULL);
    617 	list_create(&exacct_globals_list, sizeof (struct exacct_globals),
    618 	    offsetof(struct exacct_globals, ac_link));
    619 	zone_key_create(&exacct_zone_key, exacct_zone_init,
    620 	    exacct_zone_shutdown, exacct_zone_fini);
    621 
    622 	if ((error = mod_install(&modlinkage)) != 0) {
    623 		(void) zone_key_delete(exacct_zone_key);
    624 		exacct_zone_key = ZONE_KEY_UNINITIALIZED;
    625 		mutex_destroy(&exacct_globals_list_lock);
    626 		list_destroy(&exacct_globals_list);
    627 	}
    628 	return (error);
    629 }
    630 
    631 int
    632 _info(struct modinfo *modinfop)
    633 {
    634 	return (mod_info(&modlinkage, modinfop));
    635 }
    636 
    637 int
    638 _fini()
    639 {
    640 	return (EBUSY);
    641 }
    642