Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/xpv_user.h>
     28 
     29 #include <sys/types.h>
     30 #include <sys/file.h>
     31 #include <sys/errno.h>
     32 #include <sys/open.h>
     33 #include <sys/cred.h>
     34 #include <sys/conf.h>
     35 #include <sys/stat.h>
     36 #include <sys/modctl.h>
     37 #include <sys/ddi.h>
     38 #include <sys/sunddi.h>
     39 #include <sys/vmsystm.h>
     40 #include <sys/sdt.h>
     41 #include <sys/hypervisor.h>
     42 #include <sys/xen_errno.h>
     43 #include <sys/policy.h>
     44 
     45 #include <vm/hat_i86.h>
     46 #include <vm/hat_pte.h>
     47 #include <vm/seg_mf.h>
     48 
     49 #include <xen/sys/privcmd.h>
     50 #include <sys/privcmd_impl.h>
     51 
     52 static dev_info_t *privcmd_devi;
     53 
     54 /*ARGSUSED*/
     55 static int
     56 privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
     57 {
     58 	switch (cmd) {
     59 	case DDI_INFO_DEVT2DEVINFO:
     60 	case DDI_INFO_DEVT2INSTANCE:
     61 		break;
     62 	default:
     63 		return (DDI_FAILURE);
     64 	}
     65 
     66 	switch (getminor((dev_t)arg)) {
     67 	case PRIVCMD_MINOR:
     68 		break;
     69 	default:
     70 		return (DDI_FAILURE);
     71 	}
     72 
     73 	if (cmd == DDI_INFO_DEVT2INSTANCE)
     74 		*result = 0;
     75 	else
     76 		*result = privcmd_devi;
     77 	return (DDI_SUCCESS);
     78 }
     79 
     80 static int
     81 privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
     82 {
     83 	if (cmd != DDI_ATTACH)
     84 		return (DDI_FAILURE);
     85 
     86 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
     87 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
     88 		return (DDI_FAILURE);
     89 
     90 	privcmd_devi = devi;
     91 	ddi_report_dev(devi);
     92 	return (DDI_SUCCESS);
     93 }
     94 
     95 static int
     96 privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
     97 {
     98 	if (cmd != DDI_DETACH)
     99 		return (DDI_FAILURE);
    100 	ddi_remove_minor_node(devi, NULL);
    101 	privcmd_devi = NULL;
    102 	return (DDI_SUCCESS);
    103 }
    104 
    105 /*ARGSUSED1*/
    106 static int
    107 privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
    108 {
    109 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
    110 }
    111 
    112 /*
    113  * Map a contiguous set of machine frames in a foreign domain.
    114  * Used in the following way:
    115  *
    116  *	privcmd_mmap_t p;
    117  *	privcmd_mmap_entry_t e;
    118  *
    119  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
    120  *	p.num = number of privcmd_mmap_entry_t's
    121  *	p.dom = domid;
    122  *	p.entry = &e;
    123  *	e.va = addr;
    124  *	e.mfn = mfn;
    125  *	e.npages = btopr(size);
    126  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
    127  */
    128 /*ARGSUSED2*/
    129 int
    130 do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
    131 {
    132 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
    133 	privcmd_mmap_entry_t *umme;
    134 	struct as *as = curproc->p_as;
    135 	struct seg *seg;
    136 	int i, error = 0;
    137 
    138 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
    139 		return (EFAULT);
    140 
    141 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
    142 	    privcmd_mmap_entry_t *, mmc->entry);
    143 
    144 	if (mmc->dom == DOMID_SELF) {
    145 		error = ENOTSUP;	/* Too paranoid? */
    146 		goto done;
    147 	}
    148 
    149 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
    150 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
    151 		caddr_t addr;
    152 
    153 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
    154 			error = EFAULT;
    155 			break;
    156 		}
    157 
    158 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
    159 		    ulong_t, mme->npages);
    160 
    161 		if (mme->mfn == MFN_INVALID) {
    162 			error = EINVAL;
    163 			break;
    164 		}
    165 
    166 		addr = (caddr_t)mme->va;
    167 
    168 		/*
    169 		 * Find the segment we want to mess with, then add
    170 		 * the mfn range to the segment.
    171 		 */
    172 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    173 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
    174 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
    175 			error = EINVAL;
    176 		else
    177 			error = segmf_add_mfns(seg, addr,
    178 			    mme->mfn, mme->npages, mmc->dom);
    179 		AS_LOCK_EXIT(as, &as->a_lock);
    180 
    181 		if (error != 0)
    182 			break;
    183 	}
    184 
    185 done:
    186 	DTRACE_XPV1(mmap__end, int, error);
    187 
    188 	return (error);
    189 }
    190 
    191 /*
    192  * Set up the address range to map to an array of mfns in
    193  * a foreign domain.  Used in the following way:
    194  *
    195  *	privcmd_mmap_batch_t p;
    196  *
    197  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
    198  *	p.num = number of pages
    199  *	p.dom = domid
    200  *	p.addr = addr;
    201  *	p.arr = array of mfns, indexed 0 .. p.num - 1
    202  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
    203  */
    204 /*ARGSUSED2*/
    205 static int
    206 do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
    207 {
    208 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
    209 	struct as *as = curproc->p_as;
    210 	struct seg *seg;
    211 	int i, error = 0;
    212 	caddr_t addr;
    213 	ulong_t *ulp;
    214 
    215 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
    216 		return (EFAULT);
    217 
    218 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
    219 	    caddr_t, mmb->addr);
    220 
    221 	addr = (caddr_t)mmb->addr;
    222 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    223 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
    224 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
    225 		error = EINVAL;
    226 		goto done;
    227 	}
    228 
    229 	for (i = 0, ulp = mmb->arr;
    230 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
    231 		mfn_t mfn;
    232 
    233 		if (fulword(ulp, &mfn) != 0) {
    234 			error = EFAULT;
    235 			break;
    236 		}
    237 
    238 		if (mfn == MFN_INVALID) {
    239 			/*
    240 			 * This mfn is invalid and should not be added to
    241 			 * segmf, as we'd only cause an immediate EFAULT when
    242 			 * we tried to fault it in.
    243 			 */
    244 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
    245 			continue;
    246 		}
    247 
    248 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
    249 			continue;
    250 
    251 		/*
    252 		 * Tell the process that this MFN could not be mapped, so it
    253 		 * won't later try to access it.
    254 		 */
    255 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
    256 		if (sulword(ulp, mfn) != 0) {
    257 			error = EFAULT;
    258 			break;
    259 		}
    260 	}
    261 
    262 done:
    263 	AS_LOCK_EXIT(as, &as->a_lock);
    264 
    265 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
    266 	    mmb->addr);
    267 
    268 	return (error);
    269 }
    270 
    271 /*ARGSUSED*/
    272 static int
    273 privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
    274 {
    275 	if (secpolicy_xvm_control(cr))
    276 		return (EPERM);
    277 
    278 	/*
    279 	 * Everything is a -native- data type.
    280 	 */
    281 	if ((mode & FMODELS) != FNATIVE)
    282 		return (EOVERFLOW);
    283 
    284 	switch (cmd) {
    285 	case IOCTL_PRIVCMD_HYPERCALL:
    286 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
    287 	case IOCTL_PRIVCMD_MMAP:
    288 		if (DOMAIN_IS_PRIVILEGED(xen_info))
    289 			return (do_privcmd_mmap((void *)arg, mode, cr));
    290 		break;
    291 	case IOCTL_PRIVCMD_MMAPBATCH:
    292 		if (DOMAIN_IS_PRIVILEGED(xen_info))
    293 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
    294 		break;
    295 	default:
    296 		break;
    297 	}
    298 	return (EINVAL);
    299 }
    300 
    301 /*
    302  * The real magic happens in the segmf segment driver.
    303  */
    304 /*ARGSUSED8*/
    305 static int
    306 privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
    307     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
    308 {
    309 	struct segmf_crargs a;
    310 	int error;
    311 
    312 	if (secpolicy_xvm_control(cr))
    313 		return (EPERM);
    314 
    315 	as_rangelock(as);
    316 	if ((flags & MAP_FIXED) == 0) {
    317 		map_addr(addrp, len, (offset_t)off, 0, flags);
    318 		if (*addrp == NULL) {
    319 			error = ENOMEM;
    320 			goto rangeunlock;
    321 		}
    322 	} else {
    323 		/*
    324 		 * User specified address
    325 		 */
    326 		(void) as_unmap(as, *addrp, len);
    327 	}
    328 
    329 	/*
    330 	 * The mapping *must* be MAP_SHARED at offset 0.
    331 	 *
    332 	 * (Foreign pages are treated like device memory; the
    333 	 * ioctl interface allows the backing objects to be
    334 	 * arbitrarily redefined to point at any machine frame.)
    335 	 */
    336 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
    337 		error = EINVAL;
    338 		goto rangeunlock;
    339 	}
    340 
    341 	a.dev = dev;
    342 	a.prot = (uchar_t)prot;
    343 	a.maxprot = (uchar_t)maxprot;
    344 	error = as_map(as, *addrp, len, segmf_create, &a);
    345 
    346 rangeunlock:
    347 	as_rangeunlock(as);
    348 	return (error);
    349 }
    350 
    351 static struct cb_ops privcmd_cb_ops = {
    352 	privcmd_open,
    353 	nulldev,	/* close */
    354 	nodev,		/* strategy */
    355 	nodev,		/* print */
    356 	nodev,		/* dump */
    357 	nodev,		/* read */
    358 	nodev,		/* write */
    359 	privcmd_ioctl,
    360 	nodev,		/* devmap */
    361 	nodev,		/* mmap */
    362 	privcmd_segmap,
    363 	nochpoll,	/* poll */
    364 	ddi_prop_op,
    365 	NULL,
    366 	D_64BIT | D_NEW | D_MP
    367 };
    368 
    369 static struct dev_ops privcmd_dv_ops = {
    370 	DEVO_REV,
    371 	0,
    372 	privcmd_getinfo,
    373 	nulldev,		/* identify */
    374 	nulldev,		/* probe */
    375 	privcmd_attach,
    376 	privcmd_detach,
    377 	nodev,			/* reset */
    378 	&privcmd_cb_ops,
    379 	0,			/* struct bus_ops */
    380 	NULL,			/* power */
    381 	ddi_quiesce_not_needed,		/* quiesce */
    382 };
    383 
    384 static struct modldrv modldrv = {
    385 	&mod_driverops,
    386 	"privcmd driver",
    387 	&privcmd_dv_ops
    388 };
    389 
    390 static struct modlinkage modl = {
    391 	MODREV_1,
    392 	&modldrv
    393 };
    394 
    395 int
    396 _init(void)
    397 {
    398 	return (mod_install(&modl));
    399 }
    400 
    401 int
    402 _fini(void)
    403 {
    404 	return (mod_remove(&modl));
    405 }
    406 
    407 int
    408 _info(struct modinfo *modinfo)
    409 {
    410 	return (mod_info(&modl, modinfo));
    411 }
    412