Home | History | Annotate | Download | only in io
      1   5084  johnlev /*
      2   5084  johnlev  * CDDL HEADER START
      3   5084  johnlev  *
      4   5084  johnlev  * The contents of this file are subject to the terms of the
      5   5084  johnlev  * Common Development and Distribution License (the "License").
      6   5084  johnlev  * You may not use this file except in compliance with the License.
      7   5084  johnlev  *
      8   5084  johnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   5084  johnlev  * or http://www.opensolaris.org/os/licensing.
     10   5084  johnlev  * See the License for the specific language governing permissions
     11   5084  johnlev  * and limitations under the License.
     12   5084  johnlev  *
     13   5084  johnlev  * When distributing Covered Code, include this CDDL HEADER in each
     14   5084  johnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   5084  johnlev  * If applicable, add the following below this CDDL HEADER, with the
     16   5084  johnlev  * fields enclosed by brackets "[]" replaced with your own identifying
     17   5084  johnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   5084  johnlev  *
     19   5084  johnlev  * CDDL HEADER END
     20   5084  johnlev  */
     21   5084  johnlev 
     22   5084  johnlev /*
     23  10175   Stuart  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24   5084  johnlev  * Use is subject to license terms.
     25   5084  johnlev  */
     26  10175   Stuart 
     27  10175   Stuart #include <sys/xpv_user.h>
     28   5084  johnlev 
     29   5084  johnlev #include <sys/types.h>
     30   5084  johnlev #include <sys/file.h>
     31   5084  johnlev #include <sys/errno.h>
     32   5084  johnlev #include <sys/open.h>
     33   5084  johnlev #include <sys/cred.h>
     34   5084  johnlev #include <sys/conf.h>
     35   5084  johnlev #include <sys/stat.h>
     36   5084  johnlev #include <sys/modctl.h>
     37   5084  johnlev #include <sys/ddi.h>
     38   5084  johnlev #include <sys/sunddi.h>
     39   5084  johnlev #include <sys/vmsystm.h>
     40   5084  johnlev #include <sys/sdt.h>
     41   5084  johnlev #include <sys/hypervisor.h>
     42   5084  johnlev #include <sys/xen_errno.h>
     43   6784  johnlev #include <sys/policy.h>
     44   5084  johnlev 
     45   5084  johnlev #include <vm/hat_i86.h>
     46   5084  johnlev #include <vm/hat_pte.h>
     47   5084  johnlev #include <vm/seg_mf.h>
     48   5084  johnlev 
     49   5084  johnlev #include <xen/sys/privcmd.h>
     50   5084  johnlev #include <sys/privcmd_impl.h>
     51   5084  johnlev 
     52   5084  johnlev static dev_info_t *privcmd_devi;
     53   5084  johnlev 
     54   5084  johnlev /*ARGSUSED*/
     55   5084  johnlev static int
     56   5084  johnlev privcmd_getinfo(dev_info_t *devi, ddi_info_cmd_t cmd, void *arg, void **result)
     57   5084  johnlev {
     58   5084  johnlev 	switch (cmd) {
     59   5084  johnlev 	case DDI_INFO_DEVT2DEVINFO:
     60   5084  johnlev 	case DDI_INFO_DEVT2INSTANCE:
     61   5084  johnlev 		break;
     62   5084  johnlev 	default:
     63   5084  johnlev 		return (DDI_FAILURE);
     64   5084  johnlev 	}
     65   5084  johnlev 
     66   5084  johnlev 	switch (getminor((dev_t)arg)) {
     67   5084  johnlev 	case PRIVCMD_MINOR:
     68   5084  johnlev 		break;
     69   5084  johnlev 	default:
     70   5084  johnlev 		return (DDI_FAILURE);
     71   5084  johnlev 	}
     72   5084  johnlev 
     73   5084  johnlev 	if (cmd == DDI_INFO_DEVT2INSTANCE)
     74   5084  johnlev 		*result = 0;
     75   5084  johnlev 	else
     76   5084  johnlev 		*result = privcmd_devi;
     77   5084  johnlev 	return (DDI_SUCCESS);
     78   5084  johnlev }
     79   5084  johnlev 
     80   5084  johnlev static int
     81   5084  johnlev privcmd_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
     82   5084  johnlev {
     83   5084  johnlev 	if (cmd != DDI_ATTACH)
     84   5084  johnlev 		return (DDI_FAILURE);
     85   5084  johnlev 
     86   5084  johnlev 	if (ddi_create_minor_node(devi, PRIVCMD_NODE,
     87   5084  johnlev 	    S_IFCHR, PRIVCMD_MINOR, DDI_PSEUDO, 0) != DDI_SUCCESS)
     88   5084  johnlev 		return (DDI_FAILURE);
     89   5084  johnlev 
     90   5084  johnlev 	privcmd_devi = devi;
     91   5084  johnlev 	ddi_report_dev(devi);
     92   5084  johnlev 	return (DDI_SUCCESS);
     93   5084  johnlev }
     94   5084  johnlev 
     95   5084  johnlev static int
     96   5084  johnlev privcmd_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
     97   5084  johnlev {
     98   5084  johnlev 	if (cmd != DDI_DETACH)
     99   5084  johnlev 		return (DDI_FAILURE);
    100   5084  johnlev 	ddi_remove_minor_node(devi, NULL);
    101   5084  johnlev 	privcmd_devi = NULL;
    102   5084  johnlev 	return (DDI_SUCCESS);
    103   5084  johnlev }
    104   5084  johnlev 
    105   5084  johnlev /*ARGSUSED1*/
    106   5084  johnlev static int
    107   5084  johnlev privcmd_open(dev_t *dev, int flag, int otyp, cred_t *cr)
    108   5084  johnlev {
    109   5084  johnlev 	return (getminor(*dev) == PRIVCMD_MINOR ? 0 : ENXIO);
    110   5084  johnlev }
    111   5084  johnlev 
    112   5084  johnlev /*
    113   5084  johnlev  * Map a contiguous set of machine frames in a foreign domain.
    114   5084  johnlev  * Used in the following way:
    115   5084  johnlev  *
    116   5084  johnlev  *	privcmd_mmap_t p;
    117   5084  johnlev  *	privcmd_mmap_entry_t e;
    118   5084  johnlev  *
    119   5084  johnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
    120   5084  johnlev  *	p.num = number of privcmd_mmap_entry_t's
    121   5084  johnlev  *	p.dom = domid;
    122   5084  johnlev  *	p.entry = &e;
    123   5084  johnlev  *	e.va = addr;
    124   5084  johnlev  *	e.mfn = mfn;
    125   5084  johnlev  *	e.npages = btopr(size);
    126   5084  johnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAP, &p);
    127   5084  johnlev  */
    128   5084  johnlev /*ARGSUSED2*/
    129   5084  johnlev int
    130   5084  johnlev do_privcmd_mmap(void *uarg, int mode, cred_t *cr)
    131   5084  johnlev {
    132   5084  johnlev 	privcmd_mmap_t __mmapcmd, *mmc = &__mmapcmd;
    133   5084  johnlev 	privcmd_mmap_entry_t *umme;
    134   5084  johnlev 	struct as *as = curproc->p_as;
    135   5084  johnlev 	struct seg *seg;
    136   5084  johnlev 	int i, error = 0;
    137   5084  johnlev 
    138   5084  johnlev 	if (ddi_copyin(uarg, mmc, sizeof (*mmc), mode))
    139   5084  johnlev 		return (EFAULT);
    140   5084  johnlev 
    141   5084  johnlev 	DTRACE_XPV3(mmap__start, domid_t, mmc->dom, int, mmc->num,
    142   5084  johnlev 	    privcmd_mmap_entry_t *, mmc->entry);
    143   5084  johnlev 
    144   5084  johnlev 	if (mmc->dom == DOMID_SELF) {
    145   5084  johnlev 		error = ENOTSUP;	/* Too paranoid? */
    146   5084  johnlev 		goto done;
    147   5084  johnlev 	}
    148   5084  johnlev 
    149   5084  johnlev 	for (umme = mmc->entry, i = 0; i < mmc->num; i++, umme++) {
    150   5084  johnlev 		privcmd_mmap_entry_t __mmapent, *mme = &__mmapent;
    151   5084  johnlev 		caddr_t addr;
    152   5084  johnlev 
    153   5084  johnlev 		if (ddi_copyin(umme, mme, sizeof (*mme), mode)) {
    154   5084  johnlev 			error = EFAULT;
    155   5084  johnlev 			break;
    156   5084  johnlev 		}
    157   5084  johnlev 
    158   5084  johnlev 		DTRACE_XPV3(mmap__entry, ulong_t, mme->va, ulong_t, mme->mfn,
    159   5084  johnlev 		    ulong_t, mme->npages);
    160   5084  johnlev 
    161   5084  johnlev 		if (mme->mfn == MFN_INVALID) {
    162   5084  johnlev 			error = EINVAL;
    163   5084  johnlev 			break;
    164   5084  johnlev 		}
    165   5084  johnlev 
    166   5084  johnlev 		addr = (caddr_t)mme->va;
    167   5084  johnlev 
    168   5084  johnlev 		/*
    169   5084  johnlev 		 * Find the segment we want to mess with, then add
    170   5084  johnlev 		 * the mfn range to the segment.
    171   5084  johnlev 		 */
    172   5084  johnlev 		AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    173   5084  johnlev 		if ((seg = as_findseg(as, addr, 0)) == NULL ||
    174   5084  johnlev 		    addr + mmu_ptob(mme->npages) > seg->s_base + seg->s_size)
    175   5084  johnlev 			error = EINVAL;
    176   5084  johnlev 		else
    177   5084  johnlev 			error = segmf_add_mfns(seg, addr,
    178   5084  johnlev 			    mme->mfn, mme->npages, mmc->dom);
    179   5084  johnlev 		AS_LOCK_EXIT(as, &as->a_lock);
    180   5084  johnlev 
    181   5084  johnlev 		if (error != 0)
    182   5084  johnlev 			break;
    183   5084  johnlev 	}
    184   5084  johnlev 
    185   5084  johnlev done:
    186   5084  johnlev 	DTRACE_XPV1(mmap__end, int, error);
    187   5084  johnlev 
    188   5084  johnlev 	return (error);
    189   5084  johnlev }
    190   5084  johnlev 
    191   5084  johnlev /*
    192   5084  johnlev  * Set up the address range to map to an array of mfns in
    193   5084  johnlev  * a foreign domain.  Used in the following way:
    194   5084  johnlev  *
    195   5084  johnlev  *	privcmd_mmap_batch_t p;
    196   5084  johnlev  *
    197   5084  johnlev  *	addr = mmap(NULL, size, prot, MAP_SHARED, fd, 0);
    198   5084  johnlev  *	p.num = number of pages
    199   5084  johnlev  *	p.dom = domid
    200   5084  johnlev  *	p.addr = addr;
    201   5084  johnlev  *	p.arr = array of mfns, indexed 0 .. p.num - 1
    202   5084  johnlev  *	ioctl(fd, IOCTL_PRIVCMD_MMAPBATCH, &p);
    203   5084  johnlev  */
    204   5084  johnlev /*ARGSUSED2*/
    205   5084  johnlev static int
    206   5084  johnlev do_privcmd_mmapbatch(void *uarg, int mode, cred_t *cr)
    207   5084  johnlev {
    208   5084  johnlev 	privcmd_mmapbatch_t __mmapbatch, *mmb = &__mmapbatch;
    209   5084  johnlev 	struct as *as = curproc->p_as;
    210   5084  johnlev 	struct seg *seg;
    211   5084  johnlev 	int i, error = 0;
    212   5084  johnlev 	caddr_t addr;
    213   5084  johnlev 	ulong_t *ulp;
    214   5084  johnlev 
    215   5084  johnlev 	if (ddi_copyin(uarg, mmb, sizeof (*mmb), mode))
    216   5084  johnlev 		return (EFAULT);
    217   5084  johnlev 
    218   5084  johnlev 	DTRACE_XPV3(mmapbatch__start, domid_t, mmb->dom, int, mmb->num,
    219   5084  johnlev 	    caddr_t, mmb->addr);
    220   5084  johnlev 
    221   5084  johnlev 	addr = (caddr_t)mmb->addr;
    222   5084  johnlev 	AS_LOCK_ENTER(as, &as->a_lock, RW_READER);
    223   5084  johnlev 	if ((seg = as_findseg(as, addr, 0)) == NULL ||
    224   5084  johnlev 	    addr + ptob(mmb->num) > seg->s_base + seg->s_size) {
    225   5084  johnlev 		error = EINVAL;
    226   5084  johnlev 		goto done;
    227   5084  johnlev 	}
    228   5084  johnlev 
    229   5084  johnlev 	for (i = 0, ulp = mmb->arr;
    230   5084  johnlev 	    i < mmb->num; i++, addr += PAGESIZE, ulp++) {
    231   5084  johnlev 		mfn_t mfn;
    232   5084  johnlev 
    233   5084  johnlev 		if (fulword(ulp, &mfn) != 0) {
    234   5084  johnlev 			error = EFAULT;
    235   5084  johnlev 			break;
    236   5084  johnlev 		}
    237   5084  johnlev 
    238   5084  johnlev 		if (mfn == MFN_INVALID) {
    239   6144      rab 			/*
    240   6144      rab 			 * This mfn is invalid and should not be added to
    241   6144      rab 			 * segmf, as we'd only cause an immediate EFAULT when
    242   6144      rab 			 * we tried to fault it in.
    243   6144      rab 			 */
    244   6144      rab 			mfn |= XEN_DOMCTL_PFINFO_XTAB;
    245   6144      rab 			continue;
    246   5084  johnlev 		}
    247   5084  johnlev 
    248   5084  johnlev 		if (segmf_add_mfns(seg, addr, mfn, 1, mmb->dom) == 0)
    249   5084  johnlev 			continue;
    250   5084  johnlev 
    251   5084  johnlev 		/*
    252   5084  johnlev 		 * Tell the process that this MFN could not be mapped, so it
    253   5084  johnlev 		 * won't later try to access it.
    254   5084  johnlev 		 */
    255   6144      rab 		mfn |= XEN_DOMCTL_PFINFO_XTAB;
    256   5084  johnlev 		if (sulword(ulp, mfn) != 0) {
    257   5084  johnlev 			error = EFAULT;
    258   5084  johnlev 			break;
    259   5084  johnlev 		}
    260   5084  johnlev 	}
    261   5084  johnlev 
    262   5084  johnlev done:
    263   5084  johnlev 	AS_LOCK_EXIT(as, &as->a_lock);
    264   5084  johnlev 
    265   5084  johnlev 	DTRACE_XPV3(mmapbatch__end, int, error, struct seg *, seg, caddr_t,
    266   5084  johnlev 	    mmb->addr);
    267   5084  johnlev 
    268   5084  johnlev 	return (error);
    269   5084  johnlev }
    270   5084  johnlev 
    271   5084  johnlev /*ARGSUSED*/
    272   5084  johnlev static int
    273   5084  johnlev privcmd_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cr, int *rval)
    274   5084  johnlev {
    275   6784  johnlev 	if (secpolicy_xvm_control(cr))
    276   6784  johnlev 		return (EPERM);
    277   5084  johnlev 
    278   5084  johnlev 	/*
    279   5084  johnlev 	 * Everything is a -native- data type.
    280   5084  johnlev 	 */
    281   6784  johnlev 	if ((mode & FMODELS) != FNATIVE)
    282   6784  johnlev 		return (EOVERFLOW);
    283   5084  johnlev 
    284   5084  johnlev 	switch (cmd) {
    285   5084  johnlev 	case IOCTL_PRIVCMD_HYPERCALL:
    286   5084  johnlev 		return (do_privcmd_hypercall((void *)arg, mode, cr, rval));
    287   5084  johnlev 	case IOCTL_PRIVCMD_MMAP:
    288   5084  johnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
    289   5084  johnlev 			return (do_privcmd_mmap((void *)arg, mode, cr));
    290   5084  johnlev 		break;
    291   5084  johnlev 	case IOCTL_PRIVCMD_MMAPBATCH:
    292   5084  johnlev 		if (DOMAIN_IS_PRIVILEGED(xen_info))
    293   5084  johnlev 			return (do_privcmd_mmapbatch((void *)arg, mode, cr));
    294   5084  johnlev 		break;
    295   5084  johnlev 	default:
    296   5084  johnlev 		break;
    297   5084  johnlev 	}
    298   5084  johnlev 	return (EINVAL);
    299   5084  johnlev }
    300   5084  johnlev 
    301   5084  johnlev /*
    302   5084  johnlev  * The real magic happens in the segmf segment driver.
    303   5084  johnlev  */
    304   5084  johnlev /*ARGSUSED8*/
    305   5084  johnlev static int
    306   5084  johnlev privcmd_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp,
    307   5084  johnlev     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cr)
    308   5084  johnlev {
    309   5084  johnlev 	struct segmf_crargs a;
    310   5084  johnlev 	int error;
    311   6784  johnlev 
    312   6784  johnlev 	if (secpolicy_xvm_control(cr))
    313   6784  johnlev 		return (EPERM);
    314   5084  johnlev 
    315   5084  johnlev 	as_rangelock(as);
    316   5084  johnlev 	if ((flags & MAP_FIXED) == 0) {
    317   5084  johnlev 		map_addr(addrp, len, (offset_t)off, 0, flags);
    318   5084  johnlev 		if (*addrp == NULL) {
    319   5084  johnlev 			error = ENOMEM;
    320   5084  johnlev 			goto rangeunlock;
    321   5084  johnlev 		}
    322   5084  johnlev 	} else {
    323   5084  johnlev 		/*
    324   5084  johnlev 		 * User specified address
    325   5084  johnlev 		 */
    326   5084  johnlev 		(void) as_unmap(as, *addrp, len);
    327   5084  johnlev 	}
    328   5084  johnlev 
    329   5084  johnlev 	/*
    330   5084  johnlev 	 * The mapping *must* be MAP_SHARED at offset 0.
    331   5084  johnlev 	 *
    332   5084  johnlev 	 * (Foreign pages are treated like device memory; the
    333   5084  johnlev 	 * ioctl interface allows the backing objects to be
    334   5084  johnlev 	 * arbitrarily redefined to point at any machine frame.)
    335   5084  johnlev 	 */
    336   5084  johnlev 	if ((flags & MAP_TYPE) != MAP_SHARED || off != 0) {
    337   5084  johnlev 		error = EINVAL;
    338   5084  johnlev 		goto rangeunlock;
    339   5084  johnlev 	}
    340   5084  johnlev 
    341   5084  johnlev 	a.dev = dev;
    342   5084  johnlev 	a.prot = (uchar_t)prot;
    343   5084  johnlev 	a.maxprot = (uchar_t)maxprot;
    344   5084  johnlev 	error = as_map(as, *addrp, len, segmf_create, &a);
    345   5084  johnlev 
    346   5084  johnlev rangeunlock:
    347   5084  johnlev 	as_rangeunlock(as);
    348   5084  johnlev 	return (error);
    349   5084  johnlev }
    350   5084  johnlev 
    351   5084  johnlev static struct cb_ops privcmd_cb_ops = {
    352   5084  johnlev 	privcmd_open,
    353   5084  johnlev 	nulldev,	/* close */
    354   5084  johnlev 	nodev,		/* strategy */
    355   5084  johnlev 	nodev,		/* print */
    356   5084  johnlev 	nodev,		/* dump */
    357   5084  johnlev 	nodev,		/* read */
    358   5084  johnlev 	nodev,		/* write */
    359   5084  johnlev 	privcmd_ioctl,
    360   5084  johnlev 	nodev,		/* devmap */
    361   5084  johnlev 	nodev,		/* mmap */
    362   5084  johnlev 	privcmd_segmap,
    363   5084  johnlev 	nochpoll,	/* poll */
    364   5084  johnlev 	ddi_prop_op,
    365   5084  johnlev 	NULL,
    366   5084  johnlev 	D_64BIT | D_NEW | D_MP
    367   5084  johnlev };
    368   5084  johnlev 
    369   5084  johnlev static struct dev_ops privcmd_dv_ops = {
    370   5084  johnlev 	DEVO_REV,
    371   5084  johnlev 	0,
    372   5084  johnlev 	privcmd_getinfo,
    373   7656   Sherry 	nulldev,		/* identify */
    374   7656   Sherry 	nulldev,		/* probe */
    375   5084  johnlev 	privcmd_attach,
    376   5084  johnlev 	privcmd_detach,
    377   7656   Sherry 	nodev,			/* reset */
    378   5084  johnlev 	&privcmd_cb_ops,
    379   7656   Sherry 	0,			/* struct bus_ops */
    380   7656   Sherry 	NULL,			/* power */
    381   7656   Sherry 	ddi_quiesce_not_needed,		/* quiesce */
    382   5084  johnlev };
    383   5084  johnlev 
    384   5084  johnlev static struct modldrv modldrv = {
    385   5084  johnlev 	&mod_driverops,
    386   7542  Richard 	"privcmd driver",
    387   5084  johnlev 	&privcmd_dv_ops
    388   5084  johnlev };
    389   5084  johnlev 
    390   5084  johnlev static struct modlinkage modl = {
    391   5084  johnlev 	MODREV_1,
    392   5084  johnlev 	&modldrv
    393   5084  johnlev };
    394   5084  johnlev 
    395   5084  johnlev int
    396   5084  johnlev _init(void)
    397   5084  johnlev {
    398   5084  johnlev 	return (mod_install(&modl));
    399   5084  johnlev }
    400   5084  johnlev 
    401   5084  johnlev int
    402   5084  johnlev _fini(void)
    403   5084  johnlev {
    404   5084  johnlev 	return (mod_remove(&modl));
    405   5084  johnlev }
    406   5084  johnlev 
    407   5084  johnlev int
    408   5084  johnlev _info(struct modinfo *modinfo)
    409   5084  johnlev {
    410   5084  johnlev 	return (mod_info(&modl, modinfo));
    411   5084  johnlev }
    412