Home | History | Annotate | Download | only in vm
      1  5084  johnlev /*
      2  5084  johnlev  * CDDL HEADER START
      3  5084  johnlev  *
      4  5084  johnlev  * The contents of this file are subject to the terms of the
      5  5084  johnlev  * Common Development and Distribution License (the "License").
      6  5084  johnlev  * You may not use this file except in compliance with the License.
      7  5084  johnlev  *
      8  5084  johnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  5084  johnlev  * or http://www.opensolaris.org/os/licensing.
     10  5084  johnlev  * See the License for the specific language governing permissions
     11  5084  johnlev  * and limitations under the License.
     12  5084  johnlev  *
     13  5084  johnlev  * When distributing Covered Code, include this CDDL HEADER in each
     14  5084  johnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  5084  johnlev  * If applicable, add the following below this CDDL HEADER, with the
     16  5084  johnlev  * fields enclosed by brackets "[]" replaced with your own identifying
     17  5084  johnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  5084  johnlev  *
     19  5084  johnlev  * CDDL HEADER END
     20  5084  johnlev  */
     21  5084  johnlev 
     22  5084  johnlev /*
     23  7756     Mark  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  5084  johnlev  * Use is subject to license terms.
     25  5084  johnlev  */
     26  5084  johnlev 
     27  5084  johnlev /*
     28  5084  johnlev  * Machine frame segment driver.  This segment driver allows dom0 processes to
     29  5084  johnlev  * map pages of other domains or Xen (e.g. during save/restore).  ioctl()s on
     30  5084  johnlev  * the privcmd driver provide the MFN values backing each mapping, and we map
     31  5084  johnlev  * them into the process's address space at this time.  Demand-faulting is not
     32  5084  johnlev  * supported by this driver due to the requirements upon some of the ioctl()s.
     33  5084  johnlev  */
     34  5084  johnlev 
     35  5084  johnlev 
     36  5084  johnlev #include <sys/types.h>
     37  5084  johnlev #include <sys/systm.h>
     38  5084  johnlev #include <sys/vmsystm.h>
     39  5084  johnlev #include <sys/mman.h>
     40  5084  johnlev #include <sys/errno.h>
     41  5084  johnlev #include <sys/kmem.h>
     42  5084  johnlev #include <sys/cmn_err.h>
     43  5084  johnlev #include <sys/vnode.h>
     44  5084  johnlev #include <sys/conf.h>
     45  5084  johnlev #include <sys/debug.h>
     46  5084  johnlev #include <sys/lgrp.h>
     47  5084  johnlev #include <sys/hypervisor.h>
     48  5084  johnlev 
     49  5084  johnlev #include <vm/page.h>
     50  5084  johnlev #include <vm/hat.h>
     51  5084  johnlev #include <vm/as.h>
     52  5084  johnlev #include <vm/seg.h>
     53  5084  johnlev 
     54  5084  johnlev #include <vm/hat_pte.h>
     55  7756     Mark #include <vm/hat_i86.h>
     56  5084  johnlev #include <vm/seg_mf.h>
     57  5084  johnlev 
     58  5084  johnlev #include <sys/fs/snode.h>
     59  5084  johnlev 
     60  5084  johnlev #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)
     61  5084  johnlev 
     62  7756     Mark typedef struct segmf_mfn_s {
     63  7756     Mark 	mfn_t		m_mfn;
     64  7756     Mark } segmf_mfn_t;
     65  7756     Mark 
     66  7756     Mark /* g_flags */
     67  7756     Mark #define	SEGMF_GFLAGS_WR		0x1
     68  7756     Mark #define	SEGMF_GFLAGS_MAPPED	0x2
     69  7756     Mark typedef struct segmf_gref_s {
     70  7756     Mark 	uint64_t	g_ptep;
     71  7756     Mark 	grant_ref_t	g_gref;
     72  7756     Mark 	uint32_t	g_flags;
     73  7756     Mark 	grant_handle_t	g_handle;
     74  7756     Mark } segmf_gref_t;
     75  7756     Mark 
     76  7756     Mark typedef union segmf_mu_u {
     77  7756     Mark 	segmf_mfn_t	m;
     78  7756     Mark 	segmf_gref_t	g;
     79  7756     Mark } segmf_mu_t;
     80  7756     Mark 
     81  7756     Mark typedef enum {
     82  7756     Mark 	SEGMF_MAP_EMPTY = 0,
     83  7756     Mark 	SEGMF_MAP_MFN,
     84  7756     Mark 	SEGMF_MAP_GREF
     85  7756     Mark } segmf_map_type_t;
     86  7756     Mark 
     87  7756     Mark typedef struct segmf_map_s {
     88  7756     Mark 	segmf_map_type_t	t_type;
     89  7756     Mark 	segmf_mu_t		u;
     90  7756     Mark } segmf_map_t;
     91  5084  johnlev 
     92  5084  johnlev struct segmf_data {
     93  5084  johnlev 	kmutex_t	lock;
     94  5084  johnlev 	struct vnode	*vp;
     95  5084  johnlev 	uchar_t		prot;
     96  5084  johnlev 	uchar_t		maxprot;
     97  5084  johnlev 	size_t		softlockcnt;
     98  5084  johnlev 	domid_t		domid;
     99  7756     Mark 	segmf_map_t	*map;
    100  5084  johnlev };
    101  5084  johnlev 
    102  5084  johnlev static struct seg_ops segmf_ops;
    103  7756     Mark 
    104  7756     Mark static int segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t len);
    105  5084  johnlev 
    106  5084  johnlev static struct segmf_data *
    107  5084  johnlev segmf_data_zalloc(struct seg *seg)
    108  5084  johnlev {
    109  5084  johnlev 	struct segmf_data *data = kmem_zalloc(sizeof (*data), KM_SLEEP);
    110  5084  johnlev 
    111  5084  johnlev 	mutex_init(&data->lock, "segmf.lock", MUTEX_DEFAULT, NULL);
    112  5084  johnlev 	seg->s_ops = &segmf_ops;
    113  5084  johnlev 	seg->s_data = data;
    114  5084  johnlev 	return (data);
    115  5084  johnlev }
    116  5084  johnlev 
    117  5084  johnlev int
    118  5084  johnlev segmf_create(struct seg *seg, void *args)
    119  5084  johnlev {
    120  5084  johnlev 	struct segmf_crargs *a = args;
    121  5084  johnlev 	struct segmf_data *data;
    122  5084  johnlev 	struct as *as = seg->s_as;
    123  5084  johnlev 	pgcnt_t i, npages = seg_pages(seg);
    124  5084  johnlev 	int error;
    125  5084  johnlev 
    126  5084  johnlev 	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
    127  5084  johnlev 
    128  5084  johnlev 	data = segmf_data_zalloc(seg);
    129  5084  johnlev 	data->vp = specfind(a->dev, VCHR);
    130  5084  johnlev 	data->prot = a->prot;
    131  5084  johnlev 	data->maxprot = a->maxprot;
    132  5084  johnlev 
    133  7756     Mark 	data->map = kmem_alloc(npages * sizeof (segmf_map_t), KM_SLEEP);
    134  7756     Mark 	for (i = 0; i < npages; i++) {
    135  7756     Mark 		data->map[i].t_type = SEGMF_MAP_EMPTY;
    136  7756     Mark 	}
    137  5084  johnlev 
    138  5084  johnlev 	error = VOP_ADDMAP(VTOCVP(data->vp), 0, as, seg->s_base, seg->s_size,
    139  5331      amw 	    data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
    140  5084  johnlev 
    141  5084  johnlev 	if (error != 0)
    142  5084  johnlev 		hat_unload(as->a_hat,
    143  5084  johnlev 		    seg->s_base, seg->s_size, HAT_UNLOAD_UNMAP);
    144  5084  johnlev 	return (error);
    145  5084  johnlev }
    146  5084  johnlev 
    147  5084  johnlev /*
    148  5084  johnlev  * Duplicate a seg and return new segment in newseg.
    149  5084  johnlev  */
    150  5084  johnlev static int
    151  5084  johnlev segmf_dup(struct seg *seg, struct seg *newseg)
    152  5084  johnlev {
    153  5084  johnlev 	struct segmf_data *data = seg->s_data;
    154  5084  johnlev 	struct segmf_data *ndata;
    155  5084  johnlev 	pgcnt_t npages = seg_pages(newseg);
    156  7756     Mark 	size_t sz;
    157  5084  johnlev 
    158  5084  johnlev 	ndata = segmf_data_zalloc(newseg);
    159  5084  johnlev 
    160  5084  johnlev 	VN_HOLD(data->vp);
    161  5084  johnlev 	ndata->vp = data->vp;
    162  5084  johnlev 	ndata->prot = data->prot;
    163  5084  johnlev 	ndata->maxprot = data->maxprot;
    164  5084  johnlev 	ndata->domid = data->domid;
    165  5084  johnlev 
    166  7756     Mark 	sz = npages * sizeof (segmf_map_t);
    167  7756     Mark 	ndata->map = kmem_alloc(sz, KM_SLEEP);
    168  7756     Mark 	bcopy(data->map, ndata->map, sz);
    169  5084  johnlev 
    170  5084  johnlev 	return (VOP_ADDMAP(VTOCVP(ndata->vp), 0, newseg->s_as,
    171  5084  johnlev 	    newseg->s_base, newseg->s_size, ndata->prot, ndata->maxprot,
    172  5331      amw 	    MAP_SHARED, CRED(), NULL));
    173  5084  johnlev }
    174  5084  johnlev 
    175  5084  johnlev /*
    176  5084  johnlev  * We only support unmapping the whole segment, and we automatically unlock
    177  5084  johnlev  * what we previously soft-locked.
    178  5084  johnlev  */
    179  5084  johnlev static int
    180  5084  johnlev segmf_unmap(struct seg *seg, caddr_t addr, size_t len)
    181  5084  johnlev {
    182  5084  johnlev 	struct segmf_data *data = seg->s_data;
    183  5084  johnlev 	offset_t off;
    184  5084  johnlev 
    185  5084  johnlev 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
    186  5084  johnlev 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
    187  5084  johnlev 		panic("segmf_unmap");
    188  5084  johnlev 
    189  5084  johnlev 	if (addr != seg->s_base || len != seg->s_size)
    190  5084  johnlev 		return (ENOTSUP);
    191  5084  johnlev 
    192  5084  johnlev 	hat_unload(seg->s_as->a_hat, addr, len,
    193  5084  johnlev 	    HAT_UNLOAD_UNMAP | HAT_UNLOAD_UNLOCK);
    194  5084  johnlev 
    195  5084  johnlev 	off = (offset_t)seg_page(seg, addr);
    196  5084  johnlev 
    197  5084  johnlev 	ASSERT(data->vp != NULL);
    198  5084  johnlev 
    199  5084  johnlev 	(void) VOP_DELMAP(VTOCVP(data->vp), off, seg->s_as, addr, len,
    200  5331      amw 	    data->prot, data->maxprot, MAP_SHARED, CRED(), NULL);
    201  5084  johnlev 
    202  5084  johnlev 	seg_free(seg);
    203  5084  johnlev 	return (0);
    204  5084  johnlev }
    205  5084  johnlev 
    206  5084  johnlev static void
    207  5084  johnlev segmf_free(struct seg *seg)
    208  5084  johnlev {
    209  5084  johnlev 	struct segmf_data *data = seg->s_data;
    210  5084  johnlev 	pgcnt_t npages = seg_pages(seg);
    211  5084  johnlev 
    212  7756     Mark 	kmem_free(data->map, npages * sizeof (segmf_map_t));
    213  5084  johnlev 	VN_RELE(data->vp);
    214  5084  johnlev 	mutex_destroy(&data->lock);
    215  5084  johnlev 	kmem_free(data, sizeof (*data));
    216  5084  johnlev }
    217  5084  johnlev 
    218  5084  johnlev static int segmf_faultpage_debug = 0;
    219  5084  johnlev /*ARGSUSED*/
    220  5084  johnlev static int
    221  5084  johnlev segmf_faultpage(struct hat *hat, struct seg *seg, caddr_t addr,
    222  5084  johnlev     enum fault_type type, uint_t prot)
    223  5084  johnlev {
    224  5084  johnlev 	struct segmf_data *data = seg->s_data;
    225  5084  johnlev 	uint_t hat_flags = HAT_LOAD_NOCONSIST;
    226  5084  johnlev 	mfn_t mfn;
    227  5084  johnlev 	x86pte_t pte;
    228  7756     Mark 	segmf_map_t *map;
    229  7756     Mark 	uint_t idx;
    230  5084  johnlev 
    231  5084  johnlev 
    232  7756     Mark 	idx = seg_page(seg, addr);
    233  7756     Mark 	map = &data->map[idx];
    234  7756     Mark 	ASSERT(map->t_type == SEGMF_MAP_MFN);
    235  7756     Mark 
    236  7756     Mark 	mfn = map->u.m.m_mfn;
    237  5084  johnlev 
    238  5084  johnlev 	if (type == F_SOFTLOCK) {
    239  5084  johnlev 		mutex_enter(&freemem_lock);
    240  5084  johnlev 		data->softlockcnt++;
    241  5084  johnlev 		mutex_exit(&freemem_lock);
    242  5084  johnlev 		hat_flags |= HAT_LOAD_LOCK;
    243  5084  johnlev 	} else
    244  5084  johnlev 		hat_flags |= HAT_LOAD;
    245  5084  johnlev 
    246  5084  johnlev 	if (segmf_faultpage_debug > 0) {
    247  5084  johnlev 		uprintf("segmf_faultpage: addr %p domid %x mfn %lx prot %x\n",
    248  5084  johnlev 		    (void *)addr, data->domid, mfn, prot);
    249  5084  johnlev 		segmf_faultpage_debug--;
    250  5084  johnlev 	}
    251  5084  johnlev 
    252  5084  johnlev 	/*
    253  5084  johnlev 	 * Ask the HAT to load a throwaway mapping to page zero, then
    254  5084  johnlev 	 * overwrite it with our foreign domain mapping. It gets removed
    255  5084  johnlev 	 * later via hat_unload()
    256  5084  johnlev 	 */
    257  5084  johnlev 	hat_devload(hat, addr, MMU_PAGESIZE, (pfn_t)0,
    258  5084  johnlev 	    PROT_READ | HAT_UNORDERED_OK, hat_flags);
    259  5084  johnlev 
    260  5084  johnlev 	pte = mmu_ptob((x86pte_t)mfn) | PT_VALID | PT_USER | PT_FOREIGN;
    261  5084  johnlev 	if (prot & PROT_WRITE)
    262  5084  johnlev 		pte |= PT_WRITABLE;
    263  5084  johnlev 
    264  5084  johnlev 	if (HYPERVISOR_update_va_mapping_otherdomain((uintptr_t)addr, pte,
    265  5084  johnlev 	    UVMF_INVLPG | UVMF_ALL, data->domid) != 0) {
    266  5084  johnlev 		hat_flags = HAT_UNLOAD_UNMAP;
    267  5084  johnlev 
    268  5084  johnlev 		if (type == F_SOFTLOCK) {
    269  5084  johnlev 			hat_flags |= HAT_UNLOAD_UNLOCK;
    270  5084  johnlev 			mutex_enter(&freemem_lock);
    271  5084  johnlev 			data->softlockcnt--;
    272  5084  johnlev 			mutex_exit(&freemem_lock);
    273  5084  johnlev 		}
    274  5084  johnlev 
    275  5084  johnlev 		hat_unload(hat, addr, MMU_PAGESIZE, hat_flags);
    276  5084  johnlev 		return (FC_MAKE_ERR(EFAULT));
    277  5084  johnlev 	}
    278  5084  johnlev 
    279  5084  johnlev 	return (0);
    280  5084  johnlev }
    281  5084  johnlev 
    282  5084  johnlev static int
    283  5084  johnlev seg_rw_to_prot(enum seg_rw rw)
    284  5084  johnlev {
    285  5084  johnlev 	switch (rw) {
    286  5084  johnlev 	case S_READ:
    287  5084  johnlev 		return (PROT_READ);
    288  5084  johnlev 	case S_WRITE:
    289  5084  johnlev 		return (PROT_WRITE);
    290  5084  johnlev 	case S_EXEC:
    291  5084  johnlev 		return (PROT_EXEC);
    292  5084  johnlev 	case S_OTHER:
    293  5084  johnlev 	default:
    294  5084  johnlev 		break;
    295  5084  johnlev 	}
    296  5084  johnlev 	return (PROT_READ | PROT_WRITE | PROT_EXEC);
    297  5084  johnlev }
    298  5084  johnlev 
    299  5084  johnlev static void
    300  5084  johnlev segmf_softunlock(struct hat *hat, struct seg *seg, caddr_t addr, size_t len)
    301  5084  johnlev {
    302  5084  johnlev 	struct segmf_data *data = seg->s_data;
    303  5084  johnlev 
    304  5084  johnlev 	hat_unlock(hat, addr, len);
    305  5084  johnlev 
    306  5084  johnlev 	mutex_enter(&freemem_lock);
    307  5084  johnlev 	ASSERT(data->softlockcnt >= btopr(len));
    308  5084  johnlev 	data->softlockcnt -= btopr(len);
    309  5084  johnlev 	mutex_exit(&freemem_lock);
    310  5084  johnlev 
    311  5084  johnlev 	if (data->softlockcnt == 0) {
    312  5084  johnlev 		struct as *as = seg->s_as;
    313  5084  johnlev 
    314  5084  johnlev 		if (AS_ISUNMAPWAIT(as)) {
    315  5084  johnlev 			mutex_enter(&as->a_contents);
    316  5084  johnlev 			if (AS_ISUNMAPWAIT(as)) {
    317  5084  johnlev 				AS_CLRUNMAPWAIT(as);
    318  5084  johnlev 				cv_broadcast(&as->a_cv);
    319  5084  johnlev 			}
    320  5084  johnlev 			mutex_exit(&as->a_contents);
    321  5084  johnlev 		}
    322  5084  johnlev 	}
    323  5084  johnlev }
    324  5084  johnlev 
    325  5084  johnlev static int
    326  5084  johnlev segmf_fault_range(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
    327  5084  johnlev     enum fault_type type, enum seg_rw rw)
    328  5084  johnlev {
    329  5084  johnlev 	struct segmf_data *data = seg->s_data;
    330  5084  johnlev 	int error = 0;
    331  5084  johnlev 	caddr_t a;
    332  5084  johnlev 
    333  5084  johnlev 	if ((data->prot & seg_rw_to_prot(rw)) == 0)
    334  5084  johnlev 		return (FC_PROT);
    335  5084  johnlev 
    336  5084  johnlev 	/* loop over the address range handling each fault */
    337  5084  johnlev 
    338  5084  johnlev 	for (a = addr; a < addr + len; a += PAGESIZE) {
    339  5084  johnlev 		error = segmf_faultpage(hat, seg, a, type, data->prot);
    340  5084  johnlev 		if (error != 0)
    341  5084  johnlev 			break;
    342  5084  johnlev 	}
    343  5084  johnlev 
    344  5084  johnlev 	if (error != 0 && type == F_SOFTLOCK) {
    345  5084  johnlev 		size_t done = (size_t)(a - addr);
    346  5084  johnlev 
    347  5084  johnlev 		/*
    348  5084  johnlev 		 * Undo what's been done so far.
    349  5084  johnlev 		 */
    350  5084  johnlev 		if (done > 0)
    351  5084  johnlev 			segmf_softunlock(hat, seg, addr, done);
    352  5084  johnlev 	}
    353  5084  johnlev 
    354  5084  johnlev 	return (error);
    355  5084  johnlev }
    356  5084  johnlev 
    357  5084  johnlev /*
    358  5084  johnlev  * We never demand-fault for seg_mf.
    359  5084  johnlev  */
    360  5084  johnlev /*ARGSUSED*/
    361  5084  johnlev static int
    362  5084  johnlev segmf_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len,
    363  5084  johnlev     enum fault_type type, enum seg_rw rw)
    364  5084  johnlev {
    365  5084  johnlev 	return (FC_MAKE_ERR(EFAULT));
    366  5084  johnlev }
    367  5084  johnlev 
    368  5084  johnlev /*ARGSUSED*/
    369  5084  johnlev static int
    370  5084  johnlev segmf_faulta(struct seg *seg, caddr_t addr)
    371  5084  johnlev {
    372  5084  johnlev 	return (0);
    373  5084  johnlev }
    374  5084  johnlev 
    375  5084  johnlev /*ARGSUSED*/
    376  5084  johnlev static int
    377  5084  johnlev segmf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
    378  5084  johnlev {
    379  5084  johnlev 	return (EINVAL);
    380  5084  johnlev }
    381  5084  johnlev 
    382  5084  johnlev /*ARGSUSED*/
    383  5084  johnlev static int
    384  5084  johnlev segmf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
    385  5084  johnlev {
    386  5084  johnlev 	return (EINVAL);
    387  5084  johnlev }
    388  5084  johnlev 
    389  5084  johnlev /*ARGSUSED*/
    390  5084  johnlev static int
    391  5084  johnlev segmf_kluster(struct seg *seg, caddr_t addr, ssize_t delta)
    392  5084  johnlev {
    393  5084  johnlev 	return (-1);
    394  5084  johnlev }
    395  5084  johnlev 
    396  5084  johnlev /*ARGSUSED*/
    397  5084  johnlev static int
    398  5084  johnlev segmf_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
    399  5084  johnlev {
    400  5084  johnlev 	return (0);
    401  5084  johnlev }
    402  5084  johnlev 
    403  5084  johnlev /*
    404  5084  johnlev  * XXPV	Hmm.  Should we say that mf mapping are "in core?"
    405  5084  johnlev  */
    406  5084  johnlev 
    407  5084  johnlev /*ARGSUSED*/
    408  5084  johnlev static size_t
    409  5084  johnlev segmf_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
    410  5084  johnlev {
    411  5084  johnlev 	size_t v;
    412  5084  johnlev 
    413  5084  johnlev 	for (v = 0, len = (len + PAGEOFFSET) & PAGEMASK; len;
    414  5084  johnlev 	    len -= PAGESIZE, v += PAGESIZE)
    415  5084  johnlev 		*vec++ = 1;
    416  5084  johnlev 	return (v);
    417  5084  johnlev }
    418  5084  johnlev 
    419  5084  johnlev /*ARGSUSED*/
    420  5084  johnlev static int
    421  5084  johnlev segmf_lockop(struct seg *seg, caddr_t addr,
    422  5084  johnlev     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
    423  5084  johnlev {
    424  5084  johnlev 	return (0);
    425  5084  johnlev }
    426  5084  johnlev 
    427  5084  johnlev static int
    428  5084  johnlev segmf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
    429  5084  johnlev {
    430  5084  johnlev 	struct segmf_data *data = seg->s_data;
    431  5084  johnlev 	pgcnt_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
    432  5084  johnlev 
    433  5084  johnlev 	if (pgno != 0) {
    434  5084  johnlev 		do
    435  5084  johnlev 			protv[--pgno] = data->prot;
    436  5084  johnlev 		while (pgno != 0)
    437  5084  johnlev 			;
    438  5084  johnlev 	}
    439  5084  johnlev 	return (0);
    440  5084  johnlev }
    441  5084  johnlev 
    442  5084  johnlev static u_offset_t
    443  5084  johnlev segmf_getoffset(struct seg *seg, caddr_t addr)
    444  5084  johnlev {
    445  5084  johnlev 	return (addr - seg->s_base);
    446  5084  johnlev }
    447  5084  johnlev 
    448  5084  johnlev /*ARGSUSED*/
    449  5084  johnlev static int
    450  5084  johnlev segmf_gettype(struct seg *seg, caddr_t addr)
    451  5084  johnlev {
    452  5084  johnlev 	return (MAP_SHARED);
    453  5084  johnlev }
    454  5084  johnlev 
    455  5084  johnlev /*ARGSUSED1*/
    456  5084  johnlev static int
    457  5084  johnlev segmf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
    458  5084  johnlev {
    459  5084  johnlev 	struct segmf_data *data = seg->s_data;
    460  5084  johnlev 
    461  5084  johnlev 	*vpp = VTOCVP(data->vp);
    462  5084  johnlev 	return (0);
    463  5084  johnlev }
    464  5084  johnlev 
    465  5084  johnlev /*ARGSUSED*/
    466  5084  johnlev static int
    467  5084  johnlev segmf_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
    468  5084  johnlev {
    469  5084  johnlev 	return (0);
    470  5084  johnlev }
    471  5084  johnlev 
    472  5084  johnlev /*ARGSUSED*/
    473  5084  johnlev static void
    474  5084  johnlev segmf_dump(struct seg *seg)
    475  5084  johnlev {}
    476  5084  johnlev 
    477  5084  johnlev /*ARGSUSED*/
    478  5084  johnlev static int
    479  5084  johnlev segmf_pagelock(struct seg *seg, caddr_t addr, size_t len,
    480  5084  johnlev     struct page ***ppp, enum lock_type type, enum seg_rw rw)
    481  5084  johnlev {
    482  5084  johnlev 	return (ENOTSUP);
    483  5084  johnlev }
    484  5084  johnlev 
    485  5084  johnlev /*ARGSUSED*/
    486  5084  johnlev static int
    487  5084  johnlev segmf_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc)
    488  5084  johnlev {
    489  5084  johnlev 	return (ENOTSUP);
    490  5084  johnlev }
    491  5084  johnlev 
    492  5084  johnlev static int
    493  5084  johnlev segmf_getmemid(struct seg *seg, caddr_t addr, memid_t *memid)
    494  5084  johnlev {
    495  5084  johnlev 	struct segmf_data *data = seg->s_data;
    496  5084  johnlev 
    497  5084  johnlev 	memid->val[0] = (uintptr_t)VTOCVP(data->vp);
    498  5084  johnlev 	memid->val[1] = (uintptr_t)seg_page(seg, addr);
    499  5084  johnlev 	return (0);
    500  5084  johnlev }
    501  5084  johnlev 
    502  5084  johnlev /*ARGSUSED*/
    503  5084  johnlev static lgrp_mem_policy_info_t *
    504  5084  johnlev segmf_getpolicy(struct seg *seg, caddr_t addr)
    505  5084  johnlev {
    506  5084  johnlev 	return (NULL);
    507  5084  johnlev }
    508  5084  johnlev 
    509  5084  johnlev /*ARGSUSED*/
    510  5084  johnlev static int
    511  5084  johnlev segmf_capable(struct seg *seg, segcapability_t capability)
    512  5084  johnlev {
    513  5084  johnlev 	return (0);
    514  5084  johnlev }
    515  5084  johnlev 
    516  5084  johnlev /*
    517  5084  johnlev  * Add a set of contiguous foreign MFNs to the segment. soft-locking them.  The
    518  5084  johnlev  * pre-faulting is necessary due to live migration; in particular we must
    519  5084  johnlev  * return an error in response to IOCTL_PRIVCMD_MMAPBATCH rather than faulting
    520  5084  johnlev  * later on a bad MFN.  Whilst this isn't necessary for the other MMAP
    521  5084  johnlev  * ioctl()s, we lock them too, as they should be transitory.
    522  5084  johnlev  */
    523  5084  johnlev int
    524  5084  johnlev segmf_add_mfns(struct seg *seg, caddr_t addr, mfn_t mfn,
    525  5084  johnlev     pgcnt_t pgcnt, domid_t domid)
    526  5084  johnlev {
    527  5084  johnlev 	struct segmf_data *data = seg->s_data;
    528  7756     Mark 	pgcnt_t base;
    529  5084  johnlev 	faultcode_t fc;
    530  5084  johnlev 	pgcnt_t i;
    531  5084  johnlev 	int error = 0;
    532  5084  johnlev 
    533  5084  johnlev 	if (seg->s_ops != &segmf_ops)
    534  5084  johnlev 		return (EINVAL);
    535  5084  johnlev 
    536  5084  johnlev 	/*
    537  5084  johnlev 	 * Don't mess with dom0.
    538  5084  johnlev 	 *
    539  5084  johnlev 	 * Only allow the domid to be set once for the segment.
    540  5084  johnlev 	 * After that attempts to add mappings to this segment for
    541  5084  johnlev 	 * other domains explicitly fails.
    542  5084  johnlev 	 */
    543  5084  johnlev 
    544  5084  johnlev 	if (domid == 0 || domid == DOMID_SELF)
    545  5084  johnlev 		return (EACCES);
    546  5084  johnlev 
    547  5084  johnlev 	mutex_enter(&data->lock);
    548  5084  johnlev 
    549  5084  johnlev 	if (data->domid == 0)
    550  5084  johnlev 		data->domid = domid;
    551  5084  johnlev 
    552  5084  johnlev 	if (data->domid != domid) {
    553  5084  johnlev 		error = EINVAL;
    554  5084  johnlev 		goto out;
    555  5084  johnlev 	}
    556  5084  johnlev 
    557  5084  johnlev 	base = seg_page(seg, addr);
    558  5084  johnlev 
    559  7756     Mark 	for (i = 0; i < pgcnt; i++) {
    560  7756     Mark 		data->map[base + i].t_type = SEGMF_MAP_MFN;
    561  7756     Mark 		data->map[base + i].u.m.m_mfn = mfn++;
    562  7756     Mark 	}
    563  5084  johnlev 
    564  5084  johnlev 	fc = segmf_fault_range(seg->s_as->a_hat, seg, addr,
    565  5084  johnlev 	    pgcnt * MMU_PAGESIZE, F_SOFTLOCK, S_OTHER);
    566  5084  johnlev 
    567  5084  johnlev 	if (fc != 0) {
    568  5084  johnlev 		error = fc_decode(fc);
    569  7756     Mark 		for (i = 0; i < pgcnt; i++) {
    570  7756     Mark 			data->map[base + i].t_type = SEGMF_MAP_EMPTY;
    571  7756     Mark 		}
    572  5084  johnlev 	}
    573  5084  johnlev 
    574  5084  johnlev out:
    575  5084  johnlev 	mutex_exit(&data->lock);
    576  5084  johnlev 	return (error);
    577  7756     Mark }
    578  7756     Mark 
    579  7756     Mark int
    580  7756     Mark segmf_add_grefs(struct seg *seg, caddr_t addr, uint_t flags,
    581  7756     Mark     grant_ref_t *grefs, uint_t cnt, domid_t domid)
    582  7756     Mark {
    583  7756     Mark 	struct segmf_data *data;
    584  7756     Mark 	segmf_map_t *map;
    585  7756     Mark 	faultcode_t fc;
    586  7756     Mark 	uint_t idx;
    587  7756     Mark 	uint_t i;
    588  7756     Mark 	int e;
    589  7756     Mark 
    590  7756     Mark 	if (seg->s_ops != &segmf_ops)
    591  7756     Mark 		return (EINVAL);
    592  7756     Mark 
    593  7756     Mark 	/*
    594  7756     Mark 	 * Don't mess with dom0.
    595  7756     Mark 	 *
    596  7756     Mark 	 * Only allow the domid to be set once for the segment.
    597  7756     Mark 	 * After that attempts to add mappings to this segment for
    598  7756     Mark 	 * other domains explicitly fails.
    599  7756     Mark 	 */
    600  7756     Mark 
    601  7756     Mark 	if (domid == 0 || domid == DOMID_SELF)
    602  7756     Mark 		return (EACCES);
    603  7756     Mark 
    604  7756     Mark 	data = seg->s_data;
    605  7756     Mark 	idx = seg_page(seg, addr);
    606  7756     Mark 	map = &data->map[idx];
    607  7756     Mark 	e = 0;
    608  7756     Mark 
    609  7756     Mark 	mutex_enter(&data->lock);
    610  7756     Mark 
    611  7756     Mark 	if (data->domid == 0)
    612  7756     Mark 		data->domid = domid;
    613  7756     Mark 
    614  7756     Mark 	if (data->domid != domid) {
    615  7756     Mark 		e = EINVAL;
    616  7756     Mark 		goto out;
    617  7756     Mark 	}
    618  7756     Mark 
    619  7756     Mark 	/* store away the grefs passed in then fault in the pages */
    620  7756     Mark 	for (i = 0; i < cnt; i++) {
    621  7756     Mark 		map[i].t_type = SEGMF_MAP_GREF;
    622  7756     Mark 		map[i].u.g.g_gref = grefs[i];
    623  7756     Mark 		map[i].u.g.g_handle = 0;
    624  7756     Mark 		map[i].u.g.g_flags = 0;
    625  7756     Mark 		if (flags & SEGMF_GREF_WR) {
    626  7756     Mark 			map[i].u.g.g_flags |= SEGMF_GFLAGS_WR;
    627  7756     Mark 		}
    628  7756     Mark 	}
    629  7756     Mark 	fc = segmf_fault_gref_range(seg, addr, cnt);
    630  7756     Mark 	if (fc != 0) {
    631  7756     Mark 		e = fc_decode(fc);
    632  7756     Mark 		for (i = 0; i < cnt; i++) {
    633  7756     Mark 			data->map[i].t_type = SEGMF_MAP_EMPTY;
    634  7756     Mark 		}
    635  7756     Mark 	}
    636  7756     Mark 
    637  7756     Mark out:
    638  7756     Mark 	mutex_exit(&data->lock);
    639  7756     Mark 	return (e);
    640  7756     Mark }
    641  7756     Mark 
    642  7756     Mark int
    643  7756     Mark segmf_release_grefs(struct seg *seg, caddr_t addr, uint_t cnt)
    644  7756     Mark {
    645  7756     Mark 	gnttab_unmap_grant_ref_t mapop[SEGMF_MAX_GREFS];
    646  7756     Mark 	struct segmf_data *data;
    647  7756     Mark 	segmf_map_t *map;
    648  7756     Mark 	uint_t idx;
    649  7756     Mark 	long e;
    650  7756     Mark 	int i;
    651  7756     Mark 	int n;
    652  7756     Mark 
    653  7756     Mark 
    654  7756     Mark 	if (cnt > SEGMF_MAX_GREFS) {
    655  7756     Mark 		return (-1);
    656  7756     Mark 	}
    657  7756     Mark 
    658  7756     Mark 	idx = seg_page(seg, addr);
    659  7756     Mark 	data = seg->s_data;
    660  7756     Mark 	map = &data->map[idx];
    661  7756     Mark 
    662  7756     Mark 	bzero(mapop, sizeof (gnttab_unmap_grant_ref_t) * cnt);
    663  7756     Mark 
    664  7756     Mark 	/*
    665  7756     Mark 	 * for each entry which isn't empty and is currently mapped,
    666  7756     Mark 	 * set it up for an unmap then mark them empty.
    667  7756     Mark 	 */
    668  7756     Mark 	n = 0;
    669  7756     Mark 	for (i = 0; i < cnt; i++) {
    670  7756     Mark 		ASSERT(map[i].t_type != SEGMF_MAP_MFN);
    671  7756     Mark 		if ((map[i].t_type == SEGMF_MAP_GREF) &&
    672  7756     Mark 		    (map[i].u.g.g_flags & SEGMF_GFLAGS_MAPPED)) {
    673  7756     Mark 			mapop[n].handle = map[i].u.g.g_handle;
    674  7756     Mark 			mapop[n].host_addr = map[i].u.g.g_ptep;
    675  7756     Mark 			mapop[n].dev_bus_addr = 0;
    676  7756     Mark 			n++;
    677  7756     Mark 		}
    678  7756     Mark 		map[i].t_type = SEGMF_MAP_EMPTY;
    679  7756     Mark 	}
    680  7756     Mark 
    681  7756     Mark 	/* if there's nothing to unmap, just return */
    682  7756     Mark 	if (n == 0) {
    683  7756     Mark 		return (0);
    684  7756     Mark 	}
    685  7756     Mark 
    686  7756     Mark 	e = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &mapop, n);
    687  7756     Mark 	if (e != 0) {
    688  7756     Mark 		return (-1);
    689  7756     Mark 	}
    690  7756     Mark 
    691  7756     Mark 	return (0);
    692  7756     Mark }
    693  7756     Mark 
    694  7756     Mark 
    695  7756     Mark void
    696  7756     Mark segmf_add_gref_pte(struct seg *seg, caddr_t addr, uint64_t pte_ma)
    697  7756     Mark {
    698  7756     Mark 	struct segmf_data *data;
    699  7756     Mark 	uint_t idx;
    700  7756     Mark 
    701  7756     Mark 	idx = seg_page(seg, addr);
    702  7756     Mark 	data = seg->s_data;
    703  7756     Mark 
    704  7756     Mark 	data->map[idx].u.g.g_ptep = pte_ma;
    705  7756     Mark }
    706  7756     Mark 
    707  7756     Mark 
    708  7756     Mark static int
    709  7756     Mark segmf_fault_gref_range(struct seg *seg, caddr_t addr, size_t cnt)
    710  7756     Mark {
    711  7756     Mark 	gnttab_map_grant_ref_t mapop[SEGMF_MAX_GREFS];
    712  7756     Mark 	struct segmf_data *data;
    713  7756     Mark 	segmf_map_t *map;
    714  7756     Mark 	uint_t idx;
    715  7756     Mark 	int e;
    716  7756     Mark 	int i;
    717  7756     Mark 
    718  7756     Mark 
    719  7756     Mark 	if (cnt > SEGMF_MAX_GREFS) {
    720  7756     Mark 		return (-1);
    721  7756     Mark 	}
    722  7756     Mark 
    723  7756     Mark 	data = seg->s_data;
    724  7756     Mark 	idx = seg_page(seg, addr);
    725  7756     Mark 	map = &data->map[idx];
    726  7756     Mark 
    727  7756     Mark 	bzero(mapop, sizeof (gnttab_map_grant_ref_t) * cnt);
    728  7756     Mark 
    729  7756     Mark 	ASSERT(map->t_type == SEGMF_MAP_GREF);
    730  7756     Mark 
    731  7756     Mark 	/*
    732  7756     Mark 	 * map in each page passed in into the user apps AS. We do this by
    733  7756     Mark 	 * passing the MA of the actual pte of the mapping to the hypervisor.
    734  7756     Mark 	 */
    735  7756     Mark 	for (i = 0; i < cnt; i++) {
    736  7756     Mark 		mapop[i].host_addr = map[i].u.g.g_ptep;
    737  7756     Mark 		mapop[i].dom = data->domid;
    738  7756     Mark 		mapop[i].ref = map[i].u.g.g_gref;
    739  7756     Mark 		mapop[i].flags = GNTMAP_host_map | GNTMAP_application_map |
    740  7756     Mark 		    GNTMAP_contains_pte;
    741  7756     Mark 		if (!(map[i].u.g.g_flags & SEGMF_GFLAGS_WR)) {
    742  7756     Mark 			mapop[i].flags |= GNTMAP_readonly;
    743  7756     Mark 		}
    744  7756     Mark 	}
    745  7756     Mark 	e = xen_map_gref(GNTTABOP_map_grant_ref, mapop, cnt, B_TRUE);
    746  7756     Mark 	if ((e != 0) || (mapop[0].status != GNTST_okay)) {
    747  7756     Mark 		return (FC_MAKE_ERR(EFAULT));
    748  7756     Mark 	}
    749  7756     Mark 
    750  7756     Mark 	/* save handle for segmf_release_grefs() and mark it as mapped */
    751  7756     Mark 	for (i = 0; i < cnt; i++) {
    752  7756     Mark 		ASSERT(mapop[i].status == GNTST_okay);
    753  7756     Mark 		map[i].u.g.g_handle = mapop[i].handle;
    754  7756     Mark 		map[i].u.g.g_flags |= SEGMF_GFLAGS_MAPPED;
    755  7756     Mark 	}
    756  7756     Mark 
    757  7756     Mark 	return (0);
    758  5084  johnlev }
    759  5084  johnlev 
    760  5084  johnlev static struct seg_ops segmf_ops = {
    761  5084  johnlev 	segmf_dup,
    762  5084  johnlev 	segmf_unmap,
    763  5084  johnlev 	segmf_free,
    764  5084  johnlev 	segmf_fault,
    765  5084  johnlev 	segmf_faulta,
    766  5084  johnlev 	segmf_setprot,
    767  5084  johnlev 	segmf_checkprot,
    768  5084  johnlev 	(int (*)())segmf_kluster,
    769  5084  johnlev 	(size_t (*)(struct seg *))NULL,	/* swapout */
    770  5084  johnlev 	segmf_sync,
    771  5084  johnlev 	segmf_incore,
    772  5084  johnlev 	segmf_lockop,
    773  5084  johnlev 	segmf_getprot,
    774  5084  johnlev 	segmf_getoffset,
    775  5084  johnlev 	segmf_gettype,
    776  5084  johnlev 	segmf_getvp,
    777  5084  johnlev 	segmf_advise,
    778  5084  johnlev 	segmf_dump,
    779  5084  johnlev 	segmf_pagelock,
    780  5084  johnlev 	segmf_setpagesize,
    781  5084  johnlev 	segmf_getmemid,
    782  5084  johnlev 	segmf_getpolicy,
    783  5084  johnlev 	segmf_capable
    784  5084  johnlev };
    785