Home | History | Annotate | Download | only in vm
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     28 /*	  All Rights Reserved  	*/
     29 
     30 /*
     31  * University Copyright- Copyright (c) 1982, 1986, 1988
     32  * The Regents of the University of California
     33  * All Rights Reserved
     34  *
     35  * University Acknowledgment- Portions of this document are derived from
     36  * software developed by the University of California, Berkeley, and its
     37  * contributors.
     38  */
     39 
     40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     41 
     42 /*
     43  * VM - segment of a mapped device.
     44  *
     45  * This segment driver is used when mapping character special devices.
     46  */
     47 
     48 #include <sys/types.h>
     49 #include <sys/t_lock.h>
     50 #include <sys/sysmacros.h>
     51 #include <sys/vtrace.h>
     52 #include <sys/systm.h>
     53 #include <sys/vmsystm.h>
     54 #include <sys/mman.h>
     55 #include <sys/errno.h>
     56 #include <sys/kmem.h>
     57 #include <sys/cmn_err.h>
     58 #include <sys/vnode.h>
     59 #include <sys/proc.h>
     60 #include <sys/conf.h>
     61 #include <sys/debug.h>
     62 #include <sys/ddidevmap.h>
     63 #include <sys/ddi_implfuncs.h>
     64 #include <sys/lgrp.h>
     65 
     66 #include <vm/page.h>
     67 #include <vm/hat.h>
     68 #include <vm/as.h>
     69 #include <vm/seg.h>
     70 #include <vm/seg_dev.h>
     71 #include <vm/seg_kp.h>
     72 #include <vm/seg_kmem.h>
     73 #include <vm/vpage.h>
     74 
     75 #include <sys/sunddi.h>
     76 #include <sys/esunddi.h>
     77 #include <sys/fs/snode.h>
     78 
     79 
     80 #if DEBUG
     81 int segdev_debug;
     82 #define	DEBUGF(level, args) { if (segdev_debug >= (level)) cmn_err args; }
     83 #else
     84 #define	DEBUGF(level, args)
     85 #endif
     86 
     87 /* Default timeout for devmap context management */
     88 #define	CTX_TIMEOUT_VALUE 0
     89 
     90 #define	HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
     91 			{ mutex_enter(&dhp->dh_lock); }
     92 
     93 #define	RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
     94 			{ mutex_exit(&dhp->dh_lock); }
     95 
     96 #define	round_down_p2(a, s)	((a) & ~((s) - 1))
     97 #define	round_up_p2(a, s)	(((a) + (s) - 1) & ~((s) - 1))
     98 
     99 /*
    100  * VA_PA_ALIGNED checks to see if both VA and PA are on pgsize boundary
    101  * VA_PA_PGSIZE_ALIGNED check to see if VA is aligned with PA w.r.t. pgsize
    102  */
    103 #define	VA_PA_ALIGNED(uvaddr, paddr, pgsize)		\
    104 	(((uvaddr | paddr) & (pgsize - 1)) == 0)
    105 #define	VA_PA_PGSIZE_ALIGNED(uvaddr, paddr, pgsize)	\
    106 	(((uvaddr ^ paddr) & (pgsize - 1)) == 0)
    107 
    108 #define	vpgtob(n)	((n) * sizeof (struct vpage))	/* For brevity */
    109 
    110 #define	VTOCVP(vp)	(VTOS(vp)->s_commonvp)	/* we "know" it's an snode */
    111 
    112 static struct devmap_ctx *devmapctx_list = NULL;
    113 static struct devmap_softlock *devmap_slist = NULL;
    114 
    115 /*
    116  * mutex, vnode and page for the page of zeros we use for the trash mappings.
    117  * One trash page is allocated on the first ddi_umem_setup call that uses it
    118  * XXX Eventually, we may want to combine this with what segnf does when all
    119  * hat layers implement HAT_NOFAULT.
    120  *
    121  * The trash page is used when the backing store for a userland mapping is
    122  * removed but the application semantics do not take kindly to a SIGBUS.
    123  * In that scenario, the applications pages are mapped to some dummy page
    124  * which returns garbage on read and writes go into a common place.
    125  * (Perfect for NO_FAULT semantics)
    126  * The device driver is responsible to communicating to the app with some
    127  * other mechanism that such remapping has happened and the app should take
    128  * corrective action.
    129  * We can also use an anonymous memory page as there is no requirement to
    130  * keep the page locked, however this complicates the fault code. RFE.
    131  */
    132 static struct vnode trashvp;
    133 static struct page *trashpp;
    134 
    135 /* Non-pageable kernel memory is allocated from the umem_np_arena. */
    136 static vmem_t *umem_np_arena;
    137 
    138 /* Set the cookie to a value we know will never be a valid umem_cookie */
    139 #define	DEVMAP_DEVMEM_COOKIE	((ddi_umem_cookie_t)0x1)
    140 
    141 /*
    142  * Macros to check if type of devmap handle
    143  */
    144 #define	cookie_is_devmem(c)	\
    145 	((c) == (struct ddi_umem_cookie *)DEVMAP_DEVMEM_COOKIE)
    146 
    147 #define	cookie_is_pmem(c)	\
    148 	((c) == (struct ddi_umem_cookie *)DEVMAP_PMEM_COOKIE)
    149 
    150 #define	cookie_is_kpmem(c)	(!cookie_is_devmem(c) && !cookie_is_pmem(c) &&\
    151 	((c)->type == KMEM_PAGEABLE))
    152 
    153 #define	dhp_is_devmem(dhp)	\
    154 	(cookie_is_devmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    155 
    156 #define	dhp_is_pmem(dhp)	\
    157 	(cookie_is_pmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    158 
    159 #define	dhp_is_kpmem(dhp)	\
    160 	(cookie_is_kpmem((struct ddi_umem_cookie *)((dhp)->dh_cookie)))
    161 
    162 /*
    163  * Private seg op routines.
    164  */
    165 static int	segdev_dup(struct seg *, struct seg *);
    166 static int	segdev_unmap(struct seg *, caddr_t, size_t);
    167 static void	segdev_free(struct seg *);
    168 static faultcode_t segdev_fault(struct hat *, struct seg *, caddr_t, size_t,
    169 		    enum fault_type, enum seg_rw);
    170 static faultcode_t segdev_faulta(struct seg *, caddr_t);
    171 static int	segdev_setprot(struct seg *, caddr_t, size_t, uint_t);
    172 static int	segdev_checkprot(struct seg *, caddr_t, size_t, uint_t);
    173 static void	segdev_badop(void);
    174 static int	segdev_sync(struct seg *, caddr_t, size_t, int, uint_t);
    175 static size_t	segdev_incore(struct seg *, caddr_t, size_t, char *);
    176 static int	segdev_lockop(struct seg *, caddr_t, size_t, int, int,
    177 		    ulong_t *, size_t);
    178 static int	segdev_getprot(struct seg *, caddr_t, size_t, uint_t *);
    179 static u_offset_t	segdev_getoffset(struct seg *, caddr_t);
    180 static int	segdev_gettype(struct seg *, caddr_t);
    181 static int	segdev_getvp(struct seg *, caddr_t, struct vnode **);
    182 static int	segdev_advise(struct seg *, caddr_t, size_t, uint_t);
    183 static void	segdev_dump(struct seg *);
    184 static int	segdev_pagelock(struct seg *, caddr_t, size_t,
    185 		    struct page ***, enum lock_type, enum seg_rw);
    186 static int	segdev_setpagesize(struct seg *, caddr_t, size_t, uint_t);
    187 static int	segdev_getmemid(struct seg *, caddr_t, memid_t *);
    188 static lgrp_mem_policy_info_t	*segdev_getpolicy(struct seg *, caddr_t);
    189 static int	segdev_capable(struct seg *, segcapability_t);
    190 
    191 /*
    192  * XXX	this struct is used by rootnex_map_fault to identify
    193  *	the segment it has been passed. So if you make it
    194  *	"static" you'll need to fix rootnex_map_fault.
    195  */
    196 struct seg_ops segdev_ops = {
    197 	segdev_dup,
    198 	segdev_unmap,
    199 	segdev_free,
    200 	segdev_fault,
    201 	segdev_faulta,
    202 	segdev_setprot,
    203 	segdev_checkprot,
    204 	(int (*)())segdev_badop,	/* kluster */
    205 	(size_t (*)(struct seg *))NULL,	/* swapout */
    206 	segdev_sync,			/* sync */
    207 	segdev_incore,
    208 	segdev_lockop,			/* lockop */
    209 	segdev_getprot,
    210 	segdev_getoffset,
    211 	segdev_gettype,
    212 	segdev_getvp,
    213 	segdev_advise,
    214 	segdev_dump,
    215 	segdev_pagelock,
    216 	segdev_setpagesize,
    217 	segdev_getmemid,
    218 	segdev_getpolicy,
    219 	segdev_capable,
    220 };
    221 
    222 /*
    223  * Private segdev support routines
    224  */
    225 static struct segdev_data *sdp_alloc(void);
    226 
    227 static void segdev_softunlock(struct hat *, struct seg *, caddr_t,
    228     size_t, enum seg_rw);
    229 
    230 static faultcode_t segdev_faultpage(struct hat *, struct seg *, caddr_t,
    231     struct vpage *, enum fault_type, enum seg_rw, devmap_handle_t *);
    232 
    233 static faultcode_t segdev_faultpages(struct hat *, struct seg *, caddr_t,
    234     size_t, enum fault_type, enum seg_rw, devmap_handle_t *);
    235 
    236 static struct devmap_ctx *devmap_ctxinit(dev_t, ulong_t);
    237 static struct devmap_softlock *devmap_softlock_init(dev_t, ulong_t);
    238 static void devmap_softlock_rele(devmap_handle_t *);
    239 static void devmap_ctx_rele(devmap_handle_t *);
    240 
    241 static void devmap_ctxto(void *);
    242 
    243 static devmap_handle_t *devmap_find_handle(devmap_handle_t *dhp_head,
    244     caddr_t addr);
    245 
    246 static ulong_t devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
    247     ulong_t *opfn, ulong_t *pagesize);
    248 
    249 static void free_devmap_handle(devmap_handle_t *dhp);
    250 
    251 static int devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
    252     struct seg *newseg);
    253 
    254 static devmap_handle_t *devmap_handle_unmap(devmap_handle_t *dhp);
    255 
    256 static void devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len);
    257 
    258 static void devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr);
    259 
    260 static int devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
    261     offset_t off, size_t len, uint_t flags);
    262 
    263 static void devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len,
    264     caddr_t addr, size_t *llen, caddr_t *laddr);
    265 
    266 static void devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len);
    267 
    268 static void *devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag);
    269 static void devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size);
    270 
    271 static void *devmap_umem_alloc_np(size_t size, size_t flags);
    272 static void devmap_umem_free_np(void *addr, size_t size);
    273 
    274 /*
    275  * routines to lock and unlock underlying segkp segment for
    276  * KMEM_PAGEABLE type cookies.
    277  */
    278 static faultcode_t  acquire_kpmem_lock(struct ddi_umem_cookie *, size_t);
    279 static void release_kpmem_lock(struct ddi_umem_cookie *, size_t);
    280 
    281 /*
    282  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
    283  * drivers with devmap_access callbacks
    284  */
    285 static int devmap_softlock_enter(struct devmap_softlock *, size_t,
    286 	enum fault_type);
    287 static void devmap_softlock_exit(struct devmap_softlock *, size_t,
    288 	enum fault_type);
    289 
    290 static kmutex_t devmapctx_lock;
    291 
    292 static kmutex_t devmap_slock;
    293 
    294 /*
    295  * Initialize the thread callbacks and thread private data.
    296  */
    297 static struct devmap_ctx *
    298 devmap_ctxinit(dev_t dev, ulong_t id)
    299 {
    300 	struct devmap_ctx	*devctx;
    301 	struct devmap_ctx	*tmp;
    302 	dev_info_t		*dip;
    303 
    304 	tmp =  kmem_zalloc(sizeof (struct devmap_ctx), KM_SLEEP);
    305 
    306 	mutex_enter(&devmapctx_lock);
    307 
    308 	dip = e_ddi_hold_devi_by_dev(dev, 0);
    309 	ASSERT(dip != NULL);
    310 	ddi_release_devi(dip);
    311 
    312 	for (devctx = devmapctx_list; devctx != NULL; devctx = devctx->next)
    313 		if ((devctx->dip == dip) && (devctx->id == id))
    314 			break;
    315 
    316 	if (devctx == NULL) {
    317 		devctx = tmp;
    318 		devctx->dip = dip;
    319 		devctx->id = id;
    320 		mutex_init(&devctx->lock, NULL, MUTEX_DEFAULT, NULL);
    321 		cv_init(&devctx->cv, NULL, CV_DEFAULT, NULL);
    322 		devctx->next = devmapctx_list;
    323 		devmapctx_list = devctx;
    324 	} else
    325 		kmem_free(tmp, sizeof (struct devmap_ctx));
    326 
    327 	mutex_enter(&devctx->lock);
    328 	devctx->refcnt++;
    329 	mutex_exit(&devctx->lock);
    330 	mutex_exit(&devmapctx_lock);
    331 
    332 	return (devctx);
    333 }
    334 
    335 /*
    336  * Timeout callback called if a CPU has not given up the device context
    337  * within dhp->dh_timeout_length ticks
    338  */
    339 static void
    340 devmap_ctxto(void *data)
    341 {
    342 	struct devmap_ctx *devctx = data;
    343 
    344 	TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_CTXTO,
    345 	    "devmap_ctxto:timeout expired, devctx=%p", (void *)devctx);
    346 	mutex_enter(&devctx->lock);
    347 	/*
    348 	 * Set oncpu = 0 so the next mapping trying to get the device context
    349 	 * can.
    350 	 */
    351 	devctx->oncpu = 0;
    352 	devctx->timeout = 0;
    353 	cv_signal(&devctx->cv);
    354 	mutex_exit(&devctx->lock);
    355 }
    356 
    357 /*
    358  * Create a device segment.
    359  */
    360 int
    361 segdev_create(struct seg *seg, void *argsp)
    362 {
    363 	struct segdev_data *sdp;
    364 	struct segdev_crargs *a = (struct segdev_crargs *)argsp;
    365 	devmap_handle_t *dhp = (devmap_handle_t *)a->devmap_data;
    366 	int error;
    367 
    368 	/*
    369 	 * Since the address space is "write" locked, we
    370 	 * don't need the segment lock to protect "segdev" data.
    371 	 */
    372 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    373 
    374 	hat_map(seg->s_as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
    375 
    376 	sdp = sdp_alloc();
    377 
    378 	sdp->mapfunc = a->mapfunc;
    379 	sdp->offset = a->offset;
    380 	sdp->prot = a->prot;
    381 	sdp->maxprot = a->maxprot;
    382 	sdp->type = a->type;
    383 	sdp->pageprot = 0;
    384 	sdp->softlockcnt = 0;
    385 	sdp->vpage = NULL;
    386 
    387 	if (sdp->mapfunc == NULL)
    388 		sdp->devmap_data = dhp;
    389 	else
    390 		sdp->devmap_data = dhp = NULL;
    391 
    392 	sdp->hat_flags = a->hat_flags;
    393 	sdp->hat_attr = a->hat_attr;
    394 
    395 	/*
    396 	 * Currently, hat_flags supports only HAT_LOAD_NOCONSIST
    397 	 */
    398 	ASSERT(!(sdp->hat_flags & ~HAT_LOAD_NOCONSIST));
    399 
    400 	/*
    401 	 * Hold shadow vnode -- segdev only deals with
    402 	 * character (VCHR) devices. We use the common
    403 	 * vp to hang pages on.
    404 	 */
    405 	sdp->vp = specfind(a->dev, VCHR);
    406 	ASSERT(sdp->vp != NULL);
    407 
    408 	seg->s_ops = &segdev_ops;
    409 	seg->s_data = sdp;
    410 
    411 	while (dhp != NULL) {
    412 		dhp->dh_seg = seg;
    413 		dhp = dhp->dh_next;
    414 	}
    415 
    416 	/*
    417 	 * Inform the vnode of the new mapping.
    418 	 */
    419 	/*
    420 	 * It is ok to use pass sdp->maxprot to ADDMAP rather than to use
    421 	 * dhp specific maxprot because spec_addmap does not use maxprot.
    422 	 */
    423 	error = VOP_ADDMAP(VTOCVP(sdp->vp), sdp->offset,
    424 	    seg->s_as, seg->s_base, seg->s_size,
    425 	    sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
    426 
    427 	if (error != 0) {
    428 		sdp->devmap_data = NULL;
    429 		hat_unload(seg->s_as->a_hat, seg->s_base, seg->s_size,
    430 		    HAT_UNLOAD_UNMAP);
    431 	}
    432 
    433 	return (error);
    434 }
    435 
    436 static struct segdev_data *
    437 sdp_alloc(void)
    438 {
    439 	struct segdev_data *sdp;
    440 
    441 	sdp = kmem_zalloc(sizeof (struct segdev_data), KM_SLEEP);
    442 	rw_init(&sdp->lock, NULL, RW_DEFAULT, NULL);
    443 
    444 	return (sdp);
    445 }
    446 
    447 /*
    448  * Duplicate seg and return new segment in newseg.
    449  */
    450 static int
    451 segdev_dup(struct seg *seg, struct seg *newseg)
    452 {
    453 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
    454 	struct segdev_data *newsdp;
    455 	devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
    456 	size_t npages;
    457 	int ret;
    458 
    459 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DUP,
    460 	    "segdev_dup:start dhp=%p, seg=%p", (void *)dhp, (void *)seg);
    461 
    462 	DEBUGF(3, (CE_CONT, "segdev_dup: dhp %p seg %p\n",
    463 	    (void *)dhp, (void *)seg));
    464 
    465 	/*
    466 	 * Since the address space is "write" locked, we
    467 	 * don't need the segment lock to protect "segdev" data.
    468 	 */
    469 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    470 
    471 	newsdp = sdp_alloc();
    472 
    473 	newseg->s_ops = seg->s_ops;
    474 	newseg->s_data = (void *)newsdp;
    475 
    476 	VN_HOLD(sdp->vp);
    477 	newsdp->vp 	= sdp->vp;
    478 	newsdp->mapfunc = sdp->mapfunc;
    479 	newsdp->offset	= sdp->offset;
    480 	newsdp->pageprot = sdp->pageprot;
    481 	newsdp->prot	= sdp->prot;
    482 	newsdp->maxprot = sdp->maxprot;
    483 	newsdp->type = sdp->type;
    484 	newsdp->hat_attr = sdp->hat_attr;
    485 	newsdp->hat_flags = sdp->hat_flags;
    486 	newsdp->softlockcnt = 0;
    487 
    488 	/*
    489 	 * Initialize per page data if the segment we are
    490 	 * dup'ing has per page information.
    491 	 */
    492 	npages = seg_pages(newseg);
    493 
    494 	if (sdp->vpage != NULL) {
    495 		size_t nbytes = vpgtob(npages);
    496 
    497 		newsdp->vpage = kmem_zalloc(nbytes, KM_SLEEP);
    498 		bcopy(sdp->vpage, newsdp->vpage, nbytes);
    499 	} else
    500 		newsdp->vpage = NULL;
    501 
    502 	/*
    503 	 * duplicate devmap handles
    504 	 */
    505 	if (dhp != NULL) {
    506 		ret = devmap_handle_dup(dhp,
    507 		    (devmap_handle_t **)&newsdp->devmap_data, newseg);
    508 		if (ret != 0) {
    509 			TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DUP_CK1,
    510 			    "segdev_dup:ret1 ret=%x, dhp=%p seg=%p",
    511 			    ret, (void *)dhp, (void *)seg);
    512 			DEBUGF(1, (CE_CONT,
    513 			    "segdev_dup: ret %x dhp %p seg %p\n",
    514 			    ret, (void *)dhp, (void *)seg));
    515 			return (ret);
    516 		}
    517 	}
    518 
    519 	/*
    520 	 * Inform the common vnode of the new mapping.
    521 	 */
    522 	return (VOP_ADDMAP(VTOCVP(newsdp->vp),
    523 	    newsdp->offset, newseg->s_as,
    524 	    newseg->s_base, newseg->s_size, newsdp->prot,
    525 	    newsdp->maxprot, sdp->type, CRED(), NULL));
    526 }
    527 
    528 /*
    529  * duplicate devmap handles
    530  */
    531 static int
    532 devmap_handle_dup(devmap_handle_t *dhp, devmap_handle_t **new_dhp,
    533     struct seg *newseg)
    534 {
    535 	devmap_handle_t *newdhp_save = NULL;
    536 	devmap_handle_t *newdhp = NULL;
    537 	struct devmap_callback_ctl *callbackops;
    538 
    539 	while (dhp != NULL) {
    540 		newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
    541 
    542 		/* Need to lock the original dhp while copying if REMAP */
    543 		HOLD_DHP_LOCK(dhp);
    544 		bcopy(dhp, newdhp, sizeof (devmap_handle_t));
    545 		RELE_DHP_LOCK(dhp);
    546 		newdhp->dh_seg = newseg;
    547 		newdhp->dh_next = NULL;
    548 		if (newdhp_save != NULL)
    549 			newdhp_save->dh_next = newdhp;
    550 		else
    551 			*new_dhp = newdhp;
    552 		newdhp_save = newdhp;
    553 
    554 		callbackops = &newdhp->dh_callbackops;
    555 
    556 		if (dhp->dh_softlock != NULL)
    557 			newdhp->dh_softlock = devmap_softlock_init(
    558 			    newdhp->dh_dev,
    559 			    (ulong_t)callbackops->devmap_access);
    560 		if (dhp->dh_ctx != NULL)
    561 			newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
    562 			    (ulong_t)callbackops->devmap_access);
    563 
    564 		/*
    565 		 * Initialize dh_lock if we want to do remap.
    566 		 */
    567 		if (newdhp->dh_flags & DEVMAP_ALLOW_REMAP) {
    568 			mutex_init(&newdhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
    569 			newdhp->dh_flags |= DEVMAP_LOCK_INITED;
    570 		}
    571 
    572 		if (callbackops->devmap_dup != NULL) {
    573 			int ret;
    574 
    575 			/*
    576 			 * Call the dup callback so that the driver can
    577 			 * duplicate its private data.
    578 			 */
    579 			ret = (*callbackops->devmap_dup)(dhp, dhp->dh_pvtp,
    580 			    (devmap_cookie_t *)newdhp, &newdhp->dh_pvtp);
    581 
    582 			if (ret != 0) {
    583 				/*
    584 				 * We want to free up this segment as the driver
    585 				 * has indicated that we can't dup it.  But we
    586 				 * don't want to call the drivers, devmap_unmap,
    587 				 * callback function as the driver does not
    588 				 * think this segment exists. The caller of
    589 				 * devmap_dup will call seg_free on newseg
    590 				 * as it was the caller that allocated the
    591 				 * segment.
    592 				 */
    593 				DEBUGF(1, (CE_CONT, "devmap_handle_dup ERROR: "
    594 				    "newdhp %p dhp %p\n", (void *)newdhp,
    595 				    (void *)dhp));
    596 				callbackops->devmap_unmap = NULL;
    597 				return (ret);
    598 			}
    599 		}
    600 
    601 		dhp = dhp->dh_next;
    602 	}
    603 
    604 	return (0);
    605 }
    606 
    607 /*
    608  * Split a segment at addr for length len.
    609  */
    610 /*ARGSUSED*/
    611 static int
    612 segdev_unmap(struct seg *seg, caddr_t addr, size_t len)
    613 {
    614 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
    615 	register struct segdev_data *nsdp;
    616 	register struct seg *nseg;
    617 	register size_t	opages;		/* old segment size in pages */
    618 	register size_t	npages;		/* new segment size in pages */
    619 	register size_t	dpages;		/* pages being deleted (unmapped) */
    620 	register size_t	nbytes;
    621 	devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
    622 	devmap_handle_t *dhpp;
    623 	devmap_handle_t *newdhp;
    624 	struct devmap_callback_ctl *callbackops;
    625 	caddr_t nbase;
    626 	offset_t off;
    627 	ulong_t nsize;
    628 	size_t mlen, sz;
    629 
    630 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP,
    631 	    "segdev_unmap:start dhp=%p, seg=%p addr=%p len=%lx",
    632 	    (void *)dhp, (void *)seg, (void *)addr, len);
    633 
    634 	DEBUGF(3, (CE_CONT, "segdev_unmap: dhp %p seg %p addr %p len %lx\n",
    635 	    (void *)dhp, (void *)seg, (void *)addr, len));
    636 
    637 	/*
    638 	 * Since the address space is "write" locked, we
    639 	 * don't need the segment lock to protect "segdev" data.
    640 	 */
    641 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
    642 
    643 	if ((sz = sdp->softlockcnt) > 0) {
    644 		/*
    645 		 * Fail the unmap if pages are SOFTLOCKed through this mapping.
    646 		 * softlockcnt is protected from change by the as write lock.
    647 		 */
    648 		TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK1,
    649 		    "segdev_unmap:error softlockcnt = %ld", sz);
    650 		DEBUGF(1, (CE_CONT, "segdev_unmap: softlockcnt %ld\n", sz));
    651 		return (EAGAIN);
    652 	}
    653 
    654 	/*
    655 	 * Check for bad sizes
    656 	 */
    657 	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
    658 	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET))
    659 		panic("segdev_unmap");
    660 
    661 	if (dhp != NULL) {
    662 		devmap_handle_t *tdhp;
    663 		/*
    664 		 * If large page size was used in hat_devload(),
    665 		 * the same page size must be used in hat_unload().
    666 		 */
    667 		dhpp = tdhp = devmap_find_handle(dhp, addr);
    668 		while (tdhp != NULL) {
    669 			if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
    670 				break;
    671 			}
    672 			tdhp = tdhp->dh_next;
    673 		}
    674 		if (tdhp != NULL) {	/* found a dhp using large pages */
    675 			size_t slen = len;
    676 			size_t mlen;
    677 			size_t soff;
    678 
    679 			soff = (ulong_t)(addr - dhpp->dh_uvaddr);
    680 			while (slen != 0) {
    681 				mlen = MIN(slen, (dhpp->dh_len - soff));
    682 				hat_unload(seg->s_as->a_hat, dhpp->dh_uvaddr,
    683 				    dhpp->dh_len, HAT_UNLOAD_UNMAP);
    684 				dhpp = dhpp->dh_next;
    685 				ASSERT(slen >= mlen);
    686 				slen -= mlen;
    687 				soff = 0;
    688 			}
    689 		} else
    690 			hat_unload(seg->s_as->a_hat, addr, len,
    691 			    HAT_UNLOAD_UNMAP);
    692 	} else {
    693 		/*
    694 		 * Unload any hardware translations in the range
    695 		 * to be taken out.
    696 		 */
    697 		hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
    698 	}
    699 
    700 	/*
    701 	 * get the user offset which will used in the driver callbacks
    702 	 */
    703 	off = sdp->offset + (offset_t)(addr - seg->s_base);
    704 
    705 	/*
    706 	 * Inform the vnode of the unmapping.
    707 	 */
    708 	ASSERT(sdp->vp != NULL);
    709 	(void) VOP_DELMAP(VTOCVP(sdp->vp), off, seg->s_as, addr, len,
    710 	    sdp->prot, sdp->maxprot, sdp->type, CRED(), NULL);
    711 
    712 	/*
    713 	 * Check for entire segment
    714 	 */
    715 	if (addr == seg->s_base && len == seg->s_size) {
    716 		seg_free(seg);
    717 		return (0);
    718 	}
    719 
    720 	opages = seg_pages(seg);
    721 	dpages = btop(len);
    722 	npages = opages - dpages;
    723 
    724 	/*
    725 	 * Check for beginning of segment
    726 	 */
    727 	if (addr == seg->s_base) {
    728 		if (sdp->vpage != NULL) {
    729 			register struct vpage *ovpage;
    730 
    731 			ovpage = sdp->vpage;	/* keep pointer to vpage */
    732 
    733 			nbytes = vpgtob(npages);
    734 			sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    735 			bcopy(&ovpage[dpages], sdp->vpage, nbytes);
    736 
    737 			/* free up old vpage */
    738 			kmem_free(ovpage, vpgtob(opages));
    739 		}
    740 
    741 		/*
    742 		 * free devmap handles from the beginning of the mapping.
    743 		 */
    744 		if (dhp != NULL)
    745 			devmap_handle_unmap_head(dhp, len);
    746 
    747 		sdp->offset += (offset_t)len;
    748 
    749 		seg->s_base += len;
    750 		seg->s_size -= len;
    751 
    752 		return (0);
    753 	}
    754 
    755 	/*
    756 	 * Check for end of segment
    757 	 */
    758 	if (addr + len == seg->s_base + seg->s_size) {
    759 		if (sdp->vpage != NULL) {
    760 			register struct vpage *ovpage;
    761 
    762 			ovpage = sdp->vpage;	/* keep pointer to vpage */
    763 
    764 			nbytes = vpgtob(npages);
    765 			sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    766 			bcopy(ovpage, sdp->vpage, nbytes);
    767 
    768 			/* free up old vpage */
    769 			kmem_free(ovpage, vpgtob(opages));
    770 		}
    771 		seg->s_size -= len;
    772 
    773 		/*
    774 		 * free devmap handles from addr to the end of the mapping.
    775 		 */
    776 		if (dhp != NULL)
    777 			devmap_handle_unmap_tail(dhp, addr);
    778 
    779 		return (0);
    780 	}
    781 
    782 	/*
    783 	 * The section to go is in the middle of the segment,
    784 	 * have to make it into two segments.  nseg is made for
    785 	 * the high end while seg is cut down at the low end.
    786 	 */
    787 	nbase = addr + len;				/* new seg base */
    788 	nsize = (seg->s_base + seg->s_size) - nbase;	/* new seg size */
    789 	seg->s_size = addr - seg->s_base;		/* shrink old seg */
    790 	nseg = seg_alloc(seg->s_as, nbase, nsize);
    791 	if (nseg == NULL)
    792 		panic("segdev_unmap seg_alloc");
    793 
    794 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK2,
    795 	    "segdev_unmap: seg=%p nseg=%p", (void *)seg, (void *)nseg);
    796 	DEBUGF(3, (CE_CONT, "segdev_unmap: segdev_dup seg %p nseg %p\n",
    797 	    (void *)seg, (void *)nseg));
    798 	nsdp = sdp_alloc();
    799 
    800 	nseg->s_ops = seg->s_ops;
    801 	nseg->s_data = (void *)nsdp;
    802 
    803 	VN_HOLD(sdp->vp);
    804 	nsdp->mapfunc = sdp->mapfunc;
    805 	nsdp->offset = sdp->offset + (offset_t)(nseg->s_base - seg->s_base);
    806 	nsdp->vp 	= sdp->vp;
    807 	nsdp->pageprot = sdp->pageprot;
    808 	nsdp->prot	= sdp->prot;
    809 	nsdp->maxprot = sdp->maxprot;
    810 	nsdp->type = sdp->type;
    811 	nsdp->hat_attr = sdp->hat_attr;
    812 	nsdp->hat_flags = sdp->hat_flags;
    813 	nsdp->softlockcnt = 0;
    814 
    815 	/*
    816 	 * Initialize per page data if the segment we are
    817 	 * dup'ing has per page information.
    818 	 */
    819 	if (sdp->vpage != NULL) {
    820 		/* need to split vpage into two arrays */
    821 		register size_t nnbytes;
    822 		register size_t nnpages;
    823 		register struct vpage *ovpage;
    824 
    825 		ovpage = sdp->vpage;		/* keep pointer to vpage */
    826 
    827 		npages = seg_pages(seg);	/* seg has shrunk */
    828 		nbytes = vpgtob(npages);
    829 		nnpages = seg_pages(nseg);
    830 		nnbytes = vpgtob(nnpages);
    831 
    832 		sdp->vpage = kmem_alloc(nbytes, KM_SLEEP);
    833 		bcopy(ovpage, sdp->vpage, nbytes);
    834 
    835 		nsdp->vpage = kmem_alloc(nnbytes, KM_SLEEP);
    836 		bcopy(&ovpage[npages + dpages], nsdp->vpage, nnbytes);
    837 
    838 		/* free up old vpage */
    839 		kmem_free(ovpage, vpgtob(opages));
    840 	} else
    841 		nsdp->vpage = NULL;
    842 
    843 	/*
    844 	 * unmap dhps.
    845 	 */
    846 	if (dhp == NULL) {
    847 		nsdp->devmap_data = NULL;
    848 		return (0);
    849 	}
    850 	while (dhp != NULL) {
    851 		callbackops = &dhp->dh_callbackops;
    852 		TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_UNMAP_CK3,
    853 		    "segdev_unmap: dhp=%p addr=%p", dhp, addr);
    854 		DEBUGF(3, (CE_CONT, "unmap: dhp %p addr %p uvaddr %p len %lx\n",
    855 		    (void *)dhp, (void *)addr,
    856 		    (void *)dhp->dh_uvaddr, dhp->dh_len));
    857 
    858 		if (addr == (dhp->dh_uvaddr + dhp->dh_len)) {
    859 			dhpp = dhp->dh_next;
    860 			dhp->dh_next = NULL;
    861 			dhp = dhpp;
    862 		} else if (addr > (dhp->dh_uvaddr + dhp->dh_len)) {
    863 			dhp = dhp->dh_next;
    864 		} else if (addr > dhp->dh_uvaddr &&
    865 		    (addr + len) < (dhp->dh_uvaddr + dhp->dh_len)) {
    866 			/*
    867 			 * <addr, addr+len> is enclosed by dhp.
    868 			 * create a newdhp that begins at addr+len and
    869 			 * ends at dhp->dh_uvaddr+dhp->dh_len.
    870 			 */
    871 			newdhp = kmem_alloc(sizeof (devmap_handle_t), KM_SLEEP);
    872 			HOLD_DHP_LOCK(dhp);
    873 			bcopy(dhp, newdhp, sizeof (devmap_handle_t));
    874 			RELE_DHP_LOCK(dhp);
    875 			newdhp->dh_seg = nseg;
    876 			newdhp->dh_next = dhp->dh_next;
    877 			if (dhp->dh_softlock != NULL)
    878 				newdhp->dh_softlock = devmap_softlock_init(
    879 				    newdhp->dh_dev,
    880 				    (ulong_t)callbackops->devmap_access);
    881 			if (dhp->dh_ctx != NULL)
    882 				newdhp->dh_ctx = devmap_ctxinit(newdhp->dh_dev,
    883 				    (ulong_t)callbackops->devmap_access);
    884 			if (newdhp->dh_flags & DEVMAP_LOCK_INITED) {
    885 				mutex_init(&newdhp->dh_lock,
    886 				    NULL, MUTEX_DEFAULT, NULL);
    887 			}
    888 			if (callbackops->devmap_unmap != NULL)
    889 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    890 				    off, len, dhp, &dhp->dh_pvtp,
    891 				    newdhp, &newdhp->dh_pvtp);
    892 			mlen = len + (addr - dhp->dh_uvaddr);
    893 			devmap_handle_reduce_len(newdhp, mlen);
    894 			nsdp->devmap_data = newdhp;
    895 			/* XX Changing len should recalculate LARGE flag */
    896 			dhp->dh_len = addr - dhp->dh_uvaddr;
    897 			dhpp = dhp->dh_next;
    898 			dhp->dh_next = NULL;
    899 			dhp = dhpp;
    900 		} else if ((addr > dhp->dh_uvaddr) &&
    901 		    ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len))) {
    902 			mlen = dhp->dh_len + dhp->dh_uvaddr - addr;
    903 			/*
    904 			 * <addr, addr+len> spans over dhps.
    905 			 */
    906 			if (callbackops->devmap_unmap != NULL)
    907 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    908 				    off, mlen, (devmap_cookie_t *)dhp,
    909 				    &dhp->dh_pvtp, NULL, NULL);
    910 			/* XX Changing len should recalculate LARGE flag */
    911 			dhp->dh_len = addr - dhp->dh_uvaddr;
    912 			dhpp = dhp->dh_next;
    913 			dhp->dh_next = NULL;
    914 			dhp = dhpp;
    915 			nsdp->devmap_data = dhp;
    916 		} else if ((addr + len) >= (dhp->dh_uvaddr + dhp->dh_len)) {
    917 			/*
    918 			 * dhp is enclosed by <addr, addr+len>.
    919 			 */
    920 			dhp->dh_seg = nseg;
    921 			nsdp->devmap_data = dhp;
    922 			dhp = devmap_handle_unmap(dhp);
    923 			nsdp->devmap_data = dhp; /* XX redundant? */
    924 		} else if (((addr + len) > dhp->dh_uvaddr) &&
    925 		    ((addr + len) < (dhp->dh_uvaddr + dhp->dh_len))) {
    926 			mlen = addr + len - dhp->dh_uvaddr;
    927 			if (callbackops->devmap_unmap != NULL)
    928 				(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
    929 				    dhp->dh_uoff, mlen, NULL,
    930 				    NULL, dhp, &dhp->dh_pvtp);
    931 			devmap_handle_reduce_len(dhp, mlen);
    932 			nsdp->devmap_data = dhp;
    933 			dhp->dh_seg = nseg;
    934 			dhp = dhp->dh_next;
    935 		} else {
    936 			dhp->dh_seg = nseg;
    937 			dhp = dhp->dh_next;
    938 		}
    939 	}
    940 	return (0);
    941 }
    942 
    943 /*
    944  * Utility function handles reducing the length of a devmap handle during unmap
    945  * Note that is only used for unmapping the front portion of the handler,
    946  * i.e., we are bumping up the offset/pfn etc up by len
    947  * Do not use if reducing length at the tail.
    948  */
    949 static void
    950 devmap_handle_reduce_len(devmap_handle_t *dhp, size_t len)
    951 {
    952 	struct ddi_umem_cookie *cp;
    953 	struct devmap_pmem_cookie *pcp;
    954 	/*
    955 	 * adjust devmap handle fields
    956 	 */
    957 	ASSERT(len < dhp->dh_len);
    958 
    959 	/* Make sure only page-aligned changes are done */
    960 	ASSERT((len & PAGEOFFSET) == 0);
    961 
    962 	dhp->dh_len -= len;
    963 	dhp->dh_uoff += (offset_t)len;
    964 	dhp->dh_roff += (offset_t)len;
    965 	dhp->dh_uvaddr += len;
    966 	/* Need to grab dhp lock if REMAP */
    967 	HOLD_DHP_LOCK(dhp);
    968 	cp = dhp->dh_cookie;
    969 	if (!(dhp->dh_flags & DEVMAP_MAPPING_INVALID)) {
    970 		if (cookie_is_devmem(cp)) {
    971 			dhp->dh_pfn += btop(len);
    972 		} else if (cookie_is_pmem(cp)) {
    973 			pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
    974 			ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
    975 			    dhp->dh_roff < ptob(pcp->dp_npages));
    976 		} else {
    977 			ASSERT(dhp->dh_roff < cp->size);
    978 			ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
    979 			    dhp->dh_cvaddr < (cp->cvaddr + cp->size));
    980 			ASSERT((dhp->dh_cvaddr + len) <=
    981 			    (cp->cvaddr + cp->size));
    982 
    983 			dhp->dh_cvaddr += len;
    984 		}
    985 	}
    986 	/* XXX - Should recalculate the DEVMAP_FLAG_LARGE after changes */
    987 	RELE_DHP_LOCK(dhp);
    988 }
    989 
    990 /*
    991  * Free devmap handle, dhp.
    992  * Return the next devmap handle on the linked list.
    993  */
    994 static devmap_handle_t *
    995 devmap_handle_unmap(devmap_handle_t *dhp)
    996 {
    997 	struct devmap_callback_ctl *callbackops = &dhp->dh_callbackops;
    998 	struct segdev_data *sdp = (struct segdev_data *)dhp->dh_seg->s_data;
    999 	devmap_handle_t *dhpp = (devmap_handle_t *)sdp->devmap_data;
   1000 
   1001 	ASSERT(dhp != NULL);
   1002 
   1003 	/*
   1004 	 * before we free up dhp, call the driver's devmap_unmap entry point
   1005 	 * to free resources allocated for this dhp.
   1006 	 */
   1007 	if (callbackops->devmap_unmap != NULL) {
   1008 		(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp, dhp->dh_uoff,
   1009 		    dhp->dh_len, NULL, NULL, NULL, NULL);
   1010 	}
   1011 
   1012 	if (dhpp == dhp) {	/* releasing first dhp, change sdp data */
   1013 		sdp->devmap_data = dhp->dh_next;
   1014 	} else {
   1015 		while (dhpp->dh_next != dhp) {
   1016 			dhpp = dhpp->dh_next;
   1017 		}
   1018 		dhpp->dh_next = dhp->dh_next;
   1019 	}
   1020 	dhpp = dhp->dh_next;	/* return value is next dhp in chain */
   1021 
   1022 	if (dhp->dh_softlock != NULL)
   1023 		devmap_softlock_rele(dhp);
   1024 
   1025 	if (dhp->dh_ctx != NULL)
   1026 		devmap_ctx_rele(dhp);
   1027 
   1028 	if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
   1029 		mutex_destroy(&dhp->dh_lock);
   1030 	}
   1031 	kmem_free(dhp, sizeof (devmap_handle_t));
   1032 
   1033 	return (dhpp);
   1034 }
   1035 
   1036 /*
   1037  * Free complete devmap handles from dhp for len bytes
   1038  * dhp can be either the first handle or a subsequent handle
   1039  */
   1040 static void
   1041 devmap_handle_unmap_head(devmap_handle_t *dhp, size_t len)
   1042 {
   1043 	struct devmap_callback_ctl *callbackops;
   1044 
   1045 	/*
   1046 	 * free the devmap handles covered by len.
   1047 	 */
   1048 	while (len >= dhp->dh_len) {
   1049 		len -= dhp->dh_len;
   1050 		dhp = devmap_handle_unmap(dhp);
   1051 	}
   1052 	if (len != 0) {	/* partial unmap at head of first remaining dhp */
   1053 		callbackops = &dhp->dh_callbackops;
   1054 
   1055 		/*
   1056 		 * Call the unmap callback so the drivers can make
   1057 		 * adjustment on its private data.
   1058 		 */
   1059 		if (callbackops->devmap_unmap != NULL)
   1060 			(*callbackops->devmap_unmap)(dhp, dhp->dh_pvtp,
   1061 			    dhp->dh_uoff, len, NULL, NULL, dhp, &dhp->dh_pvtp);
   1062 		devmap_handle_reduce_len(dhp, len);
   1063 	}
   1064 }
   1065 
   1066 /*
   1067  * Free devmap handles to truncate  the mapping after addr
   1068  * RFE: Simpler to pass in dhp pointing at correct dhp (avoid find again)
   1069  *	Also could then use the routine in middle unmap case too
   1070  */
   1071 static void
   1072 devmap_handle_unmap_tail(devmap_handle_t *dhp, caddr_t addr)
   1073 {
   1074 	register struct seg *seg = dhp->dh_seg;
   1075 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1076 	register devmap_handle_t *dhph = (devmap_handle_t *)sdp->devmap_data;
   1077 	struct devmap_callback_ctl *callbackops;
   1078 	register devmap_handle_t *dhpp;
   1079 	size_t maplen;
   1080 	ulong_t off;
   1081 	size_t len;
   1082 
   1083 	maplen = (size_t)(addr - dhp->dh_uvaddr);
   1084 	dhph = devmap_find_handle(dhph, addr);
   1085 
   1086 	while (dhph != NULL) {
   1087 		if (maplen == 0) {
   1088 			dhph =  devmap_handle_unmap(dhph);
   1089 		} else {
   1090 			callbackops = &dhph->dh_callbackops;
   1091 			len = dhph->dh_len - maplen;
   1092 			off = (ulong_t)sdp->offset + (addr - seg->s_base);
   1093 			/*
   1094 			 * Call the unmap callback so the driver
   1095 			 * can make adjustments on its private data.
   1096 			 */
   1097 			if (callbackops->devmap_unmap != NULL)
   1098 				(*callbackops->devmap_unmap)(dhph,
   1099 				    dhph->dh_pvtp, off, len,
   1100 				    (devmap_cookie_t *)dhph,
   1101 				    &dhph->dh_pvtp, NULL, NULL);
   1102 			/* XXX Reducing len needs to recalculate LARGE flag */
   1103 			dhph->dh_len = maplen;
   1104 			maplen = 0;
   1105 			dhpp = dhph->dh_next;
   1106 			dhph->dh_next = NULL;
   1107 			dhph = dhpp;
   1108 		}
   1109 	} /* end while */
   1110 }
   1111 
   1112 /*
   1113  * Free a segment.
   1114  */
   1115 static void
   1116 segdev_free(struct seg *seg)
   1117 {
   1118 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1119 	devmap_handle_t *dhp = (devmap_handle_t *)sdp->devmap_data;
   1120 
   1121 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FREE,
   1122 	    "segdev_free: dhp=%p seg=%p", (void *)dhp, (void *)seg);
   1123 	DEBUGF(3, (CE_CONT, "segdev_free: dhp %p seg %p\n",
   1124 	    (void *)dhp, (void *)seg));
   1125 
   1126 	/*
   1127 	 * Since the address space is "write" locked, we
   1128 	 * don't need the segment lock to protect "segdev" data.
   1129 	 */
   1130 	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as, &seg->s_as->a_lock));
   1131 
   1132 	while (dhp != NULL)
   1133 		dhp = devmap_handle_unmap(dhp);
   1134 
   1135 	VN_RELE(sdp->vp);
   1136 	if (sdp->vpage != NULL)
   1137 		kmem_free(sdp->vpage, vpgtob(seg_pages(seg)));
   1138 
   1139 	rw_destroy(&sdp->lock);
   1140 	kmem_free(sdp, sizeof (*sdp));
   1141 }
   1142 
   1143 static void
   1144 free_devmap_handle(devmap_handle_t *dhp)
   1145 {
   1146 	register devmap_handle_t *dhpp;
   1147 
   1148 	/*
   1149 	 * free up devmap handle
   1150 	 */
   1151 	while (dhp != NULL) {
   1152 		dhpp = dhp->dh_next;
   1153 		if (dhp->dh_flags & DEVMAP_LOCK_INITED) {
   1154 			mutex_destroy(&dhp->dh_lock);
   1155 		}
   1156 
   1157 		if (dhp->dh_softlock != NULL)
   1158 			devmap_softlock_rele(dhp);
   1159 
   1160 		if (dhp->dh_ctx != NULL)
   1161 			devmap_ctx_rele(dhp);
   1162 
   1163 		kmem_free(dhp, sizeof (devmap_handle_t));
   1164 		dhp = dhpp;
   1165 	}
   1166 }
   1167 
   1168 /*
   1169  * routines to lock and unlock underlying segkp segment for
   1170  * KMEM_PAGEABLE type cookies.
   1171  * segkp only allows a single pending F_SOFTLOCK
   1172  * we keep track of number of locks in the cookie so we can
   1173  * have multiple pending faults and manage the calls to segkp.
   1174  * RFE: if segkp supports either pagelock or can support multiple
   1175  * calls to F_SOFTLOCK, then these routines can go away.
   1176  *	If pagelock, segdev_faultpage can fault on a page by page basis
   1177  *		and simplifies the code quite a bit.
   1178  *	if multiple calls allowed but not partial ranges, then need for
   1179  *	cookie->lock and locked count goes away, code can call as_fault directly
   1180  */
   1181 static faultcode_t
   1182 acquire_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
   1183 {
   1184 	int err = 0;
   1185 	ASSERT(cookie_is_kpmem(cookie));
   1186 	/*
   1187 	 * Fault in pages in segkp with F_SOFTLOCK.
   1188 	 * We want to hold the lock until all pages have been loaded.
   1189 	 * segkp only allows single caller to hold SOFTLOCK, so cookie
   1190 	 * holds a count so we dont call into segkp multiple times
   1191 	 */
   1192 	mutex_enter(&cookie->lock);
   1193 
   1194 	/*
   1195 	 * Check for overflow in locked field
   1196 	 */
   1197 	if ((UINT32_MAX - cookie->locked) < npages) {
   1198 		err = FC_MAKE_ERR(ENOMEM);
   1199 	} else if (cookie->locked == 0) {
   1200 		/* First time locking */
   1201 		err = as_fault(kas.a_hat, &kas, cookie->cvaddr,
   1202 		    cookie->size, F_SOFTLOCK, PROT_READ|PROT_WRITE);
   1203 	}
   1204 	if (!err) {
   1205 		cookie->locked += npages;
   1206 	}
   1207 	mutex_exit(&cookie->lock);
   1208 	return (err);
   1209 }
   1210 
   1211 static void
   1212 release_kpmem_lock(struct ddi_umem_cookie *cookie, size_t npages)
   1213 {
   1214 	mutex_enter(&cookie->lock);
   1215 	ASSERT(cookie_is_kpmem(cookie));
   1216 	ASSERT(cookie->locked >= npages);
   1217 	cookie->locked -= (uint_t)npages;
   1218 	if (cookie->locked == 0) {
   1219 		/* Last unlock */
   1220 		if (as_fault(kas.a_hat, &kas, cookie->cvaddr,
   1221 		    cookie->size, F_SOFTUNLOCK, PROT_READ|PROT_WRITE))
   1222 			panic("segdev releasing kpmem lock %p", (void *)cookie);
   1223 	}
   1224 	mutex_exit(&cookie->lock);
   1225 }
   1226 
   1227 /*
   1228  * Routines to synchronize F_SOFTLOCK and F_INVAL faults for
   1229  * drivers with devmap_access callbacks
   1230  * slock->softlocked basically works like a rw lock
   1231  *	-ve counts => F_SOFTLOCK in progress
   1232  *	+ve counts => F_INVAL/F_PROT in progress
   1233  * We allow only one F_SOFTLOCK at a time
   1234  * but can have multiple pending F_INVAL/F_PROT calls
   1235  *
   1236  * This routine waits using cv_wait_sig so killing processes is more graceful
   1237  * Returns EINTR if coming out of this routine due to a signal, 0 otherwise
   1238  */
   1239 static int devmap_softlock_enter(
   1240 	struct devmap_softlock *slock,
   1241 	size_t npages,
   1242 	enum fault_type type)
   1243 {
   1244 	if (npages == 0)
   1245 		return (0);
   1246 	mutex_enter(&(slock->lock));
   1247 	switch (type) {
   1248 	case F_SOFTLOCK :
   1249 		while (slock->softlocked) {
   1250 			if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
   1251 				/* signalled */
   1252 				mutex_exit(&(slock->lock));
   1253 				return (EINTR);
   1254 			}
   1255 		}
   1256 		slock->softlocked -= npages; /* -ve count => locked */
   1257 		break;
   1258 	case F_INVAL :
   1259 	case F_PROT :
   1260 		while (slock->softlocked < 0)
   1261 			if (cv_wait_sig(&(slock)->cv, &(slock)->lock) == 0) {
   1262 				/* signalled */
   1263 				mutex_exit(&(slock->lock));
   1264 				return (EINTR);
   1265 			}
   1266 		slock->softlocked += npages; /* +ve count => f_invals */
   1267 		break;
   1268 	default:
   1269 		ASSERT(0);
   1270 	}
   1271 	mutex_exit(&(slock->lock));
   1272 	return (0);
   1273 }
   1274 
   1275 static void devmap_softlock_exit(
   1276 	struct devmap_softlock *slock,
   1277 	size_t npages,
   1278 	enum fault_type type)
   1279 {
   1280 	if (slock == NULL)
   1281 		return;
   1282 	mutex_enter(&(slock->lock));
   1283 	switch (type) {
   1284 	case F_SOFTLOCK :
   1285 		ASSERT(-slock->softlocked >= npages);
   1286 		slock->softlocked += npages;	/* -ve count is softlocked */
   1287 		if (slock->softlocked == 0)
   1288 			cv_signal(&slock->cv);
   1289 		break;
   1290 	case F_INVAL :
   1291 	case F_PROT:
   1292 		ASSERT(slock->softlocked >= npages);
   1293 		slock->softlocked -= npages;
   1294 		if (slock->softlocked == 0)
   1295 			cv_signal(&slock->cv);
   1296 		break;
   1297 	default:
   1298 		ASSERT(0);
   1299 	}
   1300 	mutex_exit(&(slock->lock));
   1301 }
   1302 
   1303 /*
   1304  * Do a F_SOFTUNLOCK call over the range requested.
   1305  * The range must have already been F_SOFTLOCK'ed.
   1306  * The segment lock should be held, (but not the segment private lock?)
   1307  *  The softunlock code below does not adjust for large page sizes
   1308  *	assumes the caller already did any addr/len adjustments for
   1309  *	pagesize mappings before calling.
   1310  */
   1311 /*ARGSUSED*/
   1312 static void
   1313 segdev_softunlock(
   1314 	struct hat *hat,		/* the hat */
   1315 	struct seg *seg,		/* seg_dev of interest */
   1316 	caddr_t addr,			/* base address of range */
   1317 	size_t len,			/* number of bytes */
   1318 	enum seg_rw rw)			/* type of access at fault */
   1319 {
   1320 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1321 	devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
   1322 
   1323 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SOFTUNLOCK,
   1324 	    "segdev_softunlock:dhp_head=%p sdp=%p addr=%p len=%lx",
   1325 	    dhp_head, sdp, addr, len);
   1326 	DEBUGF(3, (CE_CONT, "segdev_softunlock: dhp %p lockcnt %lx "
   1327 	    "addr %p len %lx\n",
   1328 	    (void *)dhp_head, sdp->softlockcnt, (void *)addr, len));
   1329 
   1330 	hat_unlock(hat, addr, len);
   1331 
   1332 	if (dhp_head != NULL) {
   1333 		devmap_handle_t *dhp;
   1334 		size_t mlen;
   1335 		size_t tlen = len;
   1336 		ulong_t off;
   1337 
   1338 		dhp = devmap_find_handle(dhp_head, addr);
   1339 		ASSERT(dhp != NULL);
   1340 
   1341 		off = (ulong_t)(addr - dhp->dh_uvaddr);
   1342 		while (tlen != 0) {
   1343 			mlen = MIN(tlen, (dhp->dh_len - off));
   1344 
   1345 			/*
   1346 			 * unlock segkp memory, locked during F_SOFTLOCK
   1347 			 */
   1348 			if (dhp_is_kpmem(dhp)) {
   1349 				release_kpmem_lock(
   1350 				    (struct ddi_umem_cookie *)dhp->dh_cookie,
   1351 				    btopr(mlen));
   1352 			}
   1353 
   1354 			/*
   1355 			 * Do the softlock accounting for devmap_access
   1356 			 */
   1357 			if (dhp->dh_callbackops.devmap_access != NULL) {
   1358 				devmap_softlock_exit(dhp->dh_softlock,
   1359 				    btopr(mlen), F_SOFTLOCK);
   1360 			}
   1361 
   1362 			tlen -= mlen;
   1363 			dhp = dhp->dh_next;
   1364 			off = 0;
   1365 		}
   1366 	}
   1367 
   1368 	mutex_enter(&freemem_lock);
   1369 	ASSERT(sdp->softlockcnt >= btopr(len));
   1370 	sdp->softlockcnt -= btopr(len);
   1371 	mutex_exit(&freemem_lock);
   1372 	if (sdp->softlockcnt == 0) {
   1373 		/*
   1374 		 * All SOFTLOCKS are gone. Wakeup any waiting
   1375 		 * unmappers so they can try again to unmap.
   1376 		 * Check for waiters first without the mutex
   1377 		 * held so we don't always grab the mutex on
   1378 		 * softunlocks.
   1379 		 */
   1380 		if (AS_ISUNMAPWAIT(seg->s_as)) {
   1381 			mutex_enter(&seg->s_as->a_contents);
   1382 			if (AS_ISUNMAPWAIT(seg->s_as)) {
   1383 				AS_CLRUNMAPWAIT(seg->s_as);
   1384 				cv_broadcast(&seg->s_as->a_cv);
   1385 			}
   1386 			mutex_exit(&seg->s_as->a_contents);
   1387 		}
   1388 	}
   1389 
   1390 }
   1391 
   1392 /*
   1393  * Handle fault for a single page.
   1394  * Done in a separate routine so we can handle errors more easily.
   1395  * This routine is called only from segdev_faultpages()
   1396  * when looping over the range of addresses requested. The segment lock is held.
   1397  */
   1398 static faultcode_t
   1399 segdev_faultpage(
   1400 	struct hat *hat,		/* the hat */
   1401 	struct seg *seg,		/* seg_dev of interest */
   1402 	caddr_t addr,			/* address in as */
   1403 	struct vpage *vpage,		/* pointer to vpage for seg, addr */
   1404 	enum fault_type type,		/* type of fault */
   1405 	enum seg_rw rw,			/* type of access at fault */
   1406 	devmap_handle_t *dhp)		/* devmap handle if any for this page */
   1407 {
   1408 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1409 	uint_t prot;
   1410 	pfn_t pfnum = PFN_INVALID;
   1411 	u_offset_t offset;
   1412 	uint_t hat_flags;
   1413 	dev_info_t *dip;
   1414 
   1415 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE,
   1416 	    "segdev_faultpage: dhp=%p seg=%p addr=%p", dhp, seg, addr);
   1417 	DEBUGF(8, (CE_CONT, "segdev_faultpage: dhp %p seg %p addr %p \n",
   1418 	    (void *)dhp, (void *)seg, (void *)addr));
   1419 
   1420 	/*
   1421 	 * Initialize protection value for this page.
   1422 	 * If we have per page protection values check it now.
   1423 	 */
   1424 	if (sdp->pageprot) {
   1425 		uint_t protchk;
   1426 
   1427 		switch (rw) {
   1428 		case S_READ:
   1429 			protchk = PROT_READ;
   1430 			break;
   1431 		case S_WRITE:
   1432 			protchk = PROT_WRITE;
   1433 			break;
   1434 		case S_EXEC:
   1435 			protchk = PROT_EXEC;
   1436 			break;
   1437 		case S_OTHER:
   1438 		default:
   1439 			protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
   1440 			break;
   1441 		}
   1442 
   1443 		prot = VPP_PROT(vpage);
   1444 		if ((prot & protchk) == 0)
   1445 			return (FC_PROT);	/* illegal access type */
   1446 	} else {
   1447 		prot = sdp->prot;
   1448 		/* caller has already done segment level protection check */
   1449 	}
   1450 
   1451 	if (type == F_SOFTLOCK) {
   1452 		mutex_enter(&freemem_lock);
   1453 		sdp->softlockcnt++;
   1454 		mutex_exit(&freemem_lock);
   1455 	}
   1456 
   1457 	hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
   1458 	offset = sdp->offset + (u_offset_t)(addr - seg->s_base);
   1459 	/*
   1460 	 * In the devmap framework, sdp->mapfunc is set to NULL.  we can get
   1461 	 * pfnum from dhp->dh_pfn (at beginning of segment) and offset from
   1462 	 * seg->s_base.
   1463 	 */
   1464 	if (dhp == NULL) {
   1465 		/* If segment has devmap_data, then dhp should be non-NULL */
   1466 		ASSERT(sdp->devmap_data == NULL);
   1467 		pfnum = (pfn_t)cdev_mmap(sdp->mapfunc, sdp->vp->v_rdev,
   1468 		    (off_t)offset, prot);
   1469 		prot |= sdp->hat_attr;
   1470 	} else {
   1471 		ulong_t off;
   1472 		struct ddi_umem_cookie *cp;
   1473 		struct devmap_pmem_cookie *pcp;
   1474 
   1475 		/* ensure the dhp passed in contains addr. */
   1476 		ASSERT(dhp == devmap_find_handle(
   1477 		    (devmap_handle_t *)sdp->devmap_data, addr));
   1478 
   1479 		off = addr - dhp->dh_uvaddr;
   1480 
   1481 		/*
   1482 		 * This routine assumes that the caller makes sure that the
   1483 		 * fields in dhp used below are unchanged due to remap during
   1484 		 * this call. Caller does HOLD_DHP_LOCK if neeed
   1485 		 */
   1486 		cp = dhp->dh_cookie;
   1487 		if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
   1488 			pfnum = PFN_INVALID;
   1489 		} else if (cookie_is_devmem(cp)) {
   1490 			pfnum = dhp->dh_pfn + btop(off);
   1491 		} else if (cookie_is_pmem(cp)) {
   1492 			pcp = (struct devmap_pmem_cookie *)dhp->dh_pcookie;
   1493 			ASSERT((dhp->dh_roff & PAGEOFFSET) == 0 &&
   1494 			    dhp->dh_roff < ptob(pcp->dp_npages));
   1495 			pfnum = page_pptonum(
   1496 			    pcp->dp_pparray[btop(off + dhp->dh_roff)]);
   1497 		} else {
   1498 			ASSERT(dhp->dh_roff < cp->size);
   1499 			ASSERT(dhp->dh_cvaddr >= cp->cvaddr &&
   1500 			    dhp->dh_cvaddr < (cp->cvaddr + cp->size));
   1501 			ASSERT((dhp->dh_cvaddr + off) <=
   1502 			    (cp->cvaddr + cp->size));
   1503 			ASSERT((dhp->dh_cvaddr + off + PAGESIZE) <=
   1504 			    (cp->cvaddr + cp->size));
   1505 
   1506 			switch (cp->type) {
   1507 			case UMEM_LOCKED :
   1508 				if (cp->pparray != NULL) {
   1509 					ASSERT((dhp->dh_roff &
   1510 					    PAGEOFFSET) == 0);
   1511 					pfnum = page_pptonum(
   1512 					    cp->pparray[btop(off +
   1513 					    dhp->dh_roff)]);
   1514 				} else {
   1515 					pfnum = hat_getpfnum(
   1516 					    ((proc_t *)cp->procp)->p_as->a_hat,
   1517 					    cp->cvaddr + off);
   1518 				}
   1519 			break;
   1520 			case UMEM_TRASH :
   1521 				pfnum = page_pptonum(trashpp);
   1522 				/*
   1523 				 * We should set hat_flags to HAT_NOFAULT also
   1524 				 * However, not all hat layers implement this
   1525 				 */
   1526 				break;
   1527 			case KMEM_PAGEABLE:
   1528 			case KMEM_NON_PAGEABLE:
   1529 				pfnum = hat_getpfnum(kas.a_hat,
   1530 				    dhp->dh_cvaddr + off);
   1531 				break;
   1532 			default :
   1533 				pfnum = PFN_INVALID;
   1534 				break;
   1535 			}
   1536 		}
   1537 		prot |= dhp->dh_hat_attr;
   1538 	}
   1539 	if (pfnum == PFN_INVALID) {
   1540 		return (FC_MAKE_ERR(EFAULT));
   1541 	}
   1542 	/* prot should already be OR'ed in with hat_attributes if needed */
   1543 
   1544 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGE_CK1,
   1545 	    "segdev_faultpage: pfnum=%lx memory=%x prot=%x flags=%x",
   1546 	    pfnum, pf_is_memory(pfnum), prot, hat_flags);
   1547 	DEBUGF(9, (CE_CONT, "segdev_faultpage: pfnum %lx memory %x "
   1548 	    "prot %x flags %x\n", pfnum, pf_is_memory(pfnum), prot, hat_flags));
   1549 
   1550 	if (pf_is_memory(pfnum) || (dhp != NULL)) {
   1551 		/*
   1552 		 * It's not _really_ required here to pass sdp->hat_flags
   1553 		 * to hat_devload even though we do it.
   1554 		 * This is because hat figures it out DEVMEM mappings
   1555 		 * are non-consistent, anyway.
   1556 		 */
   1557 		hat_devload(hat, addr, PAGESIZE, pfnum,
   1558 		    prot, hat_flags | sdp->hat_flags);
   1559 		return (0);
   1560 	}
   1561 
   1562 	/*
   1563 	 * Fall through to the case where devmap is not used and need to call
   1564 	 * up the device tree to set up the mapping
   1565 	 */
   1566 
   1567 	dip = VTOS(VTOCVP(sdp->vp))->s_dip;
   1568 	ASSERT(dip);
   1569 
   1570 	/*
   1571 	 * When calling ddi_map_fault, we do not OR in sdp->hat_attr
   1572 	 * This is because this calls drivers which may not expect
   1573 	 * prot to have any other values than PROT_ALL
   1574 	 * The root nexus driver has a hack to peek into the segment
   1575 	 * structure and then OR in sdp->hat_attr.
   1576 	 * XX In case the bus_ops interfaces are ever revisited
   1577 	 * we need to fix this. prot should include other hat attributes
   1578 	 */
   1579 	if (ddi_map_fault(dip, hat, seg, addr, NULL, pfnum, prot & PROT_ALL,
   1580 	    (uint_t)(type == F_SOFTLOCK)) != DDI_SUCCESS) {
   1581 		return (FC_MAKE_ERR(EFAULT));
   1582 	}
   1583 	return (0);
   1584 }
   1585 
   1586 static faultcode_t
   1587 segdev_fault(
   1588 	struct hat *hat,		/* the hat */
   1589 	struct seg *seg,		/* the seg_dev of interest */
   1590 	caddr_t addr,			/* the address of the fault */
   1591 	size_t len,			/* the length of the range */
   1592 	enum fault_type type,		/* type of fault */
   1593 	enum seg_rw rw)			/* type of access at fault */
   1594 {
   1595 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1596 	devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
   1597 	devmap_handle_t *dhp;
   1598 	struct devmap_softlock *slock = NULL;
   1599 	ulong_t slpage = 0;
   1600 	ulong_t off;
   1601 	caddr_t maddr = addr;
   1602 	int err;
   1603 	int err_is_faultcode = 0;
   1604 
   1605 	TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_FAULT,
   1606 	    "segdev_fault: dhp_head=%p seg=%p addr=%p len=%lx type=%x",
   1607 	    (void *)dhp_head, (void *)seg, (void *)addr, len, type);
   1608 	DEBUGF(7, (CE_CONT, "segdev_fault: dhp_head %p seg %p "
   1609 	    "addr %p len %lx type %x\n",
   1610 	    (void *)dhp_head, (void *)seg, (void *)addr, len, type));
   1611 
   1612 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   1613 
   1614 	/* Handle non-devmap case */
   1615 	if (dhp_head == NULL)
   1616 		return (segdev_faultpages(hat, seg, addr, len, type, rw, NULL));
   1617 
   1618 	/* Find devmap handle */
   1619 	if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
   1620 		return (FC_NOMAP);
   1621 
   1622 	/*
   1623 	 * The seg_dev driver does not implement copy-on-write,
   1624 	 * and always loads translations with maximal allowed permissions
   1625 	 * but we got an fault trying to access the device.
   1626 	 * Servicing the fault is not going to result in any better result
   1627 	 * RFE: If we want devmap_access callbacks to be involved in F_PROT
   1628 	 *	faults, then the code below is written for that
   1629 	 *	Pending resolution of the following:
   1630 	 *	- determine if the F_INVAL/F_SOFTLOCK syncing
   1631 	 *	is needed for F_PROT also or not. The code below assumes it does
   1632 	 *	- If driver sees F_PROT and calls devmap_load with same type,
   1633 	 *	then segdev_faultpages will fail with FC_PROT anyway, need to
   1634 	 *	change that so calls from devmap_load to segdev_faultpages for
   1635 	 *	F_PROT type are retagged to F_INVAL.
   1636 	 * RFE: Today we dont have drivers that use devmap and want to handle
   1637 	 *	F_PROT calls. The code in segdev_fault* is written to allow
   1638 	 *	this case but is not tested. A driver that needs this capability
   1639 	 *	should be able to remove the short-circuit case; resolve the
   1640 	 *	above issues and "should" work.
   1641 	 */
   1642 	if (type == F_PROT) {
   1643 		return (FC_PROT);
   1644 	}
   1645 
   1646 	/*
   1647 	 * Loop through dhp list calling devmap_access or segdev_faultpages for
   1648 	 * each devmap handle.
   1649 	 * drivers which implement devmap_access can interpose on faults and do
   1650 	 * device-appropriate special actions before calling devmap_load.
   1651 	 */
   1652 
   1653 	/*
   1654 	 * Unfortunately, this simple loop has turned out to expose a variety
   1655 	 * of complex problems which results in the following convoluted code.
   1656 	 *
   1657 	 * First, a desire to handle a serialization of F_SOFTLOCK calls
   1658 	 * to the driver within the framework.
   1659 	 *	This results in a dh_softlock structure that is on a per device
   1660 	 *	(or device instance) basis and serializes devmap_access calls.
   1661 	 *	Ideally we would need to do this for underlying
   1662 	 *	memory/device regions that are being faulted on
   1663 	 *	but that is hard to identify and with REMAP, harder
   1664 	 * Second, a desire to serialize F_INVAL(and F_PROT) calls w.r.t.
   1665 	 * 	to F_SOFTLOCK calls to the driver.
   1666 	 * These serializations are to simplify the driver programmer model.
   1667 	 * To support these two features, the code first goes through the
   1668 	 *	devmap handles and counts the pages (slpage) that are covered
   1669 	 *	by devmap_access callbacks.
   1670 	 * This part ends with a devmap_softlock_enter call
   1671 	 *	which allows only one F_SOFTLOCK active on a device instance,
   1672 	 *	but multiple F_INVAL/F_PROTs can be active except when a
   1673 	 *	F_SOFTLOCK is active
   1674 	 *
   1675 	 * Next, we dont short-circuit the fault code upfront to call
   1676 	 *	segdev_softunlock for F_SOFTUNLOCK, because we must use
   1677 	 *	the same length when we softlock and softunlock.
   1678 	 *
   1679 	 *	-Hat layers may not support softunlocking lengths less than the
   1680 	 *	original length when there is large page support.
   1681 	 *	-kpmem locking is dependent on keeping the lengths same.
   1682 	 *	-if drivers handled F_SOFTLOCK, they probably also expect to
   1683 	 *		see an F_SOFTUNLOCK of the same length
   1684 	 *	Hence, if extending lengths during softlock,
   1685 	 *	softunlock has to make the same adjustments and goes through
   1686 	 *	the same loop calling segdev_faultpages/segdev_softunlock
   1687 	 *	But some of the synchronization and error handling is different
   1688 	 */
   1689 
   1690 	if (type != F_SOFTUNLOCK) {
   1691 		devmap_handle_t *dhpp = dhp;
   1692 		size_t slen = len;
   1693 
   1694 		/*
   1695 		 * Calculate count of pages that are :
   1696 		 * a) within the (potentially extended) fault region
   1697 		 * b) AND covered by devmap handle with devmap_access
   1698 		 */
   1699 		off = (ulong_t)(addr - dhpp->dh_uvaddr);
   1700 		while (slen != 0) {
   1701 			size_t mlen;
   1702 
   1703 			/*
   1704 			 * Softlocking on a region that allows remap is
   1705 			 * unsupported due to unresolved locking issues
   1706 			 * XXX: unclear what these are?
   1707 			 *	One potential is that if there is a pending
   1708 			 *	softlock, then a remap should not be allowed
   1709 			 *	until the unlock is done. This is easily
   1710 			 *	fixed by returning error in devmap*remap on
   1711 			 *	checking the dh->dh_softlock->softlocked value
   1712 			 */
   1713 			if ((type == F_SOFTLOCK) &&
   1714 			    (dhpp->dh_flags & DEVMAP_ALLOW_REMAP)) {
   1715 				return (FC_NOSUPPORT);
   1716 			}
   1717 
   1718 			mlen = MIN(slen, (dhpp->dh_len - off));
   1719 			if (dhpp->dh_callbackops.devmap_access) {
   1720 				size_t llen;
   1721 				caddr_t laddr;
   1722 				/*
   1723 				 * use extended length for large page mappings
   1724 				 */
   1725 				HOLD_DHP_LOCK(dhpp);
   1726 				if ((sdp->pageprot == 0) &&
   1727 				    (dhpp->dh_flags & DEVMAP_FLAG_LARGE)) {
   1728 					devmap_get_large_pgsize(dhpp,
   1729 					    mlen, maddr, &llen, &laddr);
   1730 				} else {
   1731 					llen = mlen;
   1732 				}
   1733 				RELE_DHP_LOCK(dhpp);
   1734 
   1735 				slpage += btopr(llen);
   1736 				slock = dhpp->dh_softlock;
   1737 			}
   1738 			maddr += mlen;
   1739 			ASSERT(slen >= mlen);
   1740 			slen -= mlen;
   1741 			dhpp = dhpp->dh_next;
   1742 			off = 0;
   1743 		}
   1744 		/*
   1745 		 * synchonize with other faulting threads and wait till safe
   1746 		 * devmap_softlock_enter might return due to signal in cv_wait
   1747 		 *
   1748 		 * devmap_softlock_enter has to be called outside of while loop
   1749 		 * to prevent a deadlock if len spans over multiple dhps.
   1750 		 * dh_softlock is based on device instance and if multiple dhps
   1751 		 * use the same device instance, the second dhp's LOCK call
   1752 		 * will hang waiting on the first to complete.
   1753 		 * devmap_setup verifies that slocks in a dhp_chain are same.
   1754 		 * RFE: this deadlock only hold true for F_SOFTLOCK. For
   1755 		 * 	F_INVAL/F_PROT, since we now allow multiple in parallel,
   1756 		 *	we could have done the softlock_enter inside the loop
   1757 		 *	and supported multi-dhp mappings with dissimilar devices
   1758 		 */
   1759 		if (err = devmap_softlock_enter(slock, slpage, type))
   1760 			return (FC_MAKE_ERR(err));
   1761 	}
   1762 
   1763 	/* reset 'maddr' to the start addr of the range of fault. */
   1764 	maddr = addr;
   1765 
   1766 	/* calculate the offset corresponds to 'addr' in the first dhp. */
   1767 	off = (ulong_t)(addr - dhp->dh_uvaddr);
   1768 
   1769 	/*
   1770 	 * The fault length may span over multiple dhps.
   1771 	 * Loop until the total length is satisfied.
   1772 	 */
   1773 	while (len != 0) {
   1774 		size_t llen;
   1775 		size_t mlen;
   1776 		caddr_t laddr;
   1777 
   1778 		/*
   1779 		 * mlen is the smaller of 'len' and the length
   1780 		 * from addr to the end of mapping defined by dhp.
   1781 		 */
   1782 		mlen = MIN(len, (dhp->dh_len - off));
   1783 
   1784 		HOLD_DHP_LOCK(dhp);
   1785 		/*
   1786 		 * Pass the extended length and address to devmap_access
   1787 		 * if large pagesize is used for loading address translations.
   1788 		 */
   1789 		if ((sdp->pageprot == 0) &&
   1790 		    (dhp->dh_flags & DEVMAP_FLAG_LARGE)) {
   1791 			devmap_get_large_pgsize(dhp, mlen, maddr,
   1792 			    &llen, &laddr);
   1793 			ASSERT(maddr == addr || laddr == maddr);
   1794 		} else {
   1795 			llen = mlen;
   1796 			laddr = maddr;
   1797 		}
   1798 
   1799 		if (dhp->dh_callbackops.devmap_access != NULL) {
   1800 			offset_t aoff;
   1801 
   1802 			aoff = sdp->offset + (offset_t)(laddr - seg->s_base);
   1803 
   1804 			/*
   1805 			 * call driver's devmap_access entry point which will
   1806 			 * call devmap_load/contextmgmt to load the translations
   1807 			 *
   1808 			 * We drop the dhp_lock before calling access so
   1809 			 * drivers can call devmap_*_remap within access
   1810 			 */
   1811 			RELE_DHP_LOCK(dhp);
   1812 
   1813 			err = (*dhp->dh_callbackops.devmap_access)(
   1814 			    dhp, (void *)dhp->dh_pvtp, aoff, llen, type, rw);
   1815 		} else {
   1816 			/*
   1817 			 * If no devmap_access entry point, then load mappings
   1818 			 * hold dhp_lock across faultpages if REMAP
   1819 			 */
   1820 			err = segdev_faultpages(hat, seg, laddr, llen,
   1821 			    type, rw, dhp);
   1822 			err_is_faultcode = 1;
   1823 			RELE_DHP_LOCK(dhp);
   1824 		}
   1825 
   1826 		if (err) {
   1827 			if ((type == F_SOFTLOCK) && (maddr > addr)) {
   1828 				/*
   1829 				 * If not first dhp, use
   1830 				 * segdev_fault(F_SOFTUNLOCK) for prior dhps
   1831 				 * While this is recursion, it is incorrect to
   1832 				 * call just segdev_softunlock
   1833 				 * if we are using either large pages
   1834 				 * or devmap_access. It will be more right
   1835 				 * to go through the same loop as above
   1836 				 * rather than call segdev_softunlock directly
   1837 				 * It will use the right lenghths as well as
   1838 				 * call into the driver devmap_access routines.
   1839 				 */
   1840 				size_t done = (size_t)(maddr - addr);
   1841 				(void) segdev_fault(hat, seg, addr, done,
   1842 				    F_SOFTUNLOCK, S_OTHER);
   1843 				/*
   1844 				 * reduce slpage by number of pages
   1845 				 * released by segdev_softunlock
   1846 				 */
   1847 				ASSERT(slpage >= btopr(done));
   1848 				devmap_softlock_exit(slock,
   1849 				    slpage - btopr(done), type);
   1850 			} else {
   1851 				devmap_softlock_exit(slock, slpage, type);
   1852 			}
   1853 
   1854 
   1855 			/*
   1856 			 * Segdev_faultpages() already returns a faultcode,
   1857 			 * hence, result from segdev_faultpages() should be
   1858 			 * returned directly.
   1859 			 */
   1860 			if (err_is_faultcode)
   1861 				return (err);
   1862 			return (FC_MAKE_ERR(err));
   1863 		}
   1864 
   1865 		maddr += mlen;
   1866 		ASSERT(len >= mlen);
   1867 		len -= mlen;
   1868 		dhp = dhp->dh_next;
   1869 		off = 0;
   1870 
   1871 		ASSERT(!dhp || len == 0 || maddr == dhp->dh_uvaddr);
   1872 	}
   1873 	/*
   1874 	 * release the softlock count at end of fault
   1875 	 * For F_SOFTLOCk this is done in the later F_SOFTUNLOCK
   1876 	 */
   1877 	if ((type == F_INVAL) || (type == F_PROT))
   1878 		devmap_softlock_exit(slock, slpage, type);
   1879 	return (0);
   1880 }
   1881 
   1882 /*
   1883  * segdev_faultpages
   1884  *
   1885  * Used to fault in seg_dev segment pages. Called by segdev_fault or devmap_load
   1886  * This routine assumes that the callers makes sure that the fields
   1887  * in dhp used below are not changed due to remap during this call.
   1888  * Caller does HOLD_DHP_LOCK if neeed
   1889  * This routine returns a faultcode_t as a return value for segdev_fault.
   1890  */
   1891 static faultcode_t
   1892 segdev_faultpages(
   1893 	struct hat *hat,		/* the hat */
   1894 	struct seg *seg,		/* the seg_dev of interest */
   1895 	caddr_t addr,			/* the address of the fault */
   1896 	size_t len,			/* the length of the range */
   1897 	enum fault_type type,		/* type of fault */
   1898 	enum seg_rw rw,			/* type of access at fault */
   1899 	devmap_handle_t *dhp)		/* devmap handle */
   1900 {
   1901 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   1902 	register caddr_t a;
   1903 	struct vpage *vpage;
   1904 	struct ddi_umem_cookie *kpmem_cookie = NULL;
   1905 	int err;
   1906 
   1907 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_FAULTPAGES,
   1908 	    "segdev_faultpages: dhp=%p seg=%p addr=%p len=%lx",
   1909 	    (void *)dhp, (void *)seg, (void *)addr, len);
   1910 	DEBUGF(5, (CE_CONT, "segdev_faultpages: "
   1911 	    "dhp %p seg %p addr %p len %lx\n",
   1912 	    (void *)dhp, (void *)seg, (void *)addr, len));
   1913 
   1914 	/*
   1915 	 * The seg_dev driver does not implement copy-on-write,
   1916 	 * and always loads translations with maximal allowed permissions
   1917 	 * but we got an fault trying to access the device.
   1918 	 * Servicing the fault is not going to result in any better result
   1919 	 * XXX: If we want to allow devmap_access to handle F_PROT calls,
   1920 	 * This code should be removed and let the normal fault handling
   1921 	 * take care of finding the error
   1922 	 */
   1923 	if (type == F_PROT) {
   1924 		return (FC_PROT);
   1925 	}
   1926 
   1927 	if (type == F_SOFTUNLOCK) {
   1928 		segdev_softunlock(hat, seg, addr, len, rw);
   1929 		return (0);
   1930 	}
   1931 
   1932 	/*
   1933 	 * For kernel pageable memory, fault/lock segkp pages
   1934 	 * We hold this until the completion of this
   1935 	 * fault (INVAL/PROT) or till unlock (SOFTLOCK).
   1936 	 */
   1937 	if ((dhp != NULL) && dhp_is_kpmem(dhp)) {
   1938 		kpmem_cookie = (struct ddi_umem_cookie *)dhp->dh_cookie;
   1939 		if (err = acquire_kpmem_lock(kpmem_cookie, btopr(len)))
   1940 			return (err);
   1941 	}
   1942 
   1943 	/*
   1944 	 * If we have the same protections for the entire segment,
   1945 	 * insure that the access being attempted is legitimate.
   1946 	 */
   1947 	rw_enter(&sdp->lock, RW_READER);
   1948 	if (sdp->pageprot == 0) {
   1949 		uint_t protchk;
   1950 
   1951 		switch (rw) {
   1952 		case S_READ:
   1953 			protchk = PROT_READ;
   1954 			break;
   1955 		case S_WRITE:
   1956 			protchk = PROT_WRITE;
   1957 			break;
   1958 		case S_EXEC:
   1959 			protchk = PROT_EXEC;
   1960 			break;
   1961 		case S_OTHER:
   1962 		default:
   1963 			protchk = PROT_READ | PROT_WRITE | PROT_EXEC;
   1964 			break;
   1965 		}
   1966 
   1967 		if ((sdp->prot & protchk) == 0) {
   1968 			rw_exit(&sdp->lock);
   1969 			/* undo kpmem locking */
   1970 			if (kpmem_cookie != NULL) {
   1971 				release_kpmem_lock(kpmem_cookie, btopr(len));
   1972 			}
   1973 			return (FC_PROT);	/* illegal access type */
   1974 		}
   1975 	}
   1976 
   1977 	/*
   1978 	 * we do a single hat_devload for the range if
   1979 	 *   - devmap framework (dhp is not NULL),
   1980 	 *   - pageprot == 0, i.e., no per-page protection set and
   1981 	 *   - is device pages, irrespective of whether we are using large pages
   1982 	 */
   1983 	if ((sdp->pageprot == 0) && (dhp != NULL) && dhp_is_devmem(dhp)) {
   1984 		pfn_t pfnum;
   1985 		uint_t hat_flags;
   1986 
   1987 		if (dhp->dh_flags & DEVMAP_MAPPING_INVALID) {
   1988 			rw_exit(&sdp->lock);
   1989 			return (FC_NOMAP);
   1990 		}
   1991 
   1992 		if (type == F_SOFTLOCK) {
   1993 			mutex_enter(&freemem_lock);
   1994 			sdp->softlockcnt += btopr(len);
   1995 			mutex_exit(&freemem_lock);
   1996 		}
   1997 
   1998 		hat_flags = ((type == F_SOFTLOCK) ? HAT_LOAD_LOCK : HAT_LOAD);
   1999 		pfnum = dhp->dh_pfn + btop((uintptr_t)(addr - dhp->dh_uvaddr));
   2000 		ASSERT(!pf_is_memory(pfnum));
   2001 
   2002 		hat_devload(hat, addr, len, pfnum, sdp->prot | dhp->dh_hat_attr,
   2003 		    hat_flags | sdp->hat_flags);
   2004 		rw_exit(&sdp->lock);
   2005 		return (0);
   2006 	}
   2007 
   2008 	/* Handle cases where we have to loop through fault handling per-page */
   2009 
   2010 	if (sdp->vpage == NULL)
   2011 		vpage = NULL;
   2012 	else
   2013 		vpage = &sdp->vpage[seg_page(seg, addr)];
   2014 
   2015 	/* loop over the address range handling each fault */
   2016 	for (a = addr; a < addr + len; a += PAGESIZE) {
   2017 		if (err = segdev_faultpage(hat, seg, a, vpage, type, rw, dhp)) {
   2018 			break;
   2019 		}
   2020 		if (vpage != NULL)
   2021 			vpage++;
   2022 	}
   2023 	rw_exit(&sdp->lock);
   2024 	if (err && (type == F_SOFTLOCK)) { /* error handling for F_SOFTLOCK */
   2025 		size_t done = (size_t)(a - addr); /* pages fault successfully */
   2026 		if (done > 0) {
   2027 			/* use softunlock for those pages */
   2028 			segdev_softunlock(hat, seg, addr, done, S_OTHER);
   2029 		}
   2030 		if (kpmem_cookie != NULL) {
   2031 			/* release kpmem lock for rest of pages */
   2032 			ASSERT(len >= done);
   2033 			release_kpmem_lock(kpmem_cookie, btopr(len - done));
   2034 		}
   2035 	} else if ((kpmem_cookie != NULL) && (type != F_SOFTLOCK)) {
   2036 		/* for non-SOFTLOCK cases, release kpmem */
   2037 		release_kpmem_lock(kpmem_cookie, btopr(len));
   2038 	}
   2039 	return (err);
   2040 }
   2041 
   2042 /*
   2043  * Asynchronous page fault.  We simply do nothing since this
   2044  * entry point is not supposed to load up the translation.
   2045  */
   2046 /*ARGSUSED*/
   2047 static faultcode_t
   2048 segdev_faulta(struct seg *seg, caddr_t addr)
   2049 {
   2050 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_FAULTA,
   2051 	    "segdev_faulta: seg=%p addr=%p", (void *)seg, (void *)addr);
   2052 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2053 
   2054 	return (0);
   2055 }
   2056 
   2057 static int
   2058 segdev_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
   2059 {
   2060 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2061 	register devmap_handle_t *dhp;
   2062 	register struct vpage *vp, *evp;
   2063 	devmap_handle_t *dhp_head = (devmap_handle_t *)sdp->devmap_data;
   2064 	ulong_t off;
   2065 	size_t mlen, sz;
   2066 
   2067 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT,
   2068 	    "segdev_setprot:start seg=%p addr=%p len=%lx prot=%x",
   2069 	    (void *)seg, (void *)addr, len, prot);
   2070 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2071 
   2072 	if ((sz = sdp->softlockcnt) > 0 && dhp_head != NULL) {
   2073 		/*
   2074 		 * Fail the setprot if pages are SOFTLOCKed through this
   2075 		 * mapping.
   2076 		 * Softlockcnt is protected from change by the as read lock.
   2077 		 */
   2078 		TRACE_1(TR_FAC_DEVMAP, TR_DEVMAP_SETPROT_CK1,
   2079 		    "segdev_setprot:error softlockcnt=%lx", sz);
   2080 		DEBUGF(1, (CE_CONT, "segdev_setprot: softlockcnt %ld\n", sz));
   2081 		return (EAGAIN);
   2082 	}
   2083 
   2084 	if (dhp_head != NULL) {
   2085 		if ((dhp = devmap_find_handle(dhp_head, addr)) == NULL)
   2086 			return (EINVAL);
   2087 
   2088 		/*
   2089 		 * check if violate maxprot.
   2090 		 */
   2091 		off = (ulong_t)(addr - dhp->dh_uvaddr);
   2092 		mlen  = len;
   2093 		while (dhp) {
   2094 			if ((dhp->dh_maxprot & prot) != prot)
   2095 				return (EACCES);	/* violated maxprot */
   2096 
   2097 			if (mlen > (dhp->dh_len - off)) {
   2098 				mlen -= dhp->dh_len - off;
   2099 				dhp = dhp->dh_next;
   2100 				off = 0;
   2101 			} else
   2102 				break;
   2103 		}
   2104 	} else {
   2105 		if ((sdp->maxprot & prot) != prot)
   2106 			return (EACCES);
   2107 	}
   2108 
   2109 	rw_enter(&sdp->lock, RW_WRITER);
   2110 	if (addr == seg->s_base && len == seg->s_size && sdp->pageprot == 0) {
   2111 		if (sdp->prot == prot) {
   2112 			rw_exit(&sdp->lock);
   2113 			return (0);			/* all done */
   2114 		}
   2115 		sdp->prot = (uchar_t)prot;
   2116 	} else {
   2117 		sdp->pageprot = 1;
   2118 		if (sdp->vpage == NULL) {
   2119 			/*
   2120 			 * First time through setting per page permissions,
   2121 			 * initialize all the vpage structures to prot
   2122 			 */
   2123 			sdp->vpage = kmem_zalloc(vpgtob(seg_pages(seg)),
   2124 			    KM_SLEEP);
   2125 			evp = &sdp->vpage[seg_pages(seg)];
   2126 			for (vp = sdp->vpage; vp < evp; vp++)
   2127 				VPP_SETPROT(vp, sdp->prot);
   2128 		}
   2129 		/*
   2130 		 * Now go change the needed vpages protections.
   2131 		 */
   2132 		evp = &sdp->vpage[seg_page(seg, addr + len)];
   2133 		for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++)
   2134 			VPP_SETPROT(vp, prot);
   2135 	}
   2136 	rw_exit(&sdp->lock);
   2137 
   2138 	if (dhp_head != NULL) {
   2139 		devmap_handle_t *tdhp;
   2140 		/*
   2141 		 * If large page size was used in hat_devload(),
   2142 		 * the same page size must be used in hat_unload().
   2143 		 */
   2144 		dhp = tdhp = devmap_find_handle(dhp_head, addr);
   2145 		while (tdhp != NULL) {
   2146 			if (tdhp->dh_flags & DEVMAP_FLAG_LARGE) {
   2147 				break;
   2148 			}
   2149 			tdhp = tdhp->dh_next;
   2150 		}
   2151 		if (tdhp) {
   2152 			size_t slen = len;
   2153 			size_t mlen;
   2154 			size_t soff;
   2155 
   2156 			soff = (ulong_t)(addr - dhp->dh_uvaddr);
   2157 			while (slen != 0) {
   2158 				mlen = MIN(slen, (dhp->dh_len - soff));
   2159 				hat_unload(seg->s_as->a_hat, dhp->dh_uvaddr,
   2160 				    dhp->dh_len, HAT_UNLOAD);
   2161 				dhp = dhp->dh_next;
   2162 				ASSERT(slen >= mlen);
   2163 				slen -= mlen;
   2164 				soff = 0;
   2165 			}
   2166 			return (0);
   2167 		}
   2168 	}
   2169 
   2170 	if ((prot & ~PROT_USER) == PROT_NONE) {
   2171 		hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD);
   2172 	} else {
   2173 		/*
   2174 		 * RFE: the segment should keep track of all attributes
   2175 		 * allowing us to remove the deprecated hat_chgprot
   2176 		 * and use hat_chgattr.
   2177 		 */
   2178 		hat_chgprot(seg->s_as->a_hat, addr, len, prot);
   2179 	}
   2180 
   2181 	return (0);
   2182 }
   2183 
   2184 static int
   2185 segdev_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
   2186 {
   2187 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2188 	struct vpage *vp, *evp;
   2189 
   2190 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_CHECKPROT,
   2191 	    "segdev_checkprot:start seg=%p addr=%p len=%lx prot=%x",
   2192 	    (void *)seg, (void *)addr, len, prot);
   2193 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2194 
   2195 	/*
   2196 	 * If segment protection can be used, simply check against them
   2197 	 */
   2198 	rw_enter(&sdp->lock, RW_READER);
   2199 	if (sdp->pageprot == 0) {
   2200 		register int err;
   2201 
   2202 		err = ((sdp->prot & prot) != prot) ? EACCES : 0;
   2203 		rw_exit(&sdp->lock);
   2204 		return (err);
   2205 	}
   2206 
   2207 	/*
   2208 	 * Have to check down to the vpage level
   2209 	 */
   2210 	evp = &sdp->vpage[seg_page(seg, addr + len)];
   2211 	for (vp = &sdp->vpage[seg_page(seg, addr)]; vp < evp; vp++) {
   2212 		if ((VPP_PROT(vp) & prot) != prot) {
   2213 			rw_exit(&sdp->lock);
   2214 			return (EACCES);
   2215 		}
   2216 	}
   2217 	rw_exit(&sdp->lock);
   2218 	return (0);
   2219 }
   2220 
   2221 static int
   2222 segdev_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
   2223 {
   2224 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2225 	size_t pgno;
   2226 
   2227 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_GETPROT,
   2228 	    "segdev_getprot:start seg=%p addr=%p len=%lx protv=%p",
   2229 	    (void *)seg, (void *)addr, len, (void *)protv);
   2230 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2231 
   2232 	pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
   2233 	if (pgno != 0) {
   2234 		rw_enter(&sdp->lock, RW_READER);
   2235 		if (sdp->pageprot == 0) {
   2236 			do {
   2237 				protv[--pgno] = sdp->prot;
   2238 			} while (pgno != 0);
   2239 		} else {
   2240 			size_t pgoff = seg_page(seg, addr);
   2241 
   2242 			do {
   2243 				pgno--;
   2244 				protv[pgno] =
   2245 				    VPP_PROT(&sdp->vpage[pgno + pgoff]);
   2246 			} while (pgno != 0);
   2247 		}
   2248 		rw_exit(&sdp->lock);
   2249 	}
   2250 	return (0);
   2251 }
   2252 
   2253 static u_offset_t
   2254 segdev_getoffset(register struct seg *seg, caddr_t addr)
   2255 {
   2256 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2257 
   2258 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETOFFSET,
   2259 	    "segdev_getoffset:start seg=%p addr=%p", (void *)seg, (void *)addr);
   2260 
   2261 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2262 
   2263 	return ((u_offset_t)sdp->offset + (addr - seg->s_base));
   2264 }
   2265 
   2266 /*ARGSUSED*/
   2267 static int
   2268 segdev_gettype(register struct seg *seg, caddr_t addr)
   2269 {
   2270 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2271 
   2272 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETTYPE,
   2273 	    "segdev_gettype:start seg=%p addr=%p", (void *)seg, (void *)addr);
   2274 
   2275 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2276 
   2277 	return (sdp->type);
   2278 }
   2279 
   2280 
   2281 /*ARGSUSED*/
   2282 static int
   2283 segdev_getvp(register struct seg *seg, caddr_t addr, struct vnode **vpp)
   2284 {
   2285 	register struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   2286 
   2287 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_GETVP,
   2288 	    "segdev_getvp:start seg=%p addr=%p", (void *)seg, (void *)addr);
   2289 
   2290 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2291 
   2292 	/*
   2293 	 * Note that this vp is the common_vp of the device, where the
   2294 	 * pages are hung ..
   2295 	 */
   2296 	*vpp = VTOCVP(sdp->vp);
   2297 
   2298 	return (0);
   2299 }
   2300 
   2301 static void
   2302 segdev_badop(void)
   2303 {
   2304 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGDEV_BADOP,
   2305 	    "segdev_badop:start");
   2306 	panic("segdev_badop");
   2307 	/*NOTREACHED*/
   2308 }
   2309 
   2310 /*
   2311  * segdev pages are not in the cache, and thus can't really be controlled.
   2312  * Hence, syncs are simply always successful.
   2313  */
   2314 /*ARGSUSED*/
   2315 static int
   2316 segdev_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags)
   2317 {
   2318 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SYNC, "segdev_sync:start");
   2319 
   2320 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2321 
   2322 	return (0);
   2323 }
   2324 
   2325 /*
   2326  * segdev pages are always "in core".
   2327  */
   2328 /*ARGSUSED*/
   2329 static size_t
   2330 segdev_incore(struct seg *seg, caddr_t addr, size_t len, char *vec)
   2331 {
   2332 	size_t v = 0;
   2333 
   2334 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_INCORE, "segdev_incore:start");
   2335 
   2336 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2337 
   2338 	for (len = (len + PAGEOFFSET) & PAGEMASK; len; len -= PAGESIZE,
   2339 	    v += PAGESIZE)
   2340 		*vec++ = 1;
   2341 	return (v);
   2342 }
   2343 
   2344 /*
   2345  * segdev pages are not in the cache, and thus can't really be controlled.
   2346  * Hence, locks are simply always successful.
   2347  */
   2348 /*ARGSUSED*/
   2349 static int
   2350 segdev_lockop(struct seg *seg, caddr_t addr,
   2351     size_t len, int attr, int op, ulong_t *lockmap, size_t pos)
   2352 {
   2353 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_LOCKOP, "segdev_lockop:start");
   2354 
   2355 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2356 
   2357 	return (0);
   2358 }
   2359 
   2360 /*
   2361  * segdev pages are not in the cache, and thus can't really be controlled.
   2362  * Hence, advise is simply always successful.
   2363  */
   2364 /*ARGSUSED*/
   2365 static int
   2366 segdev_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav)
   2367 {
   2368 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_ADVISE, "segdev_advise:start");
   2369 
   2370 	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock));
   2371 
   2372 	return (0);
   2373 }
   2374 
   2375 /*
   2376  * segdev pages are not dumped, so we just return
   2377  */
   2378 /*ARGSUSED*/
   2379 static void
   2380 segdev_dump(struct seg *seg)
   2381 {}
   2382 
   2383 /*
   2384  * ddi_segmap_setup:	Used by drivers who wish specify mapping attributes
   2385  *			for a segment.	Called from a drivers segmap(9E)
   2386  *			routine.
   2387  */
   2388 /*ARGSUSED*/
   2389 int
   2390 ddi_segmap_setup(dev_t dev, off_t offset, struct as *as, caddr_t *addrp,
   2391     off_t len, uint_t prot, uint_t maxprot, uint_t flags, cred_t *cred,
   2392     ddi_device_acc_attr_t *accattrp, uint_t rnumber)
   2393 {
   2394 	struct segdev_crargs dev_a;
   2395 	int (*mapfunc)(dev_t dev, off_t off, int prot);
   2396 	uint_t hat_attr;
   2397 	pfn_t pfn;
   2398 	int	error, i;
   2399 
   2400 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP_SETUP,
   2401 	    "ddi_segmap_setup:start");
   2402 
   2403 	if ((mapfunc = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap) == nodev)
   2404 		return (ENODEV);
   2405 
   2406 	/*
   2407 	 * Character devices that support the d_mmap
   2408 	 * interface can only be mmap'ed shared.
   2409 	 */
   2410 	if ((flags & MAP_TYPE) != MAP_SHARED)
   2411 		return (EINVAL);
   2412 
   2413 	/*
   2414 	 * Check that this region is indeed mappable on this platform.
   2415 	 * Use the mapping function.
   2416 	 */
   2417 	if (ddi_device_mapping_check(dev, accattrp, rnumber, &hat_attr) == -1)
   2418 		return (ENXIO);
   2419 
   2420 	/*
   2421 	 * Check to ensure that the entire range is
   2422 	 * legal and we are not trying to map in
   2423 	 * more than the device will let us.
   2424 	 */
   2425 	for (i = 0; i < len; i += PAGESIZE) {
   2426 		if (i == 0) {
   2427 			/*
   2428 			 * Save the pfn at offset here. This pfn will be
   2429 			 * used later to get user address.
   2430 			 */
   2431 			if ((pfn = (pfn_t)cdev_mmap(mapfunc, dev, offset,
   2432 			    maxprot)) == PFN_INVALID)
   2433 				return (ENXIO);
   2434 		} else {
   2435 			if (cdev_mmap(mapfunc, dev, offset + i, maxprot) ==
   2436 			    PFN_INVALID)
   2437 				return (ENXIO);
   2438 		}
   2439 	}
   2440 
   2441 	as_rangelock(as);
   2442 	/* Pick an address w/o worrying about any vac alignment constraints. */
   2443 	error = choose_addr(as, addrp, len, ptob(pfn), ADDR_NOVACALIGN, flags);
   2444 	if (error != 0) {
   2445 		as_rangeunlock(as);
   2446 		return (error);
   2447 	}
   2448 
   2449 	dev_a.mapfunc = mapfunc;
   2450 	dev_a.dev = dev;
   2451 	dev_a.offset = (offset_t)offset;
   2452 	dev_a.type = flags & MAP_TYPE;
   2453 	dev_a.prot = (uchar_t)prot;
   2454 	dev_a.maxprot = (uchar_t)maxprot;
   2455 	dev_a.hat_attr = hat_attr;
   2456 	dev_a.hat_flags = 0;
   2457 	dev_a.devmap_data = NULL;
   2458 
   2459 	error = as_map(as, *addrp, len, segdev_create, &dev_a);
   2460 	as_rangeunlock(as);
   2461 	return (error);
   2462 
   2463 }
   2464 
   2465 /*ARGSUSED*/
   2466 static int
   2467 segdev_pagelock(struct seg *seg, caddr_t addr, size_t len,
   2468     struct page ***ppp, enum lock_type type, enum seg_rw rw)
   2469 {
   2470 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_PAGELOCK,
   2471 	    "segdev_pagelock:start");
   2472 	return (ENOTSUP);
   2473 }
   2474 
   2475 /*ARGSUSED*/
   2476 static int
   2477 segdev_setpagesize(struct seg *seg, caddr_t addr, size_t len,
   2478     uint_t szc)
   2479 {
   2480 	return (ENOTSUP);
   2481 }
   2482 
   2483 /*
   2484  * devmap_device: Used by devmap framework to establish mapping
   2485  *                called by devmap_seup(9F) during map setup time.
   2486  */
   2487 /*ARGSUSED*/
   2488 static int
   2489 devmap_device(devmap_handle_t *dhp, struct as *as, caddr_t *addr,
   2490     offset_t off, size_t len, uint_t flags)
   2491 {
   2492 	devmap_handle_t *rdhp, *maxdhp;
   2493 	struct segdev_crargs dev_a;
   2494 	int	err;
   2495 	uint_t maxprot = PROT_ALL;
   2496 	offset_t offset = 0;
   2497 	pfn_t pfn;
   2498 	struct devmap_pmem_cookie *pcp;
   2499 
   2500 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVICE,
   2501 	    "devmap_device:start dhp=%p addr=%p off=%llx, len=%lx",
   2502 	    (void *)dhp, (void *)addr, off, len);
   2503 
   2504 	DEBUGF(2, (CE_CONT, "devmap_device: dhp %p addr %p off %llx len %lx\n",
   2505 	    (void *)dhp, (void *)addr, off, len));
   2506 
   2507 	as_rangelock(as);
   2508 	if ((flags & MAP_FIXED) == 0) {
   2509 		offset_t aligned_off;
   2510 
   2511 		rdhp = maxdhp = dhp;
   2512 		while (rdhp != NULL) {
   2513 			maxdhp = (maxdhp->dh_len > rdhp->dh_len) ?
   2514 			    maxdhp : rdhp;
   2515 			rdhp = rdhp->dh_next;
   2516 			maxprot |= dhp->dh_maxprot;
   2517 		}
   2518 		offset = maxdhp->dh_uoff - dhp->dh_uoff;
   2519 
   2520 		/*
   2521 		 * Use the dhp that has the
   2522 		 * largest len to get user address.
   2523 		 */
   2524 		/*
   2525 		 * If MAPPING_INVALID, cannot use dh_pfn/dh_cvaddr,
   2526 		 * use 0 which is as good as any other.
   2527 		 */
   2528 		if (maxdhp->dh_flags & DEVMAP_MAPPING_INVALID) {
   2529 			aligned_off = (offset_t)0;
   2530 		} else if (dhp_is_devmem(maxdhp)) {
   2531 			aligned_off = (offset_t)ptob(maxdhp->dh_pfn) - offset;
   2532 		} else if (dhp_is_pmem(maxdhp)) {
   2533 			pcp = (struct devmap_pmem_cookie *)maxdhp->dh_pcookie;
   2534 			pfn = page_pptonum(
   2535 			    pcp->dp_pparray[btop(maxdhp->dh_roff)]);
   2536 			aligned_off = (offset_t)ptob(pfn) - offset;
   2537 		} else {
   2538 			aligned_off = (offset_t)(uintptr_t)maxdhp->dh_cvaddr -
   2539 			    offset;
   2540 		}
   2541 
   2542 		/*
   2543 		 * Pick an address aligned to dh_cookie.
   2544 		 * for kernel memory/user memory, cookie is cvaddr.
   2545 		 * for device memory, cookie is physical address.
   2546 		 */
   2547 		map_addr(addr, len, aligned_off, 1, flags);
   2548 		if (*addr == NULL) {
   2549 			as_rangeunlock(as);
   2550 			return (ENOMEM);
   2551 		}
   2552 	} else {
   2553 		/*
   2554 		 * User-specified address; blow away any previous mappings.
   2555 		 */
   2556 		(void) as_unmap(as, *addr, len);
   2557 	}
   2558 
   2559 	dev_a.mapfunc = NULL;
   2560 	dev_a.dev = dhp->dh_dev;
   2561 	dev_a.type = flags & MAP_TYPE;
   2562 	dev_a.offset = off;
   2563 	/*
   2564 	 * sdp->maxprot has the least restrict protection of all dhps.
   2565 	 */
   2566 	dev_a.maxprot = maxprot;
   2567 	dev_a.prot = dhp->dh_prot;
   2568 	/*
   2569 	 * devmap uses dhp->dh_hat_attr for hat.
   2570 	 */
   2571 	dev_a.hat_flags = 0;
   2572 	dev_a.hat_attr = 0;
   2573 	dev_a.devmap_data = (void *)dhp;
   2574 
   2575 	err = as_map(as, *addr, len, segdev_create, &dev_a);
   2576 	as_rangeunlock(as);
   2577 	return (err);
   2578 }
   2579 
   2580 int
   2581 devmap_do_ctxmgt(devmap_cookie_t dhc, void *pvtp, offset_t off, size_t len,
   2582     uint_t type, uint_t rw, int (*ctxmgt)(devmap_cookie_t, void *, offset_t,
   2583     size_t, uint_t, uint_t))
   2584 {
   2585 	register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   2586 	struct devmap_ctx *devctx;
   2587 	int do_timeout = 0;
   2588 	int ret;
   2589 
   2590 #ifdef lint
   2591 	pvtp = pvtp;
   2592 #endif
   2593 
   2594 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT,
   2595 	    "devmap_do_ctxmgt:start dhp=%p off=%llx, len=%lx",
   2596 	    (void *)dhp, off, len);
   2597 	DEBUGF(7, (CE_CONT, "devmap_do_ctxmgt: dhp %p off %llx len %lx\n",
   2598 	    (void *)dhp, off, len));
   2599 
   2600 	if (ctxmgt == NULL)
   2601 		return (FC_HWERR);
   2602 
   2603 	devctx = dhp->dh_ctx;
   2604 
   2605 	/*
   2606 	 * If we are on an MP system with more than one cpu running
   2607 	 * and if a thread on some CPU already has the context, wait
   2608 	 * for it to finish if there is a hysteresis timeout.
   2609 	 *
   2610 	 * We call cv_wait() instead of cv_wait_sig() because
   2611 	 * it does not matter much if it returned due to a signal
   2612 	 * or due to a cv_signal() or cv_broadcast().  In either event
   2613 	 * we need to complete the mapping otherwise the processes
   2614 	 * will die with a SEGV.
   2615 	 */
   2616 	if ((dhp->dh_timeout_length > 0) && (ncpus > 1)) {
   2617 		TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK1,
   2618 		    "devmap_do_ctxmgt:doing hysteresis, devctl %p dhp %p",
   2619 		    devctx, dhp);
   2620 		do_timeout = 1;
   2621 		mutex_enter(&devctx->lock);
   2622 		while (devctx->oncpu)
   2623 			cv_wait(&devctx->cv, &devctx->lock);
   2624 		devctx->oncpu = 1;
   2625 		mutex_exit(&devctx->lock);
   2626 	}
   2627 
   2628 	/*
   2629 	 * Call the contextmgt callback so that the driver can handle
   2630 	 * the fault.
   2631 	 */
   2632 	ret = (*ctxmgt)(dhp, dhp->dh_pvtp, off, len, type, rw);
   2633 
   2634 	/*
   2635 	 * If devmap_access() returned -1, then there was a hardware
   2636 	 * error so we need to convert the return value to something
   2637 	 * that trap() will understand.  Otherwise, the return value
   2638 	 * is already a fault code generated by devmap_unload()
   2639 	 * or devmap_load().
   2640 	 */
   2641 	if (ret) {
   2642 		TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK2,
   2643 		    "devmap_do_ctxmgt: ret=%x dhp=%p devctx=%p",
   2644 		    ret, dhp, devctx);
   2645 		DEBUGF(1, (CE_CONT, "devmap_do_ctxmgt: ret %x dhp %p\n",
   2646 		    ret, (void *)dhp));
   2647 		if (devctx->oncpu) {
   2648 			mutex_enter(&devctx->lock);
   2649 			devctx->oncpu = 0;
   2650 			cv_signal(&devctx->cv);
   2651 			mutex_exit(&devctx->lock);
   2652 		}
   2653 		return (FC_HWERR);
   2654 	}
   2655 
   2656 	/*
   2657 	 * Setup the timeout if we need to
   2658 	 */
   2659 	if (do_timeout) {
   2660 		mutex_enter(&devctx->lock);
   2661 		if (dhp->dh_timeout_length > 0) {
   2662 			TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK3,
   2663 			    "devmap_do_ctxmgt:timeout set");
   2664 			devctx->timeout = timeout(devmap_ctxto,
   2665 			    devctx, dhp->dh_timeout_length);
   2666 		} else {
   2667 			/*
   2668 			 * We don't want to wait so set oncpu to
   2669 			 * 0 and wake up anyone waiting.
   2670 			 */
   2671 			TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DO_CTXMGT_CK4,
   2672 			    "devmap_do_ctxmgt:timeout not set");
   2673 			devctx->oncpu = 0;
   2674 			cv_signal(&devctx->cv);
   2675 		}
   2676 		mutex_exit(&devctx->lock);
   2677 	}
   2678 
   2679 	return (DDI_SUCCESS);
   2680 }
   2681 
   2682 /*
   2683  *                                       end of mapping
   2684  *                    poff   fault_offset         |
   2685  *            base     |        |                 |
   2686  *              |      |        |                 |
   2687  *              V      V        V                 V
   2688  *  +-----------+---------------+-------+---------+-------+
   2689  *              ^               ^       ^         ^
   2690  *              |<--- offset--->|<-len->|         |
   2691  *              |<--- dh_len(size of mapping) --->|
   2692  *                     |<--  pg -->|
   2693  *                              -->|rlen|<--
   2694  */
   2695 static ulong_t
   2696 devmap_roundup(devmap_handle_t *dhp, ulong_t offset, size_t len,
   2697     ulong_t *opfn, ulong_t *pagesize)
   2698 {
   2699 	register int level;
   2700 	ulong_t pg;
   2701 	ulong_t poff;
   2702 	ulong_t base;
   2703 	caddr_t uvaddr;
   2704 	long rlen;
   2705 
   2706 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP,
   2707 	    "devmap_roundup:start dhp=%p off=%lx len=%lx",
   2708 	    (void *)dhp, offset, len);
   2709 	DEBUGF(2, (CE_CONT, "devmap_roundup: dhp %p off %lx len %lx\n",
   2710 	    (void *)dhp, offset, len));
   2711 
   2712 	/*
   2713 	 * get the max. pagesize that is aligned within the range
   2714 	 * <dh_pfn, dh_pfn+offset>.
   2715 	 *
   2716 	 * The calculations below use physical address to ddetermine
   2717 	 * the page size to use. The same calculations can use the
   2718 	 * virtual address to determine the page size.
   2719 	 */
   2720 	base = (ulong_t)ptob(dhp->dh_pfn);
   2721 	for (level = dhp->dh_mmulevel; level >= 0; level--) {
   2722 		pg = page_get_pagesize(level);
   2723 		poff = ((base + offset) & ~(pg - 1));
   2724 		uvaddr = dhp->dh_uvaddr + (poff - base);
   2725 		if ((poff >= base) &&
   2726 		    ((poff + pg) <= (base + dhp->dh_len)) &&
   2727 		    VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg))
   2728 			break;
   2729 	}
   2730 
   2731 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK1,
   2732 	    "devmap_roundup: base=%lx poff=%lx dhp=%p",
   2733 	    base, poff, dhp);
   2734 	DEBUGF(2, (CE_CONT, "devmap_roundup: base %lx poff %lx pfn %lx\n",
   2735 	    base, poff, dhp->dh_pfn));
   2736 
   2737 	ASSERT(VA_PA_ALIGNED((uintptr_t)uvaddr, poff, pg));
   2738 	ASSERT(level >= 0);
   2739 
   2740 	*pagesize = pg;
   2741 	*opfn = dhp->dh_pfn + btop(poff - base);
   2742 
   2743 	rlen = len + offset - (poff - base + pg);
   2744 
   2745 	ASSERT(rlen < (long)len);
   2746 
   2747 	TRACE_5(TR_FAC_DEVMAP, TR_DEVMAP_ROUNDUP_CK2,
   2748 	    "devmap_roundup:ret dhp=%p level=%x rlen=%lx psiz=%p opfn=%p",
   2749 	    (void *)dhp, level, rlen, pagesize, opfn);
   2750 	DEBUGF(1, (CE_CONT, "devmap_roundup: dhp %p "
   2751 	    "level %x rlen %lx psize %lx opfn %lx\n",
   2752 	    (void *)dhp, level, rlen, *pagesize, *opfn));
   2753 
   2754 	return ((ulong_t)((rlen > 0) ? rlen : 0));
   2755 }
   2756 
   2757 /*
   2758  * find the dhp that contains addr.
   2759  */
   2760 static devmap_handle_t *
   2761 devmap_find_handle(devmap_handle_t *dhp_head, caddr_t addr)
   2762 {
   2763 	devmap_handle_t *dhp;
   2764 
   2765 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_FIND_HANDLE,
   2766 	    "devmap_find_handle:start");
   2767 
   2768 	dhp = dhp_head;
   2769 	while (dhp) {
   2770 		if (addr >= dhp->dh_uvaddr &&
   2771 		    addr < (dhp->dh_uvaddr + dhp->dh_len))
   2772 			return (dhp);
   2773 		dhp = dhp->dh_next;
   2774 	}
   2775 
   2776 	return ((devmap_handle_t *)NULL);
   2777 }
   2778 
   2779 /*
   2780  * devmap_unload:
   2781  *			Marks a segdev segment or pages if offset->offset+len
   2782  *			is not the entire segment as intercept and unloads the
   2783  *			pages in the range offset -> offset+len.
   2784  */
   2785 int
   2786 devmap_unload(devmap_cookie_t dhc, offset_t offset, size_t len)
   2787 {
   2788 	register devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   2789 	caddr_t	addr;
   2790 	ulong_t	size;
   2791 	ssize_t	soff;
   2792 
   2793 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_UNLOAD,
   2794 	    "devmap_unload:start dhp=%p offset=%llx len=%lx",
   2795 	    (void *)dhp, offset, len);
   2796 	DEBUGF(7, (CE_CONT, "devmap_unload: dhp %p offset %llx len %lx\n",
   2797 	    (void *)dhp, offset, len));
   2798 
   2799 	soff = (ssize_t)(offset - dhp->dh_uoff);
   2800 	soff = round_down_p2(soff, PAGESIZE);
   2801 	if (soff < 0 || soff >= dhp->dh_len)
   2802 		return (FC_MAKE_ERR(EINVAL));
   2803 
   2804 	/*
   2805 	 * Address and size must be page aligned.  Len is set to the
   2806 	 * number of bytes in the number of pages that are required to
   2807 	 * support len.  Offset is set to the byte offset of the first byte
   2808 	 * of the page that contains offset.
   2809 	 */
   2810 	len = round_up_p2(len, PAGESIZE);
   2811 
   2812 	/*
   2813 	 * If len is == 0, then calculate the size by getting
   2814 	 * the number of bytes from offset to the end of the segment.
   2815 	 */
   2816 	if (len == 0)
   2817 		size = dhp->dh_len - soff;
   2818 	else {
   2819 		size = len;
   2820 		if ((soff + size) > dhp->dh_len)
   2821 			return (FC_MAKE_ERR(EINVAL));
   2822 	}
   2823 
   2824 	/*
   2825 	 * The address is offset bytes from the base address of
   2826 	 * the dhp.
   2827 	 */
   2828 	addr = (caddr_t)(soff + dhp->dh_uvaddr);
   2829 
   2830 	/*
   2831 	 * If large page size was used in hat_devload(),
   2832 	 * the same page size must be used in hat_unload().
   2833 	 */
   2834 	if (dhp->dh_flags & DEVMAP_FLAG_LARGE) {
   2835 		hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
   2836 		    dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
   2837 	} else {
   2838 		hat_unload(dhp->dh_seg->s_as->a_hat,  addr, size,
   2839 		    HAT_UNLOAD|HAT_UNLOAD_OTHER);
   2840 	}
   2841 
   2842 	return (0);
   2843 }
   2844 
   2845 /*
   2846  * calculates the optimal page size that will be used for hat_devload().
   2847  */
   2848 static void
   2849 devmap_get_large_pgsize(devmap_handle_t *dhp, size_t len, caddr_t addr,
   2850     size_t *llen, caddr_t *laddr)
   2851 {
   2852 	ulong_t off;
   2853 	ulong_t pfn;
   2854 	ulong_t pgsize;
   2855 	uint_t first = 1;
   2856 
   2857 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GET_LARGE_PGSIZE,
   2858 	    "devmap_get_large_pgsize:start");
   2859 
   2860 	/*
   2861 	 * RFE - Code only supports large page mappings for devmem
   2862 	 * This code could be changed in future if we want to support
   2863 	 * large page mappings for kernel exported memory.
   2864 	 */
   2865 	ASSERT(dhp_is_devmem(dhp));
   2866 	ASSERT(!(dhp->dh_flags & DEVMAP_MAPPING_INVALID));
   2867 
   2868 	*llen = 0;
   2869 	off = (ulong_t)(addr - dhp->dh_uvaddr);
   2870 	while ((long)len > 0) {
   2871 		/*
   2872 		 * get the optimal pfn to minimize address translations.
   2873 		 * devmap_roundup() returns residue bytes for next round
   2874 		 * calculations.
   2875 		 */
   2876 		len = devmap_roundup(dhp, off, len, &pfn, &pgsize);
   2877 
   2878 		if (first) {
   2879 			*laddr = dhp->dh_uvaddr + ptob(pfn - dhp->dh_pfn);
   2880 			first = 0;
   2881 		}
   2882 
   2883 		*llen += pgsize;
   2884 		off = ptob(pfn - dhp->dh_pfn) + pgsize;
   2885 	}
   2886 	/* Large page mapping len/addr cover more range than original fault */
   2887 	ASSERT(*llen >= len && *laddr <= addr);
   2888 	ASSERT((*laddr + *llen) >= (addr + len));
   2889 }
   2890 
   2891 /*
   2892  * Initialize the devmap_softlock structure.
   2893  */
   2894 static struct devmap_softlock *
   2895 devmap_softlock_init(dev_t dev, ulong_t id)
   2896 {
   2897 	struct devmap_softlock *slock;
   2898 	struct devmap_softlock *tmp;
   2899 
   2900 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_INIT,
   2901 	    "devmap_softlock_init:start");
   2902 
   2903 	tmp = kmem_zalloc(sizeof (struct devmap_softlock), KM_SLEEP);
   2904 	mutex_enter(&devmap_slock);
   2905 
   2906 	for (slock = devmap_slist; slock != NULL; slock = slock->next)
   2907 		if ((slock->dev == dev) && (slock->id == id))
   2908 			break;
   2909 
   2910 	if (slock == NULL) {
   2911 		slock = tmp;
   2912 		slock->dev = dev;
   2913 		slock->id = id;
   2914 		mutex_init(&slock->lock, NULL, MUTEX_DEFAULT, NULL);
   2915 		cv_init(&slock->cv, NULL, CV_DEFAULT, NULL);
   2916 		slock->next = devmap_slist;
   2917 		devmap_slist = slock;
   2918 	} else
   2919 		kmem_free(tmp, sizeof (struct devmap_softlock));
   2920 
   2921 	mutex_enter(&slock->lock);
   2922 	slock->refcnt++;
   2923 	mutex_exit(&slock->lock);
   2924 	mutex_exit(&devmap_slock);
   2925 
   2926 	return (slock);
   2927 }
   2928 
   2929 /*
   2930  * Wake up processes that sleep on softlocked.
   2931  * Free dh_softlock if refcnt is 0.
   2932  */
   2933 static void
   2934 devmap_softlock_rele(devmap_handle_t *dhp)
   2935 {
   2936 	struct devmap_softlock *slock = dhp->dh_softlock;
   2937 	struct devmap_softlock *tmp;
   2938 	struct devmap_softlock *parent;
   2939 
   2940 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SOFTLOCK_RELE,
   2941 	    "devmap_softlock_rele:start");
   2942 
   2943 	mutex_enter(&devmap_slock);
   2944 	mutex_enter(&slock->lock);
   2945 
   2946 	ASSERT(slock->refcnt > 0);
   2947 
   2948 	slock->refcnt--;
   2949 
   2950 	/*
   2951 	 * If no one is using the device, free up the slock data.
   2952 	 */
   2953 	if (slock->refcnt == 0) {
   2954 		slock->softlocked = 0;
   2955 		cv_signal(&slock->cv);
   2956 
   2957 		if (devmap_slist == slock)
   2958 			devmap_slist = slock->next;
   2959 		else {
   2960 			parent = devmap_slist;
   2961 			for (tmp = devmap_slist->next; tmp != NULL;
   2962 			    tmp = tmp->next) {
   2963 				if (tmp == slock) {
   2964 					parent->next = tmp->next;
   2965 					break;
   2966 				}
   2967 				parent = tmp;
   2968 			}
   2969 		}
   2970 		mutex_exit(&slock->lock);
   2971 		mutex_destroy(&slock->lock);
   2972 		cv_destroy(&slock->cv);
   2973 		kmem_free(slock, sizeof (struct devmap_softlock));
   2974 	} else
   2975 		mutex_exit(&slock->lock);
   2976 
   2977 	mutex_exit(&devmap_slock);
   2978 }
   2979 
   2980 /*
   2981  * Wake up processes that sleep on dh_ctx->locked.
   2982  * Free dh_ctx if refcnt is 0.
   2983  */
   2984 static void
   2985 devmap_ctx_rele(devmap_handle_t *dhp)
   2986 {
   2987 	struct devmap_ctx *devctx = dhp->dh_ctx;
   2988 	struct devmap_ctx *tmp;
   2989 	struct devmap_ctx *parent;
   2990 	timeout_id_t tid;
   2991 
   2992 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE,
   2993 	    "devmap_ctx_rele:start");
   2994 
   2995 	mutex_enter(&devmapctx_lock);
   2996 	mutex_enter(&devctx->lock);
   2997 
   2998 	ASSERT(devctx->refcnt > 0);
   2999 
   3000 	devctx->refcnt--;
   3001 
   3002 	/*
   3003 	 * If no one is using the device, free up the devctx data.
   3004 	 */
   3005 	if (devctx->refcnt == 0) {
   3006 		/*
   3007 		 * Untimeout any threads using this mapping as they are about
   3008 		 * to go away.
   3009 		 */
   3010 		if (devctx->timeout != 0) {
   3011 			TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_CTX_RELE_CK1,
   3012 			    "devmap_ctx_rele:untimeout ctx->timeout");
   3013 
   3014 			tid = devctx->timeout;
   3015 			mutex_exit(&devctx->lock);
   3016 			(void) untimeout(tid);
   3017 			mutex_enter(&devctx->lock);
   3018 		}
   3019 
   3020 		devctx->oncpu = 0;
   3021 		cv_signal(&devctx->cv);
   3022 
   3023 		if (devmapctx_list == devctx)
   3024 			devmapctx_list = devctx->next;
   3025 		else {
   3026 			parent = devmapctx_list;
   3027 			for (tmp = devmapctx_list->next; tmp != NULL;
   3028 			    tmp = tmp->next) {
   3029 				if (tmp == devctx) {
   3030 					parent->next = tmp->next;
   3031 					break;
   3032 				}
   3033 				parent = tmp;
   3034 			}
   3035 		}
   3036 		mutex_exit(&devctx->lock);
   3037 		mutex_destroy(&devctx->lock);
   3038 		cv_destroy(&devctx->cv);
   3039 		kmem_free(devctx, sizeof (struct devmap_ctx));
   3040 	} else
   3041 		mutex_exit(&devctx->lock);
   3042 
   3043 	mutex_exit(&devmapctx_lock);
   3044 }
   3045 
   3046 /*
   3047  * devmap_load:
   3048  *			Marks a segdev segment or pages if offset->offset+len
   3049  *			is not the entire segment as nointercept and faults in
   3050  *			the pages in the range offset -> offset+len.
   3051  */
   3052 int
   3053 devmap_load(devmap_cookie_t dhc, offset_t offset, size_t len, uint_t type,
   3054     uint_t rw)
   3055 {
   3056 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3057 	struct as *asp = dhp->dh_seg->s_as;
   3058 	caddr_t	addr;
   3059 	ulong_t	size;
   3060 	ssize_t	soff;	/* offset from the beginning of the segment */
   3061 	int rc;
   3062 
   3063 	TRACE_3(TR_FAC_DEVMAP, TR_DEVMAP_LOAD,
   3064 	    "devmap_load:start dhp=%p offset=%llx len=%lx",
   3065 	    (void *)dhp, offset, len);
   3066 
   3067 	DEBUGF(7, (CE_CONT, "devmap_load: dhp %p offset %llx len %lx\n",
   3068 	    (void *)dhp, offset, len));
   3069 
   3070 	/*
   3071 	 *	Hat layer only supports devload to process' context for which
   3072 	 *	the as lock is held. Verify here and return error if drivers
   3073 	 *	inadvertently call devmap_load on a wrong devmap handle.
   3074 	 */
   3075 	if ((asp != &kas) && !AS_LOCK_HELD(asp, &asp->a_lock))
   3076 		return (FC_MAKE_ERR(EINVAL));
   3077 
   3078 	soff = (ssize_t)(offset - dhp->dh_uoff);
   3079 	soff = round_down_p2(soff, PAGESIZE);
   3080 	if (soff < 0 || soff >= dhp->dh_len)
   3081 		return (FC_MAKE_ERR(EINVAL));
   3082 
   3083 	/*
   3084 	 * Address and size must be page aligned.  Len is set to the
   3085 	 * number of bytes in the number of pages that are required to
   3086 	 * support len.  Offset is set to the byte offset of the first byte
   3087 	 * of the page that contains offset.
   3088 	 */
   3089 	len = round_up_p2(len, PAGESIZE);
   3090 
   3091 	/*
   3092 	 * If len == 0, then calculate the size by getting
   3093 	 * the number of bytes from offset to the end of the segment.
   3094 	 */
   3095 	if (len == 0)
   3096 		size = dhp->dh_len - soff;
   3097 	else {
   3098 		size = len;
   3099 		if ((soff + size) > dhp->dh_len)
   3100 			return (FC_MAKE_ERR(EINVAL));
   3101 	}
   3102 
   3103 	/*
   3104 	 * The address is offset bytes from the base address of
   3105 	 * the segment.
   3106 	 */
   3107 	addr = (caddr_t)(soff + dhp->dh_uvaddr);
   3108 
   3109 	HOLD_DHP_LOCK(dhp);
   3110 	rc = segdev_faultpages(asp->a_hat,
   3111 	    dhp->dh_seg, addr, size, type, rw, dhp);
   3112 	RELE_DHP_LOCK(dhp);
   3113 	return (rc);
   3114 }
   3115 
   3116 int
   3117 devmap_setup(dev_t dev, offset_t off, struct as *as, caddr_t *addrp,
   3118     size_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
   3119 {
   3120 	register devmap_handle_t *dhp;
   3121 	int (*devmap)(dev_t, devmap_cookie_t, offset_t, size_t,
   3122 	    size_t *, uint_t);
   3123 	int (*mmap)(dev_t, off_t, int);
   3124 	struct devmap_callback_ctl *callbackops;
   3125 	devmap_handle_t *dhp_head = NULL;
   3126 	devmap_handle_t *dhp_prev = NULL;
   3127 	devmap_handle_t *dhp_curr;
   3128 	caddr_t addr;
   3129 	int map_flag;
   3130 	int ret;
   3131 	ulong_t total_len;
   3132 	size_t map_len;
   3133 	size_t resid_len = len;
   3134 	offset_t map_off = off;
   3135 	struct devmap_softlock *slock = NULL;
   3136 
   3137 #ifdef lint
   3138 	cred = cred;
   3139 #endif
   3140 
   3141 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SETUP,
   3142 	    "devmap_setup:start off=%llx len=%lx", off, len);
   3143 	DEBUGF(3, (CE_CONT, "devmap_setup: off %llx len %lx\n",
   3144 	    off, len));
   3145 
   3146 	devmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_devmap;
   3147 	mmap = devopsp[getmajor(dev)]->devo_cb_ops->cb_mmap;
   3148 
   3149 	/*
   3150 	 * driver must provide devmap(9E) entry point in cb_ops to use the
   3151 	 * devmap framework.
   3152 	 */
   3153 	if (devmap == NULL || devmap == nulldev || devmap == nodev)
   3154 		return (EINVAL);
   3155 
   3156 	/*
   3157 	 * To protect from an inadvertent entry because the devmap entry point
   3158 	 * is not NULL, return error if D_DEVMAP bit is not set in cb_flag and
   3159 	 * mmap is NULL.
   3160 	 */
   3161 	map_flag = devopsp[getmajor(dev)]->devo_cb_ops->cb_flag;
   3162 	if ((map_flag & D_DEVMAP) == 0 && (mmap == NULL || mmap == nulldev))
   3163 		return (EINVAL);
   3164 
   3165 	/*
   3166 	 * devmap allows mmap(2) to map multiple registers.
   3167 	 * one devmap_handle is created for each register mapped.
   3168 	 */
   3169 	for (total_len = 0; total_len < len; total_len += map_len) {
   3170 		dhp = kmem_zalloc(sizeof (devmap_handle_t), KM_SLEEP);
   3171 
   3172 		if (dhp_prev != NULL)
   3173 			dhp_prev->dh_next = dhp;
   3174 		else
   3175 			dhp_head = dhp;
   3176 		dhp_prev = dhp;
   3177 
   3178 		dhp->dh_prot = prot;
   3179 		dhp->dh_orig_maxprot = dhp->dh_maxprot = maxprot;
   3180 		dhp->dh_dev = dev;
   3181 		dhp->dh_timeout_length = CTX_TIMEOUT_VALUE;
   3182 		dhp->dh_uoff = map_off;
   3183 
   3184 		/*
   3185 		 * Get mapping specific info from
   3186 		 * the driver, such as rnumber, roff, len, callbackops,
   3187 		 * accattrp and, if the mapping is for kernel memory,
   3188 		 * ddi_umem_cookie.
   3189 		 */
   3190 		if ((ret = cdev_devmap(dev, dhp, map_off,
   3191 		    resid_len, &map_len, get_udatamodel())) != 0) {
   3192 			free_devmap_handle(dhp_head);
   3193 			return (ENXIO);
   3194 		}
   3195 
   3196 		if (map_len & PAGEOFFSET) {
   3197 			free_devmap_handle(dhp_head);
   3198 			return (EINVAL);
   3199 		}
   3200 
   3201 		callbackops = &dhp->dh_callbackops;
   3202 
   3203 		if ((callbackops->devmap_access == NULL) ||
   3204 		    (callbackops->devmap_access == nulldev) ||
   3205 		    (callbackops->devmap_access == nodev)) {
   3206 			/*
   3207 			 * Normally devmap does not support MAP_PRIVATE unless
   3208 			 * the drivers provide a valid devmap_access routine.
   3209 			 */
   3210 			if ((flags & MAP_PRIVATE) != 0) {
   3211 				free_devmap_handle(dhp_head);
   3212 				return (EINVAL);
   3213 			}
   3214 		} else {
   3215 			/*
   3216 			 * Initialize dhp_softlock and dh_ctx if the drivers
   3217 			 * provide devmap_access.
   3218 			 */
   3219 			dhp->dh_softlock = devmap_softlock_init(dev,
   3220 			    (ulong_t)callbackops->devmap_access);
   3221 			dhp->dh_ctx = devmap_ctxinit(dev,
   3222 			    (ulong_t)callbackops->devmap_access);
   3223 
   3224 			/*
   3225 			 * segdev_fault can only work when all
   3226 			 * dh_softlock in a multi-dhp mapping
   3227 			 * are same. see comments in segdev_fault
   3228 			 * This code keeps track of the first
   3229 			 * dh_softlock allocated in slock and
   3230 			 * compares all later allocations and if
   3231 			 * not similar, returns an error.
   3232 			 */
   3233 			if (slock == NULL)
   3234 				slock = dhp->dh_softlock;
   3235 			if (slock != dhp->dh_softlock) {
   3236 				free_devmap_handle(dhp_head);
   3237 				return (ENOTSUP);
   3238 			}
   3239 		}
   3240 
   3241 		map_off += map_len;
   3242 		resid_len -= map_len;
   3243 	}
   3244 
   3245 	/*
   3246 	 * get the user virtual address and establish the mapping between
   3247 	 * uvaddr and device physical address.
   3248 	 */
   3249 	if ((ret = devmap_device(dhp_head, as, addrp, off, len, flags))
   3250 	    != 0) {
   3251 		/*
   3252 		 * free devmap handles if error during the mapping.
   3253 		 */
   3254 		free_devmap_handle(dhp_head);
   3255 
   3256 		return (ret);
   3257 	}
   3258 
   3259 	/*
   3260 	 * call the driver's devmap_map callback to do more after the mapping,
   3261 	 * such as to allocate driver private data for context management.
   3262 	 */
   3263 	dhp = dhp_head;
   3264 	map_off = off;
   3265 	addr = *addrp;
   3266 	while (dhp != NULL) {
   3267 		callbackops = &dhp->dh_callbackops;
   3268 		dhp->dh_uvaddr = addr;
   3269 		dhp_curr = dhp;
   3270 		if (callbackops->devmap_map != NULL) {
   3271 			ret = (*callbackops->devmap_map)((devmap_cookie_t)dhp,
   3272 			    dev, flags, map_off,
   3273 			    dhp->dh_len, &dhp->dh_pvtp);
   3274 			if (ret != 0) {
   3275 				struct segdev_data *sdp;
   3276 
   3277 				/*
   3278 				 * call driver's devmap_unmap entry point
   3279 				 * to free driver resources.
   3280 				 */
   3281 				dhp = dhp_head;
   3282 				map_off = off;
   3283 				while (dhp != dhp_curr) {
   3284 					callbackops = &dhp->dh_callbackops;
   3285 					if (callbackops->devmap_unmap != NULL) {
   3286 						(*callbackops->devmap_unmap)(
   3287 						    dhp, dhp->dh_pvtp,
   3288 						    map_off, dhp->dh_len,
   3289 						    NULL, NULL, NULL, NULL);
   3290 					}
   3291 					map_off += dhp->dh_len;
   3292 					dhp = dhp->dh_next;
   3293 				}
   3294 				sdp = dhp_head->dh_seg->s_data;
   3295 				sdp->devmap_data = NULL;
   3296 				free_devmap_handle(dhp_head);
   3297 				return (ENXIO);
   3298 			}
   3299 		}
   3300 		map_off += dhp->dh_len;
   3301 		addr += dhp->dh_len;
   3302 		dhp = dhp->dh_next;
   3303 	}
   3304 
   3305 	return (0);
   3306 }
   3307 
   3308 int
   3309 ddi_devmap_segmap(dev_t dev, off_t off, ddi_as_handle_t as, caddr_t *addrp,
   3310     off_t len, uint_t prot, uint_t maxprot, uint_t flags, struct cred *cred)
   3311 {
   3312 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_SEGMAP,
   3313 	    "devmap_segmap:start");
   3314 	return (devmap_setup(dev, (offset_t)off, (struct as *)as, addrp,
   3315 	    (size_t)len, prot, maxprot, flags, cred));
   3316 }
   3317 
   3318 /*
   3319  * Called from devmap_devmem_setup/remap to see if can use large pages for
   3320  * this device mapping.
   3321  * Also calculate the max. page size for this mapping.
   3322  * this page size will be used in fault routine for
   3323  * optimal page size calculations.
   3324  */
   3325 static void
   3326 devmap_devmem_large_page_setup(devmap_handle_t *dhp)
   3327 {
   3328 	ASSERT(dhp_is_devmem(dhp));
   3329 	dhp->dh_mmulevel = 0;
   3330 
   3331 	/*
   3332 	 * use large page size only if:
   3333 	 *  1. device memory.
   3334 	 *  2. mmu supports multiple page sizes,
   3335 	 *  3. Driver did not disallow it
   3336 	 *  4. dhp length is at least as big as the large pagesize
   3337 	 *  5. the uvaddr and pfn are large pagesize aligned
   3338 	 */
   3339 	if (page_num_pagesizes() > 1 &&
   3340 	    !(dhp->dh_flags & (DEVMAP_USE_PAGESIZE | DEVMAP_MAPPING_INVALID))) {
   3341 		ulong_t base;
   3342 		int level;
   3343 
   3344 		base = (ulong_t)ptob(dhp->dh_pfn);
   3345 		for (level = 1; level < page_num_pagesizes(); level++) {
   3346 			size_t pgsize = page_get_pagesize(level);
   3347 			if ((dhp->dh_len < pgsize) ||
   3348 			    (!VA_PA_PGSIZE_ALIGNED((uintptr_t)dhp->dh_uvaddr,
   3349 			    base, pgsize))) {
   3350 				break;
   3351 			}
   3352 		}
   3353 		dhp->dh_mmulevel = level - 1;
   3354 	}
   3355 	if (dhp->dh_mmulevel > 0) {
   3356 		dhp->dh_flags |= DEVMAP_FLAG_LARGE;
   3357 	} else {
   3358 		dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
   3359 	}
   3360 }
   3361 
   3362 /*
   3363  * Called by driver devmap routine to pass device specific info to
   3364  * the framework.    used for device memory mapping only.
   3365  */
   3366 int
   3367 devmap_devmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
   3368     struct devmap_callback_ctl *callbackops, uint_t rnumber, offset_t roff,
   3369     size_t len, uint_t maxprot, uint_t flags, ddi_device_acc_attr_t *accattrp)
   3370 {
   3371 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3372 	ddi_acc_handle_t handle;
   3373 	ddi_map_req_t mr;
   3374 	ddi_acc_hdl_t *hp;
   3375 	int err;
   3376 
   3377 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_SETUP,
   3378 	    "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
   3379 	    (void *)dhp, roff, rnumber, (uint_t)len);
   3380 	DEBUGF(2, (CE_CONT, "devmap_devmem_setup: dhp %p offset %llx "
   3381 	    "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
   3382 
   3383 	/*
   3384 	 * First to check if this function has been called for this dhp.
   3385 	 */
   3386 	if (dhp->dh_flags & DEVMAP_SETUP_DONE)
   3387 		return (DDI_FAILURE);
   3388 
   3389 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
   3390 		return (DDI_FAILURE);
   3391 
   3392 	if (flags & DEVMAP_MAPPING_INVALID) {
   3393 		/*
   3394 		 * Don't go up the tree to get pfn if the driver specifies
   3395 		 * DEVMAP_MAPPING_INVALID in flags.
   3396 		 *
   3397 		 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
   3398 		 * remap permission.
   3399 		 */
   3400 		if (!(flags & DEVMAP_ALLOW_REMAP)) {
   3401 			return (DDI_FAILURE);
   3402 		}
   3403 		dhp->dh_pfn = PFN_INVALID;
   3404 	} else {
   3405 		handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
   3406 		if (handle == NULL)
   3407 			return (DDI_FAILURE);
   3408 
   3409 		hp = impl_acc_hdl_get(handle);
   3410 		hp->ah_vers = VERS_ACCHDL;
   3411 		hp->ah_dip = dip;
   3412 		hp->ah_rnumber = rnumber;
   3413 		hp->ah_offset = roff;
   3414 		hp->ah_len = len;
   3415 		if (accattrp != NULL)
   3416 			hp->ah_acc = *accattrp;
   3417 
   3418 		mr.map_op = DDI_MO_MAP_LOCKED;
   3419 		mr.map_type = DDI_MT_RNUMBER;
   3420 		mr.map_obj.rnumber = rnumber;
   3421 		mr.map_prot = maxprot & dhp->dh_orig_maxprot;
   3422 		mr.map_flags = DDI_MF_DEVICE_MAPPING;
   3423 		mr.map_handlep = hp;
   3424 		mr.map_vers = DDI_MAP_VERSION;
   3425 
   3426 		/*
   3427 		 * up the device tree to get pfn.
   3428 		 * The rootnex_map_regspec() routine in nexus drivers has been
   3429 		 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
   3430 		 */
   3431 		err = ddi_map(dip, &mr, roff, len, (caddr_t *)&dhp->dh_pfn);
   3432 		dhp->dh_hat_attr = hp->ah_hat_flags;
   3433 		impl_acc_hdl_free(handle);
   3434 
   3435 		if (err)
   3436 			return (DDI_FAILURE);
   3437 	}
   3438 	/* Should not be using devmem setup for memory pages */
   3439 	ASSERT(!pf_is_memory(dhp->dh_pfn));
   3440 
   3441 	/* Only some of the flags bits are settable by the driver */
   3442 	dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
   3443 	dhp->dh_len = ptob(btopr(len));
   3444 
   3445 	dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
   3446 	dhp->dh_roff = ptob(btop(roff));
   3447 
   3448 	/* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
   3449 	devmap_devmem_large_page_setup(dhp);
   3450 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
   3451 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
   3452 
   3453 
   3454 	if (callbackops != NULL) {
   3455 		bcopy(callbackops, &dhp->dh_callbackops,
   3456 		    sizeof (struct devmap_callback_ctl));
   3457 	}
   3458 
   3459 	/*
   3460 	 * Initialize dh_lock if we want to do remap.
   3461 	 */
   3462 	if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
   3463 		mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
   3464 		dhp->dh_flags |= DEVMAP_LOCK_INITED;
   3465 	}
   3466 
   3467 	dhp->dh_flags |= DEVMAP_SETUP_DONE;
   3468 
   3469 	return (DDI_SUCCESS);
   3470 }
   3471 
   3472 int
   3473 devmap_devmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
   3474     uint_t rnumber, offset_t roff, size_t len, uint_t maxprot,
   3475     uint_t flags, ddi_device_acc_attr_t *accattrp)
   3476 {
   3477 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3478 	ddi_acc_handle_t handle;
   3479 	ddi_map_req_t mr;
   3480 	ddi_acc_hdl_t *hp;
   3481 	pfn_t	pfn;
   3482 	uint_t	hat_flags;
   3483 	int	err;
   3484 
   3485 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_DEVMEM_REMAP,
   3486 	    "devmap_devmem_setup:start dhp=%p offset=%llx rnum=%d len=%lx",
   3487 	    (void *)dhp, roff, rnumber, (uint_t)len);
   3488 	DEBUGF(2, (CE_CONT, "devmap_devmem_remap: dhp %p offset %llx "
   3489 	    "rnum %d len %lx\n", (void *)dhp, roff, rnumber, len));
   3490 
   3491 	/*
   3492 	 * Return failure if setup has not been done or no remap permission
   3493 	 * has been granted during the setup.
   3494 	 */
   3495 	if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
   3496 	    (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
   3497 		return (DDI_FAILURE);
   3498 
   3499 	/* Only DEVMAP_MAPPING_INVALID flag supported for remap */
   3500 	if ((flags != 0) && (flags != DEVMAP_MAPPING_INVALID))
   3501 		return (DDI_FAILURE);
   3502 
   3503 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
   3504 		return (DDI_FAILURE);
   3505 
   3506 	if (!(flags & DEVMAP_MAPPING_INVALID)) {
   3507 		handle = impl_acc_hdl_alloc(KM_SLEEP, NULL);
   3508 		if (handle == NULL)
   3509 			return (DDI_FAILURE);
   3510 	}
   3511 
   3512 	HOLD_DHP_LOCK(dhp);
   3513 
   3514 	/*
   3515 	 * Unload the old mapping, so next fault will setup the new mappings
   3516 	 * Do this while holding the dhp lock so other faults dont reestablish
   3517 	 * the mappings
   3518 	 */
   3519 	hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
   3520 	    dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
   3521 
   3522 	if (flags & DEVMAP_MAPPING_INVALID) {
   3523 		dhp->dh_flags |= DEVMAP_MAPPING_INVALID;
   3524 		dhp->dh_pfn = PFN_INVALID;
   3525 	} else {
   3526 		/* clear any prior DEVMAP_MAPPING_INVALID flag */
   3527 		dhp->dh_flags &= ~DEVMAP_MAPPING_INVALID;
   3528 		hp = impl_acc_hdl_get(handle);
   3529 		hp->ah_vers = VERS_ACCHDL;
   3530 		hp->ah_dip = dip;
   3531 		hp->ah_rnumber = rnumber;
   3532 		hp->ah_offset = roff;
   3533 		hp->ah_len = len;
   3534 		if (accattrp != NULL)
   3535 			hp->ah_acc = *accattrp;
   3536 
   3537 		mr.map_op = DDI_MO_MAP_LOCKED;
   3538 		mr.map_type = DDI_MT_RNUMBER;
   3539 		mr.map_obj.rnumber = rnumber;
   3540 		mr.map_prot = maxprot & dhp->dh_orig_maxprot;
   3541 		mr.map_flags = DDI_MF_DEVICE_MAPPING;
   3542 		mr.map_handlep = hp;
   3543 		mr.map_vers = DDI_MAP_VERSION;
   3544 
   3545 		/*
   3546 		 * up the device tree to get pfn.
   3547 		 * The rootnex_map_regspec() routine in nexus drivers has been
   3548 		 * modified to return pfn if map_flags is DDI_MF_DEVICE_MAPPING.
   3549 		 */
   3550 		err = ddi_map(dip, &mr, roff, len, (caddr_t *)&pfn);
   3551 		hat_flags = hp->ah_hat_flags;
   3552 		impl_acc_hdl_free(handle);
   3553 		if (err) {
   3554 			RELE_DHP_LOCK(dhp);
   3555 			return (DDI_FAILURE);
   3556 		}
   3557 		/*
   3558 		 * Store result of ddi_map first in local variables, as we do
   3559 		 * not want to overwrite the existing dhp with wrong data.
   3560 		 */
   3561 		dhp->dh_pfn = pfn;
   3562 		dhp->dh_hat_attr = hat_flags;
   3563 	}
   3564 
   3565 	/* clear the large page size flag */
   3566 	dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
   3567 
   3568 	dhp->dh_cookie = DEVMAP_DEVMEM_COOKIE;
   3569 	dhp->dh_roff = ptob(btop(roff));
   3570 
   3571 	/* setup the dh_mmulevel and DEVMAP_FLAG_LARGE */
   3572 	devmap_devmem_large_page_setup(dhp);
   3573 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
   3574 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
   3575 
   3576 	RELE_DHP_LOCK(dhp);
   3577 	return (DDI_SUCCESS);
   3578 }
   3579 
   3580 /*
   3581  * called by driver devmap routine to pass kernel virtual address  mapping
   3582  * info to the framework.    used only for kernel memory
   3583  * allocated from ddi_umem_alloc().
   3584  */
   3585 int
   3586 devmap_umem_setup(devmap_cookie_t dhc, dev_info_t *dip,
   3587     struct devmap_callback_ctl *callbackops, ddi_umem_cookie_t cookie,
   3588     offset_t off, size_t len, uint_t maxprot, uint_t flags,
   3589     ddi_device_acc_attr_t *accattrp)
   3590 {
   3591 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3592 	struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
   3593 
   3594 #ifdef lint
   3595 	dip = dip;
   3596 #endif
   3597 
   3598 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_SETUP,
   3599 	    "devmap_umem_setup:start dhp=%p offset=%llx cookie=%p len=%lx",
   3600 	    (void *)dhp, off, cookie, len);
   3601 	DEBUGF(2, (CE_CONT, "devmap_umem_setup: dhp %p offset %llx "
   3602 	    "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
   3603 
   3604 	if (cookie == NULL)
   3605 		return (DDI_FAILURE);
   3606 
   3607 	/* For UMEM_TRASH, this restriction is not needed */
   3608 	if ((off + len) > cp->size)
   3609 		return (DDI_FAILURE);
   3610 
   3611 	/* check if the cache attributes are supported */
   3612 	if (i_ddi_check_cache_attr(flags) == B_FALSE)
   3613 		return (DDI_FAILURE);
   3614 
   3615 	/*
   3616 	 * First to check if this function has been called for this dhp.
   3617 	 */
   3618 	if (dhp->dh_flags & DEVMAP_SETUP_DONE)
   3619 		return (DDI_FAILURE);
   3620 
   3621 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
   3622 		return (DDI_FAILURE);
   3623 
   3624 	if (flags & DEVMAP_MAPPING_INVALID) {
   3625 		/*
   3626 		 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
   3627 		 * remap permission.
   3628 		 */
   3629 		if (!(flags & DEVMAP_ALLOW_REMAP)) {
   3630 			return (DDI_FAILURE);
   3631 		}
   3632 	} else {
   3633 		dhp->dh_cookie = cookie;
   3634 		dhp->dh_roff = ptob(btop(off));
   3635 		dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
   3636 		/* set HAT cache attributes */
   3637 		i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
   3638 		/* set HAT endianess attributes */
   3639 		i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
   3640 	}
   3641 
   3642 	/*
   3643 	 * The default is _not_ to pass HAT_LOAD_NOCONSIST to hat_devload();
   3644 	 * we pass HAT_LOAD_NOCONSIST _only_ in cases where hat tries to
   3645 	 * create consistent mappings but our intention was to create
   3646 	 * non-consistent mappings.
   3647 	 *
   3648 	 * DEVMEM: hat figures it out it's DEVMEM and creates non-consistent
   3649 	 * mappings.
   3650 	 *
   3651 	 * kernel exported memory: hat figures it out it's memory and always
   3652 	 * creates consistent mappings.
   3653 	 *
   3654 	 * /dev/mem: non-consistent mappings. See comments in common/io/mem.c
   3655 	 *
   3656 	 * /dev/kmem: consistent mappings are created unless they are
   3657 	 * MAP_FIXED. We _explicitly_ tell hat to create non-consistent
   3658 	 * mappings by passing HAT_LOAD_NOCONSIST in case of MAP_FIXED
   3659 	 * mappings of /dev/kmem. See common/io/mem.c
   3660 	 */
   3661 
   3662 	/* Only some of the flags bits are settable by the driver */
   3663 	dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
   3664 
   3665 	dhp->dh_len = ptob(btopr(len));
   3666 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
   3667 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
   3668 
   3669 	if (callbackops != NULL) {
   3670 		bcopy(callbackops, &dhp->dh_callbackops,
   3671 		    sizeof (struct devmap_callback_ctl));
   3672 	}
   3673 	/*
   3674 	 * Initialize dh_lock if we want to do remap.
   3675 	 */
   3676 	if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
   3677 		mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
   3678 		dhp->dh_flags |= DEVMAP_LOCK_INITED;
   3679 	}
   3680 
   3681 	dhp->dh_flags |= DEVMAP_SETUP_DONE;
   3682 
   3683 	return (DDI_SUCCESS);
   3684 }
   3685 
   3686 int
   3687 devmap_umem_remap(devmap_cookie_t dhc, dev_info_t *dip,
   3688     ddi_umem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
   3689     uint_t flags, ddi_device_acc_attr_t *accattrp)
   3690 {
   3691 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3692 	struct ddi_umem_cookie *cp = (struct ddi_umem_cookie *)cookie;
   3693 
   3694 	TRACE_4(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_REMAP,
   3695 	    "devmap_umem_remap:start dhp=%p offset=%llx cookie=%p len=%lx",
   3696 	    (void *)dhp, off, cookie, len);
   3697 	DEBUGF(2, (CE_CONT, "devmap_umem_remap: dhp %p offset %llx "
   3698 	    "cookie %p len %lx\n", (void *)dhp, off, (void *)cookie, len));
   3699 
   3700 #ifdef lint
   3701 	dip = dip;
   3702 	accattrp = accattrp;
   3703 #endif
   3704 	/*
   3705 	 * Reture failure if setup has not been done or no remap permission
   3706 	 * has been granted during the setup.
   3707 	 */
   3708 	if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
   3709 	    (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
   3710 		return (DDI_FAILURE);
   3711 
   3712 	/* No flags supported for remap yet */
   3713 	if (flags != 0)
   3714 		return (DDI_FAILURE);
   3715 
   3716 	/* check if the cache attributes are supported */
   3717 	if (i_ddi_check_cache_attr(flags) == B_FALSE)
   3718 		return (DDI_FAILURE);
   3719 
   3720 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
   3721 		return (DDI_FAILURE);
   3722 
   3723 	/* For UMEM_TRASH, this restriction is not needed */
   3724 	if ((off + len) > cp->size)
   3725 		return (DDI_FAILURE);
   3726 
   3727 	HOLD_DHP_LOCK(dhp);
   3728 	/*
   3729 	 * Unload the old mapping, so next fault will setup the new mappings
   3730 	 * Do this while holding the dhp lock so other faults dont reestablish
   3731 	 * the mappings
   3732 	 */
   3733 	hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
   3734 	    dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
   3735 
   3736 	dhp->dh_cookie = cookie;
   3737 	dhp->dh_roff = ptob(btop(off));
   3738 	dhp->dh_cvaddr = cp->cvaddr + dhp->dh_roff;
   3739 	/* set HAT cache attributes */
   3740 	i_ddi_cacheattr_to_hatacc(flags, &dhp->dh_hat_attr);
   3741 	/* set HAT endianess attributes */
   3742 	i_ddi_devacc_to_hatacc(accattrp, &dhp->dh_hat_attr);
   3743 
   3744 	/* clear the large page size flag */
   3745 	dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
   3746 
   3747 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
   3748 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
   3749 	RELE_DHP_LOCK(dhp);
   3750 	return (DDI_SUCCESS);
   3751 }
   3752 
   3753 /*
   3754  * to set timeout value for the driver's context management callback, e.g.
   3755  * devmap_access().
   3756  */
   3757 void
   3758 devmap_set_ctx_timeout(devmap_cookie_t dhc, clock_t ticks)
   3759 {
   3760 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
   3761 
   3762 	TRACE_2(TR_FAC_DEVMAP, TR_DEVMAP_SET_CTX_TIMEOUT,
   3763 	    "devmap_set_ctx_timeout:start dhp=%p ticks=%x",
   3764 	    (void *)dhp, ticks);
   3765 	dhp->dh_timeout_length = ticks;
   3766 }
   3767 
   3768 int
   3769 devmap_default_access(devmap_cookie_t dhp, void *pvtp, offset_t off,
   3770     size_t len, uint_t type, uint_t rw)
   3771 {
   3772 #ifdef lint
   3773 	pvtp = pvtp;
   3774 #endif
   3775 
   3776 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_DEFAULT_ACCESS,
   3777 	    "devmap_default_access:start");
   3778 	return (devmap_load(dhp, off, len, type, rw));
   3779 }
   3780 
   3781 /*
   3782  * segkmem_alloc() wrapper to allocate memory which is both
   3783  * non-relocatable (for DR) and sharelocked, since the rest
   3784  * of this segment driver requires it.
   3785  */
   3786 static void *
   3787 devmap_alloc_pages(vmem_t *vmp, size_t size, int vmflag)
   3788 {
   3789 	ASSERT(vmp != NULL);
   3790 	ASSERT(kvseg.s_base != NULL);
   3791 	vmflag |= (VM_NORELOC | SEGKMEM_SHARELOCKED);
   3792 	return (segkmem_alloc(vmp, size, vmflag));
   3793 }
   3794 
   3795 /*
   3796  * This is where things are a bit incestuous with seg_kmem: unlike
   3797  * seg_kp, seg_kmem does not keep its pages long-term sharelocked, so
   3798  * we need to do a bit of a dance around that to prevent duplication of
   3799  * code until we decide to bite the bullet and implement a new kernel
   3800  * segment for driver-allocated memory that is exported to user space.
   3801  */
   3802 static void
   3803 devmap_free_pages(vmem_t *vmp, void *inaddr, size_t size)
   3804 {
   3805 	page_t *pp;
   3806 	caddr_t addr = inaddr;
   3807 	caddr_t eaddr;
   3808 	pgcnt_t npages = btopr(size);
   3809 
   3810 	ASSERT(vmp != NULL);
   3811 	ASSERT(kvseg.s_base != NULL);
   3812 	ASSERT(((uintptr_t)addr & PAGEOFFSET) == 0);
   3813 
   3814 	hat_unload(kas.a_hat, addr, size, HAT_UNLOAD_UNLOCK);
   3815 
   3816 	for (eaddr = addr + size; addr < eaddr; addr += PAGESIZE) {
   3817 		/*
   3818 		 * Use page_find() instead of page_lookup() to find the page
   3819 		 * since we know that it is hashed and has a shared lock.
   3820 		 */
   3821 		pp = page_find(&kvp, (u_offset_t)(uintptr_t)addr);
   3822 
   3823 		if (pp == NULL)
   3824 			panic("devmap_free_pages: page not found");
   3825 		if (!page_tryupgrade(pp)) {
   3826 			page_unlock(pp);
   3827 			pp = page_lookup(&kvp, (u_offset_t)(uintptr_t)addr,
   3828 			    SE_EXCL);
   3829 			if (pp == NULL)
   3830 				panic("devmap_free_pages: page already freed");
   3831 		}
   3832 		/* Clear p_lckcnt so page_destroy() doesn't update availrmem */
   3833 		pp->p_lckcnt = 0;
   3834 		page_destroy(pp, 0);
   3835 	}
   3836 	page_unresv(npages);
   3837 
   3838 	if (vmp != NULL)
   3839 		vmem_free(vmp, inaddr, size);
   3840 }
   3841 
   3842 /*
   3843  * devmap_umem_alloc_np() replaces kmem_zalloc() as the method for
   3844  * allocating non-pageable kmem in response to a ddi_umem_alloc()
   3845  * default request. For now we allocate our own pages and we keep
   3846  * them long-term sharelocked, since: A) the fault routines expect the
   3847  * memory to already be locked; B) pageable umem is already long-term
   3848  * locked; C) it's a lot of work to make it otherwise, particularly
   3849  * since the nexus layer expects the pages to never fault. An RFE is to
   3850  * not keep the pages long-term locked, but instead to be able to
   3851  * take faults on them and simply look them up in kvp in case we
   3852  * fault on them. Even then, we must take care not to let pageout
   3853  * steal them from us since the data must remain resident; if we
   3854  * do this we must come up with some way to pin the pages to prevent
   3855  * faults while a driver is doing DMA to/from them.
   3856  */
   3857 static void *
   3858 devmap_umem_alloc_np(size_t size, size_t flags)
   3859 {
   3860 	void *buf;
   3861 	int vmflags = (flags & DDI_UMEM_NOSLEEP)? VM_NOSLEEP : VM_SLEEP;
   3862 
   3863 	buf = vmem_alloc(umem_np_arena, size, vmflags);
   3864 	if (buf != NULL)
   3865 		bzero(buf, size);
   3866 	return (buf);
   3867 }
   3868 
   3869 static void
   3870 devmap_umem_free_np(void *addr, size_t size)
   3871 {
   3872 	vmem_free(umem_np_arena, addr, size);
   3873 }
   3874 
   3875 /*
   3876  * allocate page aligned kernel memory for exporting to user land.
   3877  * The devmap framework will use the cookie allocated by ddi_umem_alloc()
   3878  * to find a user virtual address that is in same color as the address
   3879  * allocated here.
   3880  */
   3881 void *
   3882 ddi_umem_alloc(size_t size, int flags, ddi_umem_cookie_t *cookie)
   3883 {
   3884 	register size_t len = ptob(btopr(size));
   3885 	void *buf = NULL;
   3886 	struct ddi_umem_cookie *cp;
   3887 	int iflags = 0;
   3888 
   3889 	*cookie = NULL;
   3890 
   3891 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_ALLOC,
   3892 	    "devmap_umem_alloc:start");
   3893 	if (len == 0)
   3894 		return ((void *)NULL);
   3895 
   3896 	/*
   3897 	 * allocate cookie
   3898 	 */
   3899 	if ((cp = kmem_zalloc(sizeof (struct ddi_umem_cookie),
   3900 	    flags & DDI_UMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP)) == NULL) {
   3901 		ASSERT(flags & DDI_UMEM_NOSLEEP);
   3902 		return ((void *)NULL);
   3903 	}
   3904 
   3905 	if (flags & DDI_UMEM_PAGEABLE) {
   3906 		/* Only one of the flags is allowed */
   3907 		ASSERT(!(flags & DDI_UMEM_TRASH));
   3908 		/* initialize resource with 0 */
   3909 		iflags = KPD_ZERO;
   3910 
   3911 		/*
   3912 		 * to allocate unlocked pageable memory, use segkp_get() to
   3913 		 * create a segkp segment.  Since segkp can only service kas,
   3914 		 * other segment drivers such as segdev have to do
   3915 		 * as_fault(segkp, SOFTLOCK) in its fault routine,
   3916 		 */
   3917 		if (flags & DDI_UMEM_NOSLEEP)
   3918 			iflags |= KPD_NOWAIT;
   3919 
   3920 		if ((buf = segkp_get(segkp, len, iflags)) == NULL) {
   3921 			kmem_free(cp, sizeof (struct ddi_umem_cookie));
   3922 			return ((void *)NULL);
   3923 		}
   3924 		cp->type = KMEM_PAGEABLE;
   3925 		mutex_init(&cp->lock, NULL, MUTEX_DEFAULT, NULL);
   3926 		cp->locked = 0;
   3927 	} else if (flags & DDI_UMEM_TRASH) {
   3928 		/* Only one of the flags is allowed */
   3929 		ASSERT(!(flags & DDI_UMEM_PAGEABLE));
   3930 		cp->type = UMEM_TRASH;
   3931 		buf = NULL;
   3932 	} else {
   3933 		if ((buf = devmap_umem_alloc_np(len, flags)) == NULL) {
   3934 			kmem_free(cp, sizeof (struct ddi_umem_cookie));
   3935 			return ((void *)NULL);
   3936 		}
   3937 
   3938 		cp->type = KMEM_NON_PAGEABLE;
   3939 	}
   3940 
   3941 	/*
   3942 	 * need to save size here.  size will be used when
   3943 	 * we do kmem_free.
   3944 	 */
   3945 	cp->size = len;
   3946 	cp->cvaddr = (caddr_t)buf;
   3947 
   3948 	*cookie =  (void *)cp;
   3949 	return (buf);
   3950 }
   3951 
   3952 void
   3953 ddi_umem_free(ddi_umem_cookie_t cookie)
   3954 {
   3955 	struct ddi_umem_cookie *cp;
   3956 
   3957 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_UMEM_FREE,
   3958 	    "devmap_umem_free:start");
   3959 
   3960 	/*
   3961 	 * if cookie is NULL, no effects on the system
   3962 	 */
   3963 	if (cookie == NULL)
   3964 		return;
   3965 
   3966 	cp = (struct ddi_umem_cookie *)cookie;
   3967 
   3968 	switch (cp->type) {
   3969 	case KMEM_PAGEABLE :
   3970 		ASSERT(cp->cvaddr != NULL && cp->size != 0);
   3971 		/*
   3972 		 * Check if there are still any pending faults on the cookie
   3973 		 * while the driver is deleting it,
   3974 		 * XXX - could change to an ASSERT but wont catch errant drivers
   3975 		 */
   3976 		mutex_enter(&cp->lock);
   3977 		if (cp->locked) {
   3978 			mutex_exit(&cp->lock);
   3979 			panic("ddi_umem_free for cookie with pending faults %p",
   3980 			    (void *)cp);
   3981 			return;
   3982 		}
   3983 
   3984 		segkp_release(segkp, cp->cvaddr);
   3985 
   3986 		/*
   3987 		 * release mutex associated with this cookie.
   3988 		 */
   3989 		mutex_destroy(&cp->lock);
   3990 		break;
   3991 	case KMEM_NON_PAGEABLE :
   3992 		ASSERT(cp->cvaddr != NULL && cp->size != 0);
   3993 		devmap_umem_free_np(cp->cvaddr, cp->size);
   3994 		break;
   3995 	case UMEM_TRASH :
   3996 		break;
   3997 	case UMEM_LOCKED :
   3998 		/* Callers should use ddi_umem_unlock for this type */
   3999 		ddi_umem_unlock(cookie);
   4000 		/* Frees the cookie too */
   4001 		return;
   4002 	default:
   4003 		/* panic so we can diagnose the underlying cause */
   4004 		panic("ddi_umem_free: illegal cookie type 0x%x\n",
   4005 		    cp->type);
   4006 	}
   4007 
   4008 	kmem_free(cookie, sizeof (struct ddi_umem_cookie));
   4009 }
   4010 
   4011 
   4012 static int
   4013 segdev_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
   4014 {
   4015 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   4016 
   4017 	/*
   4018 	 * It looks as if it is always mapped shared
   4019 	 */
   4020 	TRACE_0(TR_FAC_DEVMAP, TR_DEVMAP_GETMEMID,
   4021 	    "segdev_getmemid:start");
   4022 	memidp->val[0] = (uintptr_t)VTOCVP(sdp->vp);
   4023 	memidp->val[1] = sdp->offset + (uintptr_t)(addr - seg->s_base);
   4024 	return (0);
   4025 }
   4026 
   4027 /*ARGSUSED*/
   4028 static lgrp_mem_policy_info_t *
   4029 segdev_getpolicy(struct seg *seg, caddr_t addr)
   4030 {
   4031 	return (NULL);
   4032 }
   4033 
   4034 /*ARGSUSED*/
   4035 static int
   4036 segdev_capable(struct seg *seg, segcapability_t capability)
   4037 {
   4038 	return (0);
   4039 }
   4040 
   4041 /*
   4042  * ddi_umem_alloc() non-pageable quantum cache max size.
   4043  * This is just a SWAG.
   4044  */
   4045 #define	DEVMAP_UMEM_QUANTUM	(8*PAGESIZE)
   4046 
   4047 /*
   4048  * Initialize seg_dev from boot. This routine sets up the trash page
   4049  * and creates the umem_np_arena used to back non-pageable memory
   4050  * requests.
   4051  */
   4052 void
   4053 segdev_init(void)
   4054 {
   4055 	struct seg kseg;
   4056 
   4057 	umem_np_arena = vmem_create("umem_np", NULL, 0, PAGESIZE,
   4058 	    devmap_alloc_pages, devmap_free_pages, heap_arena,
   4059 	    DEVMAP_UMEM_QUANTUM, VM_SLEEP);
   4060 
   4061 	kseg.s_as = &kas;
   4062 	trashpp = page_create_va(&trashvp, 0, PAGESIZE,
   4063 	    PG_NORELOC | PG_EXCL | PG_WAIT, &kseg, NULL);
   4064 	if (trashpp == NULL)
   4065 		panic("segdev_init: failed to create trash page");
   4066 	pagezero(trashpp, 0, PAGESIZE);
   4067 	page_downgrade(trashpp);
   4068 }
   4069 
   4070 /*
   4071  * Invoke platform-dependent support routines so that /proc can have
   4072  * the platform code deal with curious hardware.
   4073  */
   4074 int
   4075 segdev_copyfrom(struct seg *seg,
   4076     caddr_t uaddr, const void *devaddr, void *kaddr, size_t len)
   4077 {
   4078 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   4079 	struct snode *sp = VTOS(VTOCVP(sdp->vp));
   4080 
   4081 	return (e_ddi_copyfromdev(sp->s_dip,
   4082 	    (off_t)(uaddr - seg->s_base), devaddr, kaddr, len));
   4083 }
   4084 
   4085 int
   4086 segdev_copyto(struct seg *seg,
   4087     caddr_t uaddr, const void *kaddr, void *devaddr, size_t len)
   4088 {
   4089 	struct segdev_data *sdp = (struct segdev_data *)seg->s_data;
   4090 	struct snode *sp = VTOS(VTOCVP(sdp->vp));
   4091 
   4092 	return (e_ddi_copytodev(sp->s_dip,
   4093 	    (off_t)(uaddr - seg->s_base), kaddr, devaddr, len));
   4094 }
   4095