Home | History | Annotate | Download | only in px
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 /*
     27  * PCI Express nexus DVMA and DMA core routines:
     28  *	dma_map/dma_bind_handle implementation
     29  *	bypass and peer-to-peer support
     30  *	fast track DVMA space allocation
     31  *	runtime DVMA debug
     32  */
     33 #include <sys/types.h>
     34 #include <sys/kmem.h>
     35 #include <sys/async.h>
     36 #include <sys/sysmacros.h>
     37 #include <sys/sunddi.h>
     38 #include <sys/ddi_impldefs.h>
     39 #include "px_obj.h"
     40 
     41 /*LINTLIBRARY*/
     42 
     43 /*
     44  * px_dma_allocmp - Allocate a pci dma implementation structure
     45  *
     46  * An extra ddi_dma_attr structure is bundled with the usual ddi_dma_impl
     47  * to hold unmodified device limits. The ddi_dma_attr inside the
     48  * ddi_dma_impl structure is augumented with system limits to enhance
     49  * DVMA performance at runtime. The unaugumented device limits saved
     50  * right after (accessed through (ddi_dma_attr_t *)(mp + 1)) is used
     51  * strictly for peer-to-peer transfers which do not obey system limits.
     52  *
     53  * return: DDI_SUCCESS DDI_DMA_NORESOURCES
     54  */
     55 ddi_dma_impl_t *
     56 px_dma_allocmp(dev_info_t *dip, dev_info_t *rdip, int (*waitfp)(caddr_t),
     57 	caddr_t arg)
     58 {
     59 	register ddi_dma_impl_t *mp;
     60 	int sleep = (waitfp == DDI_DMA_SLEEP) ? KM_SLEEP : KM_NOSLEEP;
     61 
     62 	/* Caution: we don't use zalloc to enhance performance! */
     63 	if ((mp = kmem_alloc(sizeof (px_dma_hdl_t), sleep)) == 0) {
     64 		DBG(DBG_DMA_MAP, dip, "can't alloc dma_handle\n");
     65 		if (waitfp != DDI_DMA_DONTWAIT) {
     66 			DBG(DBG_DMA_MAP, dip, "alloc_mp kmem cb\n");
     67 			ddi_set_callback(waitfp, arg, &px_kmem_clid);
     68 		}
     69 		return (mp);
     70 	}
     71 
     72 	mp->dmai_rdip = rdip;
     73 	mp->dmai_flags = 0;
     74 	mp->dmai_pfnlst = NULL;
     75 	mp->dmai_winlst = NULL;
     76 
     77 	/*
     78 	 * kmem_alloc debug: the following fields are not zero-ed
     79 	 * mp->dmai_mapping = 0;
     80 	 * mp->dmai_size = 0;
     81 	 * mp->dmai_offset = 0;
     82 	 * mp->dmai_minxfer = 0;
     83 	 * mp->dmai_burstsizes = 0;
     84 	 * mp->dmai_ndvmapages = 0;
     85 	 * mp->dmai_pool/roffset = 0;
     86 	 * mp->dmai_rflags = 0;
     87 	 * mp->dmai_inuse/flags
     88 	 * mp->dmai_nwin = 0;
     89 	 * mp->dmai_winsize = 0;
     90 	 * mp->dmai_nexus_private/tte = 0;
     91 	 * mp->dmai_iopte/pfnlst
     92 	 * mp->dmai_sbi/pfn0 = 0;
     93 	 * mp->dmai_minfo/winlst/fdvma
     94 	 * mp->dmai_rdip
     95 	 * bzero(&mp->dmai_object, sizeof (ddi_dma_obj_t));
     96 	 * bzero(&mp->dmai_attr, sizeof (ddi_dma_attr_t));
     97 	 * mp->dmai_cookie = 0;
     98 	 */
     99 
    100 	mp->dmai_attr.dma_attr_version = (uint_t)DMA_ATTR_VERSION;
    101 	mp->dmai_attr.dma_attr_flags = (uint_t)0;
    102 	mp->dmai_fault = 0;
    103 	mp->dmai_fault_check = NULL;
    104 	mp->dmai_fault_notify = NULL;
    105 
    106 	mp->dmai_error.err_ena = 0;
    107 	mp->dmai_error.err_status = DDI_FM_OK;
    108 	mp->dmai_error.err_expected = DDI_FM_ERR_UNEXPECTED;
    109 	mp->dmai_error.err_ontrap = NULL;
    110 	mp->dmai_error.err_fep = NULL;
    111 	mp->dmai_error.err_cf = NULL;
    112 
    113 	/*
    114 	 * The bdf protection value is set to immediate child
    115 	 * at first. It gets modified by switch/bridge drivers
    116 	 * as the code traverses down the fabric topology.
    117 	 *
    118 	 * XXX No IOMMU protection for broken devices.
    119 	 */
    120 	ASSERT((intptr_t)ddi_get_parent_data(rdip) >> 1 == 0);
    121 	mp->dmai_bdf = ((intptr_t)ddi_get_parent_data(rdip) == 1) ?
    122 	    PCIE_INVALID_BDF : pcie_get_bdf_for_dma_xfer(dip, rdip);
    123 
    124 	return (mp);
    125 }
    126 
    127 void
    128 px_dma_freemp(ddi_dma_impl_t *mp)
    129 {
    130 	if (mp->dmai_ndvmapages > 1)
    131 		px_dma_freepfn(mp);
    132 	if (mp->dmai_winlst)
    133 		px_dma_freewin(mp);
    134 	kmem_free(mp, sizeof (px_dma_hdl_t));
    135 }
    136 
    137 void
    138 px_dma_freepfn(ddi_dma_impl_t *mp)
    139 {
    140 	void *addr = mp->dmai_pfnlst;
    141 	if (addr) {
    142 		size_t npages = mp->dmai_ndvmapages;
    143 		if (npages > 1)
    144 			kmem_free(addr, npages * sizeof (px_iopfn_t));
    145 		mp->dmai_pfnlst = NULL;
    146 	}
    147 	mp->dmai_ndvmapages = 0;
    148 }
    149 
    150 /*
    151  * px_dma_lmts2hdl - alloate a ddi_dma_impl_t, validate practical limits
    152  *			and convert dmareq->dmar_limits to mp->dmai_attr
    153  *
    154  * ddi_dma_impl_t member modified     input
    155  * ------------------------------------------------------------------------
    156  * mp->dmai_minxfer		    - dev
    157  * mp->dmai_burstsizes		    - dev
    158  * mp->dmai_flags		    - no limit? peer-to-peer only?
    159  *
    160  * ddi_dma_attr member modified       input
    161  * ------------------------------------------------------------------------
    162  * mp->dmai_attr.dma_attr_addr_lo   - dev lo, sys lo
    163  * mp->dmai_attr.dma_attr_addr_hi   - dev hi, sys hi
    164  * mp->dmai_attr.dma_attr_count_max - dev count max, dev/sys lo/hi delta
    165  * mp->dmai_attr.dma_attr_seg       - 0         (no nocross   restriction)
    166  * mp->dmai_attr.dma_attr_align     - 1         (no alignment restriction)
    167  *
    168  * The dlim_dmaspeed member of dmareq->dmar_limits is ignored.
    169  */
    170 ddi_dma_impl_t *
    171 px_dma_lmts2hdl(dev_info_t *dip, dev_info_t *rdip, px_mmu_t *mmu_p,
    172 	ddi_dma_req_t *dmareq)
    173 {
    174 	ddi_dma_impl_t *mp;
    175 	ddi_dma_attr_t *attr_p;
    176 	uint64_t syslo		= mmu_p->mmu_dvma_base;
    177 	uint64_t syshi		= mmu_p->mmu_dvma_end;
    178 	uint64_t fasthi		= mmu_p->mmu_dvma_fast_end;
    179 	ddi_dma_lim_t *lim_p	= dmareq->dmar_limits;
    180 	uint32_t count_max	= lim_p->dlim_cntr_max;
    181 	uint64_t lo		= lim_p->dlim_addr_lo;
    182 	uint64_t hi		= lim_p->dlim_addr_hi;
    183 	if (hi <= lo) {
    184 		DBG(DBG_DMA_MAP, dip, "Bad limits\n");
    185 		return ((ddi_dma_impl_t *)DDI_DMA_NOMAPPING);
    186 	}
    187 	if (!count_max)
    188 		count_max--;
    189 
    190 	if (!(mp = px_dma_allocmp(dip, rdip, dmareq->dmar_fp,
    191 	    dmareq->dmar_arg)))
    192 		return (NULL);
    193 
    194 	/* store original dev input at the 2nd ddi_dma_attr */
    195 	attr_p = PX_DEV_ATTR(mp);
    196 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
    197 	SET_DMAALIGN(attr_p, 1);
    198 
    199 	lo = MAX(lo, syslo);
    200 	hi = MIN(hi, syshi);
    201 	if (hi <= lo)
    202 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
    203 	count_max = MIN(count_max, hi - lo);
    204 
    205 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, fasthi, 1))
    206 		mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT |
    207 		    PX_DMAI_FLAGS_NOSYSLIMIT;
    208 	else {
    209 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, 1))
    210 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
    211 	}
    212 	if (PX_DMA_NOCTX(rdip))
    213 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
    214 
    215 	/* store augumented dev input to mp->dmai_attr */
    216 	mp->dmai_burstsizes	= lim_p->dlim_burstsizes;
    217 	attr_p = &mp->dmai_attr;
    218 	SET_DMAATTR(attr_p, lo, hi, -1, count_max);
    219 	SET_DMAALIGN(attr_p, 1);
    220 	return (mp);
    221 }
    222 
    223 /*
    224  * Called from px_attach to check for bypass dma support and set
    225  * flags accordingly.
    226  */
    227 int
    228 px_dma_attach(px_t *px_p)
    229 {
    230 	uint64_t baddr;
    231 
    232 	if (px_lib_iommu_getbypass(px_p->px_dip, 0ull,
    233 	    PCI_MAP_ATTR_WRITE|PCI_MAP_ATTR_READ,
    234 	    &baddr) != DDI_ENOTSUP)
    235 		/* ignore all other errors */
    236 		px_p->px_dev_caps |= PX_BYPASS_DMA_ALLOWED;
    237 
    238 	px_p->px_dma_sync_opt = ddi_prop_get_int(DDI_DEV_T_ANY,
    239 	    px_p->px_dip, DDI_PROP_DONTPASS, "dma-sync-options", 0);
    240 
    241 	if (px_p->px_dma_sync_opt != 0)
    242 		px_p->px_dev_caps |= PX_DMA_SYNC_REQUIRED;
    243 
    244 	return (DDI_SUCCESS);
    245 }
    246 
    247 /*
    248  * px_dma_attr2hdl
    249  *
    250  * This routine is called from the alloc handle entry point to sanity check the
    251  * dma attribute structure.
    252  *
    253  * use by: px_dma_allochdl()
    254  *
    255  * return value:
    256  *
    257  *	DDI_SUCCESS		- on success
    258  *	DDI_DMA_BADATTR		- attribute has invalid version number
    259  *				  or address limits exclude dvma space
    260  */
    261 int
    262 px_dma_attr2hdl(px_t *px_p, ddi_dma_impl_t *mp)
    263 {
    264 	px_mmu_t *mmu_p = px_p->px_mmu_p;
    265 	uint64_t syslo, syshi;
    266 	int	ret;
    267 	ddi_dma_attr_t *attrp		= PX_DEV_ATTR(mp);
    268 	uint64_t hi			= attrp->dma_attr_addr_hi;
    269 	uint64_t lo			= attrp->dma_attr_addr_lo;
    270 	uint64_t align			= attrp->dma_attr_align;
    271 	uint64_t nocross		= attrp->dma_attr_seg;
    272 	uint64_t count_max		= attrp->dma_attr_count_max;
    273 
    274 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "attrp=%p cntr_max=%x.%08x\n",
    275 	    attrp, HI32(count_max), LO32(count_max));
    276 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x lo=%x.%08x\n",
    277 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
    278 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "seg=%x.%08x align=%x.%08x\n",
    279 	    HI32(nocross), LO32(nocross), HI32(align), LO32(align));
    280 
    281 	if (!nocross)
    282 		nocross--;
    283 	if (attrp->dma_attr_flags & DDI_DMA_FORCE_PHYSICAL) { /* BYPASS */
    284 
    285 		DBG(DBG_DMA_ALLOCH, px_p->px_dip, "bypass mode\n");
    286 		/*
    287 		 * If Bypass DMA is not supported, return error so that
    288 		 * target driver can fall back to dvma mode of operation
    289 		 */
    290 		if (!(px_p->px_dev_caps & PX_BYPASS_DMA_ALLOWED))
    291 			return (DDI_DMA_BADATTR);
    292 		mp->dmai_flags |= PX_DMAI_FLAGS_BYPASSREQ;
    293 		if (nocross != UINT64_MAX)
    294 			return (DDI_DMA_BADATTR);
    295 		if (align && (align > MMU_PAGE_SIZE))
    296 			return (DDI_DMA_BADATTR);
    297 		align = 1; /* align on 1 page boundary */
    298 
    299 		/* do a range check and get the limits */
    300 		ret = px_lib_dma_bypass_rngchk(px_p->px_dip, attrp,
    301 		    &syslo, &syshi);
    302 		if (ret != DDI_SUCCESS)
    303 			return (ret);
    304 	} else { /* MMU_XLATE or PEER_TO_PEER */
    305 		align = MAX(align, MMU_PAGE_SIZE) - 1;
    306 		if ((align & nocross) != align) {
    307 			dev_info_t *rdip = mp->dmai_rdip;
    308 			cmn_err(CE_WARN, "%s%d dma_attr_seg not aligned",
    309 			    NAMEINST(rdip));
    310 			return (DDI_DMA_BADATTR);
    311 		}
    312 		align = MMU_BTOP(align + 1);
    313 		syslo = mmu_p->mmu_dvma_base;
    314 		syshi = mmu_p->mmu_dvma_end;
    315 	}
    316 	if (hi <= lo) {
    317 		dev_info_t *rdip = mp->dmai_rdip;
    318 		cmn_err(CE_WARN, "%s%d limits out of range", NAMEINST(rdip));
    319 		return (DDI_DMA_BADATTR);
    320 	}
    321 	lo = MAX(lo, syslo);
    322 	hi = MIN(hi, syshi);
    323 	if (!count_max)
    324 		count_max--;
    325 
    326 	DBG(DBG_DMA_ALLOCH, px_p->px_dip, "hi=%x.%08x, lo=%x.%08x\n",
    327 	    HI32(hi), LO32(hi), HI32(lo), LO32(lo));
    328 	if (hi <= lo) {
    329 		/*
    330 		 * If this is an IOMMU bypass access, the caller can't use
    331 		 * the required addresses, so fail it.  Otherwise, it's
    332 		 * peer-to-peer; ensure that the caller has no alignment or
    333 		 * segment size restrictions.
    334 		 */
    335 		if ((mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ||
    336 		    (nocross < UINT32_MAX) || (align > 1))
    337 			return (DDI_DMA_BADATTR);
    338 
    339 		mp->dmai_flags |= PX_DMAI_FLAGS_PEER_ONLY;
    340 	} else /* set practical counter_max value */
    341 		count_max = MIN(count_max, hi - lo);
    342 
    343 	if (PX_DEV_NOSYSLIMIT(lo, hi, syslo, syshi, align))
    344 		mp->dmai_flags |= PX_DMAI_FLAGS_NOSYSLIMIT |
    345 		    PX_DMAI_FLAGS_NOFASTLIMIT;
    346 	else {
    347 		syshi = mmu_p->mmu_dvma_fast_end;
    348 		if (PX_DEV_NOFASTLIMIT(lo, hi, syslo, syshi, align))
    349 			mp->dmai_flags |= PX_DMAI_FLAGS_NOFASTLIMIT;
    350 	}
    351 	if (PX_DMA_NOCTX(mp->dmai_rdip))
    352 		mp->dmai_flags |= PX_DMAI_FLAGS_NOCTX;
    353 
    354 	mp->dmai_burstsizes	= attrp->dma_attr_burstsizes;
    355 	attrp = &mp->dmai_attr;
    356 	SET_DMAATTR(attrp, lo, hi, nocross, count_max);
    357 	return (DDI_SUCCESS);
    358 }
    359 
    360 #define	TGT_PFN_INBETWEEN(pfn, bgn, end) ((pfn >= bgn) && (pfn <= end))
    361 
    362 /*
    363  * px_dma_type - determine which of the three types DMA (peer-to-peer,
    364  *		mmu bypass, or mmu translate) we are asked to do.
    365  *		Also checks pfn0 and rejects any non-peer-to-peer
    366  *		requests for peer-only devices.
    367  *
    368  *	return values:
    369  *		DDI_DMA_NOMAPPING - can't get valid pfn0, or bad dma type
    370  *		DDI_SUCCESS
    371  *
    372  *	dma handle members affected (set on exit):
    373  *	mp->dmai_object		- dmareq->dmar_object
    374  *	mp->dmai_rflags		- consistent?, nosync?, dmareq->dmar_flags
    375  *	mp->dmai_flags   	- DMA type
    376  *	mp->dmai_pfn0   	- 1st page pfn (if va/size pair and not shadow)
    377  *	mp->dmai_roffset 	- initialized to starting MMU page offset
    378  *	mp->dmai_ndvmapages	- # of total MMU pages of entire object
    379  */
    380 int
    381 px_dma_type(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
    382 {
    383 	dev_info_t *dip = px_p->px_dip;
    384 	ddi_dma_obj_t *dobj_p = &dmareq->dmar_object;
    385 	px_pec_t *pec_p = px_p->px_pec_p;
    386 	uint32_t offset;
    387 	pfn_t pfn0;
    388 	uint_t redzone;
    389 
    390 	mp->dmai_rflags = dmareq->dmar_flags & DMP_DDIFLAGS;
    391 
    392 	if (!(px_p->px_dev_caps & PX_DMA_SYNC_REQUIRED))
    393 		mp->dmai_rflags |= DMP_NOSYNC;
    394 
    395 	switch (dobj_p->dmao_type) {
    396 	case DMA_OTYP_BUFVADDR:
    397 	case DMA_OTYP_VADDR: {
    398 		page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
    399 		caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
    400 
    401 		DBG(DBG_DMA_MAP, dip, "vaddr=%p pplist=%p\n", vaddr, pplist);
    402 		offset = (ulong_t)vaddr & MMU_PAGE_OFFSET;
    403 		if (pplist) {				/* shadow list */
    404 			mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
    405 			pfn0 = page_pptonum(*pplist);
    406 		} else {
    407 			struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
    408 			struct hat *hat_p = as_p ? as_p->a_hat : kas.a_hat;
    409 			pfn0 = hat_getpfnum(hat_p, vaddr);
    410 		}
    411 		}
    412 		break;
    413 
    414 	case DMA_OTYP_PAGES:
    415 		offset = dobj_p->dmao_obj.pp_obj.pp_offset;
    416 		mp->dmai_flags |= PX_DMAI_FLAGS_PGPFN;
    417 		pfn0 = page_pptonum(dobj_p->dmao_obj.pp_obj.pp_pp);
    418 		break;
    419 
    420 	case DMA_OTYP_PADDR:
    421 	default:
    422 		cmn_err(CE_WARN, "%s%d requested unsupported dma type %x",
    423 		    NAMEINST(mp->dmai_rdip), dobj_p->dmao_type);
    424 		return (DDI_DMA_NOMAPPING);
    425 	}
    426 	if (pfn0 == PFN_INVALID) {
    427 		cmn_err(CE_WARN, "%s%d: invalid pfn0 for DMA object %p",
    428 		    NAMEINST(dip), dobj_p);
    429 		return (DDI_DMA_NOMAPPING);
    430 	}
    431 	if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base32_pfn,
    432 	    pec_p->pec_last32_pfn)) {
    433 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP32;
    434 		goto done;	/* leave bypass and dvma flag as 0 */
    435 	} else if (TGT_PFN_INBETWEEN(pfn0, pec_p->pec_base64_pfn,
    436 	    pec_p->pec_last64_pfn)) {
    437 		mp->dmai_flags |= PX_DMAI_FLAGS_PTP|PX_DMAI_FLAGS_PTP64;
    438 		goto done;	/* leave bypass and dvma flag as 0 */
    439 	}
    440 	if (PX_DMA_ISPEERONLY(mp)) {
    441 		dev_info_t *rdip = mp->dmai_rdip;
    442 		cmn_err(CE_WARN, "Bad peer-to-peer req %s%d", NAMEINST(rdip));
    443 		return (DDI_DMA_NOMAPPING);
    444 	}
    445 
    446 	redzone = (mp->dmai_rflags & DDI_DMA_REDZONE) ||
    447 	    (mp->dmai_flags & PX_DMAI_FLAGS_MAP_BUFZONE) ?
    448 	    PX_DMAI_FLAGS_REDZONE : 0;
    449 
    450 	mp->dmai_flags |= (mp->dmai_flags & PX_DMAI_FLAGS_BYPASSREQ) ?
    451 	    PX_DMAI_FLAGS_BYPASS : (PX_DMAI_FLAGS_DVMA | redzone);
    452 done:
    453 	mp->dmai_object	 = *dobj_p;			/* whole object    */
    454 	mp->dmai_pfn0	 = (void *)pfn0;		/* cache pfn0	   */
    455 	mp->dmai_roffset = offset;			/* win0 pg0 offset */
    456 	mp->dmai_ndvmapages = MMU_BTOPR(offset + mp->dmai_object.dmao_size);
    457 	return (DDI_SUCCESS);
    458 }
    459 
    460 /*
    461  * px_dma_pgpfn - set up pfnlst array according to pages
    462  *	VA/size pair: <shadow IO, bypass, peer-to-peer>, or OTYP_PAGES
    463  */
    464 /*ARGSUSED*/
    465 static int
    466 px_dma_pgpfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
    467 {
    468 	int i;
    469 	dev_info_t *dip = px_p->px_dip;
    470 
    471 	switch (mp->dmai_object.dmao_type) {
    472 	case DMA_OTYP_BUFVADDR:
    473 	case DMA_OTYP_VADDR: {
    474 		page_t **pplist = mp->dmai_object.dmao_obj.virt_obj.v_priv;
    475 		DBG(DBG_DMA_MAP, dip, "shadow pplist=%p, %x pages, pfns=",
    476 		    pplist, npages);
    477 		for (i = 1; i < npages; i++) {
    478 			px_iopfn_t pfn = page_pptonum(pplist[i]);
    479 			PX_SET_MP_PFN1(mp, i, pfn);
    480 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
    481 		}
    482 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
    483 		}
    484 		break;
    485 
    486 	case DMA_OTYP_PAGES: {
    487 		page_t *pp = mp->dmai_object.dmao_obj.pp_obj.pp_pp->p_next;
    488 		DBG(DBG_DMA_MAP, dip, "pp=%p pfns=", pp);
    489 		for (i = 1; i < npages; i++, pp = pp->p_next) {
    490 			px_iopfn_t pfn = page_pptonum(pp);
    491 			PX_SET_MP_PFN1(mp, i, pfn);
    492 			DBG(DBG_DMA_MAP|DBG_CONT, dip, "%x ", pfn);
    493 		}
    494 		DBG(DBG_DMA_MAP|DBG_CONT, dip, "\n");
    495 		}
    496 		break;
    497 
    498 	default:	/* check is already done by px_dma_type */
    499 		ASSERT(0);
    500 		break;
    501 	}
    502 	return (DDI_SUCCESS);
    503 }
    504 
    505 /*
    506  * px_dma_vapfn - set up pfnlst array according to VA
    507  *	VA/size pair: <normal, bypass, peer-to-peer>
    508  *	pfn0 is skipped as it is already done.
    509  *	In this case, the cached pfn0 is used to fill pfnlst[0]
    510  */
    511 static int
    512 px_dma_vapfn(px_t *px_p, ddi_dma_impl_t *mp, uint_t npages)
    513 {
    514 	dev_info_t *dip = px_p->px_dip;
    515 	int i;
    516 	caddr_t vaddr = (caddr_t)mp->dmai_object.dmao_obj.virt_obj.v_as;
    517 	struct hat *hat_p = vaddr ? ((struct as *)vaddr)->a_hat : kas.a_hat;
    518 
    519 	vaddr = mp->dmai_object.dmao_obj.virt_obj.v_addr + MMU_PAGE_SIZE;
    520 	for (i = 1; i < npages; i++, vaddr += MMU_PAGE_SIZE) {
    521 		px_iopfn_t pfn = hat_getpfnum(hat_p, vaddr);
    522 		if (pfn == PFN_INVALID)
    523 			goto err_badpfn;
    524 		PX_SET_MP_PFN1(mp, i, pfn);
    525 		DBG(DBG_DMA_BINDH, dip, "px_dma_vapfn: mp=%p pfnlst[%x]=%x\n",
    526 		    mp, i, pfn);
    527 	}
    528 	return (DDI_SUCCESS);
    529 err_badpfn:
    530 	cmn_err(CE_WARN, "%s%d: bad page frame vaddr=%p", NAMEINST(dip), vaddr);
    531 	return (DDI_DMA_NOMAPPING);
    532 }
    533 
    534 /*
    535  * px_dma_pfn - Fills pfn list for all pages being DMA-ed.
    536  *
    537  * dependencies:
    538  *	mp->dmai_ndvmapages	- set to total # of dma pages
    539  *
    540  * return value:
    541  *	DDI_SUCCESS
    542  *	DDI_DMA_NOMAPPING
    543  */
    544 int
    545 px_dma_pfn(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
    546 {
    547 	uint32_t npages = mp->dmai_ndvmapages;
    548 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
    549 	int i, ret, peer = PX_DMA_ISPTP(mp);
    550 	int peer32 = PX_DMA_ISPTP32(mp);
    551 	dev_info_t *dip = px_p->px_dip;
    552 
    553 	px_pec_t *pec_p = px_p->px_pec_p;
    554 	px_iopfn_t pfn_base = peer32 ? pec_p->pec_base32_pfn :
    555 	    pec_p->pec_base64_pfn;
    556 	px_iopfn_t pfn_last = peer32 ? pec_p->pec_last32_pfn :
    557 	    pec_p->pec_last64_pfn;
    558 	px_iopfn_t pfn_adj = peer ? pfn_base : 0;
    559 
    560 	DBG(DBG_DMA_BINDH, dip, "px_dma_pfn: mp=%p pfn0=%x\n",
    561 	    mp, PX_MP_PFN0(mp) - pfn_adj);
    562 	/* 1 page: no array alloc/fill, no mixed mode check */
    563 	if (npages == 1) {
    564 		PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);
    565 		return (DDI_SUCCESS);
    566 	}
    567 	/* allocate pfn array */
    568 	if (!(mp->dmai_pfnlst = kmem_alloc(npages * sizeof (px_iopfn_t),
    569 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP))) {
    570 		if (waitfp != DDI_DMA_DONTWAIT)
    571 			ddi_set_callback(waitfp, dmareq->dmar_arg,
    572 			    &px_kmem_clid);
    573 		return (DDI_DMA_NORESOURCES);
    574 	}
    575 	/* fill pfn array */
    576 	PX_SET_MP_PFN(mp, 0, PX_MP_PFN0(mp) - pfn_adj);	/* pfnlst[0] */
    577 	if ((ret = PX_DMA_ISPGPFN(mp) ? px_dma_pgpfn(px_p, mp, npages) :
    578 	    px_dma_vapfn(px_p, mp, npages)) != DDI_SUCCESS)
    579 		goto err;
    580 
    581 	/* skip pfn0, check mixed mode and adjust peer to peer pfn */
    582 	for (i = 1; i < npages; i++) {
    583 		px_iopfn_t pfn = PX_GET_MP_PFN1(mp, i);
    584 		if (peer ^ TGT_PFN_INBETWEEN(pfn, pfn_base, pfn_last)) {
    585 			cmn_err(CE_WARN, "%s%d mixed mode DMA %lx %lx",
    586 			    NAMEINST(mp->dmai_rdip), PX_MP_PFN0(mp), pfn);
    587 			ret = DDI_DMA_NOMAPPING;	/* mixed mode */
    588 			goto err;
    589 		}
    590 		DBG(DBG_DMA_MAP, dip,
    591 		    "px_dma_pfn: pfnlst[%x]=%x-%x\n", i, pfn, pfn_adj);
    592 		if (pfn_adj)
    593 			PX_SET_MP_PFN1(mp, i, pfn - pfn_adj);
    594 	}
    595 	return (DDI_SUCCESS);
    596 err:
    597 	px_dma_freepfn(mp);
    598 	return (ret);
    599 }
    600 
    601 /*
    602  * px_dvma_win() - trim requested DVMA size down to window size
    603  *	The 1st window starts from offset and ends at page-aligned boundary.
    604  *	From the 2nd window on, each window starts and ends at page-aligned
    605  *	boundary except the last window ends at wherever requested.
    606  *
    607  *	accesses the following mp-> members:
    608  *	mp->dmai_attr.dma_attr_count_max
    609  *	mp->dmai_attr.dma_attr_seg
    610  *	mp->dmai_roffset   - start offset of 1st window
    611  *	mp->dmai_rflags (redzone)
    612  *	mp->dmai_ndvmapages (for 1 page fast path)
    613  *
    614  *	sets the following mp-> members:
    615  *	mp->dmai_size	   - xfer size, != winsize if 1st/last win  (not fixed)
    616  *	mp->dmai_winsize   - window size (no redzone), n * page size    (fixed)
    617  *	mp->dmai_nwin	   - # of DMA windows of entire object		(fixed)
    618  *	mp->dmai_rflags	   - remove partial flag if nwin == 1		(fixed)
    619  *	mp->dmai_winlst	   - NULL, window objects not used for DVMA	(fixed)
    620  *
    621  *	fixed - not changed across different DMA windows
    622  */
    623 /*ARGSUSED*/
    624 int
    625 px_dvma_win(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
    626 {
    627 	uint32_t redzone_sz	= PX_HAS_REDZONE(mp) ? MMU_PAGE_SIZE : 0;
    628 	size_t obj_sz		= mp->dmai_object.dmao_size;
    629 	size_t xfer_sz;
    630 	ulong_t pg_off;
    631 
    632 	if ((mp->dmai_ndvmapages == 1) && !redzone_sz) {
    633 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
    634 		mp->dmai_size = obj_sz;
    635 		mp->dmai_winsize = MMU_PAGE_SIZE;
    636 		mp->dmai_nwin = 1;
    637 		goto done;
    638 	}
    639 
    640 	pg_off	= mp->dmai_roffset;
    641 	xfer_sz	= obj_sz + redzone_sz;
    642 
    643 	/* include redzone in nocross check */	{
    644 		uint64_t nocross = mp->dmai_attr.dma_attr_seg;
    645 		if (xfer_sz + pg_off - 1 > nocross)
    646 			xfer_sz = nocross - pg_off + 1;
    647 		if (redzone_sz && (xfer_sz <= redzone_sz)) {
    648 			DBG(DBG_DMA_MAP, px_p->px_dip,
    649 			    "nocross too small: "
    650 			    "%lx(%lx)+%lx+%lx < %llx\n",
    651 			    xfer_sz, obj_sz, pg_off, redzone_sz, nocross);
    652 			return (DDI_DMA_TOOBIG);
    653 		}
    654 	}
    655 	xfer_sz -= redzone_sz;		/* restore transfer size  */
    656 	/* check counter max */	{
    657 		uint32_t count_max = mp->dmai_attr.dma_attr_count_max;
    658 		if (xfer_sz - 1 > count_max)
    659 			xfer_sz = count_max + 1;
    660 	}
    661 	if (xfer_sz >= obj_sz) {
    662 		mp->dmai_rflags &= ~DDI_DMA_PARTIAL;
    663 		mp->dmai_size = xfer_sz;
    664 		mp->dmai_winsize = P2ROUNDUP(xfer_sz + pg_off, MMU_PAGE_SIZE);
    665 		mp->dmai_nwin = 1;
    666 		goto done;
    667 	}
    668 	if (!(dmareq->dmar_flags & DDI_DMA_PARTIAL)) {
    669 		DBG(DBG_DMA_MAP, px_p->px_dip, "too big: %lx+%lx+%lx > %lx\n",
    670 		    obj_sz, pg_off, redzone_sz, xfer_sz);
    671 		return (DDI_DMA_TOOBIG);
    672 	}
    673 
    674 	xfer_sz = MMU_PTOB(MMU_BTOP(xfer_sz + pg_off)); /* page align */
    675 	mp->dmai_size = xfer_sz - pg_off;	/* 1st window xferrable size */
    676 	mp->dmai_winsize = xfer_sz;		/* redzone not in winsize */
    677 	mp->dmai_nwin = (obj_sz + pg_off + xfer_sz - 1) / xfer_sz;
    678 done:
    679 	mp->dmai_winlst = NULL;
    680 	px_dump_dma_handle(DBG_DMA_MAP, px_p->px_dip, mp);
    681 	return (DDI_SUCCESS);
    682 }
    683 
    684 /*
    685  * fast track cache entry to mmu context, inserts 3 0 bits between
    686  * upper 6-bits and lower 3-bits of the 9-bit cache entry
    687  */
    688 #define	MMU_FCE_TO_CTX(i)	(((i) << 3) | ((i) & 0x7) | 0x38)
    689 
    690 /*
    691  * px_dvma_map_fast - attempts to map fast trackable DVMA
    692  */
    693 /*ARGSUSED*/
    694 int
    695 px_dvma_map_fast(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
    696 {
    697 	uint_t clustsz = px_dvma_page_cache_clustsz;
    698 	uint_t entries = px_dvma_page_cache_entries;
    699 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
    700 	    mp->dmai_attr.dma_attr_flags);
    701 	int i = mmu_p->mmu_dvma_addr_scan_start;
    702 	uint8_t *lock_addr = mmu_p->mmu_dvma_cache_locks + i;
    703 	px_dvma_addr_t dvma_pg;
    704 	size_t npages = MMU_BTOP(mp->dmai_winsize);
    705 	dev_info_t *dip = mmu_p->mmu_px_p->px_dip;
    706 
    707 	extern uint8_t ldstub(uint8_t *);
    708 	ASSERT(MMU_PTOB(npages) == mp->dmai_winsize);
    709 	ASSERT(npages + PX_HAS_REDZONE(mp) <= clustsz);
    710 
    711 	for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
    712 		;
    713 	if (i >= entries) {
    714 		lock_addr = mmu_p->mmu_dvma_cache_locks;
    715 		i = 0;
    716 		for (; i < entries && ldstub(lock_addr); i++, lock_addr++)
    717 			;
    718 		if (i >= entries) {
    719 #ifdef	PX_DMA_PROF
    720 			px_dvmaft_exhaust++;
    721 #endif	/* PX_DMA_PROF */
    722 			return (DDI_DMA_NORESOURCES);
    723 		}
    724 	}
    725 	mmu_p->mmu_dvma_addr_scan_start = (i + 1) & (entries - 1);
    726 
    727 	i *= clustsz;
    728 	dvma_pg = mmu_p->dvma_base_pg + i;
    729 
    730 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i), npages,
    731 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, 0,
    732 	    MMU_MAP_PFN) != DDI_SUCCESS) {
    733 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
    734 		    "px_lib_iommu_map failed\n");
    735 		return (DDI_FAILURE);
    736 	}
    737 
    738 	if (!PX_MAP_BUFZONE(mp))
    739 		goto done;
    740 
    741 	DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: redzone pg=%x\n", i + npages);
    742 
    743 	ASSERT(PX_HAS_REDZONE(mp));
    744 
    745 	if (px_lib_iommu_map(dip, PCI_TSBID(0, i + npages), 1,
    746 	    PX_ADD_ATTR_EXTNS(attr, mp->dmai_bdf), (void *)mp, npages - 1,
    747 	    MMU_MAP_PFN) != DDI_SUCCESS) {
    748 		DBG(DBG_MAP_WIN, dip, "px_dvma_map_fast: "
    749 		    "mapping REDZONE page failed\n");
    750 
    751 		(void) px_lib_iommu_demap(dip, PCI_TSBID(0, i), npages);
    752 		return (DDI_FAILURE);
    753 	}
    754 
    755 done:
    756 #ifdef PX_DMA_PROF
    757 	px_dvmaft_success++;
    758 #endif
    759 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
    760 	mp->dmai_offset = 0;
    761 	mp->dmai_flags |= PX_DMAI_FLAGS_FASTTRACK;
    762 	PX_SAVE_MP_TTE(mp, attr);	/* save TTE template for unmapping */
    763 	if (PX_DVMA_DBG_ON(mmu_p))
    764 		px_dvma_alloc_debug(mmu_p, (char *)mp->dmai_mapping,
    765 		    mp->dmai_size, mp);
    766 	return (DDI_SUCCESS);
    767 }
    768 
    769 /*
    770  * px_dvma_map: map non-fasttrack DMA
    771  *		Use quantum cache if single page DMA.
    772  */
    773 int
    774 px_dvma_map(ddi_dma_impl_t *mp, ddi_dma_req_t *dmareq, px_mmu_t *mmu_p)
    775 {
    776 	uint_t npages = PX_DMA_WINNPGS(mp);
    777 	px_dvma_addr_t dvma_pg, dvma_pg_index;
    778 	void *dvma_addr;
    779 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
    780 	    mp->dmai_attr.dma_attr_flags);
    781 	int sleep = dmareq->dmar_fp == DDI_DMA_SLEEP ? VM_SLEEP : VM_NOSLEEP;
    782 	dev_info_t *dip = mp->dmai_rdip;
    783 	int	ret = DDI_SUCCESS;
    784 
    785 	/*
    786 	 * allocate dvma space resource and map in the first window.
    787 	 * (vmem_t *vmp, size_t size,
    788 	 *	size_t align, size_t phase, size_t nocross,
    789 	 *	void *minaddr, void *maxaddr, int vmflag)
    790 	 */
    791 	if ((npages == 1) && !PX_HAS_REDZONE(mp) && PX_HAS_NOSYSLIMIT(mp)) {
    792 		dvma_addr = vmem_alloc(mmu_p->mmu_dvma_map,
    793 		    MMU_PAGE_SIZE, sleep);
    794 		mp->dmai_flags |= PX_DMAI_FLAGS_VMEMCACHE;
    795 #ifdef	PX_DMA_PROF
    796 		px_dvma_vmem_alloc++;
    797 #endif	/* PX_DMA_PROF */
    798 	} else {
    799 		dvma_addr = vmem_xalloc(mmu_p->mmu_dvma_map,
    800 		    MMU_PTOB(npages + PX_HAS_REDZONE(mp)),
    801 		    MAX(mp->dmai_attr.dma_attr_align, MMU_PAGE_SIZE),
    802 		    0,
    803 		    mp->dmai_attr.dma_attr_seg + 1,
    804 		    (void *)mp->dmai_attr.dma_attr_addr_lo,
    805 		    (void *)(mp->dmai_attr.dma_attr_addr_hi + 1),
    806 		    sleep);
    807 #ifdef	PX_DMA_PROF
    808 		px_dvma_vmem_xalloc++;
    809 #endif	/* PX_DMA_PROF */
    810 	}
    811 	dvma_pg = MMU_BTOP((ulong_t)dvma_addr);
    812 	dvma_pg_index = dvma_pg - mmu_p->dvma_base_pg;
    813 	DBG(DBG_DMA_MAP, dip, "fallback dvma_pages: dvma_pg=%x index=%x\n",
    814 	    dvma_pg, dvma_pg_index);
    815 	if (dvma_pg == 0)
    816 		goto noresource;
    817 
    818 	mp->dmai_mapping = mp->dmai_roffset | MMU_PTOB(dvma_pg);
    819 	mp->dmai_offset = 0;
    820 	PX_SAVE_MP_TTE(mp, attr);	/* mp->dmai_tte = tte */
    821 
    822 	if ((ret = px_mmu_map_pages(mmu_p,
    823 	    mp, dvma_pg, npages, 0)) != DDI_SUCCESS) {
    824 		if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
    825 			vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
    826 			    MMU_PAGE_SIZE);
    827 #ifdef PX_DMA_PROF
    828 			px_dvma_vmem_free++;
    829 #endif /* PX_DMA_PROF */
    830 		} else {
    831 			vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
    832 			    MMU_PTOB(npages + PX_HAS_REDZONE(mp)));
    833 #ifdef PX_DMA_PROF
    834 			px_dvma_vmem_xfree++;
    835 #endif /* PX_DMA_PROF */
    836 		}
    837 	}
    838 
    839 	return (ret);
    840 noresource:
    841 	if (dmareq->dmar_fp != DDI_DMA_DONTWAIT) {
    842 		DBG(DBG_DMA_MAP, dip, "dvma_pg 0 - set callback\n");
    843 		ddi_set_callback(dmareq->dmar_fp, dmareq->dmar_arg,
    844 		    &mmu_p->mmu_dvma_clid);
    845 	}
    846 	DBG(DBG_DMA_MAP, dip, "vmem_xalloc - DDI_DMA_NORESOURCES\n");
    847 	return (DDI_DMA_NORESOURCES);
    848 }
    849 
    850 void
    851 px_dvma_unmap(px_mmu_t *mmu_p, ddi_dma_impl_t *mp)
    852 {
    853 	px_dvma_addr_t dvma_addr = (px_dvma_addr_t)mp->dmai_mapping;
    854 	px_dvma_addr_t dvma_pg = MMU_BTOP(dvma_addr);
    855 	dvma_addr = MMU_PTOB(dvma_pg);
    856 
    857 	if (mp->dmai_flags & PX_DMAI_FLAGS_FASTTRACK) {
    858 		px_iopfn_t index = dvma_pg - mmu_p->dvma_base_pg;
    859 		ASSERT(index % px_dvma_page_cache_clustsz == 0);
    860 		index /= px_dvma_page_cache_clustsz;
    861 		ASSERT(index < px_dvma_page_cache_entries);
    862 		mmu_p->mmu_dvma_cache_locks[index] = 0;
    863 #ifdef	PX_DMA_PROF
    864 		px_dvmaft_free++;
    865 #endif	/* PX_DMA_PROF */
    866 		return;
    867 	}
    868 
    869 	if (mp->dmai_flags & PX_DMAI_FLAGS_VMEMCACHE) {
    870 		vmem_free(mmu_p->mmu_dvma_map, (void *)dvma_addr,
    871 		    MMU_PAGE_SIZE);
    872 #ifdef PX_DMA_PROF
    873 		px_dvma_vmem_free++;
    874 #endif /* PX_DMA_PROF */
    875 	} else {
    876 		size_t npages = MMU_BTOP(mp->dmai_winsize) + PX_HAS_REDZONE(mp);
    877 		vmem_xfree(mmu_p->mmu_dvma_map, (void *)dvma_addr,
    878 		    MMU_PTOB(npages));
    879 #ifdef PX_DMA_PROF
    880 		px_dvma_vmem_xfree++;
    881 #endif /* PX_DMA_PROF */
    882 	}
    883 }
    884 
    885 /*
    886  * DVMA mappings may have multiple windows, but each window always have
    887  * one segment.
    888  */
    889 int
    890 px_dvma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
    891 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
    892 	uint_t cache_flags)
    893 {
    894 	switch (cmd) {
    895 	case DDI_DMA_SYNC:
    896 		return (px_lib_dma_sync(dip, rdip, (ddi_dma_handle_t)mp,
    897 		    *offp, *lenp, cache_flags));
    898 
    899 	case DDI_DMA_HTOC: {
    900 		int ret;
    901 		off_t wo_off, off = *offp;	/* wo_off: wnd's obj offset */
    902 		uint_t win_size = mp->dmai_winsize;
    903 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)objp;
    904 
    905 		if (off >= mp->dmai_object.dmao_size) {
    906 			cmn_err(CE_WARN, "%s%d invalid dma_htoc offset %lx",
    907 			    NAMEINST(mp->dmai_rdip), off);
    908 			return (DDI_FAILURE);
    909 		}
    910 		off += mp->dmai_roffset;
    911 		ret = px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
    912 		    off / win_size, &wo_off, NULL, cp, NULL); /* lenp == NULL */
    913 		if (ret)
    914 			return (ret);
    915 		DBG(DBG_DMA_CTL, dip, "HTOC:cookie=%x+%lx off=%lx,%lx\n",
    916 		    cp->dmac_address, cp->dmac_size, off, *offp);
    917 
    918 		/* adjust cookie addr/len if we are not on window boundary */
    919 		ASSERT((off % win_size) == (off -
    920 		    (PX_DMA_CURWIN(mp) ? mp->dmai_roffset : 0) - wo_off));
    921 		off = PX_DMA_CURWIN(mp) ? off % win_size : *offp;
    922 		ASSERT(cp->dmac_size > off);
    923 		cp->dmac_laddress += off;
    924 		cp->dmac_size -= off;
    925 		DBG(DBG_DMA_CTL, dip, "HTOC:mp=%p cookie=%x+%lx off=%lx,%lx\n",
    926 		    mp, cp->dmac_address, cp->dmac_size, off, wo_off);
    927 		}
    928 		return (DDI_SUCCESS);
    929 
    930 	case DDI_DMA_REPWIN:
    931 		*offp = mp->dmai_offset;
    932 		*lenp = mp->dmai_size;
    933 		return (DDI_SUCCESS);
    934 
    935 	case DDI_DMA_MOVWIN: {
    936 		off_t off = *offp;
    937 		if (off >= mp->dmai_object.dmao_size)
    938 			return (DDI_FAILURE);
    939 		off += mp->dmai_roffset;
    940 		return (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
    941 		    off / mp->dmai_winsize, offp, lenp,
    942 		    (ddi_dma_cookie_t *)objp, NULL));
    943 		}
    944 
    945 	case DDI_DMA_NEXTWIN: {
    946 		px_window_t win = PX_DMA_CURWIN(mp);
    947 		if (offp) {
    948 			if (*(px_window_t *)offp != win) {
    949 				/* window not active */
    950 				*(px_window_t *)objp = win; /* return cur win */
    951 				return (DDI_DMA_STALE);
    952 			}
    953 			win++;
    954 		} else	/* map win 0 */
    955 			win = 0;
    956 		if (win >= mp->dmai_nwin) {
    957 			*(px_window_t *)objp = win - 1;
    958 			return (DDI_DMA_DONE);
    959 		}
    960 		if (px_dma_win(dip, rdip, (ddi_dma_handle_t)mp,
    961 		    win, 0, 0, 0, 0)) {
    962 			*(px_window_t *)objp = win - 1;
    963 			return (DDI_FAILURE);
    964 		}
    965 		*(px_window_t *)objp = win;
    966 		}
    967 		return (DDI_SUCCESS);
    968 
    969 	case DDI_DMA_NEXTSEG:
    970 		if (*(px_window_t *)offp != PX_DMA_CURWIN(mp))
    971 			return (DDI_DMA_STALE);
    972 		if (lenp)				/* only 1 seg allowed */
    973 			return (DDI_DMA_DONE);
    974 
    975 		/* return mp as seg 0 */
    976 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
    977 		return (DDI_SUCCESS);
    978 
    979 	case DDI_DMA_SEGTOC:
    980 		MAKE_DMA_COOKIE((ddi_dma_cookie_t *)objp, mp->dmai_mapping,
    981 		    mp->dmai_size);
    982 		*offp = mp->dmai_offset;
    983 		*lenp = mp->dmai_size;
    984 		return (DDI_SUCCESS);
    985 
    986 	case DDI_DMA_COFF: {
    987 		ddi_dma_cookie_t *cp = (ddi_dma_cookie_t *)offp;
    988 		if (cp->dmac_address < mp->dmai_mapping ||
    989 		    (cp->dmac_address + cp->dmac_size) >
    990 		    (mp->dmai_mapping + mp->dmai_size))
    991 			return (DDI_FAILURE);
    992 		*objp = (caddr_t)(cp->dmac_address - mp->dmai_mapping +
    993 		    mp->dmai_offset);
    994 		}
    995 		return (DDI_SUCCESS);
    996 	default:
    997 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
    998 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
    999 		break;
   1000 	}
   1001 	return (DDI_FAILURE);
   1002 }
   1003 
   1004 void
   1005 px_dma_freewin(ddi_dma_impl_t *mp)
   1006 {
   1007 	px_dma_win_t *win_p = mp->dmai_winlst, *win2_p;
   1008 	for (win2_p = win_p; win_p; win2_p = win_p) {
   1009 		win_p = win2_p->win_next;
   1010 		kmem_free(win2_p, sizeof (px_dma_win_t) +
   1011 		    sizeof (ddi_dma_cookie_t) * win2_p->win_ncookies);
   1012 	}
   1013 	mp->dmai_nwin = 0;
   1014 	mp->dmai_winlst = NULL;
   1015 }
   1016 
   1017 /*
   1018  * px_dma_newwin - create a dma window object and cookies
   1019  *
   1020  *	After the initial scan in px_dma_physwin(), which identifies
   1021  *	a portion of the pfn array that belongs to a dma window,
   1022  *	we are called to allocate and initialize representing memory
   1023  *	resources. We know from the 1st scan the number of cookies
   1024  *	or dma segment in this window so we can allocate a contiguous
   1025  *	memory array for the dma cookies (The implementation of
   1026  *	ddi_dma_nextcookie(9f) dictates dma cookies be contiguous).
   1027  *
   1028  *	A second round scan is done on the pfn array to identify
   1029  *	each dma segment and initialize its corresponding dma cookie.
   1030  *	We don't need to do all the safety checking and we know they
   1031  *	all belong to the same dma window.
   1032  *
   1033  *	Input:	cookie_no - # of cookies identified by the 1st scan
   1034  *		start_idx - subscript of the pfn array for the starting pfn
   1035  *		end_idx   - subscript of the last pfn in dma window
   1036  *		win_pp    - pointer to win_next member of previous window
   1037  *	Return:	DDI_SUCCESS - with **win_pp as newly created window object
   1038  *		DDI_DMA_NORESROUCE - caller frees all previous window objs
   1039  *	Note:	Each cookie and window size are all initialized on page
   1040  *		boundary. This is not true for the 1st cookie of the 1st
   1041  *		window and the last cookie of the last window.
   1042  *		We fix that later in upper layer which has access to size
   1043  *		and offset info.
   1044  *
   1045  */
   1046 /*ARGSUSED*/
   1047 static int
   1048 px_dma_newwin(dev_info_t *dip, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp,
   1049 	uint32_t cookie_no, uint32_t start_idx, uint32_t end_idx,
   1050 	px_dma_win_t **win_pp, uint64_t count_max, uint64_t bypass)
   1051 {
   1052 	int (*waitfp)(caddr_t) = dmareq->dmar_fp;
   1053 	ddi_dma_cookie_t *cookie_p;
   1054 	uint32_t pfn_no = 1;
   1055 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, start_idx);
   1056 	px_iopfn_t prev_pfn = pfn;
   1057 	uint64_t baddr, seg_pfn0 = pfn;
   1058 	size_t sz = cookie_no * sizeof (ddi_dma_cookie_t);
   1059 	px_dma_win_t *win_p = kmem_zalloc(sizeof (px_dma_win_t) + sz,
   1060 	    waitfp == DDI_DMA_SLEEP ? KM_SLEEP : KM_NOSLEEP);
   1061 	io_attributes_t	attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
   1062 	    mp->dmai_attr.dma_attr_flags);
   1063 
   1064 	if (!win_p)
   1065 		goto noresource;
   1066 
   1067 	win_p->win_next = NULL;
   1068 	win_p->win_ncookies = cookie_no;
   1069 	win_p->win_curseg = 0;	/* start from segment 0 */
   1070 	win_p->win_size = MMU_PTOB(end_idx - start_idx + 1);
   1071 	/* win_p->win_offset is left uninitialized */
   1072 
   1073 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
   1074 	start_idx++;
   1075 	for (; start_idx <= end_idx; start_idx++, prev_pfn = pfn, pfn_no++) {
   1076 		pfn = PX_GET_MP_PFN1(mp, start_idx);
   1077 		if ((pfn == prev_pfn + 1) &&
   1078 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
   1079 			continue;
   1080 
   1081 		/* close up the cookie up to (including) prev_pfn */
   1082 		baddr = MMU_PTOB(seg_pfn0);
   1083 		if (bypass) {
   1084 			if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
   1085 			    == DDI_SUCCESS)
   1086 				baddr = px_lib_ro_bypass(dip, attr, baddr);
   1087 			else
   1088 				return (DDI_FAILURE);
   1089 		}
   1090 
   1091 		MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
   1092 		DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages)\n",
   1093 		    MMU_PTOB(seg_pfn0), pfn_no);
   1094 
   1095 		cookie_p++;	/* advance to next available cookie cell */
   1096 		pfn_no = 0;
   1097 		seg_pfn0 = pfn;	/* start a new segment from current pfn */
   1098 	}
   1099 
   1100 	baddr = MMU_PTOB(seg_pfn0);
   1101 	if (bypass) {
   1102 		if (px_lib_iommu_getbypass(dip, baddr, attr, &baddr)
   1103 		    == DDI_SUCCESS)
   1104 			baddr = px_lib_ro_bypass(dip, attr, baddr);
   1105 		else
   1106 			return (DDI_FAILURE);
   1107 	}
   1108 
   1109 	MAKE_DMA_COOKIE(cookie_p, baddr, MMU_PTOB(pfn_no));
   1110 	DBG(DBG_BYPASS, mp->dmai_rdip, "cookie %p (%x pages) of total %x\n",
   1111 	    MMU_PTOB(seg_pfn0), pfn_no, cookie_no);
   1112 #ifdef	DEBUG
   1113 	cookie_p++;
   1114 	ASSERT((cookie_p - (ddi_dma_cookie_t *)(win_p + 1)) == cookie_no);
   1115 #endif	/* DEBUG */
   1116 	*win_pp = win_p;
   1117 	return (DDI_SUCCESS);
   1118 noresource:
   1119 	if (waitfp != DDI_DMA_DONTWAIT)
   1120 		ddi_set_callback(waitfp, dmareq->dmar_arg, &px_kmem_clid);
   1121 	return (DDI_DMA_NORESOURCES);
   1122 }
   1123 
   1124 /*
   1125  * px_dma_adjust - adjust 1st and last cookie and window sizes
   1126  *	remove initial dma page offset from 1st cookie and window size
   1127  *	remove last dma page remainder from last cookie and window size
   1128  *	fill win_offset of each dma window according to just fixed up
   1129  *		each window sizes
   1130  *	px_dma_win_t members modified:
   1131  *	win_p->win_offset - this window's offset within entire DMA object
   1132  *	win_p->win_size	  - xferrable size (in bytes) for this window
   1133  *
   1134  *	ddi_dma_impl_t members modified:
   1135  *	mp->dmai_size	  - 1st window xferrable size
   1136  *	mp->dmai_offset   - 0, which is the dma offset of the 1st window
   1137  *
   1138  *	ddi_dma_cookie_t members modified:
   1139  *	cookie_p->dmac_size - 1st and last cookie remove offset or remainder
   1140  *	cookie_p->dmac_laddress - 1st cookie add page offset
   1141  */
   1142 static void
   1143 px_dma_adjust(ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp, px_dma_win_t *win_p)
   1144 {
   1145 	ddi_dma_cookie_t *cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
   1146 	size_t pg_offset = mp->dmai_roffset;
   1147 	size_t win_offset = 0;
   1148 
   1149 	cookie_p->dmac_size -= pg_offset;
   1150 	cookie_p->dmac_laddress |= pg_offset;
   1151 	win_p->win_size -= pg_offset;
   1152 	DBG(DBG_BYPASS, mp->dmai_rdip, "pg0 adjust %lx\n", pg_offset);
   1153 
   1154 	mp->dmai_size = win_p->win_size;
   1155 	mp->dmai_offset = 0;
   1156 
   1157 	pg_offset += mp->dmai_object.dmao_size;
   1158 	pg_offset &= MMU_PAGE_OFFSET;
   1159 	if (pg_offset)
   1160 		pg_offset = MMU_PAGE_SIZE - pg_offset;
   1161 	DBG(DBG_BYPASS, mp->dmai_rdip, "last pg adjust %lx\n", pg_offset);
   1162 
   1163 	for (; win_p->win_next; win_p = win_p->win_next) {
   1164 		DBG(DBG_BYPASS, mp->dmai_rdip, "win off %p\n", win_offset);
   1165 		win_p->win_offset = win_offset;
   1166 		win_offset += win_p->win_size;
   1167 	}
   1168 	/* last window */
   1169 	win_p->win_offset = win_offset;
   1170 	cookie_p = (ddi_dma_cookie_t *)(win_p + 1);
   1171 	cookie_p[win_p->win_ncookies - 1].dmac_size -= pg_offset;
   1172 	win_p->win_size -= pg_offset;
   1173 	ASSERT((win_offset + win_p->win_size) == mp->dmai_object.dmao_size);
   1174 }
   1175 
   1176 /*
   1177  * px_dma_physwin() - carve up dma windows using physical addresses.
   1178  *	Called to handle mmu bypass and pci peer-to-peer transfers.
   1179  *	Calls px_dma_newwin() to allocate window objects.
   1180  *
   1181  * Dependency: mp->dmai_pfnlst points to an array of pfns
   1182  *
   1183  * 1. Each dma window is represented by a px_dma_win_t object.
   1184  *	The object will be casted to ddi_dma_win_t and returned
   1185  *	to leaf driver through the DDI interface.
   1186  * 2. Each dma window can have several dma segments with each
   1187  *	segment representing a physically contiguous either memory
   1188  *	space (if we are doing an mmu bypass transfer) or pci address
   1189  *	space (if we are doing a peer-to-peer transfer).
   1190  * 3. Each segment has a DMA cookie to program the DMA engine.
   1191  *	The cookies within each DMA window must be located in a
   1192  *	contiguous array per ddi_dma_nextcookie(9f).
   1193  * 4. The number of DMA segments within each DMA window cannot exceed
   1194  *	mp->dmai_attr.dma_attr_sgllen. If the transfer size is
   1195  *	too large to fit in the sgllen, the rest needs to be
   1196  *	relocated to the next dma window.
   1197  * 5. Peer-to-peer DMA segment follows device hi, lo, count_max,
   1198  *	and nocross restrictions while bypass DMA follows the set of
   1199  *	restrictions with system limits factored in.
   1200  *
   1201  * Return:
   1202  *	mp->dmai_winlst	 - points to a link list of px_dma_win_t objects.
   1203  *		Each px_dma_win_t object on the link list contains
   1204  *		infomation such as its window size (# of pages),
   1205  *		starting offset (also see Restriction), an array of
   1206  *		DMA cookies, and # of cookies in the array.
   1207  *	mp->dmai_pfnlst	 - NULL, the pfn list is freed to conserve memory.
   1208  *	mp->dmai_nwin	 - # of total DMA windows on mp->dmai_winlst.
   1209  *	mp->dmai_mapping - starting cookie address
   1210  *	mp->dmai_rflags	 - consistent, nosync, no redzone
   1211  *	mp->dmai_cookie	 - start of cookie table of the 1st DMA window
   1212  *
   1213  * Restriction:
   1214  *	Each px_dma_win_t object can theoratically start from any offset
   1215  *	since the mmu is not involved. However, this implementation
   1216  *	always make windows start from page aligned offset (except
   1217  *	the 1st window, which follows the requested offset) due to the
   1218  *	fact that we are handed a pfn list. This does require device's
   1219  *	count_max and attr_seg to be at least MMU_PAGE_SIZE aligned.
   1220  */
   1221 int
   1222 px_dma_physwin(px_t *px_p, ddi_dma_req_t *dmareq, ddi_dma_impl_t *mp)
   1223 {
   1224 	uint_t npages = mp->dmai_ndvmapages;
   1225 	int ret, sgllen = mp->dmai_attr.dma_attr_sgllen;
   1226 	px_iopfn_t pfn_lo, pfn_hi, prev_pfn;
   1227 	px_iopfn_t pfn = PX_GET_MP_PFN(mp, 0);
   1228 	uint32_t i, win_no = 0, pfn_no = 1, win_pfn0_index = 0, cookie_no = 0;
   1229 	uint64_t count_max, bypass_addr = 0;
   1230 	px_dma_win_t **win_pp = (px_dma_win_t **)&mp->dmai_winlst;
   1231 	ddi_dma_cookie_t *cookie0_p;
   1232 	io_attributes_t attr = PX_GET_TTE_ATTR(mp->dmai_rflags,
   1233 	    mp->dmai_attr.dma_attr_flags);
   1234 	dev_info_t *dip = px_p->px_dip;
   1235 
   1236 	ASSERT(PX_DMA_ISPTP(mp) || PX_DMA_ISBYPASS(mp));
   1237 	if (PX_DMA_ISPTP(mp)) { /* ignore sys limits for peer-to-peer */
   1238 		ddi_dma_attr_t *dev_attr_p = PX_DEV_ATTR(mp);
   1239 		uint64_t nocross = dev_attr_p->dma_attr_seg;
   1240 		px_pec_t *pec_p = px_p->px_pec_p;
   1241 		px_iopfn_t pfn_last = PX_DMA_ISPTP32(mp) ?
   1242 		    pec_p->pec_last32_pfn - pec_p->pec_base32_pfn :
   1243 		    pec_p->pec_last64_pfn - pec_p->pec_base64_pfn;
   1244 
   1245 		if (nocross && (nocross < UINT32_MAX))
   1246 			return (DDI_DMA_NOMAPPING);
   1247 		if (dev_attr_p->dma_attr_align > MMU_PAGE_SIZE)
   1248 			return (DDI_DMA_NOMAPPING);
   1249 		pfn_lo = MMU_BTOP(dev_attr_p->dma_attr_addr_lo);
   1250 		pfn_hi = MMU_BTOP(dev_attr_p->dma_attr_addr_hi);
   1251 		pfn_hi = MIN(pfn_hi, pfn_last);
   1252 		if ((pfn_lo > pfn_hi) || (pfn < pfn_lo))
   1253 			return (DDI_DMA_NOMAPPING);
   1254 
   1255 		count_max = dev_attr_p->dma_attr_count_max;
   1256 		count_max = MIN(count_max, nocross);
   1257 		/*
   1258 		 * the following count_max trim is not done because we are
   1259 		 * making sure pfn_lo <= pfn <= pfn_hi inside the loop
   1260 		 * count_max=MIN(count_max, MMU_PTOB(pfn_hi - pfn_lo + 1)-1);
   1261 		 */
   1262 	} else { /* bypass hi/lo/count_max have been processed by attr2hdl() */
   1263 		count_max = mp->dmai_attr.dma_attr_count_max;
   1264 		pfn_lo = MMU_BTOP(mp->dmai_attr.dma_attr_addr_lo);
   1265 		pfn_hi = MMU_BTOP(mp->dmai_attr.dma_attr_addr_hi);
   1266 
   1267 		if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn),
   1268 		    attr, &bypass_addr) != DDI_SUCCESS) {
   1269 			DBG(DBG_BYPASS, mp->dmai_rdip,
   1270 			    "bypass cookie failure %lx\n", pfn);
   1271 			return (DDI_DMA_NOMAPPING);
   1272 		}
   1273 		pfn = MMU_BTOP(bypass_addr);
   1274 	}
   1275 
   1276 	/* pfn: absolute (bypass mode) or relative (p2p mode) */
   1277 	for (prev_pfn = pfn, i = 1; i < npages;
   1278 	    i++, prev_pfn = pfn, pfn_no++) {
   1279 		pfn = PX_GET_MP_PFN1(mp, i);
   1280 		if (bypass_addr) {
   1281 			if (px_lib_iommu_getbypass(dip, MMU_PTOB(pfn), attr,
   1282 			    &bypass_addr) != DDI_SUCCESS) {
   1283 				ret = DDI_DMA_NOMAPPING;
   1284 				goto err;
   1285 			}
   1286 			pfn = MMU_BTOP(bypass_addr);
   1287 		}
   1288 		if ((pfn == prev_pfn + 1) &&
   1289 		    (MMU_PTOB(pfn_no + 1) - 1 <= count_max))
   1290 			continue;
   1291 		if ((pfn < pfn_lo) || (prev_pfn > pfn_hi)) {
   1292 			ret = DDI_DMA_NOMAPPING;
   1293 			goto err;
   1294 		}
   1295 		cookie_no++;
   1296 		pfn_no = 0;
   1297 		if (cookie_no < sgllen)
   1298 			continue;
   1299 
   1300 		DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
   1301 		    win_pfn0_index, i - 1, cookie_no);
   1302 		if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no,
   1303 		    win_pfn0_index, i - 1, win_pp, count_max, bypass_addr))
   1304 			goto err;
   1305 
   1306 		win_pp = &(*win_pp)->win_next;	/* win_pp = *(win_pp) */
   1307 		win_no++;
   1308 		win_pfn0_index = i;
   1309 		cookie_no = 0;
   1310 	}
   1311 	if (pfn > pfn_hi) {
   1312 		ret = DDI_DMA_NOMAPPING;
   1313 		goto err;
   1314 	}
   1315 	cookie_no++;
   1316 	DBG(DBG_BYPASS, mp->dmai_rdip, "newwin pfn[%x-%x] %x cks\n",
   1317 	    win_pfn0_index, i - 1, cookie_no);
   1318 	if (ret = px_dma_newwin(dip, dmareq, mp, cookie_no, win_pfn0_index,
   1319 	    i - 1, win_pp, count_max, bypass_addr))
   1320 		goto err;
   1321 	win_no++;
   1322 	px_dma_adjust(dmareq, mp, mp->dmai_winlst);
   1323 	mp->dmai_nwin = win_no;
   1324 	mp->dmai_rflags |= DDI_DMA_CONSISTENT | DMP_NOSYNC;
   1325 	mp->dmai_rflags &= ~DDI_DMA_REDZONE;
   1326 	mp->dmai_flags |= PX_DMAI_FLAGS_NOSYNC;
   1327 	cookie0_p = (ddi_dma_cookie_t *)(PX_WINLST(mp) + 1);
   1328 	mp->dmai_cookie = PX_WINLST(mp)->win_ncookies > 1 ? cookie0_p + 1 : 0;
   1329 	mp->dmai_mapping = cookie0_p->dmac_laddress;
   1330 
   1331 	px_dma_freepfn(mp);
   1332 	return (DDI_DMA_MAPPED);
   1333 err:
   1334 	px_dma_freewin(mp);
   1335 	return (ret);
   1336 }
   1337 
   1338 int
   1339 px_dma_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
   1340 	enum ddi_dma_ctlops cmd, off_t *offp, size_t *lenp, caddr_t *objp,
   1341 	uint_t cache_flags)
   1342 {
   1343 	switch (cmd) {
   1344 	case DDI_DMA_SYNC:
   1345 		return (DDI_SUCCESS);
   1346 
   1347 	case DDI_DMA_HTOC: {
   1348 		off_t off = *offp;
   1349 		ddi_dma_cookie_t *loop_cp, *cp;
   1350 		px_dma_win_t *win_p = mp->dmai_winlst;
   1351 
   1352 		if (off >= mp->dmai_object.dmao_size)
   1353 			return (DDI_FAILURE);
   1354 
   1355 		/* locate window */
   1356 		while (win_p->win_offset + win_p->win_size <= off)
   1357 			win_p = win_p->win_next;
   1358 
   1359 		loop_cp = cp = (ddi_dma_cookie_t *)(win_p + 1);
   1360 		mp->dmai_offset = win_p->win_offset;
   1361 		mp->dmai_size   = win_p->win_size;
   1362 		mp->dmai_mapping = cp->dmac_laddress; /* cookie0 start addr */
   1363 
   1364 		/* adjust cookie addr/len if we are not on cookie boundary */
   1365 		off -= win_p->win_offset;	   /* offset within window */
   1366 		for (; off >= loop_cp->dmac_size; loop_cp++)
   1367 			off -= loop_cp->dmac_size; /* offset within cookie */
   1368 
   1369 		mp->dmai_cookie = loop_cp + 1;
   1370 		win_p->win_curseg = loop_cp - cp;
   1371 		cp = (ddi_dma_cookie_t *)objp;
   1372 		MAKE_DMA_COOKIE(cp, loop_cp->dmac_laddress + off,
   1373 		    loop_cp->dmac_size - off);
   1374 
   1375 		DBG(DBG_DMA_CTL, dip,
   1376 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
   1377 		    cp->dmac_laddress, cp->dmac_size);
   1378 		}
   1379 		return (DDI_SUCCESS);
   1380 
   1381 	case DDI_DMA_REPWIN:
   1382 		*offp = mp->dmai_offset;
   1383 		*lenp = mp->dmai_size;
   1384 		return (DDI_SUCCESS);
   1385 
   1386 	case DDI_DMA_MOVWIN: {
   1387 		off_t off = *offp;
   1388 		ddi_dma_cookie_t *cp;
   1389 		px_dma_win_t *win_p = mp->dmai_winlst;
   1390 
   1391 		if (off >= mp->dmai_object.dmao_size)
   1392 			return (DDI_FAILURE);
   1393 
   1394 		/* locate window */
   1395 		while (win_p->win_offset + win_p->win_size <= off)
   1396 			win_p = win_p->win_next;
   1397 
   1398 		cp = (ddi_dma_cookie_t *)(win_p + 1);
   1399 		mp->dmai_offset = win_p->win_offset;
   1400 		mp->dmai_size   = win_p->win_size;
   1401 		mp->dmai_mapping = cp->dmac_laddress;	/* cookie0 star addr */
   1402 		mp->dmai_cookie = cp + 1;
   1403 		win_p->win_curseg = 0;
   1404 
   1405 		*(ddi_dma_cookie_t *)objp = *cp;
   1406 		*offp = win_p->win_offset;
   1407 		*lenp = win_p->win_size;
   1408 		DBG(DBG_DMA_CTL, dip,
   1409 		    "HTOC: cookie - dmac_laddress=%p dmac_size=%x\n",
   1410 		    cp->dmac_laddress, cp->dmac_size);
   1411 		}
   1412 		return (DDI_SUCCESS);
   1413 
   1414 	case DDI_DMA_NEXTWIN: {
   1415 		px_dma_win_t *win_p = *(px_dma_win_t **)offp;
   1416 		px_dma_win_t **nw_pp = (px_dma_win_t **)objp;
   1417 		ddi_dma_cookie_t *cp;
   1418 		if (!win_p) {
   1419 			*nw_pp = mp->dmai_winlst;
   1420 			return (DDI_SUCCESS);
   1421 		}
   1422 
   1423 		if (win_p->win_offset != mp->dmai_offset)
   1424 			return (DDI_DMA_STALE);
   1425 		if (!win_p->win_next)
   1426 			return (DDI_DMA_DONE);
   1427 		win_p = win_p->win_next;
   1428 		cp = (ddi_dma_cookie_t *)(win_p + 1);
   1429 		mp->dmai_offset = win_p->win_offset;
   1430 		mp->dmai_size   = win_p->win_size;
   1431 		mp->dmai_mapping = cp->dmac_laddress;   /* cookie0 star addr */
   1432 		mp->dmai_cookie = cp + 1;
   1433 		win_p->win_curseg = 0;
   1434 		*nw_pp = win_p;
   1435 		}
   1436 		return (DDI_SUCCESS);
   1437 
   1438 	case DDI_DMA_NEXTSEG: {
   1439 		px_dma_win_t *w_p = *(px_dma_win_t **)offp;
   1440 		if (w_p->win_offset != mp->dmai_offset)
   1441 			return (DDI_DMA_STALE);
   1442 		if (w_p->win_curseg + 1 >= w_p->win_ncookies)
   1443 			return (DDI_DMA_DONE);
   1444 		w_p->win_curseg++;
   1445 		}
   1446 		*(ddi_dma_seg_t *)objp = (ddi_dma_seg_t)mp;
   1447 		return (DDI_SUCCESS);
   1448 
   1449 	case DDI_DMA_SEGTOC: {
   1450 		px_dma_win_t *win_p = mp->dmai_winlst;
   1451 		off_t off = mp->dmai_offset;
   1452 		ddi_dma_cookie_t *cp;
   1453 		int i;
   1454 
   1455 		/* locate active window */
   1456 		for (; win_p->win_offset != off; win_p = win_p->win_next)
   1457 			;
   1458 		cp = (ddi_dma_cookie_t *)(win_p + 1);
   1459 		for (i = 0; i < win_p->win_curseg; i++, cp++)
   1460 			off += cp->dmac_size;
   1461 		*offp = off;
   1462 		*lenp = cp->dmac_size;
   1463 		*(ddi_dma_cookie_t *)objp = *cp;	/* copy cookie */
   1464 		}
   1465 		return (DDI_SUCCESS);
   1466 
   1467 	case DDI_DMA_COFF: {
   1468 		px_dma_win_t *win_p;
   1469 		ddi_dma_cookie_t *cp;
   1470 		uint64_t addr, key = ((ddi_dma_cookie_t *)offp)->dmac_laddress;
   1471 		size_t win_off;
   1472 
   1473 		for (win_p = mp->dmai_winlst; win_p; win_p = win_p->win_next) {
   1474 			int i;
   1475 			win_off = 0;
   1476 			cp = (ddi_dma_cookie_t *)(win_p + 1);
   1477 			for (i = 0; i < win_p->win_ncookies; i++, cp++) {
   1478 				size_t sz = cp->dmac_size;
   1479 
   1480 				addr = cp->dmac_laddress;
   1481 				if ((addr <= key) && (addr + sz >= key))
   1482 					goto found;
   1483 				win_off += sz;
   1484 			}
   1485 		}
   1486 		return (DDI_FAILURE);
   1487 found:
   1488 		*objp = (caddr_t)(win_p->win_offset + win_off + (key - addr));
   1489 		return (DDI_SUCCESS);
   1490 		}
   1491 	default:
   1492 		DBG(DBG_DMA_CTL, dip, "unknown command (%x): rdip=%s%d\n",
   1493 		    cmd, ddi_driver_name(rdip), ddi_get_instance(rdip));
   1494 		break;
   1495 	}
   1496 	return (DDI_FAILURE);
   1497 }
   1498 
   1499 static void
   1500 px_dvma_debug_init(px_mmu_t *mmu_p)
   1501 {
   1502 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
   1503 	ASSERT(MUTEX_HELD(&mmu_p->dvma_debug_lock));
   1504 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat ON", mmu_p);
   1505 
   1506 	mmu_p->dvma_alloc_rec = kmem_alloc(sz, KM_SLEEP);
   1507 	mmu_p->dvma_free_rec = kmem_alloc(sz, KM_SLEEP);
   1508 
   1509 	mmu_p->dvma_active_list = NULL;
   1510 	mmu_p->dvma_alloc_rec_index = 0;
   1511 	mmu_p->dvma_free_rec_index = 0;
   1512 	mmu_p->dvma_active_count = 0;
   1513 }
   1514 
   1515 void
   1516 px_dvma_debug_fini(px_mmu_t *mmu_p)
   1517 {
   1518 	struct px_dvma_rec *prev, *ptr;
   1519 	size_t sz = sizeof (struct px_dvma_rec) * px_dvma_debug_rec;
   1520 	uint64_t mask = ~(1ull << mmu_p->mmu_inst);
   1521 	cmn_err(CE_NOTE, "PCI Express DVMA %p stat OFF", mmu_p);
   1522 
   1523 	if (mmu_p->dvma_alloc_rec) {
   1524 		kmem_free(mmu_p->dvma_alloc_rec, sz);
   1525 		mmu_p->dvma_alloc_rec = NULL;
   1526 	}
   1527 	if (mmu_p->dvma_free_rec) {
   1528 		kmem_free(mmu_p->dvma_free_rec, sz);
   1529 		mmu_p->dvma_free_rec = NULL;
   1530 	}
   1531 
   1532 	prev = mmu_p->dvma_active_list;
   1533 	if (!prev)
   1534 		return;
   1535 	for (ptr = prev->next; ptr; prev = ptr, ptr = ptr->next)
   1536 		kmem_free(prev, sizeof (struct px_dvma_rec));
   1537 	kmem_free(prev, sizeof (struct px_dvma_rec));
   1538 
   1539 	mmu_p->dvma_active_list = NULL;
   1540 	mmu_p->dvma_alloc_rec_index = 0;
   1541 	mmu_p->dvma_free_rec_index = 0;
   1542 	mmu_p->dvma_active_count = 0;
   1543 
   1544 	px_dvma_debug_off &= mask;
   1545 	px_dvma_debug_on &= mask;
   1546 }
   1547 
   1548 void
   1549 px_dvma_alloc_debug(px_mmu_t *mmu_p, char *address, uint_t len,
   1550 	ddi_dma_impl_t *mp)
   1551 {
   1552 	struct px_dvma_rec *ptr;
   1553 	mutex_enter(&mmu_p->dvma_debug_lock);
   1554 
   1555 	if (!mmu_p->dvma_alloc_rec)
   1556 		px_dvma_debug_init(mmu_p);
   1557 	if (PX_DVMA_DBG_OFF(mmu_p)) {
   1558 		px_dvma_debug_fini(mmu_p);
   1559 		goto done;
   1560 	}
   1561 
   1562 	ptr = &mmu_p->dvma_alloc_rec[mmu_p->dvma_alloc_rec_index];
   1563 	ptr->dvma_addr = address;
   1564 	ptr->len = len;
   1565 	ptr->mp = mp;
   1566 	if (++mmu_p->dvma_alloc_rec_index == px_dvma_debug_rec)
   1567 		mmu_p->dvma_alloc_rec_index = 0;
   1568 
   1569 	ptr = kmem_alloc(sizeof (struct px_dvma_rec), KM_SLEEP);
   1570 	ptr->dvma_addr = address;
   1571 	ptr->len = len;
   1572 	ptr->mp = mp;
   1573 
   1574 	ptr->next = mmu_p->dvma_active_list;
   1575 	mmu_p->dvma_active_list = ptr;
   1576 	mmu_p->dvma_active_count++;
   1577 done:
   1578 	mutex_exit(&mmu_p->dvma_debug_lock);
   1579 }
   1580 
   1581 void
   1582 px_dvma_free_debug(px_mmu_t *mmu_p, char *address, uint_t len,
   1583     ddi_dma_impl_t *mp)
   1584 {
   1585 	struct px_dvma_rec *ptr, *ptr_save;
   1586 	mutex_enter(&mmu_p->dvma_debug_lock);
   1587 
   1588 	if (!mmu_p->dvma_alloc_rec)
   1589 		px_dvma_debug_init(mmu_p);
   1590 	if (PX_DVMA_DBG_OFF(mmu_p)) {
   1591 		px_dvma_debug_fini(mmu_p);
   1592 		goto done;
   1593 	}
   1594 
   1595 	ptr = &mmu_p->dvma_free_rec[mmu_p->dvma_free_rec_index];
   1596 	ptr->dvma_addr = address;
   1597 	ptr->len = len;
   1598 	ptr->mp = mp;
   1599 	if (++mmu_p->dvma_free_rec_index == px_dvma_debug_rec)
   1600 		mmu_p->dvma_free_rec_index = 0;
   1601 
   1602 	ptr_save = mmu_p->dvma_active_list;
   1603 	for (ptr = ptr_save; ptr; ptr = ptr->next) {
   1604 		if ((ptr->dvma_addr == address) && (ptr->len = len))
   1605 			break;
   1606 		ptr_save = ptr;
   1607 	}
   1608 	if (!ptr) {
   1609 		cmn_err(CE_WARN, "bad dvma free addr=%lx len=%x",
   1610 		    (long)address, len);
   1611 		goto done;
   1612 	}
   1613 	if (ptr == mmu_p->dvma_active_list)
   1614 		mmu_p->dvma_active_list = ptr->next;
   1615 	else
   1616 		ptr_save->next = ptr->next;
   1617 	kmem_free(ptr, sizeof (struct px_dvma_rec));
   1618 	mmu_p->dvma_active_count--;
   1619 done:
   1620 	mutex_exit(&mmu_p->dvma_debug_lock);
   1621 }
   1622 
   1623 #ifdef	DEBUG
   1624 void
   1625 px_dump_dma_handle(uint64_t flag, dev_info_t *dip, ddi_dma_impl_t *hp)
   1626 {
   1627 	DBG(flag, dip, "mp(%p): flags=%x mapping=%lx xfer_size=%x\n",
   1628 	    hp, hp->dmai_inuse, hp->dmai_mapping, hp->dmai_size);
   1629 	DBG(flag|DBG_CONT, dip, "\tnpages=%x roffset=%x rflags=%x nwin=%x\n",
   1630 	    hp->dmai_ndvmapages, hp->dmai_roffset, hp->dmai_rflags,
   1631 	    hp->dmai_nwin);
   1632 	DBG(flag|DBG_CONT, dip, "\twinsize=%x tte=%p pfnlst=%p pfn0=%p\n",
   1633 	    hp->dmai_winsize, hp->dmai_tte, hp->dmai_pfnlst, hp->dmai_pfn0);
   1634 	DBG(flag|DBG_CONT, dip, "\twinlst=%x obj=%p attr=%p ckp=%p\n",
   1635 	    hp->dmai_winlst, &hp->dmai_object, &hp->dmai_attr,
   1636 	    hp->dmai_cookie);
   1637 }
   1638 #endif	/* DEBUG */
   1639