Home | History | Annotate | Download | only in ioat
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Copyright (c) 2009, Intel Corporation.
     29  * All rights reserved.
     30  */
     31 
     32 #include <sys/errno.h>
     33 #include <sys/types.h>
     34 #include <sys/conf.h>
     35 #include <sys/kmem.h>
     36 #include <sys/ddi.h>
     37 #include <sys/stat.h>
     38 #include <sys/sunddi.h>
     39 #include <sys/file.h>
     40 #include <sys/open.h>
     41 #include <sys/modctl.h>
     42 #include <sys/ddi_impldefs.h>
     43 #include <sys/sysmacros.h>
     44 #include <vm/hat.h>
     45 #include <vm/as.h>
     46 #include <sys/mach_mmu.h>
     47 #ifdef __xpv
     48 #include <sys/hypervisor.h>
     49 #endif
     50 
     51 #include <sys/ioat.h>
     52 
     53 
     54 extern ddi_device_acc_attr_t ioat_acc_attr;
     55 
     56 /* dma attr for the descriptor rings */
     57 ddi_dma_attr_t ioat_desc_dma_attr = {
     58 	DMA_ATTR_V0,		/* dma_attr_version */
     59 	0x0,			/* dma_attr_addr_lo */
     60 	0xffffffffffffffff,	/* dma_attr_addr_hi */
     61 	0xffffffff,		/* dma_attr_count_max */
     62 	0x1000,			/* dma_attr_align */
     63 	0x1,			/* dma_attr_burstsizes */
     64 	0x1,			/* dma_attr_minxfer */
     65 	0xffffffff,		/* dma_attr_maxxfer */
     66 	0xffffffff,		/* dma_attr_seg */
     67 	0x1,			/* dma_attr_sgllen */
     68 	0x1,			/* dma_attr_granular */
     69 	0x0,			/* dma_attr_flags */
     70 };
     71 
     72 /* dma attr for the completion buffers */
     73 ddi_dma_attr_t ioat_cmpl_dma_attr = {
     74 	DMA_ATTR_V0,		/* dma_attr_version */
     75 	0x0,			/* dma_attr_addr_lo */
     76 	0xffffffffffffffff,	/* dma_attr_addr_hi */
     77 	0xffffffff,		/* dma_attr_count_max */
     78 	0x40,			/* dma_attr_align */
     79 	0x1,			/* dma_attr_burstsizes */
     80 	0x1,			/* dma_attr_minxfer */
     81 	0xffffffff,		/* dma_attr_maxxfer */
     82 	0xffffffff,		/* dma_attr_seg */
     83 	0x1,			/* dma_attr_sgllen */
     84 	0x1,			/* dma_attr_granular */
     85 	0x0,			/* dma_attr_flags */
     86 };
     87 
     88 static int ioat_completion_alloc(ioat_channel_t channel);
     89 static void ioat_completion_free(ioat_channel_t channel);
     90 static void ioat_channel_start(ioat_channel_t channel);
     91 static void ioat_channel_reset(ioat_channel_t channel);
     92 
     93 int ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt);
     94 void ioat_ring_free(ioat_channel_t channel);
     95 void ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *desc);
     96 int ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
     97     dcopy_cmd_t cmd);
     98 
     99 static void ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
    100     uint64_t dest_addr, uint32_t size, uint32_t ctrl);
    101 static void ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id);
    102 
    103 
    104 /*
    105  * ioat_channel_init()
    106  */
    107 int
    108 ioat_channel_init(ioat_state_t *state)
    109 {
    110 	int i;
    111 
    112 	/*
    113 	 * initialize each dma channel's state which doesn't change across
    114 	 * channel alloc/free.
    115 	 */
    116 	state->is_chansize = sizeof (struct ioat_channel_s) *
    117 	    state->is_num_channels;
    118 	state->is_channel = kmem_zalloc(state->is_chansize, KM_SLEEP);
    119 	for (i = 0; i < state->is_num_channels; i++) {
    120 		state->is_channel[i].ic_state = state;
    121 		state->is_channel[i].ic_regs = (uint8_t *)
    122 		    ((uintptr_t)state->is_genregs +
    123 		    (uintptr_t)(IOAT_CHANNELREG_OFFSET * (i + 1)));
    124 	}
    125 
    126 	/* initial the allocator (from 0 to state->is_num_channels) */
    127 	ioat_rs_init(state, 0, state->is_num_channels, &state->is_channel_rs);
    128 
    129 	return (DDI_SUCCESS);
    130 }
    131 
    132 
    133 /*
    134  * ioat_channel_fini()
    135  */
    136 void
    137 ioat_channel_fini(ioat_state_t *state)
    138 {
    139 	ioat_rs_fini(&state->is_channel_rs);
    140 	kmem_free(state->is_channel, state->is_chansize);
    141 }
    142 
    143 
    144 /*
    145  * ioat_channel_alloc()
    146  *   NOTE: We intentionaly don't handle DCOPY_SLEEP (if no channels are
    147  *	available)
    148  */
    149 /*ARGSUSED*/
    150 int
    151 ioat_channel_alloc(void *device_private, dcopy_handle_t handle, int flags,
    152     uint_t size, dcopy_query_channel_t *info, void *channel_private)
    153 {
    154 #define	CHANSTRSIZE	20
    155 	struct ioat_channel_s *channel;
    156 	char chanstr[CHANSTRSIZE];
    157 	ioat_channel_t *chan;
    158 	ioat_state_t *state;
    159 	size_t cmd_size;
    160 	uint_t chan_num;
    161 	uint32_t estat;
    162 	int e;
    163 
    164 
    165 	state = (ioat_state_t *)device_private;
    166 	chan = (ioat_channel_t *)channel_private;
    167 
    168 	/* allocate a H/W channel */
    169 	e = ioat_rs_alloc(state->is_channel_rs, &chan_num);
    170 	if (e != DDI_SUCCESS) {
    171 		return (DCOPY_NORESOURCES);
    172 	}
    173 
    174 	channel = &state->is_channel[chan_num];
    175 	channel->ic_inuse = B_TRUE;
    176 	channel->ic_chan_num = chan_num;
    177 	channel->ic_ver = state->is_ver;
    178 	channel->ic_dca_active = B_FALSE;
    179 	channel->ic_channel_state = IOAT_CHANNEL_OK;
    180 	channel->ic_dcopy_handle = handle;
    181 
    182 #ifdef	DEBUG
    183 	{
    184 		/* if we're cbv2, verify that the V2 compatibility bit is set */
    185 		uint16_t reg;
    186 		if (channel->ic_ver == IOAT_CBv2) {
    187 			reg = ddi_get16(state->is_reg_handle,
    188 			    (uint16_t *)&channel->ic_regs[IOAT_CHAN_COMP]);
    189 			ASSERT(reg & 0x2);
    190 		}
    191 	}
    192 #endif
    193 
    194 	/*
    195 	 * Configure DMA channel
    196 	 *   Channel In Use
    197 	 *   Error Interrupt Enable
    198 	 *   Any Error Abort Enable
    199 	 *   Error Completion Enable
    200 	 */
    201 	ddi_put16(state->is_reg_handle,
    202 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
    203 
    204 	/* check channel error register, clear any errors */
    205 	estat = ddi_get32(state->is_reg_handle,
    206 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
    207 	if (estat != 0) {
    208 #ifdef	DEBUG
    209 		cmn_err(CE_CONT, "cleared errors (0x%x) before channel (%d) "
    210 		    "enable\n", estat, channel->ic_chan_num);
    211 #endif
    212 		ddi_put32(state->is_reg_handle,
    213 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR], estat);
    214 	}
    215 
    216 	/* allocate and initialize the descriptor buf */
    217 	e = ioat_ring_alloc(channel, size);
    218 	if (e != DDI_SUCCESS) {
    219 		goto chinitfail_desc_alloc;
    220 	}
    221 
    222 	/* allocate and initialize the completion space */
    223 	e = ioat_completion_alloc(channel);
    224 	if (e != DDI_SUCCESS) {
    225 		goto chinitfail_completion_alloc;
    226 	}
    227 
    228 	/* setup kmem_cache for commands */
    229 	cmd_size = sizeof (struct dcopy_cmd_s) +
    230 	    sizeof (struct dcopy_cmd_priv_s) +
    231 	    sizeof (struct ioat_cmd_private_s);
    232 	(void) snprintf(chanstr, CHANSTRSIZE, "ioat%dchan%dcmd",
    233 	    state->is_instance, channel->ic_chan_num);
    234 	channel->ic_cmd_cache = kmem_cache_create(chanstr, cmd_size, 64,
    235 	    NULL, NULL, NULL, NULL, NULL, 0);
    236 	if (channel->ic_cmd_cache == NULL) {
    237 		goto chinitfail_kmem_cache;
    238 	}
    239 
    240 	/* start-up the channel */
    241 	ioat_channel_start(channel);
    242 
    243 	/* fill in the channel info returned to dcopy */
    244 	info->qc_version = DCOPY_QUERY_CHANNEL_V0;
    245 	info->qc_id = state->is_deviceinfo.di_id;
    246 	info->qc_capabilities = (uint64_t)state->is_capabilities;
    247 	info->qc_channel_size = (uint64_t)size;
    248 	info->qc_chan_num = (uint64_t)channel->ic_chan_num;
    249 	if (channel->ic_ver == IOAT_CBv1) {
    250 		info->qc_dca_supported = B_FALSE;
    251 	} else {
    252 		if (info->qc_capabilities & IOAT_DMACAP_DCA) {
    253 			info->qc_dca_supported = B_TRUE;
    254 		} else {
    255 			info->qc_dca_supported = B_FALSE;
    256 		}
    257 	}
    258 
    259 	*chan = channel;
    260 
    261 	return (DCOPY_SUCCESS);
    262 
    263 chinitfail_kmem_cache:
    264 	ioat_completion_free(channel);
    265 chinitfail_completion_alloc:
    266 	ioat_ring_free(channel);
    267 chinitfail_desc_alloc:
    268 	return (DCOPY_FAILURE);
    269 }
    270 
    271 
    272 /*
    273  * ioat_channel_suspend()
    274  */
    275 /*ARGSUSED*/
    276 void
    277 ioat_channel_suspend(ioat_state_t *state)
    278 {
    279 	/*
    280 	 * normally you would disable interrupts and reset the H/W here. But
    281 	 * since the suspend framework doesn't know who is using us, it may
    282 	 * not suspend their I/O before us.  Since we won't actively be doing
    283 	 * any DMA or interrupts unless someone asks us to, it's safe to not
    284 	 * do anything here.
    285 	 */
    286 }
    287 
    288 
    289 /*
    290  * ioat_channel_resume()
    291  */
    292 int
    293 ioat_channel_resume(ioat_state_t *state)
    294 {
    295 	ioat_channel_ring_t *ring;
    296 	ioat_channel_t channel;
    297 	uint32_t estat;
    298 	int i;
    299 
    300 
    301 	for (i = 0; i < state->is_num_channels; i++) {
    302 		channel = &state->is_channel[i];
    303 		ring = channel->ic_ring;
    304 
    305 		if (!channel->ic_inuse) {
    306 			continue;
    307 		}
    308 
    309 		/*
    310 		 * Configure DMA channel
    311 		 *   Channel In Use
    312 		 *   Error Interrupt Enable
    313 		 *   Any Error Abort Enable
    314 		 *   Error Completion Enable
    315 		 */
    316 		ddi_put16(state->is_reg_handle,
    317 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x011C);
    318 
    319 		/* check channel error register, clear any errors */
    320 		estat = ddi_get32(state->is_reg_handle,
    321 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
    322 		if (estat != 0) {
    323 #ifdef	DEBUG
    324 			cmn_err(CE_CONT, "cleared errors (0x%x) before channel"
    325 			    " (%d) enable\n", estat, channel->ic_chan_num);
    326 #endif
    327 			ddi_put32(state->is_reg_handle,
    328 			    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR],
    329 			    estat);
    330 		}
    331 
    332 		/* Re-initialize the ring */
    333 		bzero(ring->cr_desc, channel->ic_desc_alloc_size);
    334 		/* write the physical address into the chain address register */
    335 		if (channel->ic_ver == IOAT_CBv1) {
    336 			ddi_put32(state->is_reg_handle,
    337 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
    338 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
    339 			ddi_put32(state->is_reg_handle,
    340 			    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
    341 			    (uint32_t)(ring->cr_phys_desc >> 32));
    342 		} else {
    343 			ASSERT(channel->ic_ver == IOAT_CBv2);
    344 			ddi_put32(state->is_reg_handle,
    345 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
    346 			    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
    347 			ddi_put32(state->is_reg_handle,
    348 			    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
    349 			    (uint32_t)(ring->cr_phys_desc >> 32));
    350 		}
    351 
    352 		/* re-initialize the completion buffer */
    353 		bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
    354 		/* write the phys addr into the completion address register */
    355 		ddi_put32(state->is_reg_handle,
    356 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
    357 		    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
    358 		ddi_put32(state->is_reg_handle,
    359 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
    360 		    (uint32_t)(channel->ic_phys_cmpl >> 32));
    361 
    362 		/* start-up the channel */
    363 		ioat_channel_start(channel);
    364 
    365 	}
    366 
    367 	return (DDI_SUCCESS);
    368 }
    369 
    370 /*
    371  * quiesce(9E) entry point.
    372  *
    373  * This function is called when the system is single-threaded at high
    374  * PIL with preemption disabled. Therefore, this function must not be
    375  * blocked.
    376  *
    377  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
    378  * DDI_FAILURE indicates an error condition and should almost never happen.
    379  */
    380 void
    381 ioat_channel_quiesce(ioat_state_t *state)
    382 {
    383 	int i;
    384 
    385 	/*
    386 	 * Walk through all channels and quiesce
    387 	 */
    388 	for (i = 0; i < state->is_num_channels; i++) {
    389 
    390 		ioat_channel_t	channel = state->is_channel + i;
    391 
    392 		if (!channel->ic_inuse)
    393 			continue;
    394 
    395 		/* disable the interrupts */
    396 		ddi_put16(state->is_reg_handle,
    397 		    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL],
    398 		    0x0);
    399 
    400 		ioat_channel_reset(channel);
    401 	}
    402 }
    403 
    404 
    405 /*
    406  * ioat_channel_free()
    407  */
    408 void
    409 ioat_channel_free(void *channel_private)
    410 {
    411 	struct ioat_channel_s *channel;
    412 	ioat_channel_t *chan;
    413 	ioat_state_t *state;
    414 	uint_t chan_num;
    415 
    416 
    417 	chan = (ioat_channel_t *)channel_private;
    418 	channel = *chan;
    419 
    420 	state = channel->ic_state;
    421 	chan_num = channel->ic_chan_num;
    422 
    423 	/* disable the interrupts */
    424 	ddi_put16(state->is_reg_handle,
    425 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], 0x0);
    426 
    427 	ioat_channel_reset(channel);
    428 
    429 	/* cleanup command cache */
    430 	kmem_cache_destroy(channel->ic_cmd_cache);
    431 
    432 	/* clean-up/free-up the completion space and descriptors */
    433 	ioat_completion_free(channel);
    434 	ioat_ring_free(channel);
    435 
    436 	channel->ic_inuse = B_FALSE;
    437 
    438 	/* free the H/W DMA engine */
    439 	ioat_rs_free(state->is_channel_rs, chan_num);
    440 
    441 	*chan = NULL;
    442 }
    443 
    444 
    445 /*
    446  * ioat_channel_intr()
    447  */
    448 void
    449 ioat_channel_intr(ioat_channel_t channel)
    450 {
    451 	ioat_state_t *state;
    452 	uint16_t chanctrl;
    453 	uint32_t chanerr;
    454 	uint32_t status;
    455 
    456 
    457 	state = channel->ic_state;
    458 
    459 	if (channel->ic_ver == IOAT_CBv1) {
    460 		status = ddi_get32(state->is_reg_handle,
    461 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_STS_LO]);
    462 	} else {
    463 		ASSERT(channel->ic_ver == IOAT_CBv2);
    464 		status = ddi_get32(state->is_reg_handle,
    465 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_STS_LO]);
    466 	}
    467 
    468 	/* if that status isn't ACTIVE or IDLE, the channel has failed */
    469 	if (status & IOAT_CHAN_STS_FAIL_MASK) {
    470 		chanerr = ddi_get32(state->is_reg_handle,
    471 		    (uint32_t *)&channel->ic_regs[IOAT_CHAN_ERR]);
    472 		cmn_err(CE_WARN, "channel(%d) fatal failure! "
    473 		    "chanstat_lo=0x%X; chanerr=0x%X\n",
    474 		    channel->ic_chan_num, status, chanerr);
    475 		channel->ic_channel_state = IOAT_CHANNEL_IN_FAILURE;
    476 		ioat_channel_reset(channel);
    477 
    478 		return;
    479 	}
    480 
    481 	/*
    482 	 * clear interrupt disable bit if set (it's a RW1C). Read it back to
    483 	 * ensure the write completes.
    484 	 */
    485 	chanctrl = ddi_get16(state->is_reg_handle,
    486 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
    487 	ddi_put16(state->is_reg_handle,
    488 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL], chanctrl);
    489 	(void) ddi_get16(state->is_reg_handle,
    490 	    (uint16_t *)&channel->ic_regs[IOAT_CHAN_CTL]);
    491 
    492 	/* tell dcopy we have seen a completion on this channel */
    493 	dcopy_device_channel_notify(channel->ic_dcopy_handle, DCOPY_COMPLETION);
    494 }
    495 
    496 
    497 /*
    498  * ioat_channel_start()
    499  */
    500 void
    501 ioat_channel_start(ioat_channel_t channel)
    502 {
    503 	ioat_chan_dma_desc_t desc;
    504 
    505 	/* set the first descriptor up as a NULL descriptor */
    506 	bzero(&desc, sizeof (desc));
    507 	desc.dd_size = 0;
    508 	desc.dd_ctrl = IOAT_DESC_CTRL_OP_DMA | IOAT_DESC_DMACTRL_NULL |
    509 	    IOAT_DESC_CTRL_CMPL;
    510 	desc.dd_next_desc = 0x0;
    511 
    512 	/* setup the very first descriptor */
    513 	ioat_ring_seed(channel, &desc);
    514 }
    515 
    516 
    517 /*
    518  * ioat_channel_reset()
    519  */
    520 void
    521 ioat_channel_reset(ioat_channel_t channel)
    522 {
    523 	ioat_state_t *state;
    524 
    525 	state = channel->ic_state;
    526 
    527 	/* hit the reset bit */
    528 	if (channel->ic_ver == IOAT_CBv1) {
    529 		ddi_put8(state->is_reg_handle,
    530 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x20);
    531 	} else {
    532 		ASSERT(channel->ic_ver == IOAT_CBv2);
    533 		ddi_put8(state->is_reg_handle,
    534 		    &channel->ic_regs[IOAT_V2_CHAN_CMD], 0x20);
    535 	}
    536 }
    537 
    538 
    539 /*
    540  * ioat_completion_alloc()
    541  */
    542 int
    543 ioat_completion_alloc(ioat_channel_t channel)
    544 {
    545 	ioat_state_t *state;
    546 	size_t real_length;
    547 	uint_t cookie_cnt;
    548 	int e;
    549 
    550 
    551 	state = channel->ic_state;
    552 
    553 	/*
    554 	 * allocate memory for the completion status, zero it out, and get
    555 	 * the paddr. We'll allocate a physically contiguous cache line.
    556 	 */
    557 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_cmpl_dma_attr,
    558 	    DDI_DMA_SLEEP, NULL, &channel->ic_cmpl_dma_handle);
    559 	if (e != DDI_SUCCESS) {
    560 		goto cmplallocfail_alloc_handle;
    561 	}
    562 	channel->ic_cmpl_alloc_size = 64;
    563 	e = ddi_dma_mem_alloc(channel->ic_cmpl_dma_handle,
    564 	    channel->ic_cmpl_alloc_size, &ioat_acc_attr,
    565 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
    566 	    (caddr_t *)&channel->ic_cmpl, &real_length,
    567 	    &channel->ic_cmpl_handle);
    568 	if (e != DDI_SUCCESS) {
    569 		goto cmplallocfail_mem_alloc;
    570 	}
    571 	bzero((void *)channel->ic_cmpl, channel->ic_cmpl_alloc_size);
    572 	e = ddi_dma_addr_bind_handle(channel->ic_cmpl_dma_handle, NULL,
    573 	    (caddr_t)channel->ic_cmpl, channel->ic_cmpl_alloc_size,
    574 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
    575 	    &channel->ic_cmpl_cookie, &cookie_cnt);
    576 	if (e != DDI_SUCCESS) {
    577 		goto cmplallocfail_addr_bind;
    578 	}
    579 	ASSERT(cookie_cnt == 1);
    580 	ASSERT(channel->ic_cmpl_cookie.dmac_size ==
    581 	    channel->ic_cmpl_alloc_size);
    582 	channel->ic_phys_cmpl = channel->ic_cmpl_cookie.dmac_laddress;
    583 
    584 	/* write the physical address into the completion address register */
    585 	ddi_put32(state->is_reg_handle,
    586 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO],
    587 	    (uint32_t)(channel->ic_phys_cmpl & 0xffffffff));
    588 	ddi_put32(state->is_reg_handle,
    589 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI],
    590 	    (uint32_t)(channel->ic_phys_cmpl >> 32));
    591 
    592 	return (DDI_SUCCESS);
    593 
    594 cmplallocfail_addr_bind:
    595 	ddi_dma_mem_free(&channel->ic_desc_handle);
    596 cmplallocfail_mem_alloc:
    597 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
    598 cmplallocfail_alloc_handle:
    599 	return (DDI_FAILURE);
    600 }
    601 
    602 
    603 /*
    604  * ioat_completion_free()
    605  */
    606 void
    607 ioat_completion_free(ioat_channel_t channel)
    608 {
    609 	ioat_state_t *state;
    610 
    611 	state = channel->ic_state;
    612 
    613 	/* reset the completion address register */
    614 	ddi_put32(state->is_reg_handle,
    615 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_LO], 0x0);
    616 	ddi_put32(state->is_reg_handle,
    617 	    (uint32_t *)&channel->ic_regs[IOAT_CHAN_CMPL_HI], 0x0);
    618 
    619 	/* unbind, then free up the memory, dma handle */
    620 	(void) ddi_dma_unbind_handle(channel->ic_cmpl_dma_handle);
    621 	ddi_dma_mem_free(&channel->ic_cmpl_handle);
    622 	ddi_dma_free_handle(&channel->ic_cmpl_dma_handle);
    623 }
    624 
    625 /*
    626  * ioat_ring_alloc()
    627  */
    628 int
    629 ioat_ring_alloc(ioat_channel_t channel, uint_t desc_cnt)
    630 {
    631 	ioat_channel_ring_t *ring;
    632 	ioat_state_t *state;
    633 	size_t real_length;
    634 	uint_t cookie_cnt;
    635 	int e;
    636 
    637 
    638 	state = channel->ic_state;
    639 
    640 	ring = kmem_zalloc(sizeof (ioat_channel_ring_t), KM_SLEEP);
    641 	channel->ic_ring = ring;
    642 	ring->cr_chan = channel;
    643 	ring->cr_post_cnt = 0;
    644 
    645 	mutex_init(&ring->cr_cmpl_mutex, NULL, MUTEX_DRIVER,
    646 	    channel->ic_state->is_iblock_cookie);
    647 	mutex_init(&ring->cr_desc_mutex, NULL, MUTEX_DRIVER,
    648 	    channel->ic_state->is_iblock_cookie);
    649 
    650 	/*
    651 	 * allocate memory for the ring, zero it out, and get the paddr.
    652 	 * We'll allocate a physically contiguous chunck of memory  which
    653 	 * simplifies the completion logic.
    654 	 */
    655 	e = ddi_dma_alloc_handle(state->is_dip, &ioat_desc_dma_attr,
    656 	    DDI_DMA_SLEEP, NULL, &channel->ic_desc_dma_handle);
    657 	if (e != DDI_SUCCESS) {
    658 		goto ringallocfail_alloc_handle;
    659 	}
    660 	/*
    661 	 * allocate one extra descriptor so we can simplify the empty/full
    662 	 * logic. Then round that number up to a whole multiple of 4.
    663 	 */
    664 	channel->ic_chan_desc_cnt = ((desc_cnt + 1) + 3) & ~0x3;
    665 	ring->cr_desc_last = channel->ic_chan_desc_cnt - 1;
    666 	channel->ic_desc_alloc_size = channel->ic_chan_desc_cnt *
    667 	    sizeof (ioat_chan_desc_t);
    668 	e = ddi_dma_mem_alloc(channel->ic_desc_dma_handle,
    669 	    channel->ic_desc_alloc_size, &ioat_acc_attr,
    670 	    DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
    671 	    (caddr_t *)&ring->cr_desc, &real_length, &channel->ic_desc_handle);
    672 	if (e != DDI_SUCCESS) {
    673 		goto ringallocfail_mem_alloc;
    674 	}
    675 	bzero(ring->cr_desc, channel->ic_desc_alloc_size);
    676 	e = ddi_dma_addr_bind_handle(channel->ic_desc_dma_handle, NULL,
    677 	    (caddr_t)ring->cr_desc, channel->ic_desc_alloc_size,
    678 	    DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP, NULL,
    679 	    &channel->ic_desc_cookies, &cookie_cnt);
    680 	if (e != DDI_SUCCESS) {
    681 		goto ringallocfail_addr_bind;
    682 	}
    683 	ASSERT(cookie_cnt == 1);
    684 	ASSERT(channel->ic_desc_cookies.dmac_size ==
    685 	    channel->ic_desc_alloc_size);
    686 	ring->cr_phys_desc = channel->ic_desc_cookies.dmac_laddress;
    687 
    688 	/* write the physical address into the chain address register */
    689 	if (channel->ic_ver == IOAT_CBv1) {
    690 		ddi_put32(state->is_reg_handle,
    691 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO],
    692 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
    693 		ddi_put32(state->is_reg_handle,
    694 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI],
    695 		    (uint32_t)(ring->cr_phys_desc >> 32));
    696 	} else {
    697 		ASSERT(channel->ic_ver == IOAT_CBv2);
    698 		ddi_put32(state->is_reg_handle,
    699 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO],
    700 		    (uint32_t)(ring->cr_phys_desc & 0xffffffff));
    701 		ddi_put32(state->is_reg_handle,
    702 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI],
    703 		    (uint32_t)(ring->cr_phys_desc >> 32));
    704 	}
    705 
    706 	return (DCOPY_SUCCESS);
    707 
    708 ringallocfail_addr_bind:
    709 	ddi_dma_mem_free(&channel->ic_desc_handle);
    710 ringallocfail_mem_alloc:
    711 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
    712 ringallocfail_alloc_handle:
    713 	mutex_destroy(&ring->cr_desc_mutex);
    714 	mutex_destroy(&ring->cr_cmpl_mutex);
    715 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
    716 
    717 	return (DCOPY_FAILURE);
    718 }
    719 
    720 
    721 /*
    722  * ioat_ring_free()
    723  */
    724 void
    725 ioat_ring_free(ioat_channel_t channel)
    726 {
    727 	ioat_state_t *state;
    728 
    729 
    730 	state = channel->ic_state;
    731 
    732 	/* reset the chain address register */
    733 	if (channel->ic_ver == IOAT_CBv1) {
    734 		ddi_put32(state->is_reg_handle,
    735 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_LO], 0x0);
    736 		ddi_put32(state->is_reg_handle,
    737 		    (uint32_t *)&channel->ic_regs[IOAT_V1_CHAN_ADDR_HI], 0x0);
    738 	} else {
    739 		ASSERT(channel->ic_ver == IOAT_CBv2);
    740 		ddi_put32(state->is_reg_handle,
    741 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_LO], 0x0);
    742 		ddi_put32(state->is_reg_handle,
    743 		    (uint32_t *)&channel->ic_regs[IOAT_V2_CHAN_ADDR_HI], 0x0);
    744 	}
    745 
    746 	/* unbind, then free up the memory, dma handle */
    747 	(void) ddi_dma_unbind_handle(channel->ic_desc_dma_handle);
    748 	ddi_dma_mem_free(&channel->ic_desc_handle);
    749 	ddi_dma_free_handle(&channel->ic_desc_dma_handle);
    750 
    751 	mutex_destroy(&channel->ic_ring->cr_desc_mutex);
    752 	mutex_destroy(&channel->ic_ring->cr_cmpl_mutex);
    753 	kmem_free(channel->ic_ring, sizeof (ioat_channel_ring_t));
    754 
    755 }
    756 
    757 
    758 /*
    759  * ioat_ring_seed()
    760  *    write the first descriptor in the ring.
    761  */
    762 void
    763 ioat_ring_seed(ioat_channel_t channel, ioat_chan_dma_desc_t *in_desc)
    764 {
    765 	ioat_channel_ring_t *ring;
    766 	ioat_chan_dma_desc_t *desc;
    767 	ioat_chan_dma_desc_t *prev;
    768 	ioat_state_t *state;
    769 
    770 
    771 	state = channel->ic_state;
    772 	ring = channel->ic_ring;
    773 
    774 	/* init the completion state */
    775 	ring->cr_cmpl_gen = 0x0;
    776 	ring->cr_cmpl_last = 0x0;
    777 
    778 	/* write in the descriptor and init the descriptor state */
    779 	ring->cr_post_cnt++;
    780 	channel->ic_ring->cr_desc[0] = *(ioat_chan_desc_t *)in_desc;
    781 	ring->cr_desc_gen = 0;
    782 	ring->cr_desc_prev = 0;
    783 	ring->cr_desc_next = 1;
    784 
    785 	if (channel->ic_ver == IOAT_CBv1) {
    786 		/* hit the start bit */
    787 		ddi_put8(state->is_reg_handle,
    788 		    &channel->ic_regs[IOAT_V1_CHAN_CMD], 0x1);
    789 	} else {
    790 		/*
    791 		 * if this is CBv2, link the descriptor to an empty
    792 		 * descriptor
    793 		 */
    794 		ASSERT(ring->cr_chan->ic_ver == IOAT_CBv2);
    795 		desc = (ioat_chan_dma_desc_t *)
    796 		    &ring->cr_desc[ring->cr_desc_next];
    797 		prev = (ioat_chan_dma_desc_t *)
    798 		    &ring->cr_desc[ring->cr_desc_prev];
    799 
    800 		desc->dd_ctrl = 0;
    801 		desc->dd_next_desc = 0x0;
    802 
    803 		prev->dd_next_desc = ring->cr_phys_desc +
    804 		    (ring->cr_desc_next << 6);
    805 
    806 		ddi_put16(state->is_reg_handle,
    807 		    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
    808 		    (uint16_t)1);
    809 	}
    810 
    811 }
    812 
    813 /*
    814  * ioat_ring_loop()
    815  * Make the ring loop for CB v1
    816  * This function assume we are in the ring->cr_desc_mutex mutex context
    817  */
    818 int
    819 ioat_ring_loop(ioat_channel_ring_t *ring, dcopy_cmd_t cmd)
    820 {
    821 	uint64_t count;
    822 	ioat_channel_t channel;
    823 	ioat_chan_dma_desc_t *curr;
    824 	ioat_cmd_private_t *prevpriv;
    825 	ioat_cmd_private_t *currpriv;
    826 
    827 	channel = ring->cr_chan;
    828 	ASSERT(channel->ic_ver == IOAT_CBv1);
    829 
    830 	/*
    831 	 * For each cmd in the command queue, check whether they are continuous
    832 	 * in descriptor ring. Return error if not continuous.
    833 	 */
    834 	for (count = 0, prevpriv = NULL;
    835 	    cmd != NULL && count <= channel->ic_chan_desc_cnt;
    836 	    prevpriv = currpriv) {
    837 		currpriv = cmd->dp_private->pr_device_cmd_private;
    838 		if (prevpriv != NULL &&
    839 		    currpriv->ip_index + 1 != prevpriv->ip_start &&
    840 		    currpriv->ip_index + 1 != prevpriv->ip_start +
    841 		    channel->ic_chan_desc_cnt) {
    842 			/* Non-continuous, other commands get interleaved */
    843 			return (DCOPY_FAILURE);
    844 		}
    845 		if (currpriv->ip_index < currpriv->ip_start) {
    846 			count += channel->ic_chan_desc_cnt
    847 			    + currpriv->ip_index - currpriv->ip_start + 1;
    848 		} else {
    849 			count += currpriv->ip_index - currpriv->ip_start + 1;
    850 		}
    851 		cmd = currpriv->ip_next;
    852 	}
    853 	/*
    854 	 * Check for too many descriptors which would cause wrap around in
    855 	 * descriptor ring. And make sure there is space for cancel operation.
    856 	 */
    857 	if (count >= channel->ic_chan_desc_cnt) {
    858 		return (DCOPY_FAILURE);
    859 	}
    860 
    861 	/* Point next descriptor to header of chain. */
    862 	curr = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
    863 	curr->dd_next_desc = ring->cr_phys_desc + (currpriv->ip_start << 6);
    864 
    865 	/* sync the last desc */
    866 	(void) ddi_dma_sync(channel->ic_desc_dma_handle,
    867 	    ring->cr_desc_prev << 6, 64, DDI_DMA_SYNC_FORDEV);
    868 
    869 	return (DCOPY_SUCCESS);
    870 }
    871 
    872 
    873 /*
    874  * ioat_cmd_alloc()
    875  */
    876 int
    877 ioat_cmd_alloc(void *private, int flags, dcopy_cmd_t *cmd)
    878 {
    879 	ioat_cmd_private_t *priv;
    880 	ioat_channel_t channel;
    881 	dcopy_cmd_t oldcmd;
    882 	int kmflag;
    883 
    884 
    885 	channel = (ioat_channel_t)private;
    886 
    887 	if (flags & DCOPY_NOSLEEP) {
    888 		kmflag = KM_NOSLEEP;
    889 	} else {
    890 		kmflag = KM_SLEEP;
    891 	}
    892 
    893 	/* save the command passed incase DCOPY_ALLOC_LINK is set */
    894 	oldcmd = *cmd;
    895 
    896 	*cmd = kmem_cache_alloc(channel->ic_cmd_cache, kmflag);
    897 	if (*cmd == NULL) {
    898 		return (DCOPY_NORESOURCES);
    899 	}
    900 
    901 	/* setup the dcopy and ioat private state pointers */
    902 	(*cmd)->dp_version = DCOPY_CMD_V0;
    903 	(*cmd)->dp_cmd = 0;
    904 	(*cmd)->dp_private = (struct dcopy_cmd_priv_s *)
    905 	    ((uintptr_t)(*cmd) + sizeof (struct dcopy_cmd_s));
    906 	(*cmd)->dp_private->pr_device_cmd_private =
    907 	    (struct ioat_cmd_private_s *)((uintptr_t)(*cmd)->dp_private +
    908 	    sizeof (struct dcopy_cmd_priv_s));
    909 
    910 	/*
    911 	 * if DCOPY_ALLOC_LINK is set, link the old command to the new one
    912 	 * just allocated.
    913 	 */
    914 	priv = (*cmd)->dp_private->pr_device_cmd_private;
    915 	if (flags & DCOPY_ALLOC_LINK) {
    916 		priv->ip_next = oldcmd;
    917 	} else {
    918 		priv->ip_next = NULL;
    919 	}
    920 
    921 	return (DCOPY_SUCCESS);
    922 }
    923 
    924 
    925 /*
    926  * ioat_cmd_free()
    927  */
    928 void
    929 ioat_cmd_free(void *private, dcopy_cmd_t *cmdp)
    930 {
    931 	ioat_cmd_private_t *priv;
    932 	ioat_channel_t channel;
    933 	dcopy_cmd_t next;
    934 	dcopy_cmd_t cmd;
    935 
    936 
    937 	channel = (ioat_channel_t)private;
    938 	cmd = *(cmdp);
    939 
    940 	/*
    941 	 * free all the commands in the chain (see DCOPY_ALLOC_LINK in
    942 	 * ioat_cmd_alloc() for more info).
    943 	 */
    944 	while (cmd != NULL) {
    945 		priv = cmd->dp_private->pr_device_cmd_private;
    946 		next = priv->ip_next;
    947 		kmem_cache_free(channel->ic_cmd_cache, cmd);
    948 		cmd = next;
    949 	}
    950 	*cmdp = NULL;
    951 }
    952 
    953 
    954 /*
    955  * ioat_cmd_post()
    956  */
    957 int
    958 ioat_cmd_post(void *private, dcopy_cmd_t cmd)
    959 {
    960 	ioat_channel_ring_t *ring;
    961 	ioat_cmd_private_t *priv;
    962 	ioat_channel_t channel;
    963 	ioat_state_t *state;
    964 	uint64_t dest_paddr;
    965 	uint64_t src_paddr;
    966 	uint64_t dest_addr;
    967 	uint32_t dest_size;
    968 	uint64_t src_addr;
    969 	uint32_t src_size;
    970 	size_t xfer_size;
    971 	uint32_t ctrl;
    972 	size_t size;
    973 	int e;
    974 
    975 
    976 	channel = (ioat_channel_t)private;
    977 	priv = cmd->dp_private->pr_device_cmd_private;
    978 
    979 	state = channel->ic_state;
    980 	ring = channel->ic_ring;
    981 
    982 	/*
    983 	 * Special support for DCOPY_CMD_LOOP option, only supported on CBv1.
    984 	 * DCOPY_CMD_QUEUE should also be set if DCOPY_CMD_LOOP is set.
    985 	 */
    986 	if ((cmd->dp_flags & DCOPY_CMD_LOOP) &&
    987 	    (channel->ic_ver != IOAT_CBv1 ||
    988 	    (cmd->dp_flags & DCOPY_CMD_QUEUE))) {
    989 		return (DCOPY_FAILURE);
    990 	}
    991 
    992 	if ((cmd->dp_flags & DCOPY_CMD_NOWAIT) == 0) {
    993 		mutex_enter(&ring->cr_desc_mutex);
    994 
    995 	/*
    996 	 * Try to acquire mutex if NOWAIT flag is set.
    997 	 * Return failure if failed to acquire mutex.
    998 	 */
    999 	} else if (mutex_tryenter(&ring->cr_desc_mutex) == 0) {
   1000 		return (DCOPY_FAILURE);
   1001 	}
   1002 
   1003 	/* if the channel has had a fatal failure, return failure */
   1004 	if (channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) {
   1005 		mutex_exit(&ring->cr_desc_mutex);
   1006 		return (DCOPY_FAILURE);
   1007 	}
   1008 
   1009 	/* make sure we have space for the descriptors */
   1010 	e = ioat_ring_reserve(channel, ring, cmd);
   1011 	if (e != DCOPY_SUCCESS) {
   1012 		mutex_exit(&ring->cr_desc_mutex);
   1013 		return (DCOPY_NORESOURCES);
   1014 	}
   1015 
   1016 	/* if we support DCA, and the DCA flag is set, post a DCA desc */
   1017 	if ((channel->ic_ver == IOAT_CBv2) &&
   1018 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
   1019 		ioat_cmd_post_dca(ring, cmd->dp_dca_id);
   1020 	}
   1021 
   1022 	/*
   1023 	 * the dma copy may have to be broken up into multiple descriptors
   1024 	 * since we can't cross a page boundary.
   1025 	 */
   1026 	ASSERT(cmd->dp_version == DCOPY_CMD_V0);
   1027 	ASSERT(cmd->dp_cmd == DCOPY_CMD_COPY);
   1028 	src_addr = cmd->dp.copy.cc_source;
   1029 	dest_addr = cmd->dp.copy.cc_dest;
   1030 	size = cmd->dp.copy.cc_size;
   1031 	priv->ip_start = ring->cr_desc_next;
   1032 	while (size > 0) {
   1033 		src_paddr = pa_to_ma(src_addr);
   1034 		dest_paddr = pa_to_ma(dest_addr);
   1035 
   1036 		/* adjust for any offset into the page */
   1037 		if ((src_addr & PAGEOFFSET) == 0) {
   1038 			src_size = PAGESIZE;
   1039 		} else {
   1040 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
   1041 		}
   1042 		if ((dest_addr & PAGEOFFSET) == 0) {
   1043 			dest_size = PAGESIZE;
   1044 		} else {
   1045 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
   1046 		}
   1047 
   1048 		/* take the smallest of the three */
   1049 		xfer_size = MIN(src_size, dest_size);
   1050 		xfer_size = MIN(xfer_size, size);
   1051 
   1052 		/*
   1053 		 * if this is the last descriptor, and we are supposed to
   1054 		 * generate a completion, generate a completion. same logic
   1055 		 * for interrupt.
   1056 		 */
   1057 		ctrl = 0;
   1058 		if (cmd->dp_flags & DCOPY_CMD_NOSRCSNP) {
   1059 			ctrl |= IOAT_DESC_CTRL_NOSRCSNP;
   1060 		}
   1061 		if (cmd->dp_flags & DCOPY_CMD_NODSTSNP) {
   1062 			ctrl |= IOAT_DESC_CTRL_NODSTSNP;
   1063 		}
   1064 		if (xfer_size == size) {
   1065 			if (!(cmd->dp_flags & DCOPY_CMD_NOSTAT)) {
   1066 				ctrl |= IOAT_DESC_CTRL_CMPL;
   1067 			}
   1068 			if ((cmd->dp_flags & DCOPY_CMD_INTR)) {
   1069 				ctrl |= IOAT_DESC_CTRL_INTR;
   1070 			}
   1071 		}
   1072 
   1073 		ioat_cmd_post_copy(ring, src_paddr, dest_paddr, xfer_size,
   1074 		    ctrl);
   1075 
   1076 		/* go to the next page */
   1077 		src_addr += xfer_size;
   1078 		dest_addr += xfer_size;
   1079 		size -= xfer_size;
   1080 	}
   1081 
   1082 	/* save away the state so we can poll on it. */
   1083 	priv->ip_generation = ring->cr_desc_gen_prev;
   1084 	priv->ip_index = ring->cr_desc_prev;
   1085 
   1086 	/* if queue not defined, tell the DMA engine about it */
   1087 	if (!(cmd->dp_flags & DCOPY_CMD_QUEUE)) {
   1088 		/*
   1089 		 * Link the ring to a loop (currently only for FIPE).
   1090 		 */
   1091 		if (cmd->dp_flags & DCOPY_CMD_LOOP) {
   1092 			e = ioat_ring_loop(ring, cmd);
   1093 			if (e != DCOPY_SUCCESS) {
   1094 				mutex_exit(&ring->cr_desc_mutex);
   1095 				return (DCOPY_FAILURE);
   1096 			}
   1097 		}
   1098 
   1099 		if (channel->ic_ver == IOAT_CBv1) {
   1100 			ddi_put8(state->is_reg_handle,
   1101 			    (uint8_t *)&channel->ic_regs[IOAT_V1_CHAN_CMD],
   1102 			    0x2);
   1103 		} else {
   1104 			ASSERT(channel->ic_ver == IOAT_CBv2);
   1105 			ddi_put16(state->is_reg_handle,
   1106 			    (uint16_t *)&channel->ic_regs[IOAT_V2_CHAN_CNT],
   1107 			    (uint16_t)(ring->cr_post_cnt & 0xFFFF));
   1108 		}
   1109 	}
   1110 
   1111 	mutex_exit(&ring->cr_desc_mutex);
   1112 
   1113 	return (DCOPY_SUCCESS);
   1114 }
   1115 
   1116 
   1117 /*
   1118  * ioat_cmd_post_dca()
   1119  */
   1120 static void
   1121 ioat_cmd_post_dca(ioat_channel_ring_t *ring, uint32_t dca_id)
   1122 {
   1123 	ioat_chan_dca_desc_t *saved_prev;
   1124 	ioat_chan_dca_desc_t *desc;
   1125 	ioat_chan_dca_desc_t *prev;
   1126 	ioat_channel_t channel;
   1127 	uint64_t next_desc_phys;
   1128 	off_t prev_offset;
   1129 	off_t next_offset;
   1130 
   1131 
   1132 	channel = ring->cr_chan;
   1133 	desc = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_next];
   1134 	prev = (ioat_chan_dca_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
   1135 
   1136 	/* keep track of the number of descs posted for cbv2 */
   1137 	ring->cr_post_cnt++;
   1138 
   1139 	/*
   1140 	 * post a context change desriptor. If dca has never been used on
   1141 	 * this channel, or if the id doesn't match the last id used on this
   1142 	 * channel, set CONTEXT_CHANGE bit and dca id, set dca state to active,
   1143 	 * and save away the id we're using.
   1144 	 */
   1145 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_CNTX;
   1146 	desc->dd_next_desc = 0x0;
   1147 	if (!channel->ic_dca_active || (channel->ic_dca_current != dca_id)) {
   1148 		channel->ic_dca_active = B_TRUE;
   1149 		channel->ic_dca_current = dca_id;
   1150 		desc->dd_ctrl |= IOAT_DESC_CTRL_CNTX_CHNG;
   1151 		desc->dd_cntx = dca_id;
   1152 	}
   1153 
   1154 	/*
   1155 	 * save next desc and prev offset for when we link the two
   1156 	 * descriptors together.
   1157 	 */
   1158 	saved_prev = prev;
   1159 	prev_offset = ring->cr_desc_prev << 6;
   1160 	next_offset = ring->cr_desc_next << 6;
   1161 	next_desc_phys = ring->cr_phys_desc + next_offset;
   1162 
   1163 	/* save the current desc_next and desc_last for the completion */
   1164 	ring->cr_desc_prev = ring->cr_desc_next;
   1165 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
   1166 
   1167 	/* increment next/gen so it points to the next free desc */
   1168 	ring->cr_desc_next++;
   1169 	if (ring->cr_desc_next > ring->cr_desc_last) {
   1170 		ring->cr_desc_next = 0;
   1171 		ring->cr_desc_gen++;
   1172 	}
   1173 
   1174 	/*
   1175 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
   1176 	 * we always leave on desc empty to detect full, this works out.
   1177 	 */
   1178 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
   1179 		desc = (ioat_chan_dca_desc_t *)
   1180 		    &ring->cr_desc[ring->cr_desc_next];
   1181 		prev = (ioat_chan_dca_desc_t *)
   1182 		    &ring->cr_desc[ring->cr_desc_prev];
   1183 		desc->dd_ctrl = 0;
   1184 		desc->dd_next_desc = 0x0;
   1185 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
   1186 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
   1187 		prev->dd_next_desc = ring->cr_phys_desc +
   1188 		    (ring->cr_desc_next << 6);
   1189 	}
   1190 
   1191 	/* Put the descriptors physical address in the previous descriptor */
   1192 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
   1193 	ASSERT(sizeof (ioat_chan_dca_desc_t) == 64);
   1194 
   1195 	/* sync the current desc */
   1196 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
   1197 	    DDI_DMA_SYNC_FORDEV);
   1198 
   1199 	/* update the previous desc and sync it too */
   1200 	saved_prev->dd_next_desc = next_desc_phys;
   1201 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
   1202 	    DDI_DMA_SYNC_FORDEV);
   1203 }
   1204 
   1205 
   1206 /*
   1207  * ioat_cmd_post_copy()
   1208  *
   1209  */
   1210 static void
   1211 ioat_cmd_post_copy(ioat_channel_ring_t *ring, uint64_t src_addr,
   1212     uint64_t dest_addr, uint32_t size, uint32_t ctrl)
   1213 {
   1214 	ioat_chan_dma_desc_t *saved_prev;
   1215 	ioat_chan_dma_desc_t *desc;
   1216 	ioat_chan_dma_desc_t *prev;
   1217 	ioat_channel_t channel;
   1218 	uint64_t next_desc_phy;
   1219 	off_t prev_offset;
   1220 	off_t next_offset;
   1221 
   1222 
   1223 	channel = ring->cr_chan;
   1224 	desc = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_next];
   1225 	prev = (ioat_chan_dma_desc_t *)&ring->cr_desc[ring->cr_desc_prev];
   1226 
   1227 	/* keep track of the number of descs posted for cbv2 */
   1228 	ring->cr_post_cnt++;
   1229 
   1230 	/* write in the DMA desc */
   1231 	desc->dd_ctrl = IOAT_DESC_CTRL_OP_DMA | ctrl;
   1232 	desc->dd_size = size;
   1233 	desc->dd_src_paddr = src_addr;
   1234 	desc->dd_dest_paddr = dest_addr;
   1235 	desc->dd_next_desc = 0x0;
   1236 
   1237 	/*
   1238 	 * save next desc and prev offset for when we link the two
   1239 	 * descriptors together.
   1240 	 */
   1241 	saved_prev = prev;
   1242 	prev_offset = ring->cr_desc_prev << 6;
   1243 	next_offset = ring->cr_desc_next << 6;
   1244 	next_desc_phy = ring->cr_phys_desc + next_offset;
   1245 
   1246 	/* increment next/gen so it points to the next free desc */
   1247 	ring->cr_desc_prev = ring->cr_desc_next;
   1248 	ring->cr_desc_gen_prev = ring->cr_desc_gen;
   1249 
   1250 	/* increment next/gen so it points to the next free desc */
   1251 	ring->cr_desc_next++;
   1252 	if (ring->cr_desc_next > ring->cr_desc_last) {
   1253 		ring->cr_desc_next = 0;
   1254 		ring->cr_desc_gen++;
   1255 	}
   1256 
   1257 	/*
   1258 	 * if this is CBv2, link the descriptor to an empty descriptor. Since
   1259 	 * we always leave on desc empty to detect full, this works out.
   1260 	 */
   1261 	if (ring->cr_chan->ic_ver == IOAT_CBv2) {
   1262 		desc = (ioat_chan_dma_desc_t *)
   1263 		    &ring->cr_desc[ring->cr_desc_next];
   1264 		prev = (ioat_chan_dma_desc_t *)
   1265 		    &ring->cr_desc[ring->cr_desc_prev];
   1266 		desc->dd_size = 0;
   1267 		desc->dd_ctrl = 0;
   1268 		desc->dd_next_desc = 0x0;
   1269 		(void) ddi_dma_sync(channel->ic_desc_dma_handle,
   1270 		    ring->cr_desc_next << 6, 64, DDI_DMA_SYNC_FORDEV);
   1271 		prev->dd_next_desc = ring->cr_phys_desc +
   1272 		    (ring->cr_desc_next << 6);
   1273 	}
   1274 
   1275 	/* Put the descriptors physical address in the previous descriptor */
   1276 	/*LINTED:E_TRUE_LOGICAL_EXPR*/
   1277 	ASSERT(sizeof (ioat_chan_dma_desc_t) == 64);
   1278 
   1279 	/* sync the current desc */
   1280 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, next_offset, 64,
   1281 	    DDI_DMA_SYNC_FORDEV);
   1282 
   1283 	/* update the previous desc and sync it too */
   1284 	saved_prev->dd_next_desc = next_desc_phy;
   1285 	(void) ddi_dma_sync(channel->ic_desc_dma_handle, prev_offset, 64,
   1286 	    DDI_DMA_SYNC_FORDEV);
   1287 }
   1288 
   1289 
   1290 /*
   1291  * ioat_cmd_poll()
   1292  */
   1293 int
   1294 ioat_cmd_poll(void *private, dcopy_cmd_t cmd)
   1295 {
   1296 	ioat_channel_ring_t *ring;
   1297 	ioat_cmd_private_t *priv;
   1298 	ioat_channel_t channel;
   1299 	uint64_t generation;
   1300 	uint64_t last_cmpl;
   1301 
   1302 	ASSERT(cmd != NULL);
   1303 	channel = (ioat_channel_t)private;
   1304 	priv = cmd->dp_private->pr_device_cmd_private;
   1305 
   1306 	ring = channel->ic_ring;
   1307 	ASSERT(ring != NULL);
   1308 
   1309 	if ((cmd->dp_flags & DCOPY_CMD_NOWAIT) == 0) {
   1310 		mutex_enter(&ring->cr_cmpl_mutex);
   1311 
   1312 	/*
   1313 	 * Try to acquire mutex if NOWAIT flag is set.
   1314 	 * Return failure if failed to acquire mutex.
   1315 	 */
   1316 	} else if (mutex_tryenter(&ring->cr_cmpl_mutex) == 0) {
   1317 		return (DCOPY_FAILURE);
   1318 	}
   1319 
   1320 	/* if the channel had a fatal failure, fail all polls */
   1321 	if ((channel->ic_channel_state == IOAT_CHANNEL_IN_FAILURE) ||
   1322 	    IOAT_CMPL_FAILED(channel)) {
   1323 		mutex_exit(&ring->cr_cmpl_mutex);
   1324 		return (DCOPY_FAILURE);
   1325 	}
   1326 
   1327 	/*
   1328 	 * if the current completion is the same as the last time we read one,
   1329 	 * post is still pending, nothing further to do. We track completions
   1330 	 * as indexes into the ring since post uses VAs and the H/W returns
   1331 	 * PAs. We grab a snapshot of generation and last_cmpl in the mutex.
   1332 	 */
   1333 	(void) ddi_dma_sync(channel->ic_cmpl_dma_handle, 0, 0,
   1334 	    DDI_DMA_SYNC_FORCPU);
   1335 	last_cmpl = IOAT_CMPL_INDEX(channel);
   1336 	if (last_cmpl != ring->cr_cmpl_last) {
   1337 		/*
   1338 		 * if we wrapped the ring, increment the generation. Store
   1339 		 * the last cmpl. This logic assumes a physically contiguous
   1340 		 * ring.
   1341 		 */
   1342 		if (last_cmpl < ring->cr_cmpl_last) {
   1343 			ring->cr_cmpl_gen++;
   1344 		}
   1345 		ring->cr_cmpl_last = last_cmpl;
   1346 		generation = ring->cr_cmpl_gen;
   1347 
   1348 	} else {
   1349 		generation = ring->cr_cmpl_gen;
   1350 	}
   1351 
   1352 	mutex_exit(&ring->cr_cmpl_mutex);
   1353 
   1354 	/*
   1355 	 * if cmd isn't passed in, well return.  Useful for updating the
   1356 	 * consumer pointer (ring->cr_cmpl_last).
   1357 	 */
   1358 	if (cmd->dp_flags & DCOPY_CMD_SYNC) {
   1359 		return (DCOPY_PENDING);
   1360 	}
   1361 
   1362 	/*
   1363 	 * if the post's generation is old, this post has completed. No reason
   1364 	 * to go check the last completion. if the generation is the same
   1365 	 * and if the post is before or = to the last completion processed,
   1366 	 * the post has completed.
   1367 	 */
   1368 	if (priv->ip_generation < generation) {
   1369 		return (DCOPY_COMPLETED);
   1370 	} else if ((priv->ip_generation == generation) &&
   1371 	    (priv->ip_index <= last_cmpl)) {
   1372 		return (DCOPY_COMPLETED);
   1373 	}
   1374 
   1375 	return (DCOPY_PENDING);
   1376 }
   1377 
   1378 
   1379 /*
   1380  * ioat_ring_reserve()
   1381  */
   1382 int
   1383 ioat_ring_reserve(ioat_channel_t channel, ioat_channel_ring_t *ring,
   1384     dcopy_cmd_t cmd)
   1385 {
   1386 	uint64_t dest_addr;
   1387 	uint32_t dest_size;
   1388 	uint64_t src_addr;
   1389 	uint32_t src_size;
   1390 	size_t xfer_size;
   1391 	uint64_t desc;
   1392 	int num_desc;
   1393 	size_t size;
   1394 	int i;
   1395 
   1396 
   1397 	/*
   1398 	 * figure out how many descriptors we need. This can include a dca
   1399 	 * desc and multiple desc for a dma copy.
   1400 	 */
   1401 	num_desc = 0;
   1402 	if ((channel->ic_ver == IOAT_CBv2) &&
   1403 	    (cmd->dp_flags & DCOPY_CMD_DCA)) {
   1404 		num_desc++;
   1405 	}
   1406 	src_addr = cmd->dp.copy.cc_source;
   1407 	dest_addr = cmd->dp.copy.cc_dest;
   1408 	size = cmd->dp.copy.cc_size;
   1409 	while (size > 0) {
   1410 		num_desc++;
   1411 
   1412 		/* adjust for any offset into the page */
   1413 		if ((src_addr & PAGEOFFSET) == 0) {
   1414 			src_size = PAGESIZE;
   1415 		} else {
   1416 			src_size = PAGESIZE - (src_addr & PAGEOFFSET);
   1417 		}
   1418 		if ((dest_addr & PAGEOFFSET) == 0) {
   1419 			dest_size = PAGESIZE;
   1420 		} else {
   1421 			dest_size = PAGESIZE - (dest_addr & PAGEOFFSET);
   1422 		}
   1423 
   1424 		/* take the smallest of the three */
   1425 		xfer_size = MIN(src_size, dest_size);
   1426 		xfer_size = MIN(xfer_size, size);
   1427 
   1428 		/* go to the next page */
   1429 		src_addr += xfer_size;
   1430 		dest_addr += xfer_size;
   1431 		size -= xfer_size;
   1432 	}
   1433 
   1434 	/* Make sure we have space for these descriptors */
   1435 	desc = ring->cr_desc_next;
   1436 	for (i = 0; i < num_desc; i++) {
   1437 
   1438 		/*
   1439 		 * if this is the last descriptor in the ring, see if the
   1440 		 * last completed descriptor is #0.
   1441 		 */
   1442 		if (desc == ring->cr_desc_last) {
   1443 			if (ring->cr_cmpl_last == 0) {
   1444 				/*
   1445 				 * if we think the ring is full, update where
   1446 				 * the H/W really is and check for full again.
   1447 				 */
   1448 				cmd->dp_flags |= DCOPY_CMD_SYNC;
   1449 				(void) ioat_cmd_poll(channel, cmd);
   1450 				cmd->dp_flags &= ~DCOPY_CMD_SYNC;
   1451 				if (ring->cr_cmpl_last == 0) {
   1452 					return (DCOPY_NORESOURCES);
   1453 				}
   1454 			}
   1455 
   1456 			/*
   1457 			 * go to the next descriptor which is zero in this
   1458 			 * case.
   1459 			 */
   1460 			desc = 0;
   1461 
   1462 		/*
   1463 		 * if this is not the last descriptor in the ring, see if
   1464 		 * the last completion we saw was the next descriptor.
   1465 		 */
   1466 		} else {
   1467 			if ((desc + 1) == ring->cr_cmpl_last) {
   1468 				/*
   1469 				 * if we think the ring is full, update where
   1470 				 * the H/W really is and check for full again.
   1471 				 */
   1472 				cmd->dp_flags |= DCOPY_CMD_SYNC;
   1473 				(void) ioat_cmd_poll(channel, cmd);
   1474 				cmd->dp_flags &= ~DCOPY_CMD_SYNC;
   1475 				if ((desc + 1) == ring->cr_cmpl_last) {
   1476 					return (DCOPY_NORESOURCES);
   1477 				}
   1478 			}
   1479 
   1480 			/* go to the next descriptor */
   1481 			desc++;
   1482 		}
   1483 	}
   1484 
   1485 	return (DCOPY_SUCCESS);
   1486 }
   1487