Home | History | Annotate | Download | only in nxge
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/mac_provider.h>
     27 #include <sys/nxge/nxge_impl.h>
     28 #include <sys/nxge/nxge_hio.h>
     29 #include <npi_tx_wr64.h>
     30 
     31 /* Software LSO required header files */
     32 #include <netinet/tcp.h>
     33 #include <inet/ip_impl.h>
     34 #include <inet/tcp.h>
     35 
     36 extern uint64_t mac_pkt_hash(uint_t, mblk_t *mp, uint8_t policy,
     37     boolean_t is_outbound);
     38 
     39 static mblk_t *nxge_lso_eliminate(mblk_t *);
     40 static mblk_t *nxge_do_softlso(mblk_t *mp, uint32_t mss);
     41 static void nxge_lso_info_get(mblk_t *, uint32_t *, uint32_t *);
     42 static void nxge_hcksum_retrieve(mblk_t *,
     43     uint32_t *, uint32_t *, uint32_t *,
     44     uint32_t *, uint32_t *);
     45 static uint32_t nxge_csgen(uint16_t *, int);
     46 
     47 extern uint32_t		nxge_reclaim_pending;
     48 extern uint32_t 	nxge_bcopy_thresh;
     49 extern uint32_t 	nxge_dvma_thresh;
     50 extern uint32_t 	nxge_dma_stream_thresh;
     51 extern uint32_t		nxge_tx_minfree;
     52 extern uint32_t		nxge_tx_intr_thres;
     53 extern uint32_t		nxge_tx_max_gathers;
     54 extern uint32_t		nxge_tx_tiny_pack;
     55 extern uint32_t		nxge_tx_use_bcopy;
     56 extern nxge_tx_mode_t	nxge_tx_scheme;
     57 uint32_t		nxge_lso_kick_cnt = 2;
     58 
     59 
     60 void
     61 nxge_tx_ring_task(void *arg)
     62 {
     63 	p_tx_ring_t	ring = (p_tx_ring_t)arg;
     64 
     65 	MUTEX_ENTER(&ring->lock);
     66 	(void) nxge_txdma_reclaim(ring->nxgep, ring, 0);
     67 	MUTEX_EXIT(&ring->lock);
     68 
     69 	if (!ring->tx_ring_offline) {
     70 		mac_tx_ring_update(ring->nxgep->mach, ring->tx_ring_handle);
     71 	}
     72 }
     73 
     74 static void
     75 nxge_tx_ring_dispatch(p_tx_ring_t ring)
     76 {
     77 	/*
     78 	 * Kick the ring task to reclaim some buffers.
     79 	 */
     80 	(void) ddi_taskq_dispatch(ring->taskq,
     81 	    nxge_tx_ring_task, (void *)ring, DDI_SLEEP);
     82 }
     83 
     84 mblk_t *
     85 nxge_tx_ring_send(void *arg, mblk_t *mp)
     86 {
     87 	p_nxge_ring_handle_t	nrhp = (p_nxge_ring_handle_t)arg;
     88 	p_nxge_t		nxgep;
     89 	p_tx_ring_t		tx_ring_p;
     90 	int			status, channel;
     91 
     92 	ASSERT(nrhp != NULL);
     93 	nxgep = nrhp->nxgep;
     94 	channel = nxgep->pt_config.hw_config.tdc.start + nrhp->index;
     95 	tx_ring_p = nxgep->tx_rings->rings[channel];
     96 
     97 	/*
     98 	 * We may be in a transition from offlined DMA to onlined
     99 	 * DMA.
    100 	 */
    101 	if (tx_ring_p == NULL) {
    102 		ASSERT(tx_ring_p != NULL);
    103 		freemsg(mp);
    104 		return ((mblk_t *)NULL);
    105 	}
    106 
    107 	/*
    108 	 * Valid DMA?
    109 	 */
    110 	ASSERT(nxgep == tx_ring_p->nxgep);
    111 
    112 	/*
    113 	 * Make sure DMA is not offlined.
    114 	 */
    115 	if (isLDOMservice(nxgep) && tx_ring_p->tx_ring_offline) {
    116 		ASSERT(!tx_ring_p->tx_ring_offline);
    117 		freemsg(mp);
    118 		return ((mblk_t *)NULL);
    119 	}
    120 
    121 	/*
    122 	 * Transmit the packet.
    123 	 */
    124 	status = nxge_start(nxgep, tx_ring_p, mp);
    125 	if (status) {
    126 		nxge_tx_ring_dispatch(tx_ring_p);
    127 		return (mp);
    128 	}
    129 
    130 	return ((mblk_t *)NULL);
    131 }
    132 
    133 int
    134 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp)
    135 {
    136 	int 			dma_status, status = 0;
    137 	p_tx_desc_t 		tx_desc_ring_vp;
    138 	npi_handle_t		npi_desc_handle;
    139 	nxge_os_dma_handle_t 	tx_desc_dma_handle;
    140 	p_tx_desc_t 		tx_desc_p;
    141 	p_tx_msg_t 		tx_msg_ring;
    142 	p_tx_msg_t 		tx_msg_p;
    143 	tx_desc_t		tx_desc, *tmp_desc_p;
    144 	tx_desc_t		sop_tx_desc, *sop_tx_desc_p;
    145 	p_tx_pkt_header_t	hdrp;
    146 	tx_pkt_hdr_all_t	tmp_hdrp;
    147 	p_tx_pkt_hdr_all_t	pkthdrp;
    148 	uint8_t			npads = 0;
    149 	uint64_t 		dma_ioaddr;
    150 	uint32_t		dma_flags;
    151 	int			last_bidx;
    152 	uint8_t 		*b_rptr;
    153 	caddr_t 		kaddr;
    154 	uint32_t		nmblks;
    155 	uint32_t		ngathers;
    156 	uint32_t		clen;
    157 	int 			len;
    158 	uint32_t		pkt_len, pack_len, min_len;
    159 	uint32_t		bcopy_thresh;
    160 	int 			i, cur_index, sop_index;
    161 	uint16_t		tail_index;
    162 	boolean_t		tail_wrap = B_FALSE;
    163 	nxge_dma_common_t	desc_area;
    164 	nxge_os_dma_handle_t 	dma_handle;
    165 	ddi_dma_cookie_t 	dma_cookie;
    166 	npi_handle_t		npi_handle;
    167 	p_mblk_t 		nmp;
    168 	p_mblk_t		t_mp;
    169 	uint32_t 		ncookies;
    170 	boolean_t 		good_packet;
    171 	boolean_t 		mark_mode = B_FALSE;
    172 	p_nxge_stats_t 		statsp;
    173 	p_nxge_tx_ring_stats_t tdc_stats;
    174 	t_uscalar_t 		start_offset = 0;
    175 	t_uscalar_t 		stuff_offset = 0;
    176 	t_uscalar_t 		end_offset = 0;
    177 	t_uscalar_t 		value = 0;
    178 	t_uscalar_t 		cksum_flags = 0;
    179 	boolean_t		cksum_on = B_FALSE;
    180 	uint32_t		boff = 0;
    181 	uint64_t		tot_xfer_len = 0;
    182 	boolean_t		header_set = B_FALSE;
    183 #ifdef NXGE_DEBUG
    184 	p_tx_desc_t 		tx_desc_ring_pp;
    185 	p_tx_desc_t 		tx_desc_pp;
    186 	tx_desc_t		*save_desc_p;
    187 	int			dump_len;
    188 	int			sad_len;
    189 	uint64_t		sad;
    190 	int			xfer_len;
    191 	uint32_t		msgsize;
    192 #endif
    193 	p_mblk_t 		mp_chain = NULL;
    194 	boolean_t		is_lso = B_FALSE;
    195 	boolean_t		lso_again;
    196 	int			cur_index_lso;
    197 	p_mblk_t 		nmp_lso_save;
    198 	uint32_t		lso_ngathers;
    199 	boolean_t		lso_tail_wrap = B_FALSE;
    200 
    201 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    202 	    "==> nxge_start: tx dma channel %d", tx_ring_p->tdc));
    203 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    204 	    "==> nxge_start: Starting tdc %d desc pending %d",
    205 	    tx_ring_p->tdc, tx_ring_p->descs_pending));
    206 
    207 	statsp = nxgep->statsp;
    208 
    209 	if (!isLDOMguest(nxgep)) {
    210 		switch (nxgep->mac.portmode) {
    211 		default:
    212 			if (nxgep->statsp->port_stats.lb_mode ==
    213 			    nxge_lb_normal) {
    214 				if (!statsp->mac_stats.link_up) {
    215 					freemsg(mp);
    216 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    217 					    "==> nxge_start: "
    218 					    "link not up"));
    219 					goto nxge_start_fail1;
    220 				}
    221 			}
    222 			break;
    223 		case PORT_10G_FIBER:
    224 			/*
    225 			 * For the following modes, check the link status
    226 			 * before sending the packet out:
    227 			 * nxge_lb_normal,
    228 			 * nxge_lb_ext10g,
    229 			 * nxge_lb_ext1000,
    230 			 * nxge_lb_ext100,
    231 			 * nxge_lb_ext10.
    232 			 */
    233 			if (nxgep->statsp->port_stats.lb_mode <
    234 			    nxge_lb_phy10g) {
    235 				if (!statsp->mac_stats.link_up) {
    236 					freemsg(mp);
    237 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    238 					    "==> nxge_start: "
    239 					    "link not up"));
    240 					goto nxge_start_fail1;
    241 				}
    242 			}
    243 			break;
    244 		}
    245 	}
    246 
    247 	if ((!(nxgep->drv_state & STATE_HW_INITIALIZED)) ||
    248 	    (nxgep->nxge_mac_state != NXGE_MAC_STARTED)) {
    249 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
    250 		    "==> nxge_start: hardware not initialized or stopped"));
    251 		freemsg(mp);
    252 		goto nxge_start_fail1;
    253 	}
    254 
    255 	if (nxgep->soft_lso_enable) {
    256 		mp_chain = nxge_lso_eliminate(mp);
    257 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
    258 		    "==> nxge_start(0): LSO mp $%p mp_chain $%p",
    259 		    mp, mp_chain));
    260 		if (mp_chain == NULL) {
    261 			NXGE_ERROR_MSG((nxgep, TX_CTL,
    262 			    "==> nxge_send(0): NULL mp_chain $%p != mp $%p",
    263 			    mp_chain, mp));
    264 			goto nxge_start_fail1;
    265 		}
    266 		if (mp_chain != mp) {
    267 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
    268 			    "==> nxge_send(1): IS LSO mp_chain $%p != mp $%p",
    269 			    mp_chain, mp));
    270 			is_lso = B_TRUE;
    271 			mp = mp_chain;
    272 			mp_chain = mp_chain->b_next;
    273 			mp->b_next = NULL;
    274 		}
    275 	}
    276 
    277 	hcksum_retrieve(mp, NULL, NULL, &start_offset,
    278 	    &stuff_offset, &end_offset, &value, &cksum_flags);
    279 	if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) {
    280 		start_offset += sizeof (ether_header_t);
    281 		stuff_offset += sizeof (ether_header_t);
    282 	} else {
    283 		start_offset += sizeof (struct ether_vlan_header);
    284 		stuff_offset += sizeof (struct ether_vlan_header);
    285 	}
    286 
    287 	if (cksum_flags & HCK_PARTIALCKSUM) {
    288 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
    289 		    "==> nxge_start: mp $%p len %d "
    290 		    "cksum_flags 0x%x (partial checksum) ",
    291 		    mp, MBLKL(mp), cksum_flags));
    292 		cksum_on = B_TRUE;
    293 	}
    294 
    295 	pkthdrp = (p_tx_pkt_hdr_all_t)&tmp_hdrp;
    296 	pkthdrp->reserved = 0;
    297 	tmp_hdrp.pkthdr.value = 0;
    298 	nxge_fill_tx_hdr(mp, B_FALSE, cksum_on,
    299 	    0, 0, pkthdrp,
    300 	    start_offset, stuff_offset);
    301 
    302 	lso_again = B_FALSE;
    303 	lso_ngathers = 0;
    304 
    305 	MUTEX_ENTER(&tx_ring_p->lock);
    306 
    307 	if (isLDOMservice(nxgep)) {
    308 		tx_ring_p->tx_ring_busy = B_TRUE;
    309 		if (tx_ring_p->tx_ring_offline) {
    310 			freemsg(mp);
    311 			tx_ring_p->tx_ring_busy = B_FALSE;
    312 			(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
    313 			    NXGE_TX_RING_OFFLINED);
    314 			MUTEX_EXIT(&tx_ring_p->lock);
    315 			return (status);
    316 		}
    317 	}
    318 
    319 	cur_index_lso = tx_ring_p->wr_index;
    320 	lso_tail_wrap = tx_ring_p->wr_index_wrap;
    321 start_again:
    322 	ngathers = 0;
    323 	sop_index = tx_ring_p->wr_index;
    324 #ifdef	NXGE_DEBUG
    325 	if (tx_ring_p->descs_pending) {
    326 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
    327 		    "desc pending %d ", tx_ring_p->descs_pending));
    328 	}
    329 
    330 	dump_len = (int)(MBLKL(mp));
    331 	dump_len = (dump_len > 128) ? 128: dump_len;
    332 
    333 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    334 	    "==> nxge_start: tdc %d: dumping ...: b_rptr $%p "
    335 	    "(Before header reserve: ORIGINAL LEN %d)",
    336 	    tx_ring_p->tdc,
    337 	    mp->b_rptr,
    338 	    dump_len));
    339 
    340 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: dump packets "
    341 	    "(IP ORIGINAL b_rptr $%p): %s", mp->b_rptr,
    342 	    nxge_dump_packet((char *)mp->b_rptr, dump_len)));
    343 #endif
    344 
    345 	tdc_stats = tx_ring_p->tdc_stats;
    346 	mark_mode = (tx_ring_p->descs_pending &&
    347 	    (((int)tx_ring_p->tx_ring_size - (int)tx_ring_p->descs_pending) <
    348 	    (int)nxge_tx_minfree));
    349 
    350 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    351 	    "TX Descriptor ring is channel %d mark mode %d",
    352 	    tx_ring_p->tdc, mark_mode));
    353 
    354 	if ((tx_ring_p->descs_pending + lso_ngathers) >= nxge_reclaim_pending) {
    355 		if (!nxge_txdma_reclaim(nxgep, tx_ring_p,
    356 		    (nxge_tx_minfree + lso_ngathers))) {
    357 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
    358 			    "TX Descriptor ring is full: channel %d",
    359 			    tx_ring_p->tdc));
    360 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
    361 			    "TX Descriptor ring is full: channel %d",
    362 			    tx_ring_p->tdc));
    363 			if (is_lso) {
    364 				/*
    365 				 * free the current mp and mp_chain if not FULL.
    366 				 */
    367 				tdc_stats->tx_no_desc++;
    368 				NXGE_DEBUG_MSG((nxgep, TX_CTL,
    369 				    "LSO packet: TX Descriptor ring is full: "
    370 				    "channel %d",
    371 				    tx_ring_p->tdc));
    372 				goto nxge_start_fail_lso;
    373 			} else {
    374 				(void) cas32((uint32_t *)&tx_ring_p->queueing,
    375 				    0, 1);
    376 				tdc_stats->tx_no_desc++;
    377 
    378 				if (isLDOMservice(nxgep)) {
    379 					tx_ring_p->tx_ring_busy = B_FALSE;
    380 					if (tx_ring_p->tx_ring_offline) {
    381 						(void) atomic_swap_32(
    382 						    &tx_ring_p->tx_ring_offline,
    383 						    NXGE_TX_RING_OFFLINED);
    384 					}
    385 				}
    386 
    387 				MUTEX_EXIT(&tx_ring_p->lock);
    388 				status = 1;
    389 				goto nxge_start_fail1;
    390 			}
    391 		}
    392 	}
    393 
    394 	nmp = mp;
    395 	i = sop_index = tx_ring_p->wr_index;
    396 	nmblks = 0;
    397 	ngathers = 0;
    398 	pkt_len = 0;
    399 	pack_len = 0;
    400 	clen = 0;
    401 	last_bidx = -1;
    402 	good_packet = B_TRUE;
    403 
    404 	desc_area = tx_ring_p->tdc_desc;
    405 	npi_handle = desc_area.npi_handle;
    406 	npi_desc_handle.regh = (nxge_os_acc_handle_t)
    407 	    DMA_COMMON_ACC_HANDLE(desc_area);
    408 	tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area);
    409 	tx_desc_dma_handle = (nxge_os_dma_handle_t)
    410 	    DMA_COMMON_HANDLE(desc_area);
    411 	tx_msg_ring = tx_ring_p->tx_msg_ring;
    412 
    413 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: wr_index %d i %d",
    414 	    sop_index, i));
    415 
    416 #ifdef	NXGE_DEBUG
    417 	msgsize = msgdsize(nmp);
    418 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    419 	    "==> nxge_start(1): wr_index %d i %d msgdsize %d",
    420 	    sop_index, i, msgsize));
    421 #endif
    422 	/*
    423 	 * The first 16 bytes of the premapped buffer are reserved
    424 	 * for header. No padding will be used.
    425 	 */
    426 	pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE;
    427 	if (nxge_tx_use_bcopy && (nxgep->niu_type != N2_NIU)) {
    428 		bcopy_thresh = (nxge_bcopy_thresh - TX_PKT_HEADER_SIZE);
    429 	} else {
    430 		bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE);
    431 	}
    432 	while (nmp) {
    433 		good_packet = B_TRUE;
    434 		b_rptr = nmp->b_rptr;
    435 		len = MBLKL(nmp);
    436 		if (len <= 0) {
    437 			nmp = nmp->b_cont;
    438 			continue;
    439 		}
    440 		nmblks++;
    441 
    442 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(1): nmblks %d "
    443 		    "len %d pkt_len %d pack_len %d",
    444 		    nmblks, len, pkt_len, pack_len));
    445 		/*
    446 		 * Hardware limits the transfer length to 4K for NIU and
    447 		 * 4076 (TX_MAX_TRANSFER_LENGTH) for Neptune. But we just
    448 		 * use TX_MAX_TRANSFER_LENGTH as the limit for both.
    449 		 * If len is longer than the limit, then we break nmp into
    450 		 * two chunks: Make the first chunk equal to the limit and
    451 		 * the second chunk for the remaining data. If the second
    452 		 * chunk is still larger than the limit, then it will be
    453 		 * broken into two in the next pass.
    454 		 */
    455 		if (len > TX_MAX_TRANSFER_LENGTH - TX_PKT_HEADER_SIZE) {
    456 			if ((t_mp = dupb(nmp)) != NULL) {
    457 				nmp->b_wptr = nmp->b_rptr +
    458 				    (TX_MAX_TRANSFER_LENGTH
    459 				    - TX_PKT_HEADER_SIZE);
    460 				t_mp->b_rptr = nmp->b_wptr;
    461 				t_mp->b_cont = nmp->b_cont;
    462 				nmp->b_cont = t_mp;
    463 				len = MBLKL(nmp);
    464 			} else {
    465 				if (is_lso) {
    466 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    467 					    "LSO packet: dupb failed: "
    468 					    "channel %d",
    469 					    tx_ring_p->tdc));
    470 					mp = nmp;
    471 					goto nxge_start_fail_lso;
    472 				} else {
    473 					good_packet = B_FALSE;
    474 					goto nxge_start_fail2;
    475 				}
    476 			}
    477 		}
    478 		tx_desc.value = 0;
    479 		tx_desc_p = &tx_desc_ring_vp[i];
    480 #ifdef	NXGE_DEBUG
    481 		tx_desc_pp = &tx_desc_ring_pp[i];
    482 #endif
    483 		tx_msg_p = &tx_msg_ring[i];
    484 #if defined(__i386)
    485 		npi_desc_handle.regp = (uint32_t)tx_desc_p;
    486 #else
    487 		npi_desc_handle.regp = (uint64_t)tx_desc_p;
    488 #endif
    489 		if (!header_set &&
    490 		    ((!nxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) ||
    491 		    (len >= bcopy_thresh))) {
    492 			header_set = B_TRUE;
    493 			bcopy_thresh += TX_PKT_HEADER_SIZE;
    494 			boff = 0;
    495 			pack_len = 0;
    496 			kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
    497 			hdrp = (p_tx_pkt_header_t)kaddr;
    498 			clen = pkt_len;
    499 			dma_handle = tx_msg_p->buf_dma_handle;
    500 			dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma);
    501 			(void) ddi_dma_sync(dma_handle,
    502 			    i * nxge_bcopy_thresh, nxge_bcopy_thresh,
    503 			    DDI_DMA_SYNC_FORDEV);
    504 
    505 			tx_msg_p->flags.dma_type = USE_BCOPY;
    506 			goto nxge_start_control_header_only;
    507 		}
    508 
    509 		pkt_len += len;
    510 		pack_len += len;
    511 
    512 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(3): "
    513 		    "desc entry %d "
    514 		    "DESC IOADDR $%p "
    515 		    "desc_vp $%p tx_desc_p $%p "
    516 		    "desc_pp $%p tx_desc_pp $%p "
    517 		    "len %d pkt_len %d pack_len %d",
    518 		    i,
    519 		    DMA_COMMON_IOADDR(desc_area),
    520 		    tx_desc_ring_vp, tx_desc_p,
    521 		    tx_desc_ring_pp, tx_desc_pp,
    522 		    len, pkt_len, pack_len));
    523 
    524 		if (len < bcopy_thresh) {
    525 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(4): "
    526 			    "USE BCOPY: "));
    527 			if (nxge_tx_tiny_pack) {
    528 				uint32_t blst =
    529 				    TXDMA_DESC_NEXT_INDEX(i, -1,
    530 				    tx_ring_p->tx_wrap_mask);
    531 				NXGE_DEBUG_MSG((nxgep, TX_CTL,
    532 				    "==> nxge_start(5): pack"));
    533 				if ((pack_len <= bcopy_thresh) &&
    534 				    (last_bidx == blst)) {
    535 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    536 					    "==> nxge_start: pack(6) "
    537 					    "(pkt_len %d pack_len %d)",
    538 					    pkt_len, pack_len));
    539 					i = blst;
    540 					tx_desc_p = &tx_desc_ring_vp[i];
    541 #ifdef	NXGE_DEBUG
    542 					tx_desc_pp = &tx_desc_ring_pp[i];
    543 #endif
    544 					tx_msg_p = &tx_msg_ring[i];
    545 					boff = pack_len - len;
    546 					ngathers--;
    547 				} else if (pack_len > bcopy_thresh &&
    548 				    header_set) {
    549 					pack_len = len;
    550 					boff = 0;
    551 					bcopy_thresh = nxge_bcopy_thresh;
    552 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    553 					    "==> nxge_start(7): > max NEW "
    554 					    "bcopy thresh %d "
    555 					    "pkt_len %d pack_len %d(next)",
    556 					    bcopy_thresh,
    557 					    pkt_len, pack_len));
    558 				}
    559 				last_bidx = i;
    560 			}
    561 			kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
    562 			if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) {
    563 				hdrp = (p_tx_pkt_header_t)kaddr;
    564 				header_set = B_TRUE;
    565 				NXGE_DEBUG_MSG((nxgep, TX_CTL,
    566 				    "==> nxge_start(7_x2): "
    567 				    "pkt_len %d pack_len %d (new hdrp $%p)",
    568 				    pkt_len, pack_len, hdrp));
    569 			}
    570 			tx_msg_p->flags.dma_type = USE_BCOPY;
    571 			kaddr += boff;
    572 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(8): "
    573 			    "USE BCOPY: before bcopy "
    574 			    "DESC IOADDR $%p entry %d "
    575 			    "bcopy packets %d "
    576 			    "bcopy kaddr $%p "
    577 			    "bcopy ioaddr (SAD) $%p "
    578 			    "bcopy clen %d "
    579 			    "bcopy boff %d",
    580 			    DMA_COMMON_IOADDR(desc_area), i,
    581 			    tdc_stats->tx_hdr_pkts,
    582 			    kaddr,
    583 			    dma_ioaddr,
    584 			    clen,
    585 			    boff));
    586 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
    587 			    "1USE BCOPY: "));
    588 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
    589 			    "2USE BCOPY: "));
    590 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: "
    591 			    "last USE BCOPY: copy from b_rptr $%p "
    592 			    "to KADDR $%p (len %d offset %d",
    593 			    b_rptr, kaddr, len, boff));
    594 
    595 			bcopy(b_rptr, kaddr, len);
    596 
    597 #ifdef	NXGE_DEBUG
    598 			dump_len = (len > 128) ? 128: len;
    599 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
    600 			    "==> nxge_start: dump packets "
    601 			    "(After BCOPY len %d)"
    602 			    "(b_rptr $%p): %s", len, nmp->b_rptr,
    603 			    nxge_dump_packet((char *)nmp->b_rptr,
    604 			    dump_len)));
    605 #endif
    606 
    607 			dma_handle = tx_msg_p->buf_dma_handle;
    608 			dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma);
    609 			(void) ddi_dma_sync(dma_handle,
    610 			    i * nxge_bcopy_thresh, nxge_bcopy_thresh,
    611 			    DDI_DMA_SYNC_FORDEV);
    612 			clen = len + boff;
    613 			tdc_stats->tx_hdr_pkts++;
    614 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(9): "
    615 			    "USE BCOPY: "
    616 			    "DESC IOADDR $%p entry %d "
    617 			    "bcopy packets %d "
    618 			    "bcopy kaddr $%p "
    619 			    "bcopy ioaddr (SAD) $%p "
    620 			    "bcopy clen %d "
    621 			    "bcopy boff %d",
    622 			    DMA_COMMON_IOADDR(desc_area),
    623 			    i,
    624 			    tdc_stats->tx_hdr_pkts,
    625 			    kaddr,
    626 			    dma_ioaddr,
    627 			    clen,
    628 			    boff));
    629 		} else {
    630 			NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(12): "
    631 			    "USE DVMA: len %d", len));
    632 			tx_msg_p->flags.dma_type = USE_DMA;
    633 			dma_flags = DDI_DMA_WRITE;
    634 			if (len < nxge_dma_stream_thresh) {
    635 				dma_flags |= DDI_DMA_CONSISTENT;
    636 			} else {
    637 				dma_flags |= DDI_DMA_STREAMING;
    638 			}
    639 
    640 			dma_handle = tx_msg_p->dma_handle;
    641 			dma_status = ddi_dma_addr_bind_handle(dma_handle, NULL,
    642 			    (caddr_t)b_rptr, len, dma_flags,
    643 			    DDI_DMA_DONTWAIT, NULL,
    644 			    &dma_cookie, &ncookies);
    645 			if (dma_status == DDI_DMA_MAPPED) {
    646 				dma_ioaddr = dma_cookie.dmac_laddress;
    647 				len = (int)dma_cookie.dmac_size;
    648 				clen = (uint32_t)dma_cookie.dmac_size;
    649 				NXGE_DEBUG_MSG((nxgep, TX_CTL,
    650 				    "==> nxge_start(12_1): "
    651 				    "USE DVMA: len %d clen %d "
    652 				    "ngathers %d",
    653 				    len, clen,
    654 				    ngathers));
    655 #if defined(__i386)
    656 				npi_desc_handle.regp = (uint32_t)tx_desc_p;
    657 #else
    658 				npi_desc_handle.regp = (uint64_t)tx_desc_p;
    659 #endif
    660 				while (ncookies > 1) {
    661 					ngathers++;
    662 					/*
    663 					 * this is the fix for multiple
    664 					 * cookies, which are basically
    665 					 * a descriptor entry, we don't set
    666 					 * SOP bit as well as related fields
    667 					 */
    668 
    669 					(void) npi_txdma_desc_gather_set(
    670 					    npi_desc_handle,
    671 					    &tx_desc,
    672 					    (ngathers -1),
    673 					    mark_mode,
    674 					    ngathers,
    675 					    dma_ioaddr,
    676 					    clen);
    677 
    678 					tx_msg_p->tx_msg_size = clen;
    679 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    680 					    "==> nxge_start:  DMA "
    681 					    "ncookie %d "
    682 					    "ngathers %d "
    683 					    "dma_ioaddr $%p len %d"
    684 					    "desc $%p descp $%p (%d)",
    685 					    ncookies,
    686 					    ngathers,
    687 					    dma_ioaddr, clen,
    688 					    *tx_desc_p, tx_desc_p, i));
    689 
    690 					ddi_dma_nextcookie(dma_handle,
    691 					    &dma_cookie);
    692 					dma_ioaddr =
    693 					    dma_cookie.dmac_laddress;
    694 
    695 					len = (int)dma_cookie.dmac_size;
    696 					clen = (uint32_t)dma_cookie.dmac_size;
    697 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
    698 					    "==> nxge_start(12_2): "
    699 					    "USE DVMA: len %d clen %d ",
    700 					    len, clen));
    701 
    702 					i = TXDMA_DESC_NEXT_INDEX(i, 1,
    703 					    tx_ring_p->tx_wrap_mask);
    704 					tx_desc_p = &tx_desc_ring_vp[i];
    705 
    706 #if defined(__i386)
    707 					npi_desc_handle.regp =
    708 					    (uint32_t)tx_desc_p;
    709 #else
    710 					npi_desc_handle.regp =
    711 					    (uint64_t)tx_desc_p;
    712 #endif
    713 					tx_msg_p = &tx_msg_ring[i];
    714 					tx_msg_p->flags.dma_type = USE_NONE;
    715 					tx_desc.value = 0;
    716 
    717 					ncookies--;
    718 				}
    719 				tdc_stats->tx_ddi_pkts++;
    720 				NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start:"
    721 				    "DMA: ddi packets %d",
    722 				    tdc_stats->tx_ddi_pkts));
    723 			} else {
    724 				NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL,
    725 				    "dma mapping failed for %d "
    726 				    "bytes addr $%p flags %x (%d)",
    727 				    len, b_rptr, status, status));
    728 				good_packet = B_FALSE;
    729 				tdc_stats->tx_dma_bind_fail++;
    730 				tx_msg_p->flags.dma_type = USE_NONE;
    731 				if (is_lso) {
    732 					mp = nmp;
    733 					goto nxge_start_fail_lso;
    734 				} else {
    735 					status = 1;
    736 					goto nxge_start_fail2;
    737 				}
    738 			}
    739 		} /* ddi dvma */
    740 
    741 		if (is_lso) {
    742 			nmp_lso_save = nmp;
    743 		}
    744 		nmp = nmp->b_cont;
    745 nxge_start_control_header_only:
    746 #if defined(__i386)
    747 		npi_desc_handle.regp = (uint32_t)tx_desc_p;
    748 #else
    749 		npi_desc_handle.regp = (uint64_t)tx_desc_p;
    750 #endif
    751 		ngathers++;
    752 
    753 		if (ngathers == 1) {
    754 #ifdef	NXGE_DEBUG
    755 			save_desc_p = &sop_tx_desc;
    756 #endif
    757 			sop_tx_desc_p = &sop_tx_desc;
    758 			sop_tx_desc_p->value = 0;
    759 			sop_tx_desc_p->bits.hdw.tr_len = clen;
    760 			sop_tx_desc_p->bits.hdw.sad = dma_ioaddr >> 32;
    761 			sop_tx_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff;
    762 		} else {
    763 #ifdef	NXGE_DEBUG
    764 			save_desc_p = &tx_desc;
    765 #endif
    766 			tmp_desc_p = &tx_desc;
    767 			tmp_desc_p->value = 0;
    768 			tmp_desc_p->bits.hdw.tr_len = clen;
    769 			tmp_desc_p->bits.hdw.sad = dma_ioaddr >> 32;
    770 			tmp_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff;
    771 
    772 			tx_desc_p->value = tmp_desc_p->value;
    773 		}
    774 
    775 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(13): "
    776 		    "Desc_entry %d ngathers %d "
    777 		    "desc_vp $%p tx_desc_p $%p "
    778 		    "len %d clen %d pkt_len %d pack_len %d nmblks %d "
    779 		    "dma_ioaddr (SAD) $%p mark %d",
    780 		    i, ngathers,
    781 		    tx_desc_ring_vp, tx_desc_p,
    782 		    len, clen, pkt_len, pack_len, nmblks,
    783 		    dma_ioaddr, mark_mode));
    784 
    785 #ifdef NXGE_DEBUG
    786 		npi_desc_handle.nxgep = nxgep;
    787 		npi_desc_handle.function.function = nxgep->function_num;
    788 		npi_desc_handle.function.instance = nxgep->instance;
    789 		sad = (save_desc_p->value & TX_PKT_DESC_SAD_MASK);
    790 		xfer_len = ((save_desc_p->value & TX_PKT_DESC_TR_LEN_MASK) >>
    791 		    TX_PKT_DESC_TR_LEN_SHIFT);
    792 
    793 
    794 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n"
    795 		    "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t"
    796 		    "mark %d sop %d\n",
    797 		    save_desc_p->value,
    798 		    sad,
    799 		    save_desc_p->bits.hdw.tr_len,
    800 		    xfer_len,
    801 		    save_desc_p->bits.hdw.num_ptr,
    802 		    save_desc_p->bits.hdw.mark,
    803 		    save_desc_p->bits.hdw.sop));
    804 
    805 		npi_txdma_dump_desc_one(npi_desc_handle, NULL, i);
    806 #endif
    807 
    808 		tx_msg_p->tx_msg_size = clen;
    809 		i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask);
    810 		if (ngathers > nxge_tx_max_gathers) {
    811 			good_packet = B_FALSE;
    812 			hcksum_retrieve(mp, NULL, NULL, &start_offset,
    813 			    &stuff_offset, &end_offset, &value,
    814 			    &cksum_flags);
    815 
    816 			NXGE_DEBUG_MSG((NULL, TX_CTL,
    817 			    "==> nxge_start(14): pull msg - "
    818 			    "len %d pkt_len %d ngathers %d",
    819 			    len, pkt_len, ngathers));
    820 
    821 			/*
    822 			 * Just give up on this packet.
    823 			 */
    824 			if (is_lso) {
    825 				mp = nmp_lso_save;
    826 				goto nxge_start_fail_lso;
    827 			}
    828 			status = 0;
    829 			goto nxge_start_fail2;
    830 		}
    831 	} /* while (nmp) */
    832 
    833 	tx_msg_p->tx_message = mp;
    834 	tx_desc_p = &tx_desc_ring_vp[sop_index];
    835 #if defined(__i386)
    836 	npi_desc_handle.regp = (uint32_t)tx_desc_p;
    837 #else
    838 	npi_desc_handle.regp = (uint64_t)tx_desc_p;
    839 #endif
    840 
    841 	pkthdrp = (p_tx_pkt_hdr_all_t)hdrp;
    842 	pkthdrp->reserved = 0;
    843 	hdrp->value = 0;
    844 	bcopy(&tmp_hdrp, hdrp, sizeof (tx_pkt_header_t));
    845 
    846 	if (pkt_len > NXGE_MTU_DEFAULT_MAX) {
    847 		tdc_stats->tx_jumbo_pkts++;
    848 	}
    849 
    850 	min_len = (ETHERMIN + TX_PKT_HEADER_SIZE + (npads * 2));
    851 	if (pkt_len < min_len) {
    852 		/* Assume we use bcopy to premapped buffers */
    853 		kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma);
    854 		NXGE_DEBUG_MSG((NULL, TX_CTL,
    855 		    "==> nxge_start(14-1): < (msg_min + 16)"
    856 		    "len %d pkt_len %d min_len %d bzero %d ngathers %d",
    857 		    len, pkt_len, min_len, (min_len - pkt_len), ngathers));
    858 		bzero((kaddr + pkt_len), (min_len - pkt_len));
    859 		pkt_len = tx_msg_p->tx_msg_size = min_len;
    860 
    861 		sop_tx_desc_p->bits.hdw.tr_len = min_len;
    862 
    863 		NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value);
    864 		tx_desc_p->value = sop_tx_desc_p->value;
    865 
    866 		NXGE_DEBUG_MSG((NULL, TX_CTL,
    867 		    "==> nxge_start(14-2): < msg_min - "
    868 		    "len %d pkt_len %d min_len %d ngathers %d",
    869 		    len, pkt_len, min_len, ngathers));
    870 	}
    871 
    872 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: cksum_flags 0x%x ",
    873 	    cksum_flags));
    874 	{
    875 		uint64_t	tmp_len;
    876 
    877 		/* pkt_len already includes 16 + paddings!! */
    878 		/* Update the control header length */
    879 		tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE);
    880 		tmp_len = hdrp->value |
    881 		    (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT);
    882 
    883 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
    884 		    "==> nxge_start(15_x1): setting SOP "
    885 		    "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len "
    886 		    "0x%llx hdrp->value 0x%llx",
    887 		    tot_xfer_len, tot_xfer_len, pkt_len,
    888 		    tmp_len, hdrp->value));
    889 #if defined(_BIG_ENDIAN)
    890 		hdrp->value = ddi_swap64(tmp_len);
    891 #else
    892 		hdrp->value = tmp_len;
    893 #endif
    894 		NXGE_DEBUG_MSG((nxgep,
    895 		    TX_CTL, "==> nxge_start(15_x2): setting SOP "
    896 		    "after SWAP: tot_xfer_len 0x%llx pkt_len %d "
    897 		    "tmp_len 0x%llx hdrp->value 0x%llx",
    898 		    tot_xfer_len, pkt_len,
    899 		    tmp_len, hdrp->value));
    900 	}
    901 
    902 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(15): setting SOP "
    903 	    "wr_index %d "
    904 	    "tot_xfer_len (%d) pkt_len %d npads %d",
    905 	    sop_index,
    906 	    tot_xfer_len, pkt_len,
    907 	    npads));
    908 
    909 	sop_tx_desc_p->bits.hdw.sop = 1;
    910 	sop_tx_desc_p->bits.hdw.mark = mark_mode;
    911 	sop_tx_desc_p->bits.hdw.num_ptr = ngathers;
    912 
    913 	NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value);
    914 
    915 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(16): set SOP done"));
    916 
    917 #ifdef NXGE_DEBUG
    918 	npi_desc_handle.nxgep = nxgep;
    919 	npi_desc_handle.function.function = nxgep->function_num;
    920 	npi_desc_handle.function.instance = nxgep->instance;
    921 
    922 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n"
    923 	    "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n",
    924 	    save_desc_p->value,
    925 	    sad,
    926 	    save_desc_p->bits.hdw.tr_len,
    927 	    xfer_len,
    928 	    save_desc_p->bits.hdw.num_ptr,
    929 	    save_desc_p->bits.hdw.mark,
    930 	    save_desc_p->bits.hdw.sop));
    931 	(void) npi_txdma_dump_desc_one(npi_desc_handle, NULL, sop_index);
    932 
    933 	dump_len = (pkt_len > 128) ? 128: pkt_len;
    934 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    935 	    "==> nxge_start: dump packets(17) (after sop set, len "
    936 	    " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n"
    937 	    "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len,
    938 	    (char *)hdrp,
    939 	    nxge_dump_packet((char *)hdrp, dump_len)));
    940 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
    941 	    "==> nxge_start(18): TX desc sync: sop_index %d",
    942 	    sop_index));
    943 #endif
    944 
    945 	if ((ngathers == 1) || tx_ring_p->wr_index < i) {
    946 		(void) ddi_dma_sync(tx_desc_dma_handle,
    947 		    sop_index * sizeof (tx_desc_t),
    948 		    ngathers * sizeof (tx_desc_t),
    949 		    DDI_DMA_SYNC_FORDEV);
    950 
    951 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(19): sync 1 "
    952 		    "cs_off = 0x%02X cs_s_off = 0x%02X "
    953 		    "pkt_len %d ngathers %d sop_index %d\n",
    954 		    stuff_offset, start_offset,
    955 		    pkt_len, ngathers, sop_index));
    956 	} else { /* more than one descriptor and wrap around */
    957 		uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index;
    958 		(void) ddi_dma_sync(tx_desc_dma_handle,
    959 		    sop_index * sizeof (tx_desc_t),
    960 		    nsdescs * sizeof (tx_desc_t),
    961 		    DDI_DMA_SYNC_FORDEV);
    962 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(20): sync 1 "
    963 		    "cs_off = 0x%02X cs_s_off = 0x%02X "
    964 		    "pkt_len %d ngathers %d sop_index %d\n",
    965 		    stuff_offset, start_offset,
    966 		    pkt_len, ngathers, sop_index));
    967 
    968 		(void) ddi_dma_sync(tx_desc_dma_handle,
    969 		    0,
    970 		    (ngathers - nsdescs) * sizeof (tx_desc_t),
    971 		    DDI_DMA_SYNC_FORDEV);
    972 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(21): sync 2 "
    973 		    "cs_off = 0x%02X cs_s_off = 0x%02X "
    974 		    "pkt_len %d ngathers %d sop_index %d\n",
    975 		    stuff_offset, start_offset,
    976 		    pkt_len, ngathers, sop_index));
    977 	}
    978 
    979 	tail_index = tx_ring_p->wr_index;
    980 	tail_wrap = tx_ring_p->wr_index_wrap;
    981 
    982 	tx_ring_p->wr_index = i;
    983 	if (tx_ring_p->wr_index <= tail_index) {
    984 		tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ?
    985 		    B_FALSE : B_TRUE);
    986 	}
    987 
    988 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX kick: "
    989 	    "channel %d wr_index %d wrap %d ngathers %d desc_pend %d",
    990 	    tx_ring_p->tdc,
    991 	    tx_ring_p->wr_index,
    992 	    tx_ring_p->wr_index_wrap,
    993 	    ngathers,
    994 	    tx_ring_p->descs_pending));
    995 
    996 	if (is_lso) {
    997 		lso_ngathers += ngathers;
    998 		if (mp_chain != NULL) {
    999 			mp = mp_chain;
   1000 			mp_chain = mp_chain->b_next;
   1001 			mp->b_next = NULL;
   1002 			if (nxge_lso_kick_cnt == lso_ngathers) {
   1003 				tx_ring_p->descs_pending += lso_ngathers;
   1004 				{
   1005 					tx_ring_kick_t		kick;
   1006 
   1007 					kick.value = 0;
   1008 					kick.bits.ldw.wrap =
   1009 					    tx_ring_p->wr_index_wrap;
   1010 					kick.bits.ldw.tail =
   1011 					    (uint16_t)tx_ring_p->wr_index;
   1012 
   1013 					/* Kick the Transmit kick register */
   1014 					TXDMA_REG_WRITE64(
   1015 					    NXGE_DEV_NPI_HANDLE(nxgep),
   1016 					    TX_RING_KICK_REG,
   1017 					    (uint8_t)tx_ring_p->tdc,
   1018 					    kick.value);
   1019 					tdc_stats->tx_starts++;
   1020 
   1021 					NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1022 					    "==> nxge_start: more LSO: "
   1023 					    "LSO_CNT %d",
   1024 					    lso_ngathers));
   1025 				}
   1026 				lso_ngathers = 0;
   1027 				ngathers = 0;
   1028 				cur_index_lso = sop_index = tx_ring_p->wr_index;
   1029 				lso_tail_wrap = tx_ring_p->wr_index_wrap;
   1030 			}
   1031 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1032 			    "==> nxge_start: lso again: "
   1033 			    "lso_gathers %d ngathers %d cur_index_lso %d "
   1034 			    "wr_index %d sop_index %d",
   1035 			    lso_ngathers, ngathers, cur_index_lso,
   1036 			    tx_ring_p->wr_index, sop_index));
   1037 
   1038 			NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1039 			    "==> nxge_start: next : count %d",
   1040 			    lso_ngathers));
   1041 			lso_again = B_TRUE;
   1042 			goto start_again;
   1043 		}
   1044 		ngathers = lso_ngathers;
   1045 	}
   1046 
   1047 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX KICKING: "));
   1048 
   1049 	{
   1050 		tx_ring_kick_t		kick;
   1051 
   1052 		kick.value = 0;
   1053 		kick.bits.ldw.wrap = tx_ring_p->wr_index_wrap;
   1054 		kick.bits.ldw.tail = (uint16_t)tx_ring_p->wr_index;
   1055 
   1056 		/* Kick start the Transmit kick register */
   1057 		TXDMA_REG_WRITE64(NXGE_DEV_NPI_HANDLE(nxgep),
   1058 		    TX_RING_KICK_REG,
   1059 		    (uint8_t)tx_ring_p->tdc,
   1060 		    kick.value);
   1061 	}
   1062 
   1063 	tx_ring_p->descs_pending += ngathers;
   1064 	tdc_stats->tx_starts++;
   1065 
   1066 	if (isLDOMservice(nxgep)) {
   1067 		tx_ring_p->tx_ring_busy = B_FALSE;
   1068 		if (tx_ring_p->tx_ring_offline) {
   1069 			(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
   1070 			    NXGE_TX_RING_OFFLINED);
   1071 		}
   1072 	}
   1073 
   1074 	MUTEX_EXIT(&tx_ring_p->lock);
   1075 
   1076 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start"));
   1077 	return (status);
   1078 
   1079 nxge_start_fail_lso:
   1080 	status = 0;
   1081 	good_packet = B_FALSE;
   1082 	if (mp != NULL)
   1083 		freemsg(mp);
   1084 	if (mp_chain != NULL)
   1085 		freemsgchain(mp_chain);
   1086 
   1087 	if (!lso_again && !ngathers) {
   1088 		if (isLDOMservice(nxgep)) {
   1089 			tx_ring_p->tx_ring_busy = B_FALSE;
   1090 			if (tx_ring_p->tx_ring_offline) {
   1091 				(void) atomic_swap_32(
   1092 				    &tx_ring_p->tx_ring_offline,
   1093 				    NXGE_TX_RING_OFFLINED);
   1094 			}
   1095 		}
   1096 
   1097 		MUTEX_EXIT(&tx_ring_p->lock);
   1098 		NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1099 		    "==> nxge_start: lso exit (nothing changed)"));
   1100 		goto nxge_start_fail1;
   1101 	}
   1102 
   1103 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1104 	    "==> nxge_start (channel %d): before lso "
   1105 	    "lso_gathers %d ngathers %d cur_index_lso %d "
   1106 	    "wr_index %d sop_index %d lso_again %d",
   1107 	    tx_ring_p->tdc,
   1108 	    lso_ngathers, ngathers, cur_index_lso,
   1109 	    tx_ring_p->wr_index, sop_index, lso_again));
   1110 
   1111 	if (lso_again) {
   1112 		lso_ngathers += ngathers;
   1113 		ngathers = lso_ngathers;
   1114 		sop_index = cur_index_lso;
   1115 		tx_ring_p->wr_index = sop_index;
   1116 		tx_ring_p->wr_index_wrap = lso_tail_wrap;
   1117 	}
   1118 
   1119 	NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1120 	    "==> nxge_start (channel %d): after lso "
   1121 	    "lso_gathers %d ngathers %d cur_index_lso %d "
   1122 	    "wr_index %d sop_index %d lso_again %d",
   1123 	    tx_ring_p->tdc,
   1124 	    lso_ngathers, ngathers, cur_index_lso,
   1125 	    tx_ring_p->wr_index, sop_index, lso_again));
   1126 
   1127 nxge_start_fail2:
   1128 	if (good_packet == B_FALSE) {
   1129 		cur_index = sop_index;
   1130 		NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: clean up"));
   1131 		for (i = 0; i < ngathers; i++) {
   1132 			tx_desc_p = &tx_desc_ring_vp[cur_index];
   1133 #if defined(__i386)
   1134 			npi_handle.regp = (uint32_t)tx_desc_p;
   1135 #else
   1136 			npi_handle.regp = (uint64_t)tx_desc_p;
   1137 #endif
   1138 			tx_msg_p = &tx_msg_ring[cur_index];
   1139 			(void) npi_txdma_desc_set_zero(npi_handle, 1);
   1140 			if (tx_msg_p->flags.dma_type == USE_DVMA) {
   1141 				NXGE_DEBUG_MSG((nxgep, TX_CTL,
   1142 				    "tx_desc_p = %X index = %d",
   1143 				    tx_desc_p, tx_ring_p->rd_index));
   1144 				(void) dvma_unload(tx_msg_p->dvma_handle,
   1145 				    0, -1);
   1146 				tx_msg_p->dvma_handle = NULL;
   1147 				if (tx_ring_p->dvma_wr_index ==
   1148 				    tx_ring_p->dvma_wrap_mask)
   1149 					tx_ring_p->dvma_wr_index = 0;
   1150 				else
   1151 					tx_ring_p->dvma_wr_index++;
   1152 				tx_ring_p->dvma_pending--;
   1153 			} else if (tx_msg_p->flags.dma_type == USE_DMA) {
   1154 				if (ddi_dma_unbind_handle(
   1155 				    tx_msg_p->dma_handle)) {
   1156 					cmn_err(CE_WARN, "!nxge_start: "
   1157 					    "ddi_dma_unbind_handle failed");
   1158 				}
   1159 			}
   1160 			tx_msg_p->flags.dma_type = USE_NONE;
   1161 			cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1,
   1162 			    tx_ring_p->tx_wrap_mask);
   1163 
   1164 		}
   1165 	}
   1166 
   1167 	if (isLDOMservice(nxgep)) {
   1168 		tx_ring_p->tx_ring_busy = B_FALSE;
   1169 		if (tx_ring_p->tx_ring_offline) {
   1170 			(void) atomic_swap_32(&tx_ring_p->tx_ring_offline,
   1171 			    NXGE_TX_RING_OFFLINED);
   1172 		}
   1173 	}
   1174 
   1175 	MUTEX_EXIT(&tx_ring_p->lock);
   1176 
   1177 nxge_start_fail1:
   1178 	/* Add FMA to check the access handle nxge_hregh */
   1179 
   1180 	NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start"));
   1181 	return (status);
   1182 }
   1183 
   1184 /* Software LSO starts here */
   1185 static void
   1186 nxge_hcksum_retrieve(mblk_t *mp,
   1187     uint32_t *start, uint32_t *stuff, uint32_t *end,
   1188     uint32_t *value, uint32_t *flags)
   1189 {
   1190 	if (mp->b_datap->db_type == M_DATA) {
   1191 		if (flags != NULL) {
   1192 			*flags = DB_CKSUMFLAGS(mp) & (HCK_IPV4_HDRCKSUM |
   1193 			    HCK_PARTIALCKSUM | HCK_FULLCKSUM |
   1194 			    HCK_FULLCKSUM_OK);
   1195 			if ((*flags & (HCK_PARTIALCKSUM |
   1196 			    HCK_FULLCKSUM)) != 0) {
   1197 				if (value != NULL)
   1198 					*value = (uint32_t)DB_CKSUM16(mp);
   1199 				if ((*flags & HCK_PARTIALCKSUM) != 0) {
   1200 					if (start != NULL)
   1201 						*start =
   1202 						    (uint32_t)DB_CKSUMSTART(mp);
   1203 					if (stuff != NULL)
   1204 						*stuff =
   1205 						    (uint32_t)DB_CKSUMSTUFF(mp);
   1206 					if (end != NULL)
   1207 						*end =
   1208 						    (uint32_t)DB_CKSUMEND(mp);
   1209 				}
   1210 			}
   1211 		}
   1212 	}
   1213 }
   1214 
   1215 static void
   1216 nxge_lso_info_get(mblk_t *mp, uint32_t *mss, uint32_t *flags)
   1217 {
   1218 	ASSERT(DB_TYPE(mp) == M_DATA);
   1219 
   1220 	*mss = 0;
   1221 	if (flags != NULL) {
   1222 		*flags = DB_CKSUMFLAGS(mp) & HW_LSO;
   1223 		if ((*flags != 0) && (mss != NULL)) {
   1224 			*mss = (uint32_t)DB_LSOMSS(mp);
   1225 		}
   1226 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1227 		    "==> nxge_lso_info_get(flag !=NULL): mss %d *flags 0x%x",
   1228 		    *mss, *flags));
   1229 	}
   1230 
   1231 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1232 	    "<== nxge_lso_info_get: mss %d", *mss));
   1233 }
   1234 
   1235 /*
   1236  * Do Soft LSO on the oversized packet.
   1237  *
   1238  * 1. Create a chain of message for headers.
   1239  * 2. Fill up header messages with proper information.
   1240  * 3. Copy Eithernet, IP, and TCP headers from the original message to
   1241  *    each new message with necessary adjustments.
   1242  *    * Unchange the ethernet header for DIX frames. (by default)
   1243  *    * IP Total Length field is updated to MSS or less(only for the last one).
   1244  *    * IP Identification value is incremented by one for each packet.
   1245  *    * TCP sequence Number is recalculated according to the payload length.
   1246  *    * Set FIN and/or PSH flags for the *last* packet if applied.
   1247  *    * TCP partial Checksum
   1248  * 4. Update LSO information in the first message header.
   1249  * 5. Release the original message header.
   1250  */
   1251 static mblk_t *
   1252 nxge_do_softlso(mblk_t *mp, uint32_t mss)
   1253 {
   1254 	uint32_t	hckflags;
   1255 	int		pktlen;
   1256 	int		hdrlen;
   1257 	int		segnum;
   1258 	int		i;
   1259 	struct ether_vlan_header *evh;
   1260 	int		ehlen, iphlen, tcphlen;
   1261 	struct ip	*oiph, *niph;
   1262 	struct tcphdr *otcph, *ntcph;
   1263 	int		available, len, left;
   1264 	uint16_t	ip_id;
   1265 	uint32_t	tcp_seq;
   1266 #ifdef __sparc
   1267 	uint32_t	tcp_seq_tmp;
   1268 #endif
   1269 	mblk_t		*datamp;
   1270 	uchar_t		*rptr;
   1271 	mblk_t		*nmp;
   1272 	mblk_t		*cmp;
   1273 	mblk_t		*mp_chain;
   1274 	boolean_t do_cleanup = B_FALSE;
   1275 	t_uscalar_t start_offset = 0;
   1276 	t_uscalar_t stuff_offset = 0;
   1277 	t_uscalar_t value = 0;
   1278 	uint16_t	l4_len;
   1279 	ipaddr_t	src, dst;
   1280 	uint32_t	cksum, sum, l4cksum;
   1281 
   1282 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1283 	    "==> nxge_do_softlso"));
   1284 	/*
   1285 	 * check the length of LSO packet payload and calculate the number of
   1286 	 * segments to be generated.
   1287 	 */
   1288 	pktlen = msgsize(mp);
   1289 	evh = (struct ether_vlan_header *)mp->b_rptr;
   1290 
   1291 	/* VLAN? */
   1292 	if (evh->ether_tpid == htons(ETHERTYPE_VLAN))
   1293 		ehlen = sizeof (struct ether_vlan_header);
   1294 	else
   1295 		ehlen = sizeof (struct ether_header);
   1296 	oiph = (struct ip *)(mp->b_rptr + ehlen);
   1297 	iphlen = oiph->ip_hl * 4;
   1298 	otcph = (struct tcphdr *)(mp->b_rptr + ehlen + iphlen);
   1299 	tcphlen = otcph->th_off * 4;
   1300 
   1301 	l4_len = pktlen - ehlen - iphlen;
   1302 
   1303 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1304 	    "==> nxge_do_softlso: mss %d oiph $%p "
   1305 	    "original ip_sum oiph->ip_sum 0x%x "
   1306 	    "original tcp_sum otcph->th_sum 0x%x "
   1307 	    "oiph->ip_len %d pktlen %d ehlen %d "
   1308 	    "l4_len %d (0x%x) ip_len - iphlen %d ",
   1309 	    mss,
   1310 	    oiph,
   1311 	    oiph->ip_sum,
   1312 	    otcph->th_sum,
   1313 	    ntohs(oiph->ip_len), pktlen,
   1314 	    ehlen,
   1315 	    l4_len,
   1316 	    l4_len,
   1317 	    ntohs(oiph->ip_len) - iphlen));
   1318 
   1319 	/* IPv4 + TCP */
   1320 	if (!(oiph->ip_v == IPV4_VERSION)) {
   1321 		NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
   1322 		    "<== nxge_do_softlso: not IPV4 "
   1323 		    "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
   1324 		    ntohs(oiph->ip_len), pktlen, ehlen,
   1325 		    tcphlen));
   1326 		freemsg(mp);
   1327 		return (NULL);
   1328 	}
   1329 
   1330 	if (!(oiph->ip_p == IPPROTO_TCP)) {
   1331 		NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
   1332 		    "<== nxge_do_softlso: not TCP "
   1333 		    "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
   1334 		    ntohs(oiph->ip_len), pktlen, ehlen,
   1335 		    tcphlen));
   1336 		freemsg(mp);
   1337 		return (NULL);
   1338 	}
   1339 
   1340 	if (!(ntohs(oiph->ip_len) == pktlen - ehlen)) {
   1341 		NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
   1342 		    "<== nxge_do_softlso: len not matched  "
   1343 		    "oiph->ip_len %d pktlen %d ehlen %d tcphlen %d",
   1344 		    ntohs(oiph->ip_len), pktlen, ehlen,
   1345 		    tcphlen));
   1346 		freemsg(mp);
   1347 		return (NULL);
   1348 	}
   1349 
   1350 	otcph = (struct tcphdr *)(mp->b_rptr + ehlen + iphlen);
   1351 	tcphlen = otcph->th_off * 4;
   1352 
   1353 	/* TCP flags can not include URG, RST, or SYN */
   1354 	VERIFY((otcph->th_flags & (TH_SYN | TH_RST | TH_URG)) == 0);
   1355 
   1356 	hdrlen = ehlen + iphlen + tcphlen;
   1357 
   1358 	VERIFY(MBLKL(mp) >= hdrlen);
   1359 
   1360 	if (MBLKL(mp) > hdrlen) {
   1361 		datamp = mp;
   1362 		rptr = mp->b_rptr + hdrlen;
   1363 	} else { /* = */
   1364 		datamp = mp->b_cont;
   1365 		rptr = datamp->b_rptr;
   1366 	}
   1367 
   1368 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1369 	    "nxge_do_softlso: otcph $%p pktlen: %d, "
   1370 	    "hdrlen %d ehlen %d iphlen %d tcphlen %d "
   1371 	    "mblkl(mp): %d, mblkl(datamp): %d",
   1372 	    otcph,
   1373 	    pktlen, hdrlen, ehlen, iphlen, tcphlen,
   1374 	    (int)MBLKL(mp), (int)MBLKL(datamp)));
   1375 
   1376 	hckflags = 0;
   1377 	nxge_hcksum_retrieve(mp,
   1378 	    &start_offset, &stuff_offset, &value, NULL, &hckflags);
   1379 
   1380 	dst = oiph->ip_dst.s_addr;
   1381 	src = oiph->ip_src.s_addr;
   1382 
   1383 	cksum = (dst >> 16) + (dst & 0xFFFF) +
   1384 	    (src >> 16) + (src & 0xFFFF);
   1385 	l4cksum = cksum + IP_TCP_CSUM_COMP;
   1386 
   1387 	sum = l4_len + l4cksum;
   1388 	sum = (sum & 0xFFFF) + (sum >> 16);
   1389 
   1390 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1391 	    "==> nxge_do_softlso: dst 0x%x src 0x%x sum 0x%x ~new 0x%x "
   1392 	    "hckflags 0x%x start_offset %d stuff_offset %d "
   1393 	    "value (original) 0x%x th_sum 0x%x "
   1394 	    "pktlen %d l4_len %d (0x%x) "
   1395 	    "MBLKL(mp): %d, MBLKL(datamp): %d dump header %s",
   1396 	    dst, src,
   1397 	    (sum & 0xffff), (~sum & 0xffff),
   1398 	    hckflags, start_offset, stuff_offset,
   1399 	    value, otcph->th_sum,
   1400 	    pktlen,
   1401 	    l4_len,
   1402 	    l4_len,
   1403 	    ntohs(oiph->ip_len) - (int)MBLKL(mp),
   1404 	    (int)MBLKL(datamp),
   1405 	    nxge_dump_packet((char *)evh, 12)));
   1406 
   1407 	/*
   1408 	 * Start to process.
   1409 	 */
   1410 	available = pktlen - hdrlen;
   1411 	segnum = (available - 1) / mss + 1;
   1412 
   1413 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1414 	    "==> nxge_do_softlso: pktlen %d "
   1415 	    "MBLKL(mp): %d, MBLKL(datamp): %d "
   1416 	    "available %d mss %d segnum %d",
   1417 	    pktlen, (int)MBLKL(mp), (int)MBLKL(datamp),
   1418 	    available,
   1419 	    mss,
   1420 	    segnum));
   1421 
   1422 	VERIFY(segnum >= 2);
   1423 
   1424 	/*
   1425 	 * Try to pre-allocate all header messages
   1426 	 */
   1427 	mp_chain = NULL;
   1428 	for (i = 0; i < segnum; i++) {
   1429 		if ((nmp = allocb(hdrlen, 0)) == NULL) {
   1430 			/* Clean up the mp_chain */
   1431 			while (mp_chain != NULL) {
   1432 				nmp = mp_chain;
   1433 				mp_chain = mp_chain->b_next;
   1434 				freemsg(nmp);
   1435 			}
   1436 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1437 			    "<== nxge_do_softlso: "
   1438 			    "Could not allocate enough messages for headers!"));
   1439 			freemsg(mp);
   1440 			return (NULL);
   1441 		}
   1442 		nmp->b_next = mp_chain;
   1443 		mp_chain = nmp;
   1444 
   1445 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1446 		    "==> nxge_do_softlso: "
   1447 		    "mp $%p nmp $%p mp_chain $%p mp_chain->b_next $%p",
   1448 		    mp, nmp, mp_chain, mp_chain->b_next));
   1449 	}
   1450 
   1451 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1452 	    "==> nxge_do_softlso: mp $%p nmp $%p mp_chain $%p",
   1453 	    mp, nmp, mp_chain));
   1454 
   1455 	/*
   1456 	 * Associate payload with new packets
   1457 	 */
   1458 	cmp = mp_chain;
   1459 	left = available;
   1460 	while (cmp != NULL) {
   1461 		nmp = dupb(datamp);
   1462 		if (nmp == NULL) {
   1463 			do_cleanup = B_TRUE;
   1464 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1465 			    "==>nxge_do_softlso: "
   1466 			    "Can not dupb(datamp), have to do clean up"));
   1467 			goto cleanup_allocated_msgs;
   1468 		}
   1469 
   1470 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1471 		    "==> nxge_do_softlso: (loop) before mp $%p cmp $%p "
   1472 		    "dupb nmp $%p len %d left %d msd %d ",
   1473 		    mp, cmp, nmp, len, left, mss));
   1474 
   1475 		cmp->b_cont = nmp;
   1476 		nmp->b_rptr = rptr;
   1477 		len = (left < mss) ? left : mss;
   1478 		left -= len;
   1479 
   1480 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1481 		    "==> nxge_do_softlso: (loop) after mp $%p cmp $%p "
   1482 		    "dupb nmp $%p len %d left %d mss %d ",
   1483 		    mp, cmp, nmp, len, left, mss));
   1484 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1485 		    "nxge_do_softlso: before available: %d, "
   1486 		    "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
   1487 		    available, left, len, segnum, (int)MBLKL(nmp)));
   1488 
   1489 		len -= MBLKL(nmp);
   1490 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1491 		    "nxge_do_softlso: after available: %d, "
   1492 		    "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
   1493 		    available, left, len, segnum, (int)MBLKL(nmp)));
   1494 
   1495 		while (len > 0) {
   1496 			mblk_t *mmp = NULL;
   1497 
   1498 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1499 			    "nxge_do_softlso: (4) len > 0 available: %d, "
   1500 			    "left: %d, len: %d, segnum: %d MBLK(nmp): %d",
   1501 			    available, left, len, segnum, (int)MBLKL(nmp)));
   1502 
   1503 			if (datamp->b_cont != NULL) {
   1504 				datamp = datamp->b_cont;
   1505 				rptr = datamp->b_rptr;
   1506 				mmp = dupb(datamp);
   1507 				if (mmp == NULL) {
   1508 					do_cleanup = B_TRUE;
   1509 					NXGE_DEBUG_MSG((NULL, TX_CTL,
   1510 					    "==> nxge_do_softlso: "
   1511 					    "Can not dupb(datamp) (1), :"
   1512 					    "have to do clean up"));
   1513 					NXGE_DEBUG_MSG((NULL, TX_CTL,
   1514 					    "==> nxge_do_softlso: "
   1515 					    "available: %d, left: %d, "
   1516 					    "len: %d, MBLKL(nmp): %d",
   1517 					    available, left, len,
   1518 					    (int)MBLKL(nmp)));
   1519 					goto cleanup_allocated_msgs;
   1520 				}
   1521 			} else {
   1522 				NXGE_ERROR_MSG((NULL, NXGE_ERR_CTL,
   1523 				    "==> nxge_do_softlso: "
   1524 				    "(1)available: %d, left: %d, "
   1525 				    "len: %d, MBLKL(nmp): %d",
   1526 				    available, left, len,
   1527 				    (int)MBLKL(nmp)));
   1528 				cmn_err(CE_PANIC,
   1529 				    "==> nxge_do_softlso: "
   1530 				    "Pointers must have been corrupted!\n"
   1531 				    "datamp: $%p, nmp: $%p, rptr: $%p",
   1532 				    (void *)datamp,
   1533 				    (void *)nmp,
   1534 				    (void *)rptr);
   1535 			}
   1536 			nmp->b_cont = mmp;
   1537 			nmp = mmp;
   1538 			len -= MBLKL(nmp);
   1539 		}
   1540 		if (len < 0) {
   1541 			nmp->b_wptr += len;
   1542 			rptr = nmp->b_wptr;
   1543 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1544 			    "(5) len < 0 (less than 0)"
   1545 			    "available: %d, left: %d, len: %d, MBLKL(nmp): %d",
   1546 			    available, left, len, (int)MBLKL(nmp)));
   1547 
   1548 		} else if (len == 0) {
   1549 			if (datamp->b_cont != NULL) {
   1550 				NXGE_DEBUG_MSG((NULL, TX_CTL,
   1551 				    "(5) len == 0"
   1552 				    "available: %d, left: %d, len: %d, "
   1553 				    "MBLKL(nmp): %d",
   1554 				    available, left, len, (int)MBLKL(nmp)));
   1555 				datamp = datamp->b_cont;
   1556 				rptr = datamp->b_rptr;
   1557 			} else {
   1558 				NXGE_DEBUG_MSG((NULL, TX_CTL,
   1559 				    "(6)available b_cont == NULL : %d, "
   1560 				    "left: %d, len: %d, MBLKL(nmp): %d",
   1561 				    available, left, len, (int)MBLKL(nmp)));
   1562 
   1563 				VERIFY(cmp->b_next == NULL);
   1564 				VERIFY(left == 0);
   1565 				break; /* Done! */
   1566 			}
   1567 		}
   1568 		cmp = cmp->b_next;
   1569 
   1570 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1571 		    "(7) do_softlso: "
   1572 		    "next mp in mp_chain available len != 0 : %d, "
   1573 		    "left: %d, len: %d, MBLKL(nmp): %d",
   1574 		    available, left, len, (int)MBLKL(nmp)));
   1575 	}
   1576 
   1577 	/*
   1578 	 * From now, start to fill up all headers for the first message
   1579 	 * Hardware checksum flags need to be updated separately for FULLCKSUM
   1580 	 * and PARTIALCKSUM cases. For full checksum, copy the original flags
   1581 	 * into every new packet is enough. But for HCK_PARTIALCKSUM, all
   1582 	 * required fields need to be updated properly.
   1583 	 */
   1584 	nmp = mp_chain;
   1585 	bcopy(mp->b_rptr, nmp->b_rptr, hdrlen);
   1586 	nmp->b_wptr = nmp->b_rptr + hdrlen;
   1587 	niph = (struct ip *)(nmp->b_rptr + ehlen);
   1588 	niph->ip_len = htons(mss + iphlen + tcphlen);
   1589 	ip_id = ntohs(niph->ip_id);
   1590 	ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
   1591 #ifdef __sparc
   1592 	bcopy((char *)&ntcph->th_seq, &tcp_seq_tmp, 4);
   1593 	tcp_seq = ntohl(tcp_seq_tmp);
   1594 #else
   1595 	tcp_seq = ntohl(ntcph->th_seq);
   1596 #endif
   1597 
   1598 	ntcph->th_flags &= ~(TH_FIN | TH_PUSH | TH_RST);
   1599 
   1600 	DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
   1601 	DB_CKSUMSTART(nmp) = start_offset;
   1602 	DB_CKSUMSTUFF(nmp) = stuff_offset;
   1603 
   1604 	/* calculate IP checksum and TCP pseudo header checksum */
   1605 	niph->ip_sum = 0;
   1606 	niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
   1607 
   1608 	l4_len = mss + tcphlen;
   1609 	sum = htons(l4_len) + l4cksum;
   1610 	sum = (sum & 0xFFFF) + (sum >> 16);
   1611 	ntcph->th_sum = (sum & 0xffff);
   1612 
   1613 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1614 	    "==> nxge_do_softlso: first mp $%p (mp_chain $%p) "
   1615 	    "mss %d pktlen %d l4_len %d (0x%x) "
   1616 	    "MBLKL(mp): %d, MBLKL(datamp): %d "
   1617 	    "ip_sum 0x%x "
   1618 	    "th_sum 0x%x sum 0x%x ) "
   1619 	    "dump first ip->tcp %s",
   1620 	    nmp, mp_chain,
   1621 	    mss,
   1622 	    pktlen,
   1623 	    l4_len,
   1624 	    l4_len,
   1625 	    (int)MBLKL(mp), (int)MBLKL(datamp),
   1626 	    niph->ip_sum,
   1627 	    ntcph->th_sum,
   1628 	    sum,
   1629 	    nxge_dump_packet((char *)niph, 52)));
   1630 
   1631 	cmp = nmp;
   1632 	while ((nmp = nmp->b_next)->b_next != NULL) {
   1633 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1634 		    "==>nxge_do_softlso: middle l4_len %d ", l4_len));
   1635 		bcopy(cmp->b_rptr, nmp->b_rptr, hdrlen);
   1636 		nmp->b_wptr = nmp->b_rptr + hdrlen;
   1637 		niph = (struct ip *)(nmp->b_rptr + ehlen);
   1638 		niph->ip_id = htons(++ip_id);
   1639 		niph->ip_len = htons(mss + iphlen + tcphlen);
   1640 		ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
   1641 		tcp_seq += mss;
   1642 
   1643 		ntcph->th_flags &= ~(TH_FIN | TH_PUSH | TH_RST | TH_URG);
   1644 
   1645 #ifdef __sparc
   1646 		tcp_seq_tmp = htonl(tcp_seq);
   1647 		bcopy(&tcp_seq_tmp, (char *)&ntcph->th_seq, 4);
   1648 #else
   1649 		ntcph->th_seq = htonl(tcp_seq);
   1650 #endif
   1651 		DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
   1652 		DB_CKSUMSTART(nmp) = start_offset;
   1653 		DB_CKSUMSTUFF(nmp) = stuff_offset;
   1654 
   1655 		/* calculate IP checksum and TCP pseudo header checksum */
   1656 		niph->ip_sum = 0;
   1657 		niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
   1658 		ntcph->th_sum = (sum & 0xffff);
   1659 
   1660 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1661 		    "==> nxge_do_softlso: middle ip_sum 0x%x "
   1662 		    "th_sum 0x%x "
   1663 		    " mp $%p (mp_chain $%p) pktlen %d "
   1664 		    "MBLKL(mp): %d, MBLKL(datamp): %d ",
   1665 		    niph->ip_sum,
   1666 		    ntcph->th_sum,
   1667 		    nmp, mp_chain,
   1668 		    pktlen, (int)MBLKL(mp), (int)MBLKL(datamp)));
   1669 	}
   1670 
   1671 	/* Last segment */
   1672 	/*
   1673 	 * Set FIN and/or PSH flags if present only in the last packet.
   1674 	 * The ip_len could be different from prior packets.
   1675 	 */
   1676 	bcopy(cmp->b_rptr, nmp->b_rptr, hdrlen);
   1677 	nmp->b_wptr = nmp->b_rptr + hdrlen;
   1678 	niph = (struct ip *)(nmp->b_rptr + ehlen);
   1679 	niph->ip_id = htons(++ip_id);
   1680 	niph->ip_len = htons(msgsize(nmp->b_cont) + iphlen + tcphlen);
   1681 	ntcph = (struct tcphdr *)(nmp->b_rptr + ehlen + iphlen);
   1682 	tcp_seq += mss;
   1683 #ifdef __sparc
   1684 	tcp_seq_tmp = htonl(tcp_seq);
   1685 	bcopy(&tcp_seq_tmp, (char *)&ntcph->th_seq, 4);
   1686 #else
   1687 	ntcph->th_seq = htonl(tcp_seq);
   1688 #endif
   1689 	ntcph->th_flags = (otcph->th_flags & ~TH_URG);
   1690 
   1691 	DB_CKSUMFLAGS(nmp) = (uint16_t)hckflags;
   1692 	DB_CKSUMSTART(nmp) = start_offset;
   1693 	DB_CKSUMSTUFF(nmp) = stuff_offset;
   1694 
   1695 	/* calculate IP checksum and TCP pseudo header checksum */
   1696 	niph->ip_sum = 0;
   1697 	niph->ip_sum = (uint16_t)nxge_csgen((uint16_t *)niph, iphlen);
   1698 
   1699 	l4_len = ntohs(niph->ip_len) - iphlen;
   1700 	sum = htons(l4_len) + l4cksum;
   1701 	sum = (sum & 0xFFFF) + (sum >> 16);
   1702 	ntcph->th_sum = (sum & 0xffff);
   1703 
   1704 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1705 	    "==> nxge_do_softlso: last next "
   1706 	    "niph->ip_sum 0x%x "
   1707 	    "ntcph->th_sum 0x%x sum 0x%x "
   1708 	    "dump last ip->tcp %s "
   1709 	    "cmp $%p mp $%p (mp_chain $%p) pktlen %d (0x%x) "
   1710 	    "l4_len %d (0x%x) "
   1711 	    "MBLKL(mp): %d, MBLKL(datamp): %d ",
   1712 	    niph->ip_sum,
   1713 	    ntcph->th_sum, sum,
   1714 	    nxge_dump_packet((char *)niph, 52),
   1715 	    cmp, nmp, mp_chain,
   1716 	    pktlen, pktlen,
   1717 	    l4_len,
   1718 	    l4_len,
   1719 	    (int)MBLKL(mp), (int)MBLKL(datamp)));
   1720 
   1721 cleanup_allocated_msgs:
   1722 	if (do_cleanup) {
   1723 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1724 		    "==> nxge_do_softlso: "
   1725 		    "Failed allocating messages, "
   1726 		    "have to clean up and fail!"));
   1727 		while (mp_chain != NULL) {
   1728 			nmp = mp_chain;
   1729 			mp_chain = mp_chain->b_next;
   1730 			freemsg(nmp);
   1731 		}
   1732 	}
   1733 	/*
   1734 	 * We're done here, so just free the original message and return the
   1735 	 * new message chain, that could be NULL if failed, back to the caller.
   1736 	 */
   1737 	freemsg(mp);
   1738 
   1739 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1740 	    "<== nxge_do_softlso:mp_chain $%p", mp_chain));
   1741 	return (mp_chain);
   1742 }
   1743 
   1744 /*
   1745  * Will be called before NIC driver do further operation on the message.
   1746  * The input message may include LSO information, if so, go to softlso logic
   1747  * to eliminate the oversized LSO packet for the incapable underlying h/w.
   1748  * The return could be the same non-LSO message or a message chain for LSO case.
   1749  *
   1750  * The driver needs to call this function per packet and process the whole chain
   1751  * if applied.
   1752  */
   1753 static mblk_t *
   1754 nxge_lso_eliminate(mblk_t *mp)
   1755 {
   1756 	uint32_t lsoflags;
   1757 	uint32_t mss;
   1758 
   1759 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1760 	    "==>nxge_lso_eliminate:"));
   1761 	nxge_lso_info_get(mp, &mss, &lsoflags);
   1762 
   1763 	if (lsoflags & HW_LSO) {
   1764 		mblk_t *nmp;
   1765 
   1766 		NXGE_DEBUG_MSG((NULL, TX_CTL,
   1767 		    "==>nxge_lso_eliminate:"
   1768 		    "HW_LSO:mss %d mp $%p",
   1769 		    mss, mp));
   1770 		if ((nmp = nxge_do_softlso(mp, mss)) != NULL) {
   1771 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1772 			    "<== nxge_lso_eliminate: "
   1773 			    "LSO: nmp not NULL nmp $%p mss %d mp $%p",
   1774 			    nmp, mss, mp));
   1775 			return (nmp);
   1776 		} else {
   1777 			NXGE_DEBUG_MSG((NULL, TX_CTL,
   1778 			    "<== nxge_lso_eliminate_ "
   1779 			    "LSO: failed nmp NULL nmp $%p mss %d mp $%p",
   1780 			    nmp, mss, mp));
   1781 			return (NULL);
   1782 		}
   1783 	}
   1784 
   1785 	NXGE_DEBUG_MSG((NULL, TX_CTL,
   1786 	    "<== nxge_lso_eliminate"));
   1787 	return (mp);
   1788 }
   1789 
   1790 static uint32_t
   1791 nxge_csgen(uint16_t *adr, int len)
   1792 {
   1793 	int		i, odd;
   1794 	uint32_t	sum = 0;
   1795 	uint32_t	c = 0;
   1796 
   1797 	odd = len % 2;
   1798 	for (i = 0; i < (len / 2); i++) {
   1799 		sum += (adr[i] & 0xffff);
   1800 	}
   1801 	if (odd) {
   1802 		sum += adr[len / 2] & 0xff00;
   1803 	}
   1804 	while ((c = ((sum & 0xffff0000) >> 16)) != 0) {
   1805 		sum &= 0xffff;
   1806 		sum += c;
   1807 	}
   1808 	return (~sum & 0xffff);
   1809 }
   1810