Home | History | Annotate | Download | only in io
      1   5084   johnlev /*
      2   5084   johnlev  * CDDL HEADER START
      3   5084   johnlev  *
      4   5084   johnlev  * The contents of this file are subject to the terms of the
      5   5084   johnlev  * Common Development and Distribution License (the "License").
      6   5084   johnlev  * You may not use this file except in compliance with the License.
      7   5084   johnlev  *
      8   5084   johnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9   5084   johnlev  * or http://www.opensolaris.org/os/licensing.
     10   5084   johnlev  * See the License for the specific language governing permissions
     11   5084   johnlev  * and limitations under the License.
     12   5084   johnlev  *
     13   5084   johnlev  * When distributing Covered Code, include this CDDL HEADER in each
     14   5084   johnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15   5084   johnlev  * If applicable, add the following below this CDDL HEADER, with the
     16   5084   johnlev  * fields enclosed by brackets "[]" replaced with your own identifying
     17   5084   johnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
     18   5084   johnlev  *
     19   5084   johnlev  * CDDL HEADER END
     20   5084   johnlev  */
     21   5084   johnlev 
     22   5084   johnlev /*
     23  10175    Stuart  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24   5084   johnlev  * Use is subject to license terms.
     25   5084   johnlev  */
     26   5084   johnlev 
     27   5084   johnlev /*
     28   5084   johnlev  *
     29   5084   johnlev  * Copyright (c) 2004 Christian Limpach.
     30   5084   johnlev  * All rights reserved.
     31   5084   johnlev  *
     32   5084   johnlev  * Redistribution and use in source and binary forms, with or without
     33   5084   johnlev  * modification, are permitted provided that the following conditions
     34   5084   johnlev  * are met:
     35   5084   johnlev  * 1. Redistributions of source code must retain the above copyright
     36   5084   johnlev  *    notice, this list of conditions and the following disclaimer.
     37   5084   johnlev  * 2. Redistributions in binary form must reproduce the above copyright
     38   5084   johnlev  *    notice, this list of conditions and the following disclaimer in the
     39   5084   johnlev  *    documentation and/or other materials provided with the distribution.
     40   5084   johnlev  * 3. This section intentionally left blank.
     41   5084   johnlev  * 4. The name of the author may not be used to endorse or promote products
     42   5084   johnlev  *    derived from this software without specific prior written permission.
     43   5084   johnlev  *
     44   5084   johnlev  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     45   5084   johnlev  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     46   5084   johnlev  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     47   5084   johnlev  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     48   5084   johnlev  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     49   5084   johnlev  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     50   5084   johnlev  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     51   5084   johnlev  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     52   5084   johnlev  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     53   5084   johnlev  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     54   5084   johnlev  */
     55   5084   johnlev /*
     56   5084   johnlev  * Section 3 of the above license was updated in response to bug 6379571.
     57   5084   johnlev  */
     58   5084   johnlev 
     59   5084   johnlev /*
     60  10958       dme  * xnf.c - GLDv3 network driver for domU.
     61  10958       dme  */
     62  10958       dme 
     63  10958       dme /*
     64  10958       dme  * This driver uses four per-instance locks:
     65  10958       dme  *
     66  10958       dme  * xnf_gref_lock:
     67  10958       dme  *
     68  10958       dme  *    Protects access to the grant reference list stored in
     69  10958       dme  *    xnf_gref_head. Grant references should be acquired and released
     70  10958       dme  *    using gref_get() and gref_put() respectively.
     71  10958       dme  *
     72  10958       dme  * xnf_schedlock:
     73  10958       dme  *
     74  10958       dme  *    Protects:
     75  10958       dme  *    xnf_need_sched - used to record that a previous transmit attempt
     76  10958       dme  *       failed (and consequently it will be necessary to call
     77  10958       dme  *       mac_tx_update() when transmit resources are available).
     78  10958       dme  *    xnf_pending_multicast - the number of multicast requests that
     79  10958       dme  *       have been submitted to the backend for which we have not
     80  10958       dme  *       processed responses.
     81  10958       dme  *
     82  10958       dme  * xnf_txlock:
     83  10958       dme  *
     84  10958       dme  *    Protects the transmit ring (xnf_tx_ring) and associated
     85  10958       dme  *    structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head).
     86  10958       dme  *
     87  10958       dme  * xnf_rxlock:
     88  10958       dme  *
     89  10958       dme  *    Protects the receive ring (xnf_rx_ring) and associated
     90  10958       dme  *    structures (notably xnf_rx_pkt_info).
     91  10958       dme  *
     92  10958       dme  * If driver-global state that affects both the transmit and receive
     93  10958       dme  * rings is manipulated, both xnf_txlock and xnf_rxlock should be
     94  10958       dme  * held, in that order.
     95  10958       dme  *
     96  10958       dme  * xnf_schedlock is acquired both whilst holding xnf_txlock and
     97  10958       dme  * without. It should always be acquired after xnf_txlock if both are
     98  10958       dme  * held.
     99  10958       dme  *
    100  10958       dme  * Notes:
    101  10958       dme  * - atomic_add_64() is used to manipulate counters where we require
    102  10958       dme  *   accuracy. For counters intended only for observation by humans,
    103  10958       dme  *   post increment/decrement are used instead.
    104   5084   johnlev  */
    105   5084   johnlev 
    106   5084   johnlev #include <sys/types.h>
    107   5084   johnlev #include <sys/errno.h>
    108   5084   johnlev #include <sys/param.h>
    109   5084   johnlev #include <sys/sysmacros.h>
    110   5084   johnlev #include <sys/systm.h>
    111   5084   johnlev #include <sys/stream.h>
    112   5084   johnlev #include <sys/strsubr.h>
    113  10958       dme #include <sys/strsun.h>
    114   5084   johnlev #include <sys/conf.h>
    115   5084   johnlev #include <sys/ddi.h>
    116   5084   johnlev #include <sys/devops.h>
    117   5084   johnlev #include <sys/sunddi.h>
    118   5084   johnlev #include <sys/sunndi.h>
    119   5084   johnlev #include <sys/dlpi.h>
    120   5084   johnlev #include <sys/ethernet.h>
    121   5084   johnlev #include <sys/strsun.h>
    122   5084   johnlev #include <sys/pattr.h>
    123   5084   johnlev #include <inet/ip.h>
    124   7351       dme #include <inet/ip_impl.h>
    125   7351       dme #include <sys/gld.h>
    126   5084   johnlev #include <sys/modctl.h>
    127   8275      Eric #include <sys/mac_provider.h>
    128   5084   johnlev #include <sys/mac_ether.h>
    129   5084   johnlev #include <sys/bootinfo.h>
    130   5084   johnlev #include <sys/mach_mmu.h>
    131   5741       mrj #ifdef	XPV_HVM_DRIVER
    132   5741       mrj #include <sys/xpv_support.h>
    133   5741       mrj #include <sys/hypervisor.h>
    134   5741       mrj #else
    135   5741       mrj #include <sys/hypervisor.h>
    136   5084   johnlev #include <sys/evtchn_impl.h>
    137   5084   johnlev #include <sys/balloon_impl.h>
    138   5741       mrj #endif
    139   5741       mrj #include <xen/public/io/netif.h>
    140   5741       mrj #include <sys/gnttab.h>
    141   5084   johnlev #include <xen/sys/xendev.h>
    142   5741       mrj #include <sys/sdt.h>
    143  10958       dme #include <sys/note.h>
    144  10958       dme #include <sys/debug.h>
    145   5741       mrj 
    146   5741       mrj #include <io/xnf.h>
    147   5741       mrj 
    148   5084   johnlev #if defined(DEBUG) || defined(__lint)
    149   5084   johnlev #define	XNF_DEBUG
    150  10958       dme #endif
    151  10958       dme 
    152  10958       dme #ifdef XNF_DEBUG
    153  10958       dme int xnf_debug = 0;
    154  10958       dme xnf_t *xnf_debug_instance = NULL;
    155   5084   johnlev #endif
    156   5084   johnlev 
    157   5084   johnlev /*
    158   5084   johnlev  * On a 32 bit PAE system physical and machine addresses are larger
    159   5084   johnlev  * than 32 bits.  ddi_btop() on such systems take an unsigned long
    160   5084   johnlev  * argument, and so addresses above 4G are truncated before ddi_btop()
    161   5084   johnlev  * gets to see them.  To avoid this, code the shift operation here.
    162   5084   johnlev  */
    163   5084   johnlev #define	xnf_btop(addr)	((addr) >> PAGESHIFT)
    164   5084   johnlev 
    165  10958       dme unsigned int	xnf_max_tx_frags = 1;
    166   5741       mrj 
    167   5084   johnlev /*
    168  10958       dme  * Should we use the multicast control feature if the backend provides
    169  10958       dme  * it?
    170   5084   johnlev  */
    171  10958       dme boolean_t xnf_multicast_control = B_TRUE;
    172  10958       dme 
    173   5084   johnlev /*
    174  10958       dme  * Received packets below this size are copied to a new streams buffer
    175  10958       dme  * rather than being desballoc'ed.
    176  10958       dme  *
    177  10958       dme  * This value is chosen to accommodate traffic where there are a large
    178  10958       dme  * number of small packets. For data showing a typical distribution,
    179  10958       dme  * see:
    180  10958       dme  *
    181  10958       dme  * Sinha07a:
    182  10958       dme  *	Rishi Sinha, Christos Papadopoulos, and John
    183  10958       dme  *	Heidemann. Internet Packet Size Distributions: Some
    184  10958       dme  *	Observations. Technical Report ISI-TR-2007-643,
    185  10958       dme  *	USC/Information Sciences Institute, May, 2007. Orignally
    186  10958       dme  *	released October 2005 as web page
    187  10958       dme  *	http://netweb.usc.edu/~sinha/pkt-sizes/.
    188  10958       dme  *	<http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>.
    189   5084   johnlev  */
    190  10958       dme size_t xnf_rx_copy_limit = 64;
    191   5084   johnlev 
    192  10958       dme #define	INVALID_GRANT_HANDLE	((grant_handle_t)-1)
    193  10958       dme #define	INVALID_GRANT_REF	((grant_ref_t)-1)
    194  10958       dme #define	INVALID_TX_ID		((uint16_t)-1)
    195  10958       dme 
    196  10958       dme #define	TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)]))
    197  10958       dme #define	TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE))
    198   5084   johnlev 
    199   5084   johnlev /* Required system entry points */
    200   5084   johnlev static int	xnf_attach(dev_info_t *, ddi_attach_cmd_t);
    201   5084   johnlev static int	xnf_detach(dev_info_t *, ddi_detach_cmd_t);
    202   5084   johnlev 
    203   5084   johnlev /* Required driver entry points for Nemo */
    204   5084   johnlev static int	xnf_start(void *);
    205   5084   johnlev static void	xnf_stop(void *);
    206   5084   johnlev static int	xnf_set_mac_addr(void *, const uint8_t *);
    207   5084   johnlev static int	xnf_set_multicast(void *, boolean_t, const uint8_t *);
    208   5084   johnlev static int	xnf_set_promiscuous(void *, boolean_t);
    209   5084   johnlev static mblk_t	*xnf_send(void *, mblk_t *);
    210   5084   johnlev static uint_t	xnf_intr(caddr_t);
    211   5084   johnlev static int	xnf_stat(void *, uint_t, uint64_t *);
    212   5084   johnlev static boolean_t xnf_getcapab(void *, mac_capab_t, void *);
    213   5084   johnlev 
    214   5084   johnlev /* Driver private functions */
    215   5084   johnlev static int xnf_alloc_dma_resources(xnf_t *);
    216   5084   johnlev static void xnf_release_dma_resources(xnf_t *);
    217   5084   johnlev static void xnf_release_mblks(xnf_t *);
    218  10958       dme 
    219  10958       dme static int xnf_buf_constructor(void *, void *, int);
    220  10958       dme static void xnf_buf_destructor(void *, void *);
    221  10958       dme static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t);
    222  10958       dme #pragma inline(xnf_buf_get)
    223  10958       dme static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t);
    224  10958       dme #pragma inline(xnf_buf_put)
    225  10958       dme static void xnf_buf_refresh(xnf_buf_t *);
    226  10958       dme #pragma inline(xnf_buf_refresh)
    227  10958       dme static void xnf_buf_recycle(xnf_buf_t *);
    228  10958       dme 
    229  10958       dme static int xnf_tx_buf_constructor(void *, void *, int);
    230  10958       dme static void xnf_tx_buf_destructor(void *, void *);
    231  10958       dme 
    232  10958       dme static grant_ref_t gref_get(xnf_t *);
    233  10958       dme #pragma inline(gref_get)
    234  10958       dme static void gref_put(xnf_t *, grant_ref_t);
    235  10958       dme #pragma inline(gref_put)
    236  10958       dme 
    237  10958       dme static xnf_txid_t *txid_get(xnf_t *);
    238  10958       dme #pragma inline(txid_get)
    239  10958       dme static void txid_put(xnf_t *, xnf_txid_t *);
    240  10958       dme #pragma inline(txid_put)
    241  10958       dme 
    242   5084   johnlev void xnf_send_driver_status(int, int);
    243  10958       dme static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *);
    244  10958       dme static int xnf_tx_clean_ring(xnf_t  *);
    245   5084   johnlev static void oe_state_change(dev_info_t *, ddi_eventcookie_t,
    246   5084   johnlev     void *, void *);
    247  10958       dme static boolean_t xnf_kstat_init(xnf_t *);
    248  10958       dme static void xnf_rx_collect(xnf_t *);
    249   5084   johnlev 
    250   5084   johnlev static mac_callbacks_t xnf_callbacks = {
    251  10958       dme 	MC_GETCAPAB,
    252   5084   johnlev 	xnf_stat,
    253   5084   johnlev 	xnf_start,
    254   5084   johnlev 	xnf_stop,
    255   5084   johnlev 	xnf_set_promiscuous,
    256   5084   johnlev 	xnf_set_multicast,
    257   5084   johnlev 	xnf_set_mac_addr,
    258   5084   johnlev 	xnf_send,
    259  10958       dme 	NULL,
    260   5084   johnlev 	xnf_getcapab
    261   5084   johnlev };
    262   5084   johnlev 
    263   5084   johnlev /* DMA attributes for network ring buffer */
    264   5084   johnlev static ddi_dma_attr_t ringbuf_dma_attr = {
    265   5084   johnlev 	DMA_ATTR_V0,		/* version of this structure */
    266   5084   johnlev 	0,			/* lowest usable address */
    267   5084   johnlev 	0xffffffffffffffffULL,	/* highest usable address */
    268   5084   johnlev 	0x7fffffff,		/* maximum DMAable byte count */
    269   5084   johnlev 	MMU_PAGESIZE,		/* alignment in bytes */
    270   5084   johnlev 	0x7ff,			/* bitmap of burst sizes */
    271   5084   johnlev 	1,			/* minimum transfer */
    272   5084   johnlev 	0xffffffffU,		/* maximum transfer */
    273   5084   johnlev 	0xffffffffffffffffULL,	/* maximum segment length */
    274   5084   johnlev 	1,			/* maximum number of segments */
    275   5084   johnlev 	1,			/* granularity */
    276   5084   johnlev 	0,			/* flags (reserved) */
    277   5084   johnlev };
    278   5084   johnlev 
    279  10958       dme /* DMA attributes for transmit and receive data */
    280  10958       dme static ddi_dma_attr_t buf_dma_attr = {
    281   5084   johnlev 	DMA_ATTR_V0,		/* version of this structure */
    282   5084   johnlev 	0,			/* lowest usable address */
    283   5084   johnlev 	0xffffffffffffffffULL,	/* highest usable address */
    284   5084   johnlev 	0x7fffffff,		/* maximum DMAable byte count */
    285   5084   johnlev 	MMU_PAGESIZE,		/* alignment in bytes */
    286   5084   johnlev 	0x7ff,			/* bitmap of burst sizes */
    287   5084   johnlev 	1,			/* minimum transfer */
    288   5084   johnlev 	0xffffffffU,		/* maximum transfer */
    289   5084   johnlev 	0xffffffffffffffffULL,	/* maximum segment length */
    290   5084   johnlev 	1,			/* maximum number of segments */
    291   5084   johnlev 	1,			/* granularity */
    292   5084   johnlev 	0,			/* flags (reserved) */
    293   5084   johnlev };
    294   5084   johnlev 
    295   5084   johnlev /* DMA access attributes for registers and descriptors */
    296   5084   johnlev static ddi_device_acc_attr_t accattr = {
    297   5084   johnlev 	DDI_DEVICE_ATTR_V0,
    298   5084   johnlev 	DDI_STRUCTURE_LE_ACC,	/* This is a little-endian device */
    299   5084   johnlev 	DDI_STRICTORDER_ACC
    300   5084   johnlev };
    301   5084   johnlev 
    302   5084   johnlev /* DMA access attributes for data: NOT to be byte swapped. */
    303   5084   johnlev static ddi_device_acc_attr_t data_accattr = {
    304   5084   johnlev 	DDI_DEVICE_ATTR_V0,
    305   5084   johnlev 	DDI_NEVERSWAP_ACC,
    306   5084   johnlev 	DDI_STRICTORDER_ACC
    307   5084   johnlev };
    308   5084   johnlev 
    309   5084   johnlev DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach,
    310   7656    Sherry     nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported);
    311   5084   johnlev 
    312   5084   johnlev static struct modldrv xnf_modldrv = {
    313   7351       dme 	&mod_driverops,
    314   7351       dme 	"Virtual Ethernet driver",
    315   7351       dme 	&xnf_dev_ops
    316   5084   johnlev };
    317   5084   johnlev 
    318   5084   johnlev static struct modlinkage modlinkage = {
    319   5084   johnlev 	MODREV_1, &xnf_modldrv, NULL
    320   5084   johnlev };
    321   5084   johnlev 
    322   5084   johnlev int
    323   5084   johnlev _init(void)
    324   5084   johnlev {
    325   5084   johnlev 	int r;
    326   5084   johnlev 
    327   5084   johnlev 	mac_init_ops(&xnf_dev_ops, "xnf");
    328   5084   johnlev 	r = mod_install(&modlinkage);
    329   5084   johnlev 	if (r != DDI_SUCCESS)
    330   5084   johnlev 		mac_fini_ops(&xnf_dev_ops);
    331   5084   johnlev 
    332   5084   johnlev 	return (r);
    333   5084   johnlev }
    334   5084   johnlev 
    335   5084   johnlev int
    336   5084   johnlev _fini(void)
    337   5084   johnlev {
    338  10958       dme 	return (EBUSY); /* XXPV should be removable */
    339   5084   johnlev }
    340   5084   johnlev 
    341   5084   johnlev int
    342   5084   johnlev _info(struct modinfo *modinfop)
    343   5084   johnlev {
    344   5084   johnlev 	return (mod_info(&modlinkage, modinfop));
    345   5084   johnlev }
    346   5084   johnlev 
    347  10958       dme /*
    348  10958       dme  * Acquire a grant reference.
    349  10958       dme  */
    350  10958       dme static grant_ref_t
    351  10958       dme gref_get(xnf_t *xnfp)
    352  10958       dme {
    353  10958       dme 	grant_ref_t gref;
    354  10958       dme 
    355  10958       dme 	mutex_enter(&xnfp->xnf_gref_lock);
    356  10958       dme 
    357  10958       dme 	do {
    358  10958       dme 		gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head);
    359  10958       dme 
    360  10958       dme 	} while ((gref == INVALID_GRANT_REF) &&
    361  10958       dme 	    (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0));
    362  10958       dme 
    363  10958       dme 	mutex_exit(&xnfp->xnf_gref_lock);
    364  10958       dme 
    365  10958       dme 	if (gref == INVALID_GRANT_REF) {
    366  10958       dme 		xnfp->xnf_stat_gref_failure++;
    367  10958       dme 	} else {
    368  10958       dme 		atomic_add_64(&xnfp->xnf_stat_gref_outstanding, 1);
    369  10958       dme 		if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak)
    370  10958       dme 			xnfp->xnf_stat_gref_peak =
    371  10958       dme 			    xnfp->xnf_stat_gref_outstanding;
    372  10958       dme 	}
    373  10958       dme 
    374  10958       dme 	return (gref);
    375  10958       dme }
    376  10958       dme 
    377  10958       dme /*
    378  10958       dme  * Release a grant reference.
    379  10958       dme  */
    380  10958       dme static void
    381  10958       dme gref_put(xnf_t *xnfp, grant_ref_t gref)
    382  10958       dme {
    383  10958       dme 	ASSERT(gref != INVALID_GRANT_REF);
    384  10958       dme 
    385  10958       dme 	mutex_enter(&xnfp->xnf_gref_lock);
    386  10958       dme 	gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref);
    387  10958       dme 	mutex_exit(&xnfp->xnf_gref_lock);
    388  10958       dme 
    389  10958       dme 	atomic_add_64(&xnfp->xnf_stat_gref_outstanding, -1);
    390  10958       dme }
    391  10958       dme 
    392  10958       dme /*
    393  10958       dme  * Acquire a transmit id.
    394  10958       dme  */
    395  10958       dme static xnf_txid_t *
    396  10958       dme txid_get(xnf_t *xnfp)
    397  10958       dme {
    398  10958       dme 	xnf_txid_t *tidp;
    399  10958       dme 
    400  10958       dme 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
    401  10958       dme 
    402  10958       dme 	if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID)
    403  10958       dme 		return (NULL);
    404  10958       dme 
    405  10958       dme 	ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head));
    406  10958       dme 
    407  10958       dme 	tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head);
    408  10958       dme 	xnfp->xnf_tx_pkt_id_head = tidp->next;
    409  10958       dme 	tidp->next = INVALID_TX_ID;
    410  10958       dme 
    411  10958       dme 	ASSERT(tidp->txbuf == NULL);
    412  10958       dme 
    413  10958       dme 	return (tidp);
    414  10958       dme }
    415  10958       dme 
    416  10958       dme /*
    417  10958       dme  * Release a transmit id.
    418  10958       dme  */
    419  10958       dme static void
    420  10958       dme txid_put(xnf_t *xnfp, xnf_txid_t *tidp)
    421  10958       dme {
    422  10958       dme 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
    423  10958       dme 	ASSERT(TX_ID_VALID(tidp->id));
    424  10958       dme 	ASSERT(tidp->next == INVALID_TX_ID);
    425  10958       dme 
    426  10958       dme 	tidp->txbuf = NULL;
    427  10958       dme 	tidp->next = xnfp->xnf_tx_pkt_id_head;
    428  10958       dme 	xnfp->xnf_tx_pkt_id_head = tidp->id;
    429  10958       dme }
    430  10958       dme 
    431  10958       dme /*
    432  10958       dme  * Get `wanted' slots in the transmit ring, waiting for at least that
    433  10958       dme  * number if `wait' is B_TRUE. Force the ring to be cleaned by setting
    434  10958       dme  * `wanted' to zero.
    435  10958       dme  *
    436  10958       dme  * Return the number of slots available.
    437  10958       dme  */
    438  10958       dme static int
    439  10958       dme tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait)
    440  10958       dme {
    441  10958       dme 	int slotsfree;
    442  10958       dme 	boolean_t forced_clean = (wanted == 0);
    443  10958       dme 
    444  10958       dme 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
    445  10958       dme 
    446  10958       dme 	/* LINTED: constant in conditional context */
    447  10958       dme 	while (B_TRUE) {
    448  10958       dme 		slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring);
    449  10958       dme 
    450  10958       dme 		if ((slotsfree < wanted) || forced_clean)
    451  10958       dme 			slotsfree = xnf_tx_clean_ring(xnfp);
    452  10958       dme 
    453  10958       dme 		/*
    454  10958       dme 		 * If there are more than we need free, tell other
    455  10958       dme 		 * people to come looking again. We hold txlock, so we
    456  10958       dme 		 * are able to take our slots before anyone else runs.
    457  10958       dme 		 */
    458  10958       dme 		if (slotsfree > wanted)
    459  10958       dme 			cv_broadcast(&xnfp->xnf_cv_tx_slots);
    460  10958       dme 
    461  10958       dme 		if (slotsfree >= wanted)
    462  10958       dme 			break;
    463  10958       dme 
    464  10958       dme 		if (!wait)
    465  10958       dme 			break;
    466  10958       dme 
    467  10958       dme 		cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock);
    468  10958       dme 	}
    469  10958       dme 
    470  10958       dme 	ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring)));
    471  10958       dme 
    472  10958       dme 	return (slotsfree);
    473  10958       dme }
    474  10958       dme 
    475   5084   johnlev static int
    476   5084   johnlev xnf_setup_rings(xnf_t *xnfp)
    477   5084   johnlev {
    478  10958       dme 	domid_t			oeid;
    479  10958       dme 	struct xenbus_device	*xsd;
    480   5084   johnlev 	RING_IDX		i;
    481  10958       dme 	int			err;
    482  10958       dme 	xnf_txid_t		*tidp;
    483  10958       dme 	xnf_buf_t **bdescp;
    484   5084   johnlev 
    485   5741       mrj 	oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
    486   5741       mrj 	xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
    487   5084   johnlev 
    488  10958       dme 	if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
    489   5741       mrj 		gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
    490   5084   johnlev 
    491   5084   johnlev 	err = gnttab_grant_foreign_access(oeid,
    492   5741       mrj 	    xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0);
    493   5084   johnlev 	if (err <= 0) {
    494   5084   johnlev 		err = -err;
    495   5084   johnlev 		xenbus_dev_error(xsd, err, "granting access to tx ring page");
    496   5084   johnlev 		goto out;
    497   5084   johnlev 	}
    498   5741       mrj 	xnfp->xnf_tx_ring_ref = (grant_ref_t)err;
    499   5084   johnlev 
    500  10958       dme 	if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
    501   5741       mrj 		gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
    502   5084   johnlev 
    503   5084   johnlev 	err = gnttab_grant_foreign_access(oeid,
    504   5741       mrj 	    xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0);
    505   5084   johnlev 	if (err <= 0) {
    506   5084   johnlev 		err = -err;
    507   5084   johnlev 		xenbus_dev_error(xsd, err, "granting access to rx ring page");
    508   5084   johnlev 		goto out;
    509   5084   johnlev 	}
    510   5741       mrj 	xnfp->xnf_rx_ring_ref = (grant_ref_t)err;
    511   5084   johnlev 
    512  10958       dme 	mutex_enter(&xnfp->xnf_txlock);
    513   5084   johnlev 
    514   5084   johnlev 	/*
    515  10958       dme 	 * Setup/cleanup the TX ring.  Note that this can lose packets
    516  10958       dme 	 * after a resume, but we expect to stagger on.
    517   5084   johnlev 	 */
    518  10958       dme 	xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
    519  10958       dme 	for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
    520  10958       dme 	    i < NET_TX_RING_SIZE;
    521  10958       dme 	    i++, tidp++) {
    522  10958       dme 		xnf_txbuf_t *txp;
    523   5084   johnlev 
    524  10958       dme 		tidp->id = i;
    525   5084   johnlev 
    526  10958       dme 		txp = tidp->txbuf;
    527  10958       dme 		if (txp == NULL) {
    528  10958       dme 			tidp->next = INVALID_TX_ID; /* Appease txid_put(). */
    529  10958       dme 			txid_put(xnfp, tidp);
    530   5084   johnlev 			continue;
    531   5084   johnlev 		}
    532   5084   johnlev 
    533  10958       dme 		ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF);
    534  10958       dme 		ASSERT(txp->tx_mp != NULL);
    535   5084   johnlev 
    536  10958       dme 		switch (txp->tx_type) {
    537  10958       dme 		case TX_DATA:
    538  10958       dme 			VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref)
    539  10958       dme 			    == 0);
    540   5084   johnlev 
    541  10958       dme 			if (txp->tx_bdesc == NULL) {
    542  10958       dme 				(void) gnttab_end_foreign_access_ref(
    543  10958       dme 				    txp->tx_txreq.gref, 1);
    544  10958       dme 				gref_put(xnfp, txp->tx_txreq.gref);
    545  10958       dme 				(void) ddi_dma_unbind_handle(
    546  10958       dme 				    txp->tx_dma_handle);
    547  10958       dme 			} else {
    548  10958       dme 				xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE);
    549  10958       dme 			}
    550   5084   johnlev 
    551  10958       dme 			freemsg(txp->tx_mp);
    552  10958       dme 			txid_put(xnfp, tidp);
    553  10958       dme 			kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
    554  10958       dme 
    555  10958       dme 			break;
    556  10958       dme 
    557  10958       dme 		case TX_MCAST_REQ:
    558  10958       dme 			txp->tx_type = TX_MCAST_RSP;
    559  10958       dme 			txp->tx_status = NETIF_RSP_DROPPED;
    560  10958       dme 			cv_broadcast(&xnfp->xnf_cv_multicast);
    561  10958       dme 
    562  10958       dme 			/*
    563  10958       dme 			 * The request consumed two slots in the ring,
    564  10958       dme 			 * yet only a single xnf_txid_t is used. Step
    565  10958       dme 			 * over the empty slot.
    566  10958       dme 			 */
    567  10958       dme 			i++;
    568  10958       dme 			ASSERT(i < NET_TX_RING_SIZE);
    569  10958       dme 
    570  10958       dme 			break;
    571  10958       dme 
    572  10958       dme 		case TX_MCAST_RSP:
    573  10958       dme 			break;
    574   5084   johnlev 		}
    575   5084   johnlev 	}
    576   7351       dme 
    577   7351       dme 	/* LINTED: constant in conditional context */
    578   7351       dme 	SHARED_RING_INIT(xnfp->xnf_tx_ring.sring);
    579  10958       dme 	/* LINTED: constant in conditional context */
    580  10958       dme 	FRONT_RING_INIT(&xnfp->xnf_tx_ring,
    581  10958       dme 	    xnfp->xnf_tx_ring.sring, PAGESIZE);
    582   5084   johnlev 
    583   5741       mrj 	mutex_exit(&xnfp->xnf_txlock);
    584   5084   johnlev 
    585  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
    586  10958       dme 
    587   5084   johnlev 	/*
    588  10958       dme 	 * Clean out any buffers currently posted to the receive ring
    589  10958       dme 	 * before we reset it.
    590   5084   johnlev 	 */
    591  10958       dme 	for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0];
    592  10958       dme 	    i < NET_RX_RING_SIZE;
    593  10958       dme 	    i++, bdescp++) {
    594  10958       dme 		if (*bdescp != NULL) {
    595  10958       dme 			xnf_buf_put(xnfp, *bdescp, B_FALSE);
    596  10958       dme 			*bdescp = NULL;
    597   5084   johnlev 		}
    598   5084   johnlev 	}
    599   5741       mrj 
    600   7351       dme 	/* LINTED: constant in conditional context */
    601   7351       dme 	SHARED_RING_INIT(xnfp->xnf_rx_ring.sring);
    602  10958       dme 	/* LINTED: constant in conditional context */
    603  10958       dme 	FRONT_RING_INIT(&xnfp->xnf_rx_ring,
    604  10958       dme 	    xnfp->xnf_rx_ring.sring, PAGESIZE);
    605   7351       dme 
    606  10958       dme 	/*
    607  10958       dme 	 * Fill the ring with buffers.
    608  10958       dme 	 */
    609   5084   johnlev 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
    610  10958       dme 		xnf_buf_t *bdesc;
    611  10958       dme 
    612  10958       dme 		bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE);
    613  10958       dme 		VERIFY(bdesc != NULL);
    614  10958       dme 		xnf_rxbuf_hang(xnfp, bdesc);
    615   5084   johnlev 	}
    616  10958       dme 
    617   5084   johnlev 	/* LINTED: constant in conditional context */
    618   5741       mrj 	RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring);
    619   5084   johnlev 
    620  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
    621   5084   johnlev 
    622   5084   johnlev 	return (0);
    623   5084   johnlev 
    624   5084   johnlev out:
    625  10958       dme 	if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
    626   5741       mrj 		gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
    627  10958       dme 	xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
    628   5084   johnlev 
    629  10958       dme 	if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
    630   5741       mrj 		gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
    631  10958       dme 	xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
    632   5084   johnlev 
    633   5084   johnlev 	return (err);
    634   5084   johnlev }
    635   5084   johnlev 
    636   5084   johnlev /*
    637   5084   johnlev  * Connect driver to back end, called to set up communication with
    638   5084   johnlev  * back end driver both initially and on resume after restore/migrate.
    639   5084   johnlev  */
    640   5084   johnlev void
    641   5084   johnlev xnf_be_connect(xnf_t *xnfp)
    642   5084   johnlev {
    643   5084   johnlev 	const char	*message;
    644   5084   johnlev 	xenbus_transaction_t xbt;
    645   5741       mrj 	struct		xenbus_device *xsd;
    646   5084   johnlev 	char		*xsname;
    647   6899  cz147101 	int		err;
    648   5084   johnlev 
    649   5741       mrj 	ASSERT(!xnfp->xnf_connected);
    650   5084   johnlev 
    651   5741       mrj 	xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
    652   5741       mrj 	xsname = xvdi_get_xsname(xnfp->xnf_devinfo);
    653   5084   johnlev 
    654   5084   johnlev 	err = xnf_setup_rings(xnfp);
    655   5084   johnlev 	if (err != 0) {
    656   5084   johnlev 		cmn_err(CE_WARN, "failed to set up tx/rx rings");
    657   5084   johnlev 		xenbus_dev_error(xsd, err, "setting up ring");
    658   5084   johnlev 		return;
    659   5084   johnlev 	}
    660   5084   johnlev 
    661   5084   johnlev again:
    662   5084   johnlev 	err = xenbus_transaction_start(&xbt);
    663   5084   johnlev 	if (err != 0) {
    664   5084   johnlev 		xenbus_dev_error(xsd, EIO, "starting transaction");
    665   5084   johnlev 		return;
    666   5084   johnlev 	}
    667   5084   johnlev 
    668   5084   johnlev 	err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u",
    669   5741       mrj 	    xnfp->xnf_tx_ring_ref);
    670   5084   johnlev 	if (err != 0) {
    671   5084   johnlev 		message = "writing tx ring-ref";
    672   5084   johnlev 		goto abort_transaction;
    673   5084   johnlev 	}
    674   5084   johnlev 
    675   5084   johnlev 	err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u",
    676   5741       mrj 	    xnfp->xnf_rx_ring_ref);
    677   5084   johnlev 	if (err != 0) {
    678   5084   johnlev 		message = "writing rx ring-ref";
    679   5084   johnlev 		goto abort_transaction;
    680   5084   johnlev 	}
    681   5084   johnlev 
    682   5741       mrj 	err = xenbus_printf(xbt, xsname, "event-channel", "%u",
    683   5741       mrj 	    xnfp->xnf_evtchn);
    684   5084   johnlev 	if (err != 0) {
    685   5084   johnlev 		message = "writing event-channel";
    686   5084   johnlev 		goto abort_transaction;
    687   5084   johnlev 	}
    688   5084   johnlev 
    689   5084   johnlev 	err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1);
    690   5084   johnlev 	if (err != 0) {
    691   5084   johnlev 		message = "writing feature-rx-notify";
    692   5084   johnlev 		goto abort_transaction;
    693   5084   johnlev 	}
    694   5084   johnlev 
    695  10958       dme 	err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1);
    696   5741       mrj 	if (err != 0) {
    697   5741       mrj 		message = "writing request-rx-copy";
    698   5084   johnlev 		goto abort_transaction;
    699   5084   johnlev 	}
    700   5084   johnlev 
    701  10958       dme 	if (xnfp->xnf_be_mcast_control) {
    702  10958       dme 		err = xenbus_printf(xbt, xsname, "request-multicast-control",
    703  10958       dme 		    "%d", 1);
    704  10958       dme 		if (err != 0) {
    705  10958       dme 			message = "writing request-multicast-control";
    706  10958       dme 			goto abort_transaction;
    707  10958       dme 		}
    708  10958       dme 	}
    709  10958       dme 
    710  10958       dme 	err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected);
    711   5084   johnlev 	if (err != 0) {
    712  10958       dme 		message = "switching state to XenbusStateConnected";
    713   5084   johnlev 		goto abort_transaction;
    714   5084   johnlev 	}
    715   5084   johnlev 
    716   5084   johnlev 	err = xenbus_transaction_end(xbt, 0);
    717   5084   johnlev 	if (err != 0) {
    718   5084   johnlev 		if (err == EAGAIN)
    719   5084   johnlev 			goto again;
    720   5084   johnlev 		xenbus_dev_error(xsd, err, "completing transaction");
    721   5084   johnlev 	}
    722   5084   johnlev 
    723   5084   johnlev 	return;
    724   5084   johnlev 
    725   5084   johnlev abort_transaction:
    726   5084   johnlev 	(void) xenbus_transaction_end(xbt, 1);
    727   5084   johnlev 	xenbus_dev_error(xsd, err, "%s", message);
    728   6899  cz147101 }
    729   6899  cz147101 
    730   6899  cz147101 /*
    731  10958       dme  * Read configuration information from xenstore.
    732   6899  cz147101  */
    733   6899  cz147101 void
    734   6899  cz147101 xnf_read_config(xnf_t *xnfp)
    735   6899  cz147101 {
    736  10958       dme 	int err, be_cap;
    737  10958       dme 	char mac[ETHERADDRL * 3];
    738  10958       dme 	char *oename = xvdi_get_oename(xnfp->xnf_devinfo);
    739   6899  cz147101 
    740  10958       dme 	err = xenbus_scanf(XBT_NULL, oename, "mac",
    741   6899  cz147101 	    "%s", (char *)&mac[0]);
    742   6899  cz147101 	if (err != 0) {
    743   6899  cz147101 		/*
    744   6899  cz147101 		 * bad: we're supposed to be set up with a proper mac
    745   6899  cz147101 		 * addr. at this point
    746   6899  cz147101 		 */
    747   6899  cz147101 		cmn_err(CE_WARN, "%s%d: no mac address",
    748   6899  cz147101 		    ddi_driver_name(xnfp->xnf_devinfo),
    749   6899  cz147101 		    ddi_get_instance(xnfp->xnf_devinfo));
    750   6899  cz147101 			return;
    751   6899  cz147101 	}
    752   6899  cz147101 	if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) {
    753   6899  cz147101 		err = ENOENT;
    754   6899  cz147101 		xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT,
    755   6899  cz147101 		    "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo));
    756   6899  cz147101 		return;
    757   6899  cz147101 	}
    758   6899  cz147101 
    759  10958       dme 	err = xenbus_scanf(XBT_NULL, oename,
    760  10958       dme 	    "feature-rx-copy", "%d", &be_cap);
    761   6899  cz147101 	/*
    762   6899  cz147101 	 * If we fail to read the store we assume that the key is
    763   6899  cz147101 	 * absent, implying an older domain at the far end.  Older
    764  10958       dme 	 * domains cannot do HV copy.
    765   6899  cz147101 	 */
    766   6899  cz147101 	if (err != 0)
    767  10958       dme 		be_cap = 0;
    768  10958       dme 	xnfp->xnf_be_rx_copy = (be_cap != 0);
    769  10958       dme 
    770  10958       dme 	err = xenbus_scanf(XBT_NULL, oename,
    771  10958       dme 	    "feature-multicast-control", "%d", &be_cap);
    772   6899  cz147101 	/*
    773  10958       dme 	 * If we fail to read the store we assume that the key is
    774  10958       dme 	 * absent, implying an older domain at the far end.  Older
    775  10958       dme 	 * domains do not support multicast control.
    776   6899  cz147101 	 */
    777  10958       dme 	if (err != 0)
    778  10958       dme 		be_cap = 0;
    779  10958       dme 	xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control;
    780   5084   johnlev }
    781   5084   johnlev 
    782   5084   johnlev /*
    783   5084   johnlev  *  attach(9E) -- Attach a device to the system
    784   5084   johnlev  */
    785   5084   johnlev static int
    786   5084   johnlev xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
    787   5084   johnlev {
    788   5084   johnlev 	mac_register_t *macp;
    789   5084   johnlev 	xnf_t *xnfp;
    790   5084   johnlev 	int err;
    791  10958       dme 	char cachename[32];
    792   5084   johnlev 
    793   5084   johnlev #ifdef XNF_DEBUG
    794  10958       dme 	if (xnf_debug & XNF_DEBUG_DDI)
    795   5084   johnlev 		printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo),
    796   5084   johnlev 		    (void *)devinfo);
    797   5084   johnlev #endif
    798   5084   johnlev 
    799   5084   johnlev 	switch (cmd) {
    800   5084   johnlev 	case DDI_RESUME:
    801   5084   johnlev 		xnfp = ddi_get_driver_private(devinfo);
    802  10958       dme 		xnfp->xnf_gen++;
    803   5084   johnlev 
    804   5084   johnlev 		(void) xvdi_resume(devinfo);
    805   5084   johnlev 		(void) xvdi_alloc_evtchn(devinfo);
    806   5741       mrj 		xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
    807   5741       mrj #ifdef XPV_HVM_DRIVER
    808   5741       mrj 		ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr,
    809   5741       mrj 		    xnfp);
    810   5741       mrj #else
    811   5084   johnlev 		(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr,
    812   5084   johnlev 		    (caddr_t)xnfp);
    813   5741       mrj #endif
    814   5084   johnlev 		return (DDI_SUCCESS);
    815   5084   johnlev 
    816   5084   johnlev 	case DDI_ATTACH:
    817   5084   johnlev 		break;
    818   5084   johnlev 
    819   5084   johnlev 	default:
    820   5084   johnlev 		return (DDI_FAILURE);
    821   5084   johnlev 	}
    822   5084   johnlev 
    823   5084   johnlev 	/*
    824   5084   johnlev 	 *  Allocate gld_mac_info_t and xnf_instance structures
    825   5084   johnlev 	 */
    826   5084   johnlev 	macp = mac_alloc(MAC_VERSION);
    827   5084   johnlev 	if (macp == NULL)
    828   5084   johnlev 		return (DDI_FAILURE);
    829   5084   johnlev 	xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP);
    830   5084   johnlev 
    831   5084   johnlev 	macp->m_dip = devinfo;
    832   5084   johnlev 	macp->m_driver = xnfp;
    833   5741       mrj 	xnfp->xnf_devinfo = devinfo;
    834   5084   johnlev 
    835   5084   johnlev 	macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
    836   5741       mrj 	macp->m_src_addr = xnfp->xnf_mac_addr;
    837   5084   johnlev 	macp->m_callbacks = &xnf_callbacks;
    838   5084   johnlev 	macp->m_min_sdu = 0;
    839   5084   johnlev 	macp->m_max_sdu = XNF_MAXPKT;
    840   5084   johnlev 
    841   5741       mrj 	xnfp->xnf_running = B_FALSE;
    842   5741       mrj 	xnfp->xnf_connected = B_FALSE;
    843  10958       dme 	xnfp->xnf_be_rx_copy = B_FALSE;
    844  10958       dme 	xnfp->xnf_be_mcast_control = B_FALSE;
    845   7521       dme 	xnfp->xnf_need_sched = B_FALSE;
    846   5741       mrj 
    847  10958       dme 	xnfp->xnf_rx_head = NULL;
    848  10958       dme 	xnfp->xnf_rx_tail = NULL;
    849  10958       dme 	xnfp->xnf_rx_new_buffers_posted = B_FALSE;
    850  10958       dme 
    851   5741       mrj #ifdef XPV_HVM_DRIVER
    852   6450       rab 	/*
    853   6450       rab 	 * Report our version to dom0.
    854   6450       rab 	 */
    855  10175    Stuart 	if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d",
    856   6450       rab 	    HVMPV_XNF_VERS))
    857   6450       rab 		cmn_err(CE_WARN, "xnf: couldn't write version\n");
    858   5741       mrj #endif
    859   5084   johnlev 
    860   5084   johnlev 	/*
    861   5084   johnlev 	 * Get the iblock cookie with which to initialize the mutexes.
    862   5084   johnlev 	 */
    863   5741       mrj 	if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie)
    864   5084   johnlev 	    != DDI_SUCCESS)
    865   5084   johnlev 		goto failure;
    866  10958       dme 
    867   5741       mrj 	mutex_init(&xnfp->xnf_txlock,
    868   5741       mrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
    869  10958       dme 	mutex_init(&xnfp->xnf_rxlock,
    870   5741       mrj 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
    871  10958       dme 	mutex_init(&xnfp->xnf_schedlock,
    872  10958       dme 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
    873  10958       dme 	mutex_init(&xnfp->xnf_gref_lock,
    874  10958       dme 	    NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
    875   5084   johnlev 
    876  10958       dme 	cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL);
    877  10958       dme 	cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL);
    878  10958       dme 	cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL);
    879  10958       dme 
    880  10958       dme 	(void) sprintf(cachename, "xnf_buf_cache_%d",
    881  10958       dme 	    ddi_get_instance(devinfo));
    882  10958       dme 	xnfp->xnf_buf_cache = kmem_cache_create(cachename,
    883  10958       dme 	    sizeof (xnf_buf_t), 0,
    884  10958       dme 	    xnf_buf_constructor, xnf_buf_destructor,
    885  10958       dme 	    NULL, xnfp, NULL, 0);
    886  10958       dme 	if (xnfp->xnf_buf_cache == NULL)
    887  10958       dme 		goto failure_0;
    888  10958       dme 
    889  10958       dme 	(void) sprintf(cachename, "xnf_tx_buf_cache_%d",
    890  10958       dme 	    ddi_get_instance(devinfo));
    891  10958       dme 	xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename,
    892  10958       dme 	    sizeof (xnf_txbuf_t), 0,
    893  10958       dme 	    xnf_tx_buf_constructor, xnf_tx_buf_destructor,
    894  10958       dme 	    NULL, xnfp, NULL, 0);
    895  10958       dme 	if (xnfp->xnf_tx_buf_cache == NULL)
    896   5741       mrj 		goto failure_1;
    897  10958       dme 
    898  10958       dme 	xnfp->xnf_gref_head = INVALID_GRANT_REF;
    899  10958       dme 
    900   5084   johnlev 	if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) {
    901   5084   johnlev 		cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize "
    902   5741       mrj 		    "driver data structures",
    903   5741       mrj 		    ddi_get_instance(xnfp->xnf_devinfo));
    904  10958       dme 		goto failure_2;
    905   5084   johnlev 	}
    906   5084   johnlev 
    907   5741       mrj 	xnfp->xnf_rx_ring.sring->rsp_event =
    908   5741       mrj 	    xnfp->xnf_tx_ring.sring->rsp_event = 1;
    909   5084   johnlev 
    910  10958       dme 	xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
    911  10958       dme 	xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
    912   5084   johnlev 
    913   5084   johnlev 	/* set driver private pointer now */
    914   5084   johnlev 	ddi_set_driver_private(devinfo, xnfp);
    915   5084   johnlev 
    916   5084   johnlev 	if (!xnf_kstat_init(xnfp))
    917  10958       dme 		goto failure_3;
    918   5084   johnlev 
    919   5084   johnlev 	/*
    920   5084   johnlev 	 * Allocate an event channel, add the interrupt handler and
    921   5084   johnlev 	 * bind it to the event channel.
    922   5084   johnlev 	 */
    923   5084   johnlev 	(void) xvdi_alloc_evtchn(devinfo);
    924   5741       mrj 	xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
    925   5741       mrj #ifdef XPV_HVM_DRIVER
    926   5741       mrj 	ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp);
    927   5741       mrj #else
    928   5084   johnlev 	(void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp);
    929   5741       mrj #endif
    930   5084   johnlev 
    931   5741       mrj 	err = mac_register(macp, &xnfp->xnf_mh);
    932   5084   johnlev 	mac_free(macp);
    933   5084   johnlev 	macp = NULL;
    934   5084   johnlev 	if (err != 0)
    935  10958       dme 		goto failure_4;
    936  10958       dme 
    937  10958       dme 	if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL)
    938  10958       dme 	    != DDI_SUCCESS)
    939  10958       dme 		goto failure_5;
    940   6873      fvdl 
    941   6873      fvdl #ifdef XPV_HVM_DRIVER
    942   6873      fvdl 	/*
    943   6873      fvdl 	 * In the HVM case, this driver essentially replaces a driver for
    944   6873      fvdl 	 * a 'real' PCI NIC. Without the "model" property set to
    945   6873      fvdl 	 * "Ethernet controller", like the PCI code does, netbooting does
    946   6873      fvdl 	 * not work correctly, as strplumb_get_netdev_path() will not find
    947   6873      fvdl 	 * this interface.
    948   6873      fvdl 	 */
    949   6873      fvdl 	(void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model",
    950   6873      fvdl 	    "Ethernet controller");
    951   6873      fvdl #endif
    952   5084   johnlev 
    953  10958       dme #ifdef XNF_DEBUG
    954  10958       dme 	if (xnf_debug_instance == NULL)
    955  10958       dme 		xnf_debug_instance = xnfp;
    956  10958       dme #endif
    957   6899  cz147101 
    958   5084   johnlev 	return (DDI_SUCCESS);
    959   5084   johnlev 
    960  10958       dme failure_5:
    961  10981       dme 	(void) mac_unregister(xnfp->xnf_mh);
    962  10958       dme 
    963  10958       dme failure_4:
    964   5741       mrj #ifdef XPV_HVM_DRIVER
    965   5741       mrj 	ec_unbind_evtchn(xnfp->xnf_evtchn);
    966   6431    smaybe 	xvdi_free_evtchn(devinfo);
    967   5741       mrj #else
    968   5741       mrj 	ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
    969   5741       mrj #endif
    970   5741       mrj 	xnfp->xnf_evtchn = INVALID_EVTCHN;
    971  10958       dme 	kstat_delete(xnfp->xnf_kstat_aux);
    972  10958       dme 
    973  10958       dme failure_3:
    974  10958       dme 	xnf_release_dma_resources(xnfp);
    975   5084   johnlev 
    976   6899  cz147101 failure_2:
    977  10958       dme 	kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
    978   6899  cz147101 
    979   5741       mrj failure_1:
    980  10958       dme 	kmem_cache_destroy(xnfp->xnf_buf_cache);
    981  10958       dme 
    982  10958       dme failure_0:
    983  10958       dme 	cv_destroy(&xnfp->xnf_cv_tx_slots);
    984  10958       dme 	cv_destroy(&xnfp->xnf_cv_multicast);
    985  10958       dme 	cv_destroy(&xnfp->xnf_cv_state);
    986  10958       dme 
    987  10958       dme 	mutex_destroy(&xnfp->xnf_gref_lock);
    988  10958       dme 	mutex_destroy(&xnfp->xnf_schedlock);
    989  10958       dme 	mutex_destroy(&xnfp->xnf_rxlock);
    990   5741       mrj 	mutex_destroy(&xnfp->xnf_txlock);
    991   5084   johnlev 
    992   5084   johnlev failure:
    993   5084   johnlev 	kmem_free(xnfp, sizeof (*xnfp));
    994   5084   johnlev 	if (macp != NULL)
    995   5084   johnlev 		mac_free(macp);
    996   5084   johnlev 
    997   5084   johnlev 	return (DDI_FAILURE);
    998   5084   johnlev }
    999   5084   johnlev 
   1000   5084   johnlev /*  detach(9E) -- Detach a device from the system */
   1001   5084   johnlev static int
   1002   5084   johnlev xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
   1003   5084   johnlev {
   1004   5084   johnlev 	xnf_t *xnfp;		/* Our private device info */
   1005   5084   johnlev 
   1006   5084   johnlev #ifdef XNF_DEBUG
   1007  10958       dme 	if (xnf_debug & XNF_DEBUG_DDI)
   1008   5084   johnlev 		printf("xnf_detach(0x%p)\n", (void *)devinfo);
   1009   5084   johnlev #endif
   1010   5084   johnlev 
   1011   5084   johnlev 	xnfp = ddi_get_driver_private(devinfo);
   1012   5084   johnlev 
   1013   5084   johnlev 	switch (cmd) {
   1014   5084   johnlev 	case DDI_SUSPEND:
   1015   5741       mrj #ifdef XPV_HVM_DRIVER
   1016   5741       mrj 		ec_unbind_evtchn(xnfp->xnf_evtchn);
   1017   6431    smaybe 		xvdi_free_evtchn(devinfo);
   1018   5741       mrj #else
   1019   5741       mrj 		ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
   1020   5741       mrj #endif
   1021   5084   johnlev 
   1022   5084   johnlev 		xvdi_suspend(devinfo);
   1023   5084   johnlev 
   1024  10958       dme 		mutex_enter(&xnfp->xnf_rxlock);
   1025   5741       mrj 		mutex_enter(&xnfp->xnf_txlock);
   1026   5084   johnlev 
   1027   5741       mrj 		xnfp->xnf_evtchn = INVALID_EVTCHN;
   1028   5741       mrj 		xnfp->xnf_connected = B_FALSE;
   1029   5741       mrj 		mutex_exit(&xnfp->xnf_txlock);
   1030  10958       dme 		mutex_exit(&xnfp->xnf_rxlock);
   1031   7397       Max 
   1032   7397       Max 		/* claim link to be down after disconnect */
   1033   7397       Max 		mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN);
   1034   5084   johnlev 		return (DDI_SUCCESS);
   1035   5084   johnlev 
   1036   5084   johnlev 	case DDI_DETACH:
   1037   5084   johnlev 		break;
   1038   5084   johnlev 
   1039   5084   johnlev 	default:
   1040   5084   johnlev 		return (DDI_FAILURE);
   1041   5084   johnlev 	}
   1042   5084   johnlev 
   1043   5741       mrj 	if (xnfp->xnf_connected)
   1044   5084   johnlev 		return (DDI_FAILURE);
   1045   5084   johnlev 
   1046  10958       dme 	/*
   1047  10958       dme 	 * Cannot detach if we have xnf_buf_t outstanding.
   1048  10958       dme 	 */
   1049  10958       dme 	if (xnfp->xnf_stat_buf_allocated > 0)
   1050  10958       dme 		return (DDI_FAILURE);
   1051   5084   johnlev 
   1052   5741       mrj 	if (mac_unregister(xnfp->xnf_mh) != 0)
   1053   5084   johnlev 		return (DDI_FAILURE);
   1054   6899  cz147101 
   1055   6899  cz147101 	kstat_delete(xnfp->xnf_kstat_aux);
   1056   5084   johnlev 
   1057   5084   johnlev 	/* Stop the receiver */
   1058   5084   johnlev 	xnf_stop(xnfp);
   1059   5084   johnlev 
   1060   5084   johnlev 	xvdi_remove_event_handler(devinfo, XS_OE_STATE);
   1061   5084   johnlev 
   1062   5084   johnlev 	/* Remove the interrupt */
   1063   5741       mrj #ifdef XPV_HVM_DRIVER
   1064   5741       mrj 	ec_unbind_evtchn(xnfp->xnf_evtchn);
   1065   6431    smaybe 	xvdi_free_evtchn(devinfo);
   1066   5741       mrj #else
   1067   5741       mrj 	ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
   1068   5741       mrj #endif
   1069   5084   johnlev 
   1070   5084   johnlev 	/* Release any pending xmit mblks */
   1071   5084   johnlev 	xnf_release_mblks(xnfp);
   1072   5084   johnlev 
   1073   5084   johnlev 	/* Release all DMA resources */
   1074   5084   johnlev 	xnf_release_dma_resources(xnfp);
   1075   5084   johnlev 
   1076  10958       dme 	cv_destroy(&xnfp->xnf_cv_tx_slots);
   1077  10958       dme 	cv_destroy(&xnfp->xnf_cv_multicast);
   1078  10958       dme 	cv_destroy(&xnfp->xnf_cv_state);
   1079  10958       dme 
   1080  10958       dme 	kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
   1081  10958       dme 	kmem_cache_destroy(xnfp->xnf_buf_cache);
   1082  10958       dme 
   1083  10958       dme 	mutex_destroy(&xnfp->xnf_gref_lock);
   1084  10958       dme 	mutex_destroy(&xnfp->xnf_schedlock);
   1085  10958       dme 	mutex_destroy(&xnfp->xnf_rxlock);
   1086   5741       mrj 	mutex_destroy(&xnfp->xnf_txlock);
   1087   5084   johnlev 
   1088   5084   johnlev 	kmem_free(xnfp, sizeof (*xnfp));
   1089   5084   johnlev 
   1090   5084   johnlev 	return (DDI_SUCCESS);
   1091   5084   johnlev }
   1092   5084   johnlev 
   1093   5084   johnlev /*
   1094   5084   johnlev  *  xnf_set_mac_addr() -- set the physical network address on the board.
   1095   5084   johnlev  */
   1096   5084   johnlev static int
   1097   5084   johnlev xnf_set_mac_addr(void *arg, const uint8_t *macaddr)
   1098   5084   johnlev {
   1099  10958       dme 	_NOTE(ARGUNUSED(arg, macaddr));
   1100   5084   johnlev 
   1101   5084   johnlev 	/*
   1102   5084   johnlev 	 * We can't set our macaddr.
   1103   5084   johnlev 	 */
   1104   5084   johnlev 	return (ENOTSUP);
   1105   5084   johnlev }
   1106   5084   johnlev 
   1107   5084   johnlev /*
   1108   5084   johnlev  *  xnf_set_multicast() -- set (enable) or disable a multicast address.
   1109   5084   johnlev  *
   1110   5084   johnlev  *  Program the hardware to enable/disable the multicast address
   1111  10958       dme  *  in "mca".  Enable if "add" is true, disable if false.
   1112   5084   johnlev  */
   1113   5084   johnlev static int
   1114   5084   johnlev xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca)
   1115   5084   johnlev {
   1116   5084   johnlev 	xnf_t *xnfp = arg;
   1117  10958       dme 	xnf_txbuf_t *txp;
   1118  10958       dme 	int n_slots;
   1119  10958       dme 	RING_IDX slot;
   1120  10958       dme 	xnf_txid_t *tidp;
   1121  10958       dme 	netif_tx_request_t *txrp;
   1122  10958       dme 	struct netif_extra_info *erp;
   1123  10958       dme 	boolean_t notify, result;
   1124   5084   johnlev 
   1125   5084   johnlev 	/*
   1126  10958       dme 	 * If the backend does not support multicast control then we
   1127  10958       dme 	 * must assume that the right packets will just arrive.
   1128  10958       dme 	 */
   1129  10958       dme 	if (!xnfp->xnf_be_mcast_control)
   1130  10958       dme 		return (0);
   1131  10958       dme 
   1132  10958       dme 	txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
   1133  10958       dme 	if (txp == NULL)
   1134  10958       dme 		return (1);
   1135  10958       dme 
   1136  10958       dme 	mutex_enter(&xnfp->xnf_txlock);
   1137  10958       dme 
   1138  10958       dme 	/*
   1139  10958       dme 	 * If we're not yet connected then claim success. This is
   1140  10958       dme 	 * acceptable because we refresh the entire set of multicast
   1141  10958       dme 	 * addresses when we get connected.
   1142   5084   johnlev 	 *
   1143  10958       dme 	 * We can't wait around here because the MAC layer expects
   1144  10958       dme 	 * this to be a non-blocking operation - waiting ends up
   1145  10958       dme 	 * causing a deadlock during resume.
   1146   5084   johnlev 	 */
   1147  10958       dme 	if (!xnfp->xnf_connected) {
   1148  10958       dme 		mutex_exit(&xnfp->xnf_txlock);
   1149  10958       dme 		return (0);
   1150  10958       dme 	}
   1151  10958       dme 
   1152  10958       dme 	/*
   1153  10958       dme 	 * 1. Acquire two slots in the ring.
   1154  10958       dme 	 * 2. Fill in the slots.
   1155  10958       dme 	 * 3. Request notification when the operation is done.
   1156  10958       dme 	 * 4. Kick the peer.
   1157  10958       dme 	 * 5. Wait for the response via xnf_tx_clean_ring().
   1158  10958       dme 	 */
   1159  10958       dme 
   1160  10958       dme 	n_slots = tx_slots_get(xnfp, 2, B_TRUE);
   1161  10958       dme 	ASSERT(n_slots >= 2);
   1162  10958       dme 
   1163  10958       dme 	slot = xnfp->xnf_tx_ring.req_prod_pvt;
   1164  10958       dme 	tidp = txid_get(xnfp);
   1165  10958       dme 	VERIFY(tidp != NULL);
   1166  10958       dme 
   1167  10958       dme 	txp->tx_type = TX_MCAST_REQ;
   1168  10958       dme 	txp->tx_slot = slot;
   1169  10958       dme 
   1170  10958       dme 	txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
   1171  10958       dme 	erp = (struct netif_extra_info *)
   1172  10958       dme 	    RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1);
   1173  10958       dme 
   1174  10958       dme 	txrp->gref = 0;
   1175  10958       dme 	txrp->size = 0;
   1176  10958       dme 	txrp->offset = 0;
   1177  10958       dme 	/* Set tx_txreq.id to appease xnf_tx_clean_ring(). */
   1178  10958       dme 	txrp->id = txp->tx_txreq.id = tidp->id;
   1179  10958       dme 	txrp->flags = NETTXF_extra_info;
   1180  10958       dme 
   1181  10958       dme 	erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD :
   1182  10958       dme 	    XEN_NETIF_EXTRA_TYPE_MCAST_DEL;
   1183  10958       dme 	bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL);
   1184  10958       dme 
   1185  10958       dme 	tidp->txbuf = txp;
   1186  10958       dme 
   1187  10958       dme 	xnfp->xnf_tx_ring.req_prod_pvt = slot + 2;
   1188  10958       dme 
   1189  10958       dme 	mutex_enter(&xnfp->xnf_schedlock);
   1190  10958       dme 	xnfp->xnf_pending_multicast++;
   1191  10958       dme 	mutex_exit(&xnfp->xnf_schedlock);
   1192  10958       dme 
   1193  10958       dme 	/* LINTED: constant in conditional context */
   1194  10958       dme 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
   1195  10958       dme 	    notify);
   1196  10958       dme 	if (notify)
   1197  10958       dme 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
   1198  10958       dme 
   1199  10958       dme 	while (txp->tx_type == TX_MCAST_REQ)
   1200  10958       dme 		cv_wait(&xnfp->xnf_cv_multicast,
   1201  10958       dme 		    &xnfp->xnf_txlock);
   1202  10958       dme 
   1203  10958       dme 	ASSERT(txp->tx_type == TX_MCAST_RSP);
   1204  10958       dme 
   1205  10958       dme 	mutex_enter(&xnfp->xnf_schedlock);
   1206  10958       dme 	xnfp->xnf_pending_multicast--;
   1207  10958       dme 	mutex_exit(&xnfp->xnf_schedlock);
   1208  10958       dme 
   1209  10958       dme 	result = (txp->tx_status == NETIF_RSP_OKAY);
   1210  10958       dme 
   1211  10958       dme 	txid_put(xnfp, tidp);
   1212  10958       dme 
   1213  10958       dme 	mutex_exit(&xnfp->xnf_txlock);
   1214  10958       dme 
   1215  10958       dme 	kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   1216  10958       dme 
   1217  10958       dme 	return (result ? 0 : 1);
   1218   5084   johnlev }
   1219   5084   johnlev 
   1220   5084   johnlev /*
   1221   5084   johnlev  * xnf_set_promiscuous() -- set or reset promiscuous mode on the board
   1222   5084   johnlev  *
   1223   5084   johnlev  *  Program the hardware to enable/disable promiscuous mode.
   1224   5084   johnlev  */
   1225   5084   johnlev static int
   1226   5084   johnlev xnf_set_promiscuous(void *arg, boolean_t on)
   1227   5084   johnlev {
   1228  10958       dme 	_NOTE(ARGUNUSED(arg, on));
   1229   5084   johnlev 
   1230   5084   johnlev 	/*
   1231   5084   johnlev 	 * We can't really do this, but we pretend that we can in
   1232   5084   johnlev 	 * order that snoop will work.
   1233   5084   johnlev 	 */
   1234   5084   johnlev 	return (0);
   1235   5084   johnlev }
   1236   5084   johnlev 
   1237   5084   johnlev /*
   1238   5084   johnlev  * Clean buffers that we have responses for from the transmit ring.
   1239   5084   johnlev  */
   1240   5084   johnlev static int
   1241  10958       dme xnf_tx_clean_ring(xnf_t *xnfp)
   1242   5084   johnlev {
   1243  10958       dme 	boolean_t work_to_do;
   1244   5084   johnlev 
   1245   5741       mrj 	ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
   1246   5084   johnlev 
   1247   6899  cz147101 loop:
   1248   5990  schuster 	while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) {
   1249  10958       dme 		RING_IDX cons, prod, i;
   1250  10958       dme 
   1251  10958       dme 		cons = xnfp->xnf_tx_ring.rsp_cons;
   1252  10958       dme 		prod = xnfp->xnf_tx_ring.sring->rsp_prod;
   1253   5084   johnlev 		membar_consumer();
   1254   5084   johnlev 		/*
   1255  10958       dme 		 * Clean tx requests from ring that we have responses
   1256  10958       dme 		 * for.
   1257   5084   johnlev 		 */
   1258  10958       dme 		DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod);
   1259  10958       dme 		for (i = cons; i != prod; i++) {
   1260  10958       dme 			netif_tx_response_t *trp;
   1261  10958       dme 			xnf_txid_t *tidp;
   1262  10958       dme 			xnf_txbuf_t *txp;
   1263  10958       dme 
   1264  10958       dme 			trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i);
   1265  10958       dme 			ASSERT(TX_ID_VALID(trp->id));
   1266  10958       dme 
   1267  10958       dme 			tidp = TX_ID_TO_TXID(xnfp, trp->id);
   1268  10958       dme 			ASSERT(tidp->id == trp->id);
   1269  10958       dme 			ASSERT(tidp->next == INVALID_TX_ID);
   1270  10958       dme 
   1271  10958       dme 			txp = tidp->txbuf;
   1272  10958       dme 			ASSERT(txp != NULL);
   1273  10958       dme 			ASSERT(txp->tx_txreq.id == trp->id);
   1274  10958       dme 
   1275  10958       dme 			switch (txp->tx_type) {
   1276  10958       dme 			case TX_DATA:
   1277  10958       dme 				if (gnttab_query_foreign_access(
   1278  10958       dme 				    txp->tx_txreq.gref) != 0)
   1279  10958       dme 					cmn_err(CE_PANIC,
   1280  10958       dme 					    "tx grant %d still in use by "
   1281  10958       dme 					    "backend domain",
   1282  10958       dme 					    txp->tx_txreq.gref);
   1283  10958       dme 
   1284  10958       dme 				if (txp->tx_bdesc == NULL) {
   1285  10958       dme 					(void) gnttab_end_foreign_access_ref(
   1286  10958       dme 					    txp->tx_txreq.gref, 1);
   1287  10958       dme 					gref_put(xnfp, txp->tx_txreq.gref);
   1288  10958       dme 					(void) ddi_dma_unbind_handle(
   1289  10958       dme 					    txp->tx_dma_handle);
   1290  10958       dme 				} else {
   1291  10958       dme 					xnf_buf_put(xnfp, txp->tx_bdesc,
   1292  10958       dme 					    B_TRUE);
   1293  10958       dme 				}
   1294  10958       dme 
   1295  10958       dme 				freemsg(txp->tx_mp);
   1296  10958       dme 				txid_put(xnfp, tidp);
   1297  10958       dme 				kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   1298  10958       dme 
   1299  10958       dme 				break;
   1300  10958       dme 
   1301  10958       dme 			case TX_MCAST_REQ:
   1302  10958       dme 				txp->tx_type = TX_MCAST_RSP;
   1303  10958       dme 				txp->tx_status = trp->status;
   1304  10958       dme 				cv_broadcast(&xnfp->xnf_cv_multicast);
   1305  10958       dme 
   1306  10958       dme 				break;
   1307  10958       dme 
   1308  10958       dme 			case TX_MCAST_RSP:
   1309  10958       dme 				break;
   1310  10958       dme 
   1311  10958       dme 			default:
   1312  10958       dme 				cmn_err(CE_PANIC, "xnf_tx_clean_ring: "
   1313  10958       dme 				    "invalid xnf_txbuf_t type: %d",
   1314  10958       dme 				    txp->tx_type);
   1315  10958       dme 				break;
   1316  10958       dme 			}
   1317   5084   johnlev 		}
   1318  10958       dme 		/*
   1319  10958       dme 		 * Record the last response we dealt with so that we
   1320  10958       dme 		 * know where to start next time around.
   1321  10958       dme 		 */
   1322  10958       dme 		xnfp->xnf_tx_ring.rsp_cons = prod;
   1323   5084   johnlev 		membar_enter();
   1324   5990  schuster 	}
   1325   6899  cz147101 
   1326   6899  cz147101 	/* LINTED: constant in conditional context */
   1327   6899  cz147101 	RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do);
   1328   6899  cz147101 	if (work_to_do)
   1329   6899  cz147101 		goto loop;
   1330   5990  schuster 
   1331   5990  schuster 	return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring));
   1332   5084   johnlev }
   1333   5084   johnlev 
   1334   5084   johnlev /*
   1335  10958       dme  * Allocate and fill in a look-aside buffer for the packet `mp'. Used
   1336  10958       dme  * to ensure that the packet is physically contiguous and contained
   1337  10958       dme  * within a single page.
   1338   5084   johnlev  */
   1339  10958       dme static xnf_buf_t *
   1340  10958       dme xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp)
   1341   5084   johnlev {
   1342  10958       dme 	xnf_buf_t *bd;
   1343  10958       dme 	caddr_t bp;
   1344   5084   johnlev 
   1345  10958       dme 	bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE);
   1346  10958       dme 	if (bd == NULL)
   1347  10958       dme 		return (NULL);
   1348  10958       dme 
   1349  10958       dme 	bp = bd->buf;
   1350  10958       dme 	while (mp != NULL) {
   1351  10958       dme 		size_t len = MBLKL(mp);
   1352  10958       dme 
   1353  10958       dme 		bcopy(mp->b_rptr, bp, len);
   1354  10958       dme 		bp += len;
   1355  10958       dme 
   1356  10958       dme 		mp = mp->b_cont;
   1357  10958       dme 	}
   1358  10958       dme 
   1359  10958       dme 	ASSERT((bp - bd->buf) <= PAGESIZE);
   1360  10958       dme 
   1361   5741       mrj 	xnfp->xnf_stat_tx_pullup++;
   1362  10958       dme 
   1363  10958       dme 	return (bd);
   1364   5084   johnlev }
   1365   5084   johnlev 
   1366  10958       dme /*
   1367  10958       dme  * Insert the pseudo-header checksum into the packet `buf'.
   1368  10958       dme  */
   1369   7351       dme void
   1370   7351       dme xnf_pseudo_cksum(caddr_t buf, int length)
   1371   7351       dme {
   1372   7351       dme 	struct ether_header *ehp;
   1373   7351       dme 	uint16_t sap, len, *stuff;
   1374   7351       dme 	uint32_t cksum;
   1375   7351       dme 	size_t offset;
   1376   7351       dme 	ipha_t *ipha;
   1377   7351       dme 	ipaddr_t src, dst;
   1378   7351       dme 
   1379   7351       dme 	ASSERT(length >= sizeof (*ehp));
   1380   7351       dme 	ehp = (struct ether_header *)buf;
   1381   7351       dme 
   1382   7351       dme 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
   1383   7351       dme 		struct ether_vlan_header *evhp;
   1384   7351       dme 
   1385   7351       dme 		ASSERT(length >= sizeof (*evhp));
   1386   7351       dme 		evhp = (struct ether_vlan_header *)buf;
   1387   7351       dme 		sap = ntohs(evhp->ether_type);
   1388   7351       dme 		offset = sizeof (*evhp);
   1389   7351       dme 	} else {
   1390   7351       dme 		sap = ntohs(ehp->ether_type);
   1391   7351       dme 		offset = sizeof (*ehp);
   1392   7351       dme 	}
   1393   7351       dme 
   1394   7351       dme 	ASSERT(sap == ETHERTYPE_IP);
   1395   7351       dme 
   1396   7351       dme 	/* Packet should have been pulled up by the caller. */
   1397   7351       dme 	if ((offset + sizeof (ipha_t)) > length) {
   1398   7351       dme 		cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum");
   1399   7351       dme 		return;
   1400   7351       dme 	}
   1401   7351       dme 
   1402   7351       dme 	ipha = (ipha_t *)(buf + offset);
   1403   7351       dme 
   1404   7351       dme 	ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH);
   1405   7351       dme 
   1406   7351       dme 	len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH;
   1407   7351       dme 
   1408   7351       dme 	switch (ipha->ipha_protocol) {
   1409   7351       dme 	case IPPROTO_TCP:
   1410   7351       dme 		stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
   1411   7351       dme 		cksum = IP_TCP_CSUM_COMP;
   1412   7351       dme 		break;
   1413   7351       dme 	case IPPROTO_UDP:
   1414   7351       dme 		stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH);
   1415   7351       dme 		cksum = IP_UDP_CSUM_COMP;
   1416   7351       dme 		break;
   1417   7351       dme 	default:
   1418   7351       dme 		cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d",
   1419   7351       dme 		    ipha->ipha_protocol);
   1420   7351       dme 		return;
   1421   7351       dme 	}
   1422   7351       dme 
   1423   7351       dme 	src = ipha->ipha_src;
   1424   7351       dme 	dst = ipha->ipha_dst;
   1425   7351       dme 
   1426   7351       dme 	cksum += (dst >> 16) + (dst & 0xFFFF);
   1427   7351       dme 	cksum += (src >> 16) + (src & 0xFFFF);
   1428   7351       dme 	cksum += htons(len);
   1429   7351       dme 
   1430   7351       dme 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
   1431   7351       dme 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
   1432   7351       dme 
   1433   7351       dme 	ASSERT(cksum <= 0xFFFF);
   1434   7351       dme 
   1435   7351       dme 	*stuff = (uint16_t)(cksum ? cksum : ~cksum);
   1436   7351       dme }
   1437   7351       dme 
   1438   5084   johnlev /*
   1439  10958       dme  * Push a list of prepared packets (`txp') into the transmit ring.
   1440   5084   johnlev  */
   1441  10958       dme static xnf_txbuf_t *
   1442  10958       dme tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp)
   1443   5084   johnlev {
   1444  10958       dme 	int slots_free;
   1445  10958       dme 	RING_IDX slot;
   1446  10958       dme 	boolean_t notify;
   1447  10958       dme 
   1448  10958       dme 	mutex_enter(&xnfp->xnf_txlock);
   1449  10958       dme 
   1450  10958       dme 	ASSERT(xnfp->xnf_running);
   1451  10958       dme 
   1452  10958       dme 	/*
   1453  10958       dme 	 * Wait until we are connected to the backend.
   1454  10958       dme 	 */
   1455  10958       dme 	while (!xnfp->xnf_connected)
   1456  10958       dme 		cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock);
   1457  10958       dme 
   1458  10958       dme 	slots_free = tx_slots_get(xnfp, 1, B_FALSE);
   1459  10958       dme 	DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free);
   1460  10958       dme 
   1461  10958       dme 	slot = xnfp->xnf_tx_ring.req_prod_pvt;
   1462  10958       dme 
   1463  10958       dme 	while ((txp != NULL) && (slots_free > 0)) {
   1464  10958       dme 		xnf_txid_t *tidp;
   1465  10958       dme 		netif_tx_request_t *txrp;
   1466  10958       dme 
   1467  10958       dme 		tidp = txid_get(xnfp);
   1468  10958       dme 		VERIFY(tidp != NULL);
   1469  10958       dme 
   1470  10958       dme 		txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
   1471  10958       dme 
   1472  10958       dme 		txp->tx_slot = slot;
   1473  10958       dme 		txp->tx_txreq.id = tidp->id;
   1474  10958       dme 		*txrp = txp->tx_txreq;
   1475  10958       dme 
   1476  10958       dme 		tidp->txbuf = txp;
   1477  10958       dme 
   1478  10958       dme 		xnfp->xnf_stat_opackets++;
   1479  10958       dme 		xnfp->xnf_stat_obytes += txp->tx_txreq.size;
   1480  10958       dme 
   1481  10958       dme 		txp = txp->tx_next;
   1482  10958       dme 		slots_free--;
   1483  10958       dme 		slot++;
   1484  10958       dme 
   1485  10958       dme 	}
   1486  10958       dme 
   1487  10958       dme 	xnfp->xnf_tx_ring.req_prod_pvt = slot;
   1488  10958       dme 
   1489  10958       dme 	/*
   1490  10958       dme 	 * Tell the peer that we sent something, if it cares.
   1491  10958       dme 	 */
   1492  10958       dme 	/* LINTED: constant in conditional context */
   1493  10958       dme 	RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
   1494  10958       dme 	    notify);
   1495  10958       dme 	if (notify)
   1496  10958       dme 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
   1497  10958       dme 
   1498  10958       dme 	mutex_exit(&xnfp->xnf_txlock);
   1499  10958       dme 
   1500  10958       dme 	return (txp);
   1501  10958       dme }
   1502  10958       dme 
   1503  10958       dme /*
   1504  10958       dme  * Send the chain of packets `mp'. Called by the MAC framework.
   1505  10958       dme  */
   1506  10958       dme static mblk_t *
   1507  10958       dme xnf_send(void *arg, mblk_t *mp)
   1508  10958       dme {
   1509  10958       dme 	xnf_t *xnfp = arg;
   1510  10958       dme 	domid_t oeid;
   1511  10958       dme 	xnf_txbuf_t *head, *tail;
   1512  10958       dme 	mblk_t *ml;
   1513  10958       dme 	int prepared;
   1514  10958       dme 
   1515  10958       dme 	oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
   1516  10958       dme 
   1517  10958       dme 	/*
   1518  10958       dme 	 * Prepare packets for transmission.
   1519  10958       dme 	 */
   1520  10958       dme 	head = tail = NULL;
   1521  10958       dme 	prepared = 0;
   1522  10958       dme 	while (mp != NULL) {
   1523  10958       dme 		xnf_txbuf_t *txp;
   1524  10958       dme 		int n_chunks, length;
   1525  10958       dme 		boolean_t page_oops;
   1526  10958       dme 		uint32_t pflags;
   1527  10958       dme 
   1528  10958       dme 		for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE;
   1529  10958       dme 		    ml != NULL;
   1530  10958       dme 		    ml = ml->b_cont, n_chunks++) {
   1531  10958       dme 
   1532  10958       dme 			/*
   1533  10958       dme 			 * Test if this buffer includes a page
   1534  10958       dme 			 * boundary. The test assumes that the range
   1535  10958       dme 			 * b_rptr...b_wptr can include only a single
   1536  10958       dme 			 * boundary.
   1537  10958       dme 			 */
   1538  10958       dme 			if (xnf_btop((size_t)ml->b_rptr) !=
   1539  10958       dme 			    xnf_btop((size_t)ml->b_wptr)) {
   1540  10958       dme 				xnfp->xnf_stat_tx_pagebndry++;
   1541  10958       dme 				page_oops = B_TRUE;
   1542  10958       dme 			}
   1543  10958       dme 
   1544  10958       dme 			length += MBLKL(ml);
   1545  10958       dme 		}
   1546  10958       dme 		DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks);
   1547  10958       dme 
   1548  10958       dme 		/*
   1549  10958       dme 		 * Make sure packet isn't too large.
   1550  10958       dme 		 */
   1551  10958       dme 		if (length > XNF_FRAMESIZE) {
   1552  10958       dme 			cmn_err(CE_WARN,
   1553  10958       dme 			    "xnf%d: oversized packet (%d bytes) dropped",
   1554  10958       dme 			    ddi_get_instance(xnfp->xnf_devinfo), length);
   1555  10958       dme 			freemsg(mp);
   1556  10958       dme 			continue;
   1557  10958       dme 		}
   1558  10958       dme 
   1559  10958       dme 		txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
   1560  10958       dme 		if (txp == NULL)
   1561  10958       dme 			break;
   1562  10958       dme 
   1563  10958       dme 		txp->tx_type = TX_DATA;
   1564  10958       dme 
   1565  10958       dme 		if ((n_chunks > xnf_max_tx_frags) || page_oops) {
   1566  10958       dme 			/*
   1567  10958       dme 			 * Loan a side buffer rather than the mblk
   1568  10958       dme 			 * itself.
   1569  10958       dme 			 */
   1570  10958       dme 			txp->tx_bdesc = xnf_tx_pullup(xnfp, mp);
   1571  10958       dme 			if (txp->tx_bdesc == NULL) {
   1572  10958       dme 				kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   1573  10958       dme 				break;
   1574  10958       dme 			}
   1575  10958       dme 
   1576  10958       dme 			txp->tx_bufp = txp->tx_bdesc->buf;
   1577  10958       dme 			txp->tx_mfn = txp->tx_bdesc->buf_mfn;
   1578  10958       dme 			txp->tx_txreq.gref = txp->tx_bdesc->grant_ref;
   1579  10958       dme 
   1580  10958       dme 		} else {
   1581  10958       dme 			int rc;
   1582  10958       dme 			ddi_dma_cookie_t dma_cookie;
   1583  10958       dme 			uint_t ncookies;
   1584  10958       dme 
   1585  10958       dme 			rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle,
   1586  10958       dme 			    NULL, (char *)mp->b_rptr, length,
   1587  10958       dme 			    DDI_DMA_WRITE | DDI_DMA_STREAMING,
   1588  10958       dme 			    DDI_DMA_DONTWAIT, 0, &dma_cookie,
   1589  10958       dme 			    &ncookies);
   1590  10958       dme 			if (rc != DDI_DMA_MAPPED) {
   1591  10958       dme 				ASSERT(rc != DDI_DMA_INUSE);
   1592  10958       dme 				ASSERT(rc != DDI_DMA_PARTIAL_MAP);
   1593   5084   johnlev 
   1594   5084   johnlev #ifdef XNF_DEBUG
   1595  10958       dme 				if (rc != DDI_DMA_NORESOURCES)
   1596  10958       dme 					cmn_err(CE_WARN,
   1597  10958       dme 					    "xnf%d: bind_handle failed (%x)",
   1598  10958       dme 					    ddi_get_instance(xnfp->xnf_devinfo),
   1599  10958       dme 					    rc);
   1600   5084   johnlev #endif
   1601  10958       dme 				kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   1602  10958       dme 				break;
   1603  10958       dme 			}
   1604  10958       dme 			ASSERT(ncookies == 1);
   1605   5084   johnlev 
   1606  10958       dme 			txp->tx_bdesc = NULL;
   1607  10958       dme 			txp->tx_bufp = (caddr_t)mp->b_rptr;
   1608  10958       dme 			txp->tx_mfn =
   1609  10958       dme 			    xnf_btop(pa_to_ma(dma_cookie.dmac_laddress));
   1610  10958       dme 			txp->tx_txreq.gref = gref_get(xnfp);
   1611  10958       dme 			if (txp->tx_txreq.gref == INVALID_GRANT_REF) {
   1612  10958       dme 				(void) ddi_dma_unbind_handle(
   1613  10958       dme 				    txp->tx_dma_handle);
   1614  10958       dme 				kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   1615  10958       dme 				break;
   1616  10958       dme 			}
   1617  10958       dme 			gnttab_grant_foreign_access_ref(txp->tx_txreq.gref,
   1618  10958       dme 			    oeid, txp->tx_mfn, 1);
   1619  10958       dme 		}
   1620   5084   johnlev 
   1621  10958       dme 		txp->tx_next = NULL;
   1622  10958       dme 		txp->tx_mp = mp;
   1623  10958       dme 		txp->tx_txreq.size = length;
   1624  10958       dme 		txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET;
   1625  10958       dme 		txp->tx_txreq.flags = 0;
   1626  10958       dme 		hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL,
   1627  10958       dme 		    &pflags);
   1628  10958       dme 		if (pflags != 0) {
   1629  10958       dme 			/*
   1630  10958       dme 			 * If the local protocol stack requests checksum
   1631  10958       dme 			 * offload we set the 'checksum blank' flag,
   1632  10958       dme 			 * indicating to the peer that we need the checksum
   1633  10958       dme 			 * calculated for us.
   1634  10958       dme 			 *
   1635  10958       dme 			 * We _don't_ set the validated flag, because we haven't
   1636  10958       dme 			 * validated that the data and the checksum match.
   1637  10958       dme 			 */
   1638  10958       dme 			xnf_pseudo_cksum(txp->tx_bufp, length);
   1639  10958       dme 			txp->tx_txreq.flags |= NETTXF_csum_blank;
   1640   5084   johnlev 
   1641  10958       dme 			xnfp->xnf_stat_tx_cksum_deferred++;
   1642  10958       dme 		}
   1643  10958       dme 
   1644  10958       dme 		if (head == NULL) {
   1645  10958       dme 			ASSERT(tail == NULL);
   1646  10958       dme 
   1647  10958       dme 			head = txp;
   1648  10958       dme 		} else {
   1649  10958       dme 			ASSERT(tail != NULL);
   1650  10958       dme 
   1651  10958       dme 			tail->tx_next = txp;
   1652  10958       dme 		}
   1653  10958       dme 		tail = txp;
   1654  10958       dme 
   1655  10958       dme 		mp = mp->b_next;
   1656  10958       dme 		prepared++;
   1657  10958       dme 
   1658  10958       dme 		/*
   1659  10958       dme 		 * There is no point in preparing more than
   1660  10958       dme 		 * NET_TX_RING_SIZE, as we won't be able to push them
   1661  10958       dme 		 * into the ring in one go and would hence have to
   1662  10958       dme 		 * un-prepare the extra.
   1663  10958       dme 		 */
   1664  10958       dme 		if (prepared == NET_TX_RING_SIZE)
   1665  10958       dme 			break;
   1666   5084   johnlev 	}
   1667   5084   johnlev 
   1668  10958       dme 	DTRACE_PROBE1(xnf_send_prepared, int, prepared);
   1669   5084   johnlev 
   1670  10958       dme 	if (mp != NULL) {
   1671  10958       dme #ifdef XNF_DEBUG
   1672  10958       dme 		int notprepared = 0;
   1673  10958       dme 		mblk_t *l = mp;
   1674  10958       dme 
   1675  10958       dme 		while (l != NULL) {
   1676  10958       dme 			notprepared++;
   1677  10958       dme 			l = l->b_next;
   1678  10958       dme 		}
   1679  10958       dme 
   1680  10958       dme 		DTRACE_PROBE1(xnf_send_notprepared, int, notprepared);
   1681  10958       dme #else /* !XNF_DEBUG */
   1682  10958       dme 		DTRACE_PROBE1(xnf_send_notprepared, int, -1);
   1683  10958       dme #endif /* XNF_DEBUG */
   1684   5084   johnlev 	}
   1685   5084   johnlev 
   1686   5084   johnlev 	/*
   1687  10958       dme 	 * Push the packets we have prepared into the ring. They may
   1688  10958       dme 	 * not all go.
   1689   5084   johnlev 	 */
   1690  10958       dme 	if (head != NULL)
   1691  10958       dme 		head = tx_push_packets(xnfp, head);
   1692  10958       dme 
   1693   5084   johnlev 	/*
   1694  10958       dme 	 * If some packets that we prepared were not sent, unprepare
   1695  10958       dme 	 * them and add them back to the head of those we didn't
   1696  10958       dme 	 * prepare.
   1697   5084   johnlev 	 */
   1698  10958       dme 	{
   1699  10958       dme 		xnf_txbuf_t *loop;
   1700  10958       dme 		mblk_t *mp_head, *mp_tail;
   1701  10958       dme 		int unprepared = 0;
   1702  10958       dme 
   1703  10958       dme 		mp_head = mp_tail = NULL;
   1704  10958       dme 		loop = head;
   1705  10958       dme 
   1706  10958       dme 		while (loop != NULL) {
   1707  10958       dme 			xnf_txbuf_t *next = loop->tx_next;
   1708  10958       dme 
   1709  10958       dme 			if (loop->tx_bdesc == NULL) {
   1710  10958       dme 				(void) gnttab_end_foreign_access_ref(
   1711  10958       dme 				    loop->tx_txreq.gref, 1);
   1712  10958       dme 				gref_put(xnfp, loop->tx_txreq.gref);
   1713  10958       dme 				(void) ddi_dma_unbind_handle(
   1714  10958       dme 				    loop->tx_dma_handle);
   1715  10958       dme 			} else {
   1716  10958       dme 				xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE);
   1717  10958       dme 			}
   1718  10958       dme 
   1719  10958       dme 			ASSERT(loop->tx_mp != NULL);
   1720  10958       dme 			if (mp_head == NULL)
   1721  10958       dme 				mp_head = loop->tx_mp;
   1722  10958       dme 			mp_tail = loop->tx_mp;
   1723  10958       dme 
   1724  10958       dme 			kmem_cache_free(xnfp->xnf_tx_buf_cache, loop);
   1725  10958       dme 			loop = next;
   1726  10958       dme 			unprepared++;
   1727   5084   johnlev 		}
   1728  10958       dme 
   1729  10958       dme 		if (mp_tail == NULL) {
   1730  10958       dme 			ASSERT(mp_head == NULL);
   1731  10958       dme 		} else {
   1732  10958       dme 			ASSERT(mp_head != NULL);
   1733  10958       dme 
   1734  10958       dme 			mp_tail->b_next = mp;
   1735  10958       dme 			mp = mp_head;
   1736  10958       dme 		}
   1737  10958       dme 
   1738  10958       dme 		DTRACE_PROBE1(xnf_send_unprepared, int, unprepared);
   1739   5084   johnlev 	}
   1740   5084   johnlev 
   1741  10958       dme 	/*
   1742  10958       dme 	 * If any mblks are left then we have deferred for some reason
   1743  10958       dme 	 * and need to ask for a re-schedule later. This is typically
   1744  10958       dme 	 * due to the ring filling.
   1745  10958       dme 	 */
   1746  10958       dme 	if (mp != NULL) {
   1747  10958       dme 		mutex_enter(&xnfp->xnf_schedlock);
   1748  10958       dme 		xnfp->xnf_need_sched = B_TRUE;
   1749  10958       dme 		mutex_exit(&xnfp->xnf_schedlock);
   1750   5084   johnlev 
   1751  10958       dme 		xnfp->xnf_stat_tx_defer++;
   1752   5084   johnlev 	}
   1753   5084   johnlev 
   1754   5084   johnlev 	return (mp);
   1755   5084   johnlev }
   1756   5084   johnlev 
   1757   5084   johnlev /*
   1758  10958       dme  * Notification of RX packets. Currently no TX-complete interrupt is
   1759  10958       dme  * used, as we clean the TX ring lazily.
   1760   5084   johnlev  */
   1761   5084   johnlev static uint_t
   1762   5084   johnlev xnf_intr(caddr_t arg)
   1763   5084   johnlev {
   1764   5084   johnlev 	xnf_t *xnfp = (xnf_t *)arg;
   1765  10958       dme 	mblk_t *mp;
   1766  10958       dme 	boolean_t need_sched, clean_ring;
   1767   5084   johnlev 
   1768  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
   1769   5084   johnlev 
   1770  10958       dme 	/*
   1771  10958       dme 	 * Interrupts before we are connected are spurious.
   1772  10958       dme 	 */
   1773   6899  cz147101 	if (!xnfp->xnf_connected) {
   1774  10958       dme 		mutex_exit(&xnfp->xnf_rxlock);
   1775   5741       mrj 		xnfp->xnf_stat_unclaimed_interrupts++;
   1776   5084   johnlev 		return (DDI_INTR_UNCLAIMED);
   1777   5084   johnlev 	}
   1778   5084   johnlev 
   1779  10958       dme 	/*
   1780  10958       dme 	 * Receive side processing.
   1781  10958       dme 	 */
   1782  10958       dme 	do {
   1783  10958       dme 		/*
   1784  10958       dme 		 * Collect buffers from the ring.
   1785  10958       dme 		 */
   1786  10958       dme 		xnf_rx_collect(xnfp);
   1787   5084   johnlev 
   1788  10958       dme 		/*
   1789  10958       dme 		 * Interrupt me when the next receive buffer is consumed.
   1790  10958       dme 		 */
   1791  10958       dme 		xnfp->xnf_rx_ring.sring->rsp_event =
   1792  10958       dme 		    xnfp->xnf_rx_ring.rsp_cons + 1;
   1793  10958       dme 		xen_mb();
   1794   5741       mrj 
   1795  10958       dme 	} while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring));
   1796  10958       dme 
   1797  10958       dme 	if (xnfp->xnf_rx_new_buffers_posted) {
   1798  10958       dme 		boolean_t notify;
   1799  10958       dme 
   1800  10958       dme 		/*
   1801  10958       dme 		 * Indicate to the peer that we have re-filled the
   1802  10958       dme 		 * receive ring, if it cares.
   1803  10958       dme 		 */
   1804  10958       dme 		/* LINTED: constant in conditional context */
   1805  10958       dme 		RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
   1806  10958       dme 		if (notify)
   1807  10958       dme 			ec_notify_via_evtchn(xnfp->xnf_evtchn);
   1808  10958       dme 		xnfp->xnf_rx_new_buffers_posted = B_FALSE;
   1809   5084   johnlev 	}
   1810   5084   johnlev 
   1811  10958       dme 	mp = xnfp->xnf_rx_head;
   1812  10958       dme 	xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL;
   1813  10958       dme 
   1814   7521       dme 	xnfp->xnf_stat_interrupts++;
   1815  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
   1816  10958       dme 
   1817  10958       dme 	if (mp != NULL)
   1818  10958       dme 		mac_rx(xnfp->xnf_mh, NULL, mp);
   1819   7521       dme 
   1820   5084   johnlev 	/*
   1821  10958       dme 	 * Transmit side processing.
   1822  10958       dme 	 *
   1823  10958       dme 	 * If a previous transmit attempt failed or we have pending
   1824  10958       dme 	 * multicast requests, clean the ring.
   1825  10958       dme 	 *
   1826  10958       dme 	 * If we previously stalled transmission and cleaning produces
   1827  10958       dme 	 * some free slots, tell upstream to attempt sending again.
   1828  10958       dme 	 *
   1829  10958       dme 	 * The odd style is to avoid acquiring xnf_txlock unless we
   1830  10958       dme 	 * will actually look inside the tx machinery.
   1831   5084   johnlev 	 */
   1832  10958       dme 	mutex_enter(&xnfp->xnf_schedlock);
   1833  10958       dme 	need_sched = xnfp->xnf_need_sched;
   1834  10958       dme 	clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0);
   1835  10958       dme 	mutex_exit(&xnfp->xnf_schedlock);
   1836  10958       dme 
   1837  10958       dme 	if (clean_ring) {
   1838  10958       dme 		int free_slots;
   1839  10958       dme 
   1840  10958       dme 		mutex_enter(&xnfp->xnf_txlock);
   1841  10958       dme 		free_slots = tx_slots_get(xnfp, 0, B_FALSE);
   1842  10958       dme 
   1843  10958       dme 		if (need_sched && (free_slots > 0)) {
   1844  10958       dme 			mutex_enter(&xnfp->xnf_schedlock);
   1845  10958       dme 			xnfp->xnf_need_sched = B_FALSE;
   1846  10958       dme 			mutex_exit(&xnfp->xnf_schedlock);
   1847  10958       dme 
   1848  10958       dme 			mac_tx_update(xnfp->xnf_mh);
   1849  10958       dme 		}
   1850  10958       dme 		mutex_exit(&xnfp->xnf_txlock);
   1851   7521       dme 	}
   1852   5084   johnlev 
   1853   7521       dme 	return (DDI_INTR_CLAIMED);
   1854   5084   johnlev }
   1855   5084   johnlev 
   1856   5084   johnlev /*
   1857   5084   johnlev  *  xnf_start() -- start the board receiving and enable interrupts.
   1858   5084   johnlev  */
   1859   5084   johnlev static int
   1860   5084   johnlev xnf_start(void *arg)
   1861   5084   johnlev {
   1862   5084   johnlev 	xnf_t *xnfp = arg;
   1863   5084   johnlev 
   1864   5084   johnlev #ifdef XNF_DEBUG
   1865  10958       dme 	if (xnf_debug & XNF_DEBUG_TRACE)
   1866   5084   johnlev 		printf("xnf%d start(0x%p)\n",
   1867   5741       mrj 		    ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
   1868   5084   johnlev #endif
   1869   5084   johnlev 
   1870  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
   1871   5741       mrj 	mutex_enter(&xnfp->xnf_txlock);
   1872   5084   johnlev 
   1873   5084   johnlev 	/* Accept packets from above. */
   1874   5741       mrj 	xnfp->xnf_running = B_TRUE;
   1875   5084   johnlev 
   1876   5741       mrj 	mutex_exit(&xnfp->xnf_txlock);
   1877  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
   1878   5084   johnlev 
   1879   5084   johnlev 	return (0);
   1880   5084   johnlev }
   1881   5084   johnlev 
   1882   5084   johnlev /* xnf_stop() - disable hardware */
   1883   5084   johnlev static void
   1884   5084   johnlev xnf_stop(void *arg)
   1885   5084   johnlev {
   1886   5084   johnlev 	xnf_t *xnfp = arg;
   1887   5084   johnlev 
   1888   5084   johnlev #ifdef XNF_DEBUG
   1889  10958       dme 	if (xnf_debug & XNF_DEBUG_TRACE)
   1890   5084   johnlev 		printf("xnf%d stop(0x%p)\n",
   1891   5741       mrj 		    ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
   1892   5084   johnlev #endif
   1893   5084   johnlev 
   1894  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
   1895   5741       mrj 	mutex_enter(&xnfp->xnf_txlock);
   1896   5084   johnlev 
   1897   5741       mrj 	xnfp->xnf_running = B_FALSE;
   1898   5084   johnlev 
   1899   5741       mrj 	mutex_exit(&xnfp->xnf_txlock);
   1900  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
   1901   5084   johnlev }
   1902   5084   johnlev 
   1903   5084   johnlev /*
   1904  10958       dme  * Hang buffer `bdesc' on the RX ring.
   1905   5084   johnlev  */
   1906   5084   johnlev static void
   1907  10958       dme xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc)
   1908   5084   johnlev {
   1909  10958       dme 	netif_rx_request_t *reqp;
   1910  10958       dme 	RING_IDX hang_ix;
   1911   5084   johnlev 
   1912  10958       dme 	ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
   1913   5084   johnlev 
   1914   5741       mrj 	reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring,
   1915   5741       mrj 	    xnfp->xnf_rx_ring.req_prod_pvt);
   1916   5741       mrj 	hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0));
   1917  10958       dme 	ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL);
   1918   5741       mrj 
   1919  10958       dme 	reqp->id = bdesc->id = hang_ix;
   1920   5084   johnlev 	reqp->gref = bdesc->grant_ref;
   1921  10958       dme 
   1922  10958       dme 	xnfp->xnf_rx_pkt_info[hang_ix] = bdesc;
   1923   5741       mrj 	xnfp->xnf_rx_ring.req_prod_pvt++;
   1924  10958       dme 
   1925  10958       dme 	xnfp->xnf_rx_new_buffers_posted = B_TRUE;
   1926   5084   johnlev }
   1927   5084   johnlev 
   1928  10958       dme /*
   1929  10958       dme  * Collect packets from the RX ring, storing them in `xnfp' for later
   1930  10958       dme  * use.
   1931  10958       dme  */
   1932  10958       dme static void
   1933  10958       dme xnf_rx_collect(xnf_t *xnfp)
   1934   5741       mrj {
   1935  10958       dme 	mblk_t *head, *tail;
   1936  10958       dme 
   1937  10958       dme 	ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
   1938   5741       mrj 
   1939   5741       mrj 	/*
   1940  10958       dme 	 * Loop over unconsumed responses:
   1941   5741       mrj 	 * 1. get a response
   1942   5741       mrj 	 * 2. take corresponding buffer off recv. ring
   1943   5741       mrj 	 * 3. indicate this by setting slot to NULL
   1944   5741       mrj 	 * 4. create a new message and
   1945   5741       mrj 	 * 5. copy data in, adjust ptr
   1946   5741       mrj 	 */
   1947   5741       mrj 
   1948   5741       mrj 	head = tail = NULL;
   1949   5741       mrj 
   1950   5741       mrj 	while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) {
   1951  10958       dme 		netif_rx_response_t *rxpkt;
   1952  10958       dme 		xnf_buf_t *bdesc;
   1953  10958       dme 		ssize_t len;
   1954  10958       dme 		size_t off;
   1955  10958       dme 		mblk_t *mp = NULL;
   1956  10958       dme 		boolean_t hwcsum = B_FALSE;
   1957  10958       dme 		grant_ref_t ref;
   1958   5741       mrj 
   1959   5741       mrj 		/* 1. */
   1960   5741       mrj 		rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring,
   1961   5741       mrj 		    xnfp->xnf_rx_ring.rsp_cons);
   1962   5741       mrj 
   1963  10958       dme 		DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id,
   1964  10958       dme 		    int, (int)rxpkt->offset,
   1965  10958       dme 		    int, (int)rxpkt->flags,
   1966  10958       dme 		    int, (int)rxpkt->status);
   1967   5741       mrj 
   1968   5741       mrj 		/*
   1969   5741       mrj 		 * 2.
   1970   5741       mrj 		 */
   1971  10958       dme 		bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id];
   1972  10958       dme 
   1973  10958       dme 		/*
   1974  10958       dme 		 * 3.
   1975  10958       dme 		 */
   1976  10958       dme 		xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL;
   1977   5741       mrj 		ASSERT(bdesc->id == rxpkt->id);
   1978  10958       dme 
   1979  10958       dme 		ref = bdesc->grant_ref;
   1980  10958       dme 		off = rxpkt->offset;
   1981  10958       dme 		len = rxpkt->status;
   1982  10958       dme 
   1983   6899  cz147101 		if (!xnfp->xnf_running) {
   1984  10958       dme 			DTRACE_PROBE4(xnf_rx_not_running,
   1985  10958       dme 			    int, rxpkt->status,
   1986   6899  cz147101 			    char *, bdesc->buf, int, rxpkt->offset,
   1987   6899  cz147101 			    char *, ((char *)bdesc->buf) + rxpkt->offset);
   1988  10958       dme 
   1989   6899  cz147101 			xnfp->xnf_stat_drop++;
   1990  10958       dme 
   1991  10958       dme 		} else if (len <= 0) {
   1992  10958       dme 			DTRACE_PROBE4(xnf_rx_pkt_status_negative,
   1993  10958       dme 			    int, rxpkt->status,
   1994   5741       mrj 			    char *, bdesc->buf, int, rxpkt->offset,
   1995   5741       mrj 			    char *, ((char *)bdesc->buf) + rxpkt->offset);
   1996  10958       dme 
   1997   5741       mrj 			xnfp->xnf_stat_errrx++;
   1998  10958       dme 
   1999  10958       dme 			switch (len) {
   2000  10958       dme 			case 0:
   2001   5741       mrj 				xnfp->xnf_stat_runt++;
   2002  10958       dme 				break;
   2003  10958       dme 			case NETIF_RSP_ERROR:
   2004   5741       mrj 				xnfp->xnf_stat_mac_rcv_error++;
   2005  10958       dme 				break;
   2006  10958       dme 			case NETIF_RSP_DROPPED:
   2007   5741       mrj 				xnfp->xnf_stat_norxbuf++;
   2008  10958       dme 				break;
   2009  10958       dme 			}
   2010  10958       dme 
   2011  10958       dme 		} else if (bdesc->grant_ref == INVALID_GRANT_REF) {
   2012  10958       dme 			cmn_err(CE_WARN, "Bad rx grant reference %d "
   2013  10958       dme 			    "from domain %d", ref,
   2014  10958       dme 			    xvdi_get_oeid(xnfp->xnf_devinfo));
   2015  10958       dme 
   2016  10958       dme 		} else if ((off + len) > PAGESIZE) {
   2017  10958       dme 			cmn_err(CE_WARN, "Rx packet overflows page "
   2018  10958       dme 			    "(offset %ld, length %ld) from domain %d",
   2019  10958       dme 			    off, len, xvdi_get_oeid(xnfp->xnf_devinfo));
   2020   5741       mrj 		} else {
   2021  10958       dme 			xnf_buf_t *nbuf = NULL;
   2022   5741       mrj 
   2023  10958       dme 			DTRACE_PROBE4(xnf_rx_packet, int, len,
   2024  10958       dme 			    char *, bdesc->buf, int, off,
   2025  10958       dme 			    char *, ((char *)bdesc->buf) + off);
   2026  10958       dme 
   2027   5741       mrj 			ASSERT(off + len <= PAGEOFFSET);
   2028  10958       dme 
   2029   5741       mrj 			if (rxpkt->flags & NETRXF_data_validated)
   2030   5741       mrj 				hwcsum = B_TRUE;
   2031   5741       mrj 
   2032   5741       mrj 			/*
   2033  10958       dme 			 * If the packet is below a pre-determined
   2034  10958       dme 			 * size we will copy data out rather than
   2035  10958       dme 			 * replace it.
   2036   5741       mrj 			 */
   2037  10958       dme 			if (len > xnf_rx_copy_limit)
   2038  10958       dme 				nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE);
   2039  10958       dme 
   2040  10958       dme 			/*
   2041  10958       dme 			 * If we have a replacement buffer, attempt to
   2042  10958       dme 			 * wrap the existing one with an mblk_t in
   2043  10958       dme 			 * order that the upper layers of the stack
   2044  10958       dme 			 * might use it directly.
   2045  10958       dme 			 */
   2046  10958       dme 			if (nbuf != NULL) {
   2047  10958       dme 				mp = desballoc((unsigned char *)bdesc->buf,
   2048  10958       dme 				    bdesc->len, 0, &bdesc->free_rtn);
   2049  10958       dme 				if (mp == NULL) {
   2050  10958       dme 					xnfp->xnf_stat_rx_desballoc_fail++;
   2051  10958       dme 					xnfp->xnf_stat_norxbuf++;
   2052  10958       dme 
   2053  10958       dme 					xnf_buf_put(xnfp, nbuf, B_FALSE);
   2054  10958       dme 					nbuf = NULL;
   2055  10958       dme 				} else {
   2056  10958       dme 					mp->b_rptr = mp->b_rptr + off;
   2057  10958       dme 					mp->b_wptr = mp->b_rptr + len;
   2058  10958       dme 
   2059  10958       dme 					/*
   2060  10958       dme 					 * Release the grant reference
   2061  10958       dme 					 * associated with this buffer
   2062  10958       dme 					 * - they are scarce and the
   2063  10958       dme 					 * upper layers of the stack
   2064  10958       dme 					 * don't need it.
   2065  10958       dme 					 */
   2066  10958       dme 					(void) gnttab_end_foreign_access_ref(
   2067  10958       dme 					    bdesc->grant_ref, 0);
   2068  10958       dme 					gref_put(xnfp, bdesc->grant_ref);
   2069  10958       dme 					bdesc->grant_ref = INVALID_GRANT_REF;
   2070  10958       dme 
   2071  10958       dme 					bdesc = nbuf;
   2072  10958       dme 				}
   2073  10958       dme 			}
   2074  10958       dme 
   2075  10958       dme 			if (nbuf == NULL) {
   2076   5741       mrj 				/*
   2077  10958       dme 				 * No replacement buffer allocated -
   2078  10958       dme 				 * attempt to copy the data out and
   2079  10958       dme 				 * re-hang the existing buffer.
   2080   5741       mrj 				 */
   2081   5741       mrj 
   2082  10958       dme 				/* 4. */
   2083  10958       dme 				mp = allocb(len, BPRI_MED);
   2084  10958       dme 				if (mp == NULL) {
   2085  10958       dme 					xnfp->xnf_stat_rx_allocb_fail++;
   2086  10958       dme 					xnfp->xnf_stat_norxbuf++;
   2087  10958       dme 				} else {
   2088  10958       dme 					/* 5. */
   2089  10958       dme 					bcopy(bdesc->buf + off, mp->b_wptr,
   2090  10958       dme 					    len);
   2091  10958       dme 					mp->b_wptr += len;
   2092  10958       dme 				}
   2093   5741       mrj 			}
   2094   5741       mrj 		}
   2095   5741       mrj 
   2096  10958       dme 		/* Re-hang the buffer. */
   2097  10958       dme 		xnf_rxbuf_hang(xnfp, bdesc);
   2098   5741       mrj 
   2099  10958       dme 		if (mp != NULL) {
   2100   5084   johnlev 			if (hwcsum) {
   2101   5084   johnlev 				/*
   2102   5084   johnlev 				 * If the peer says that the data has
   2103   5084   johnlev 				 * been validated then we declare that
   2104   5084   johnlev 				 * the full checksum has been
   2105   5084   johnlev 				 * verified.
   2106   5084   johnlev 				 *
   2107   5084   johnlev 				 * We don't look at the "checksum
   2108   5084   johnlev 				 * blank" flag, and hence could have a
   2109   5084   johnlev 				 * packet here that we are asserting
   2110   5084   johnlev 				 * is good with a blank checksum.
   2111   5084   johnlev 				 *
   2112   5084   johnlev 				 * The hardware checksum offload
   2113   5084   johnlev 				 * specification says that we must
   2114   5084   johnlev 				 * provide the actual checksum as well
   2115   5084   johnlev 				 * as an assertion that it is valid,
   2116   5084   johnlev 				 * but the protocol stack doesn't
   2117   5084   johnlev 				 * actually use it and some other
   2118   5084   johnlev 				 * drivers don't bother, so we don't.
   2119   5084   johnlev 				 * If it was necessary we could grovel
   2120   5084   johnlev 				 * in the packet to find it.
   2121   5084   johnlev 				 */
   2122   5084   johnlev 				(void) hcksum_assoc(mp, NULL,
   2123   5084   johnlev 				    NULL, 0, 0, 0, 0,
   2124   5084   johnlev 				    HCK_FULLCKSUM |
   2125  10958       dme 				    HCK_FULLCKSUM_OK, 0);
   2126   5741       mrj 				xnfp->xnf_stat_rx_cksum_no_need++;
   2127   5084   johnlev 			}
   2128   5084   johnlev 			if (head == NULL) {
   2129  10958       dme 				ASSERT(tail == NULL);
   2130  10958       dme 
   2131  10958       dme 				head = mp;
   2132   5084   johnlev 			} else {
   2133  10958       dme 				ASSERT(tail != NULL);
   2134  10958       dme 
   2135   5084   johnlev 				tail->b_next = mp;
   2136   5084   johnlev 			}
   2137  10958       dme 			tail = mp;
   2138   5084   johnlev 
   2139   5084   johnlev 			ASSERT(mp->b_next == NULL);
   2140   5084   johnlev 
   2141   5741       mrj 			xnfp->xnf_stat_ipackets++;
   2142   5741       mrj 			xnfp->xnf_stat_rbytes += len;
   2143   5084   johnlev 		}
   2144   5084   johnlev 
   2145   5741       mrj 		xnfp->xnf_rx_ring.rsp_cons++;
   2146   5084   johnlev 	}
   2147   5084   johnlev 
   2148   5084   johnlev 	/*
   2149  10958       dme 	 * Store the mblks we have collected.
   2150   5084   johnlev 	 */
   2151  10958       dme 	if (head != NULL) {
   2152  10958       dme 		ASSERT(tail != NULL);
   2153   5084   johnlev 
   2154  10958       dme 		if (xnfp->xnf_rx_head == NULL) {
   2155  10958       dme 			ASSERT(xnfp->xnf_rx_tail == NULL);
   2156   5084   johnlev 
   2157  10958       dme 			xnfp->xnf_rx_head = head;
   2158  10958       dme 		} else {
   2159  10958       dme 			ASSERT(xnfp->xnf_rx_tail != NULL);
   2160   5084   johnlev 
   2161  10958       dme 			xnfp->xnf_rx_tail->b_next = head;
   2162   5084   johnlev 		}
   2163  10958       dme 		xnfp->xnf_rx_tail = tail;
   2164   5084   johnlev 	}
   2165   5084   johnlev }
   2166   5084   johnlev 
   2167   5084   johnlev /*
   2168   5084   johnlev  *  xnf_alloc_dma_resources() -- initialize the drivers structures
   2169   5084   johnlev  */
   2170   5084   johnlev static int
   2171   5084   johnlev xnf_alloc_dma_resources(xnf_t *xnfp)
   2172   5084   johnlev {
   2173   5741       mrj 	dev_info_t 		*devinfo = xnfp->xnf_devinfo;
   2174   5084   johnlev 	size_t			len;
   2175   5084   johnlev 	ddi_dma_cookie_t	dma_cookie;
   2176   5084   johnlev 	uint_t			ncookies;
   2177   5084   johnlev 	int			rc;
   2178   5084   johnlev 	caddr_t			rptr;
   2179   5084   johnlev 
   2180   5084   johnlev 	/*
   2181   5084   johnlev 	 * The code below allocates all the DMA data structures that
   2182   5084   johnlev 	 * need to be released when the driver is detached.
   2183   5084   johnlev 	 *
   2184   5084   johnlev 	 * Allocate page for the transmit descriptor ring.
   2185   5084   johnlev 	 */
   2186   5084   johnlev 	if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
   2187   5741       mrj 	    DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS)
   2188   5084   johnlev 		goto alloc_error;
   2189   5084   johnlev 
   2190   5741       mrj 	if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle,
   2191   5084   johnlev 	    PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
   2192   5084   johnlev 	    DDI_DMA_SLEEP, 0, &rptr, &len,
   2193   5741       mrj 	    &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) {
   2194   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
   2195   5741       mrj 		xnfp->xnf_tx_ring_dma_handle = NULL;
   2196   5084   johnlev 		goto alloc_error;
   2197   5084   johnlev 	}
   2198   5084   johnlev 
   2199   5741       mrj 	if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL,
   2200   5084   johnlev 	    rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
   2201   5084   johnlev 	    DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
   2202   5741       mrj 		ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
   2203   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
   2204   5741       mrj 		xnfp->xnf_tx_ring_dma_handle = NULL;
   2205   5741       mrj 		xnfp->xnf_tx_ring_dma_acchandle = NULL;
   2206   5084   johnlev 		if (rc == DDI_DMA_NORESOURCES)
   2207   5084   johnlev 			goto alloc_error;
   2208   5084   johnlev 		else
   2209   5084   johnlev 			goto error;
   2210   5084   johnlev 	}
   2211   5084   johnlev 
   2212   5084   johnlev 	ASSERT(ncookies == 1);
   2213   5084   johnlev 	bzero(rptr, PAGESIZE);
   2214   5084   johnlev 	/* LINTED: constant in conditional context */
   2215   5084   johnlev 	SHARED_RING_INIT((netif_tx_sring_t *)rptr);
   2216   5084   johnlev 	/* LINTED: constant in conditional context */
   2217   5741       mrj 	FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
   2218   5741       mrj 	xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress;
   2219   5084   johnlev 
   2220   5084   johnlev 	/*
   2221   5084   johnlev 	 * Allocate page for the receive descriptor ring.
   2222   5084   johnlev 	 */
   2223   5084   johnlev 	if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
   2224   5741       mrj 	    DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS)
   2225   5084   johnlev 		goto alloc_error;
   2226   5084   johnlev 
   2227   5741       mrj 	if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle,
   2228   5084   johnlev 	    PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
   2229   5084   johnlev 	    DDI_DMA_SLEEP, 0, &rptr, &len,
   2230   5741       mrj 	    &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) {
   2231   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
   2232   5741       mrj 		xnfp->xnf_rx_ring_dma_handle = NULL;
   2233   5084   johnlev 		goto alloc_error;
   2234   5084   johnlev 	}
   2235   5084   johnlev 
   2236   5741       mrj 	if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL,
   2237   5084   johnlev 	    rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
   2238   5084   johnlev 	    DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
   2239   5741       mrj 		ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
   2240   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
   2241   5741       mrj 		xnfp->xnf_rx_ring_dma_handle = NULL;
   2242   5741       mrj 		xnfp->xnf_rx_ring_dma_acchandle = NULL;
   2243   5084   johnlev 		if (rc == DDI_DMA_NORESOURCES)
   2244   5084   johnlev 			goto alloc_error;
   2245   5084   johnlev 		else
   2246   5084   johnlev 			goto error;
   2247   5084   johnlev 	}
   2248   5084   johnlev 
   2249   5084   johnlev 	ASSERT(ncookies == 1);
   2250   5084   johnlev 	bzero(rptr, PAGESIZE);
   2251   5084   johnlev 	/* LINTED: constant in conditional context */
   2252   5084   johnlev 	SHARED_RING_INIT((netif_rx_sring_t *)rptr);
   2253   5084   johnlev 	/* LINTED: constant in conditional context */
   2254   5741       mrj 	FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
   2255   5741       mrj 	xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress;
   2256   5084   johnlev 
   2257   5084   johnlev 	return (DDI_SUCCESS);
   2258   5084   johnlev 
   2259   5084   johnlev alloc_error:
   2260   5084   johnlev 	cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory",
   2261   5741       mrj 	    ddi_get_instance(xnfp->xnf_devinfo));
   2262   5084   johnlev error:
   2263   5084   johnlev 	xnf_release_dma_resources(xnfp);
   2264   5084   johnlev 	return (DDI_FAILURE);
   2265   5084   johnlev }
   2266   5084   johnlev 
   2267   5084   johnlev /*
   2268   5084   johnlev  * Release all DMA resources in the opposite order from acquisition
   2269   5084   johnlev  */
   2270   5084   johnlev static void
   2271   5084   johnlev xnf_release_dma_resources(xnf_t *xnfp)
   2272   5084   johnlev {
   2273   5084   johnlev 	int i;
   2274   5084   johnlev 
   2275   5084   johnlev 	/*
   2276   5084   johnlev 	 * Free receive buffers which are currently associated with
   2277  10958       dme 	 * descriptors.
   2278   5084   johnlev 	 */
   2279  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
   2280  10958       dme 	for (i = 0; i < NET_RX_RING_SIZE; i++) {
   2281  10958       dme 		xnf_buf_t *bp;
   2282   5084   johnlev 
   2283  10958       dme 		if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL)
   2284   5084   johnlev 			continue;
   2285  10958       dme 		xnfp->xnf_rx_pkt_info[i] = NULL;
   2286  10958       dme 		xnf_buf_put(xnfp, bp, B_FALSE);
   2287   5084   johnlev 	}
   2288  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
   2289   5084   johnlev 
   2290  10958       dme 	/* Free the receive ring buffer. */
   2291   5741       mrj 	if (xnfp->xnf_rx_ring_dma_acchandle != NULL) {
   2292   5741       mrj 		(void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle);
   2293   5741       mrj 		ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
   2294   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
   2295   5741       mrj 		xnfp->xnf_rx_ring_dma_acchandle = NULL;
   2296   5084   johnlev 	}
   2297  10958       dme 	/* Free the transmit ring buffer. */
   2298   5741       mrj 	if (xnfp->xnf_tx_ring_dma_acchandle != NULL) {
   2299   5741       mrj 		(void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle);
   2300   5741       mrj 		ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
   2301   5741       mrj 		ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
   2302   5741       mrj 		xnfp->xnf_tx_ring_dma_acchandle = NULL;
   2303   5084   johnlev 	}
   2304   6899  cz147101 
   2305   5084   johnlev }
   2306   5084   johnlev 
   2307  10958       dme /*
   2308  10958       dme  * Release any packets and associated structures used by the TX ring.
   2309  10958       dme  */
   2310   5084   johnlev static void
   2311   5084   johnlev xnf_release_mblks(xnf_t *xnfp)
   2312   5084   johnlev {
   2313  10958       dme 	RING_IDX i;
   2314  10958       dme 	xnf_txid_t *tidp;
   2315   5084   johnlev 
   2316  10958       dme 	for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
   2317  10958       dme 	    i < NET_TX_RING_SIZE;
   2318  10958       dme 	    i++, tidp++) {
   2319  10958       dme 		xnf_txbuf_t *txp = tidp->txbuf;
   2320  10958       dme 
   2321  10958       dme 		if (txp != NULL) {
   2322  10958       dme 			ASSERT(txp->tx_mp != NULL);
   2323  10958       dme 			freemsg(txp->tx_mp);
   2324  10958       dme 
   2325  10958       dme 			txid_put(xnfp, tidp);
   2326  10958       dme 			kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
   2327  10958       dme 		}
   2328   5084   johnlev 	}
   2329   5084   johnlev }
   2330   5084   johnlev 
   2331  10958       dme static int
   2332  10958       dme xnf_buf_constructor(void *buf, void *arg, int kmflag)
   2333   5084   johnlev {
   2334  10958       dme 	int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
   2335  10958       dme 	xnf_buf_t *bdesc = buf;
   2336  10958       dme 	xnf_t *xnfp = arg;
   2337  10958       dme 	ddi_dma_cookie_t dma_cookie;
   2338  10958       dme 	uint_t ncookies;
   2339   5084   johnlev 	size_t len;
   2340   5084   johnlev 
   2341  10958       dme 	if (kmflag & KM_NOSLEEP)
   2342  10958       dme 		ddiflags = DDI_DMA_DONTWAIT;
   2343   5084   johnlev 
   2344  10958       dme 	/* Allocate a DMA access handle for the buffer. */
   2345  10958       dme 	if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr,
   2346  10958       dme 	    ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS)
   2347   5084   johnlev 		goto failure;
   2348   5084   johnlev 
   2349  10958       dme 	/* Allocate DMA-able memory for buffer. */
   2350   5084   johnlev 	if (ddi_dma_mem_alloc(bdesc->dma_handle,
   2351  10958       dme 	    PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0,
   2352   5084   johnlev 	    &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
   2353   5741       mrj 		goto failure_1;
   2354   5084   johnlev 
   2355  10958       dme 	/* Bind to virtual address of buffer to get physical address. */
   2356   5084   johnlev 	if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL,
   2357  10958       dme 	    bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
   2358  10958       dme 	    ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED)
   2359   5741       mrj 		goto failure_2;
   2360   5084   johnlev 	ASSERT(ncookies == 1);
   2361   5084   johnlev 
   2362  10958       dme 	bdesc->free_rtn.free_func = xnf_buf_recycle;
   2363  10958       dme 	bdesc->free_rtn.free_arg = (caddr_t)bdesc;
   2364  10958       dme 	bdesc->xnfp = xnfp;
   2365  10958       dme 	bdesc->buf_phys = dma_cookie.dmac_laddress;
   2366  10958       dme 	bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
   2367  10958       dme 	bdesc->len = dma_cookie.dmac_size;
   2368  10958       dme 	bdesc->grant_ref = INVALID_GRANT_REF;
   2369  10958       dme 	bdesc->gen = xnfp->xnf_gen;
   2370   5741       mrj 
   2371  10958       dme 	atomic_add_64(&xnfp->xnf_stat_buf_allocated, 1);
   2372   5084   johnlev 
   2373  10958       dme 	return (0);
   2374   5084   johnlev 
   2375   5741       mrj failure_2:
   2376   5084   johnlev 	ddi_dma_mem_free(&bdesc->acc_handle);
   2377   5084   johnlev 
   2378   5741       mrj failure_1:
   2379   5084   johnlev 	ddi_dma_free_handle(&bdesc->dma_handle);
   2380   5084   johnlev 
   2381   5084   johnlev failure:
   2382  10958       dme 
   2383  10958       dme 	return (-1);
   2384  10958       dme }
   2385  10958       dme 
   2386  10958       dme static void
   2387  10958       dme xnf_buf_destructor(void *buf, void *arg)
   2388  10958       dme {
   2389  10958       dme 	xnf_buf_t *bdesc = buf;
   2390  10958       dme 	xnf_t *xnfp = arg;
   2391  10958       dme 
   2392  10958       dme 	(void) ddi_dma_unbind_handle(bdesc->dma_handle);
   2393  10958       dme 	ddi_dma_mem_free(&bdesc->acc_handle);
   2394  10958       dme 	ddi_dma_free_handle(&bdesc->dma_handle);
   2395  10958       dme 
   2396  10958       dme 	atomic_add_64(&xnfp->xnf_stat_buf_allocated, -1);
   2397  10958       dme }
   2398  10958       dme 
   2399  10958       dme static xnf_buf_t *
   2400  10958       dme xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly)
   2401  10958       dme {
   2402  10958       dme 	grant_ref_t gref;
   2403  10958       dme 	xnf_buf_t *bufp;
   2404  10958       dme 
   2405  10958       dme 	/*
   2406  10958       dme 	 * Usually grant references are more scarce than memory, so we
   2407  10958       dme 	 * attempt to acquire a grant reference first.
   2408  10958       dme 	 */
   2409  10958       dme 	gref = gref_get(xnfp);
   2410  10958       dme 	if (gref == INVALID_GRANT_REF)
   2411  10958       dme 		return (NULL);
   2412  10958       dme 
   2413  10958       dme 	bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags);
   2414  10958       dme 	if (bufp == NULL) {
   2415  10958       dme 		gref_put(xnfp, gref);
   2416  10958       dme 		return (NULL);
   2417  10958       dme 	}
   2418  10958       dme 
   2419  10958       dme 	ASSERT(bufp->grant_ref == INVALID_GRANT_REF);
   2420  10958       dme 
   2421  10958       dme 	bufp->grant_ref = gref;
   2422  10958       dme 
   2423  10958       dme 	if (bufp->gen != xnfp->xnf_gen)
   2424  10958       dme 		xnf_buf_refresh(bufp);
   2425  10958       dme 
   2426  10958       dme 	gnttab_grant_foreign_access_ref(bufp->grant_ref,
   2427  10958       dme 	    xvdi_get_oeid(bufp->xnfp->xnf_devinfo),
   2428  10958       dme 	    bufp->buf_mfn, readonly ? 1 : 0);
   2429  10958       dme 
   2430  10958       dme 	atomic_add_64(&xnfp->xnf_stat_buf_outstanding, 1);
   2431  10958       dme 
   2432  10958       dme 	return (bufp);
   2433  10958       dme }
   2434  10958       dme 
   2435  10958       dme static void
   2436  10958       dme xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly)
   2437  10958       dme {
   2438  10958       dme 	if (bufp->grant_ref != INVALID_GRANT_REF) {
   2439  10958       dme 		(void) gnttab_end_foreign_access_ref(
   2440  10958       dme 		    bufp->grant_ref, readonly ? 1 : 0);
   2441  10958       dme 		gref_put(xnfp, bufp->grant_ref);
   2442  10958       dme 		bufp->grant_ref = INVALID_GRANT_REF;
   2443  10958       dme 	}
   2444  10958       dme 
   2445  10958       dme 	kmem_cache_free(xnfp->xnf_buf_cache, bufp);
   2446  10958       dme 
   2447  10958       dme 	atomic_add_64(&xnfp->xnf_stat_buf_outstanding, -1);
   2448  10958       dme }
   2449  10958       dme 
   2450  10958       dme /*
   2451  10958       dme  * Refresh any cached data about a buffer after resume.
   2452  10958       dme  */
   2453  10958       dme static void
   2454  10958       dme xnf_buf_refresh(xnf_buf_t *bdesc)
   2455  10958       dme {
   2456  10958       dme 	bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
   2457  10958       dme 	bdesc->gen = bdesc->xnfp->xnf_gen;
   2458  10958       dme }
   2459  10958       dme 
   2460  10958       dme /*
   2461  10958       dme  * Streams `freeb' routine for `xnf_buf_t' when used as transmit
   2462  10958       dme  * look-aside buffers.
   2463  10958       dme  */
   2464  10958       dme static void
   2465  10958       dme xnf_buf_recycle(xnf_buf_t *bdesc)
   2466  10958       dme {
   2467  10958       dme 	xnf_t *xnfp = bdesc->xnfp;
   2468  10958       dme 
   2469  10958       dme 	xnf_buf_put(xnfp, bdesc, B_TRUE);
   2470  10958       dme }
   2471  10958       dme 
   2472  10958       dme static int
   2473  10958       dme xnf_tx_buf_constructor(void *buf, void *arg, int kmflag)
   2474  10958       dme {
   2475  10958       dme 	_NOTE(ARGUNUSED(kmflag));
   2476  10958       dme 	xnf_txbuf_t *txp = buf;
   2477  10958       dme 	xnf_t *xnfp = arg;
   2478  10958       dme 
   2479  10958       dme 	if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr,
   2480  10958       dme 	    0, 0, &txp->tx_dma_handle) != DDI_SUCCESS)
   2481  10958       dme 		return (-1);
   2482  10958       dme 
   2483  10958       dme 	return (0);
   2484  10958       dme }
   2485  10958       dme 
   2486  10958       dme static void
   2487  10958       dme xnf_tx_buf_destructor(void *buf, void *arg)
   2488  10958       dme {
   2489  10958       dme 	_NOTE(ARGUNUSED(arg));
   2490  10958       dme 	xnf_txbuf_t *txp = buf;
   2491  10958       dme 
   2492  10958       dme 	ddi_dma_free_handle(&txp->tx_dma_handle);
   2493   5084   johnlev }
   2494   5084   johnlev 
   2495   5741       mrj /*
   2496   5741       mrj  * Statistics.
   2497   5741       mrj  */
   2498   5741       mrj static char *xnf_aux_statistics[] = {
   2499   5741       mrj 	"tx_cksum_deferred",
   2500   5741       mrj 	"rx_cksum_no_need",
   2501   5741       mrj 	"interrupts",
   2502   5741       mrj 	"unclaimed_interrupts",
   2503   5741       mrj 	"tx_pullup",
   2504   5741       mrj 	"tx_pagebndry",
   2505   5741       mrj 	"tx_attempt",
   2506  10958       dme 	"buf_allocated",
   2507  10958       dme 	"buf_outstanding",
   2508  10958       dme 	"gref_outstanding",
   2509  10958       dme 	"gref_failure",
   2510  10958       dme 	"gref_peak",
   2511  10958       dme 	"rx_allocb_fail",
   2512  10958       dme 	"rx_desballoc_fail",
   2513   5741       mrj };
   2514   5741       mrj 
   2515   5741       mrj static int
   2516   5741       mrj xnf_kstat_aux_update(kstat_t *ksp, int flag)
   2517   5741       mrj {
   2518   5741       mrj 	xnf_t *xnfp;
   2519   5741       mrj 	kstat_named_t *knp;
   2520   5741       mrj 
   2521   5741       mrj 	if (flag != KSTAT_READ)
   2522   5741       mrj 		return (EACCES);
   2523   5741       mrj 
   2524   5741       mrj 	xnfp = ksp->ks_private;
   2525   5741       mrj 	knp = ksp->ks_data;
   2526   5741       mrj 
   2527   5741       mrj 	/*
   2528   5741       mrj 	 * Assignment order must match that of the names in
   2529   5741       mrj 	 * xnf_aux_statistics.
   2530   5741       mrj 	 */
   2531   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred;
   2532   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need;
   2533   5741       mrj 
   2534   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_interrupts;
   2535   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts;
   2536   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup;
   2537   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry;
   2538   5741       mrj 	(knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt;
   2539   5741       mrj 
   2540  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated;
   2541  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding;
   2542  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding;
   2543  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_failure;
   2544  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_gref_peak;
   2545  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail;
   2546  10958       dme 	(knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail;
   2547   5741       mrj 
   2548   5741       mrj 	return (0);
   2549   5741       mrj }
   2550   5741       mrj 
   2551   5741       mrj static boolean_t
   2552   5741       mrj xnf_kstat_init(xnf_t *xnfp)
   2553   5741       mrj {
   2554   5741       mrj 	int nstat = sizeof (xnf_aux_statistics) /
   2555   5741       mrj 	    sizeof (xnf_aux_statistics[0]);
   2556   5741       mrj 	char **cp = xnf_aux_statistics;
   2557   5741       mrj 	kstat_named_t *knp;
   2558   5741       mrj 
   2559   5741       mrj 	/*
   2560   5741       mrj 	 * Create and initialise kstats.
   2561   5741       mrj 	 */
   2562   5741       mrj 	if ((xnfp->xnf_kstat_aux = kstat_create("xnf",
   2563   5741       mrj 	    ddi_get_instance(xnfp->xnf_devinfo),
   2564   5741       mrj 	    "aux_statistics", "net", KSTAT_TYPE_NAMED,
   2565   5741       mrj 	    nstat, 0)) == NULL)
   2566   5741       mrj 		return (B_FALSE);
   2567   5741       mrj 
   2568   5741       mrj 	xnfp->xnf_kstat_aux->ks_private = xnfp;
   2569   5741       mrj 	xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update;
   2570   5741       mrj 
   2571   5741       mrj 	knp = xnfp->xnf_kstat_aux->ks_data;
   2572   5741       mrj 	while (nstat > 0) {
   2573   5741       mrj 		kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
   2574   5741       mrj 
   2575   5741       mrj 		knp++;
   2576   5741       mrj 		cp++;
   2577   5741       mrj 		nstat--;
   2578   5741       mrj 	}
   2579   5741       mrj 
   2580   5741       mrj 	kstat_install(xnfp->xnf_kstat_aux);
   2581   5741       mrj 
   2582   5741       mrj 	return (B_TRUE);
   2583   5741       mrj }
   2584   5741       mrj 
   2585   5084   johnlev static int
   2586   5084   johnlev xnf_stat(void *arg, uint_t stat, uint64_t *val)
   2587   5084   johnlev {
   2588   5084   johnlev 	xnf_t *xnfp = arg;
   2589   5084   johnlev 
   2590  10958       dme 	mutex_enter(&xnfp->xnf_rxlock);
   2591   5741       mrj 	mutex_enter(&xnfp->xnf_txlock);
   2592   5084   johnlev 
   2593   5741       mrj #define	mac_stat(q, r)				\
   2594   5084   johnlev 	case (MAC_STAT_##q):			\
   2595   5741       mrj 		*val = xnfp->xnf_stat_##r;	\
   2596   5741       mrj 		break
   2597   5741       mrj 
   2598   5741       mrj #define	ether_stat(q, r)			\
   2599   5741       mrj 	case (ETHER_STAT_##q):			\
   2600   5741       mrj 		*val = xnfp->xnf_stat_##r;	\
   2601   5084   johnlev 		break
   2602   5084   johnlev 
   2603   5084   johnlev 	switch (stat) {
   2604   5084   johnlev 
   2605   5741       mrj 	mac_stat(IPACKETS, ipackets);
   2606   5741       mrj 	mac_stat(OPACKETS, opackets);
   2607   5741       mrj 	mac_stat(RBYTES, rbytes);
   2608   5741       mrj 	mac_stat(OBYTES, obytes);
   2609   5741       mrj 	mac_stat(NORCVBUF, norxbuf);
   2610   5741       mrj 	mac_stat(IERRORS, errrx);
   2611   5741       mrj 	mac_stat(NOXMTBUF, tx_defer);
   2612   5741       mrj 
   2613   5741       mrj 	ether_stat(MACRCV_ERRORS, mac_rcv_error);
   2614   5741       mrj 	ether_stat(TOOSHORT_ERRORS, runt);
   2615   5084   johnlev 
   2616   7397       Max 	/* always claim to be in full duplex mode */
   2617   7397       Max 	case ETHER_STAT_LINK_DUPLEX:
   2618   7397       Max 		*val = LINK_DUPLEX_FULL;
   2619   7397       Max 		break;
   2620   7397       Max 
   2621   7397       Max 	/* always claim to be at 1Gb/s link speed */
   2622   7397       Max 	case MAC_STAT_IFSPEED:
   2623   7397       Max 		*val = 1000000000ull;
   2624   7397       Max 		break;
   2625   7397       Max 
   2626   5084   johnlev 	default:
   2627   5741       mrj 		mutex_exit(&xnfp->xnf_txlock);
   2628  10958       dme 		mutex_exit(&xnfp->xnf_rxlock);
   2629   5084   johnlev 
   2630   5084   johnlev 		return (ENOTSUP);
   2631   5084   johnlev 	}
   2632   5084   johnlev 
   2633   5741       mrj #undef mac_stat
   2634   5741       mrj #undef ether_stat
   2635   5084   johnlev 
   2636   5741       mrj 	mutex_exit(&xnfp->xnf_txlock);
   2637  10958       dme 	mutex_exit(&xnfp->xnf_rxlock);
   2638   5084   johnlev 
   2639   5084   johnlev 	return (0);
   2640   5084   johnlev }
   2641   5084   johnlev 
   2642   5084   johnlev static boolean_t
   2643   5084   johnlev xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data)
   2644   5084   johnlev {
   2645  10958       dme 	_NOTE(ARGUNUSED(arg));
   2646   5084   johnlev 
   2647   5084   johnlev 	switch (cap) {
   2648   5084   johnlev 	case MAC_CAPAB_HCKSUM: {
   2649   5084   johnlev 		uint32_t *capab = cap_data;
   2650   5084   johnlev 
   2651   5702       dme 		/*
   2652   7351       dme 		 * Whilst the flag used to communicate with the IO
   2653   7351       dme 		 * domain is called "NETTXF_csum_blank", the checksum
   2654   7351       dme 		 * in the packet must contain the pseudo-header
   2655   7351       dme 		 * checksum and not zero.
   2656   5702       dme 		 *
   2657   7351       dme 		 * To help out the IO domain, we might use
   2658   7351       dme 		 * HCKSUM_INET_PARTIAL. Unfortunately our stack will
   2659   7351       dme 		 * then use checksum offload for IPv6 packets, which
   2660   7351       dme 		 * the IO domain can't handle.
   2661   7351       dme 		 *
   2662   7351       dme 		 * As a result, we declare outselves capable of
   2663   7351       dme 		 * HCKSUM_INET_FULL_V4. This means that we receive
   2664   7351       dme 		 * IPv4 packets from the stack with a blank checksum
   2665   7351       dme 		 * field and must insert the pseudo-header checksum
   2666   7351       dme 		 * before passing the packet to the IO domain.
   2667   5702       dme 		 */
   2668  10958       dme 		*capab = HCKSUM_INET_FULL_V4;
   2669   5084   johnlev 		break;
   2670   5084   johnlev 	}
   2671   5084   johnlev 	default:
   2672   5084   johnlev 		return (B_FALSE);
   2673   5084   johnlev 	}
   2674   5084   johnlev 
   2675   5084   johnlev 	return (B_TRUE);
   2676   5084   johnlev }
   2677   5084   johnlev 
   2678  10958       dme /*
   2679  10958       dme  * The state of the peer has changed - react accordingly.
   2680  10958       dme  */
   2681   5084   johnlev static void
   2682   5084   johnlev oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
   2683   5084   johnlev     void *arg, void *impl_data)
   2684   5084   johnlev {
   2685  10958       dme 	_NOTE(ARGUNUSED(id, arg));
   2686   5084   johnlev 	xnf_t *xnfp = ddi_get_driver_private(dip);
   2687   5084   johnlev 	XenbusState new_state = *(XenbusState *)impl_data;
   2688   5084   johnlev 
   2689   5084   johnlev 	ASSERT(xnfp != NULL);
   2690   5084   johnlev 
   2691   5084   johnlev 	switch (new_state) {
   2692  10958       dme 	case XenbusStateUnknown:
   2693  10958       dme 	case XenbusStateInitialising:
   2694  10958       dme 	case XenbusStateInitialised:
   2695  10958       dme 	case XenbusStateClosing:
   2696  10958       dme 	case XenbusStateClosed:
   2697  10958       dme 	case XenbusStateReconfiguring:
   2698  10958       dme 	case XenbusStateReconfigured:
   2699  10958       dme 		break;
   2700  10958       dme 
   2701  10958       dme 	case XenbusStateInitWait:
   2702  10958       dme 		xnf_read_config(xnfp);
   2703  10958       dme 
   2704  10958       dme 		if (!xnfp->xnf_be_rx_copy) {
   2705  10958       dme 			cmn_err(CE_WARN,
   2706  10958       dme 			    "The xnf driver requires a dom0 that "
   2707  10958       dme 			    "supports 'feature-rx-copy'.");
   2708  10958       dme 			(void) xvdi_switch_state(xnfp->xnf_devinfo,
   2709  10958       dme 			    XBT_NULL, XenbusStateClosed);
   2710  10958       dme 			break;
   2711  10958       dme 		}
   2712  10958       dme 
   2713  10958       dme 		/*
   2714  10958       dme 		 * Connect to the backend.
   2715  10958       dme 		 */
   2716  10958       dme 		xnf_be_connect(xnfp);
   2717  10958       dme 
   2718  10958       dme 		/*
   2719  10958       dme 		 * Our MAC address as discovered by xnf_read_config().
   2720  10958       dme 		 */
   2721  10958       dme 		mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr);
   2722  10958       dme 
   2723  10958       dme 		break;
   2724  10958       dme 
   2725   5084   johnlev 	case XenbusStateConnected:
   2726  10958       dme 		mutex_enter(&xnfp->xnf_rxlock);
   2727   5741       mrj 		mutex_enter(&xnfp->xnf_txlock);
   2728   5084   johnlev 
   2729   5741       mrj 		xnfp->xnf_connected = B_TRUE;
   2730   6899  cz147101 		/*
   2731  10958       dme 		 * Wake up any threads waiting to send data to
   2732  10958       dme 		 * backend.
   2733   6899  cz147101 		 */
   2734  10958       dme 		cv_broadcast(&xnfp->xnf_cv_state);
   2735   5084   johnlev 
   2736   5741       mrj 		mutex_exit(&xnfp->xnf_txlock);
   2737  10958       dme 		mutex_exit(&xnfp->xnf_rxlock);
   2738   5084   johnlev 
   2739   6899  cz147101 		/*
   2740  10958       dme 		 * Kick the peer in case it missed any transmits
   2741  10958       dme 		 * request in the TX ring.
   2742   6899  cz147101 		 */
   2743   5741       mrj 		ec_notify_via_evtchn(xnfp->xnf_evtchn);
   2744   6899  cz147101 
   2745   6899  cz147101 		/*
   2746  10958       dme 		 * There may already be completed receive requests in
   2747  10958       dme 		 * the ring sent by backend after it gets connected
   2748  10958       dme 		 * but before we see its state change here, so we call
   2749  10958       dme 		 * xnf_intr() to handle them, if any.
   2750   6899  cz147101 		 */
   2751   6899  cz147101 		(void) xnf_intr((caddr_t)xnfp);
   2752   6899  cz147101 
   2753  10958       dme 		/*
   2754  10958       dme 		 * Mark the link up now that we are connected.
   2755  10958       dme 		 */
   2756   7397       Max 		mac_link_update(xnfp->xnf_mh, LINK_STATE_UP);
   2757  10958       dme 
   2758  10958       dme 		/*
   2759  10958       dme 		 * Tell the backend about the multicast addresses in
   2760  10958       dme 		 * which we are interested.
   2761  10958       dme 		 */
   2762  10958       dme 		mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE);
   2763   7397       Max 
   2764   5084   johnlev 		break;
   2765   5084   johnlev 
   2766   5084   johnlev 	default:
   2767   5084   johnlev 		break;
   2768   5084   johnlev 	}
   2769   5084   johnlev }
   2770