1 5084 johnlev /* 2 5084 johnlev * CDDL HEADER START 3 5084 johnlev * 4 5084 johnlev * The contents of this file are subject to the terms of the 5 5084 johnlev * Common Development and Distribution License (the "License"). 6 5084 johnlev * You may not use this file except in compliance with the License. 7 5084 johnlev * 8 5084 johnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 5084 johnlev * or http://www.opensolaris.org/os/licensing. 10 5084 johnlev * See the License for the specific language governing permissions 11 5084 johnlev * and limitations under the License. 12 5084 johnlev * 13 5084 johnlev * When distributing Covered Code, include this CDDL HEADER in each 14 5084 johnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 5084 johnlev * If applicable, add the following below this CDDL HEADER, with the 16 5084 johnlev * fields enclosed by brackets "[]" replaced with your own identifying 17 5084 johnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18 5084 johnlev * 19 5084 johnlev * CDDL HEADER END 20 5084 johnlev */ 21 5084 johnlev 22 5084 johnlev /* 23 10175 Stuart * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 5084 johnlev * Use is subject to license terms. 25 5084 johnlev */ 26 5084 johnlev 27 5084 johnlev /* 28 5084 johnlev * 29 5084 johnlev * Copyright (c) 2004 Christian Limpach. 30 5084 johnlev * All rights reserved. 31 5084 johnlev * 32 5084 johnlev * Redistribution and use in source and binary forms, with or without 33 5084 johnlev * modification, are permitted provided that the following conditions 34 5084 johnlev * are met: 35 5084 johnlev * 1. Redistributions of source code must retain the above copyright 36 5084 johnlev * notice, this list of conditions and the following disclaimer. 37 5084 johnlev * 2. Redistributions in binary form must reproduce the above copyright 38 5084 johnlev * notice, this list of conditions and the following disclaimer in the 39 5084 johnlev * documentation and/or other materials provided with the distribution. 40 5084 johnlev * 3. This section intentionally left blank. 41 5084 johnlev * 4. The name of the author may not be used to endorse or promote products 42 5084 johnlev * derived from this software without specific prior written permission. 43 5084 johnlev * 44 5084 johnlev * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 45 5084 johnlev * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 46 5084 johnlev * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 5084 johnlev * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 48 5084 johnlev * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 49 5084 johnlev * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 50 5084 johnlev * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 51 5084 johnlev * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 52 5084 johnlev * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 53 5084 johnlev * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 5084 johnlev */ 55 5084 johnlev /* 56 5084 johnlev * Section 3 of the above license was updated in response to bug 6379571. 57 5084 johnlev */ 58 5084 johnlev 59 5084 johnlev /* 60 10958 dme * xnf.c - GLDv3 network driver for domU. 61 10958 dme */ 62 10958 dme 63 10958 dme /* 64 10958 dme * This driver uses four per-instance locks: 65 10958 dme * 66 10958 dme * xnf_gref_lock: 67 10958 dme * 68 10958 dme * Protects access to the grant reference list stored in 69 10958 dme * xnf_gref_head. Grant references should be acquired and released 70 10958 dme * using gref_get() and gref_put() respectively. 71 10958 dme * 72 10958 dme * xnf_schedlock: 73 10958 dme * 74 10958 dme * Protects: 75 10958 dme * xnf_need_sched - used to record that a previous transmit attempt 76 10958 dme * failed (and consequently it will be necessary to call 77 10958 dme * mac_tx_update() when transmit resources are available). 78 10958 dme * xnf_pending_multicast - the number of multicast requests that 79 10958 dme * have been submitted to the backend for which we have not 80 10958 dme * processed responses. 81 10958 dme * 82 10958 dme * xnf_txlock: 83 10958 dme * 84 10958 dme * Protects the transmit ring (xnf_tx_ring) and associated 85 10958 dme * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head). 86 10958 dme * 87 10958 dme * xnf_rxlock: 88 10958 dme * 89 10958 dme * Protects the receive ring (xnf_rx_ring) and associated 90 10958 dme * structures (notably xnf_rx_pkt_info). 91 10958 dme * 92 10958 dme * If driver-global state that affects both the transmit and receive 93 10958 dme * rings is manipulated, both xnf_txlock and xnf_rxlock should be 94 10958 dme * held, in that order. 95 10958 dme * 96 10958 dme * xnf_schedlock is acquired both whilst holding xnf_txlock and 97 10958 dme * without. It should always be acquired after xnf_txlock if both are 98 10958 dme * held. 99 10958 dme * 100 10958 dme * Notes: 101 10958 dme * - atomic_add_64() is used to manipulate counters where we require 102 10958 dme * accuracy. For counters intended only for observation by humans, 103 10958 dme * post increment/decrement are used instead. 104 5084 johnlev */ 105 5084 johnlev 106 5084 johnlev #include <sys/types.h> 107 5084 johnlev #include <sys/errno.h> 108 5084 johnlev #include <sys/param.h> 109 5084 johnlev #include <sys/sysmacros.h> 110 5084 johnlev #include <sys/systm.h> 111 5084 johnlev #include <sys/stream.h> 112 5084 johnlev #include <sys/strsubr.h> 113 10958 dme #include <sys/strsun.h> 114 5084 johnlev #include <sys/conf.h> 115 5084 johnlev #include <sys/ddi.h> 116 5084 johnlev #include <sys/devops.h> 117 5084 johnlev #include <sys/sunddi.h> 118 5084 johnlev #include <sys/sunndi.h> 119 5084 johnlev #include <sys/dlpi.h> 120 5084 johnlev #include <sys/ethernet.h> 121 5084 johnlev #include <sys/strsun.h> 122 5084 johnlev #include <sys/pattr.h> 123 5084 johnlev #include <inet/ip.h> 124 7351 dme #include <inet/ip_impl.h> 125 7351 dme #include <sys/gld.h> 126 5084 johnlev #include <sys/modctl.h> 127 8275 Eric #include <sys/mac_provider.h> 128 5084 johnlev #include <sys/mac_ether.h> 129 5084 johnlev #include <sys/bootinfo.h> 130 5084 johnlev #include <sys/mach_mmu.h> 131 5741 mrj #ifdef XPV_HVM_DRIVER 132 5741 mrj #include <sys/xpv_support.h> 133 5741 mrj #include <sys/hypervisor.h> 134 5741 mrj #else 135 5741 mrj #include <sys/hypervisor.h> 136 5084 johnlev #include <sys/evtchn_impl.h> 137 5084 johnlev #include <sys/balloon_impl.h> 138 5741 mrj #endif 139 5741 mrj #include <xen/public/io/netif.h> 140 5741 mrj #include <sys/gnttab.h> 141 5084 johnlev #include <xen/sys/xendev.h> 142 5741 mrj #include <sys/sdt.h> 143 10958 dme #include <sys/note.h> 144 10958 dme #include <sys/debug.h> 145 5741 mrj 146 5741 mrj #include <io/xnf.h> 147 5741 mrj 148 5084 johnlev #if defined(DEBUG) || defined(__lint) 149 5084 johnlev #define XNF_DEBUG 150 10958 dme #endif 151 10958 dme 152 10958 dme #ifdef XNF_DEBUG 153 10958 dme int xnf_debug = 0; 154 10958 dme xnf_t *xnf_debug_instance = NULL; 155 5084 johnlev #endif 156 5084 johnlev 157 5084 johnlev /* 158 5084 johnlev * On a 32 bit PAE system physical and machine addresses are larger 159 5084 johnlev * than 32 bits. ddi_btop() on such systems take an unsigned long 160 5084 johnlev * argument, and so addresses above 4G are truncated before ddi_btop() 161 5084 johnlev * gets to see them. To avoid this, code the shift operation here. 162 5084 johnlev */ 163 5084 johnlev #define xnf_btop(addr) ((addr) >> PAGESHIFT) 164 5084 johnlev 165 10958 dme unsigned int xnf_max_tx_frags = 1; 166 5741 mrj 167 5084 johnlev /* 168 10958 dme * Should we use the multicast control feature if the backend provides 169 10958 dme * it? 170 5084 johnlev */ 171 10958 dme boolean_t xnf_multicast_control = B_TRUE; 172 10958 dme 173 5084 johnlev /* 174 10958 dme * Received packets below this size are copied to a new streams buffer 175 10958 dme * rather than being desballoc'ed. 176 10958 dme * 177 10958 dme * This value is chosen to accommodate traffic where there are a large 178 10958 dme * number of small packets. For data showing a typical distribution, 179 10958 dme * see: 180 10958 dme * 181 10958 dme * Sinha07a: 182 10958 dme * Rishi Sinha, Christos Papadopoulos, and John 183 10958 dme * Heidemann. Internet Packet Size Distributions: Some 184 10958 dme * Observations. Technical Report ISI-TR-2007-643, 185 10958 dme * USC/Information Sciences Institute, May, 2007. Orignally 186 10958 dme * released October 2005 as web page 187 10958 dme * http://netweb.usc.edu/~sinha/pkt-sizes/. 188 10958 dme * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>. 189 5084 johnlev */ 190 10958 dme size_t xnf_rx_copy_limit = 64; 191 5084 johnlev 192 10958 dme #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 193 10958 dme #define INVALID_GRANT_REF ((grant_ref_t)-1) 194 10958 dme #define INVALID_TX_ID ((uint16_t)-1) 195 10958 dme 196 10958 dme #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)])) 197 10958 dme #define TX_ID_VALID(i) (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE)) 198 5084 johnlev 199 5084 johnlev /* Required system entry points */ 200 5084 johnlev static int xnf_attach(dev_info_t *, ddi_attach_cmd_t); 201 5084 johnlev static int xnf_detach(dev_info_t *, ddi_detach_cmd_t); 202 5084 johnlev 203 5084 johnlev /* Required driver entry points for Nemo */ 204 5084 johnlev static int xnf_start(void *); 205 5084 johnlev static void xnf_stop(void *); 206 5084 johnlev static int xnf_set_mac_addr(void *, const uint8_t *); 207 5084 johnlev static int xnf_set_multicast(void *, boolean_t, const uint8_t *); 208 5084 johnlev static int xnf_set_promiscuous(void *, boolean_t); 209 5084 johnlev static mblk_t *xnf_send(void *, mblk_t *); 210 5084 johnlev static uint_t xnf_intr(caddr_t); 211 5084 johnlev static int xnf_stat(void *, uint_t, uint64_t *); 212 5084 johnlev static boolean_t xnf_getcapab(void *, mac_capab_t, void *); 213 5084 johnlev 214 5084 johnlev /* Driver private functions */ 215 5084 johnlev static int xnf_alloc_dma_resources(xnf_t *); 216 5084 johnlev static void xnf_release_dma_resources(xnf_t *); 217 5084 johnlev static void xnf_release_mblks(xnf_t *); 218 10958 dme 219 10958 dme static int xnf_buf_constructor(void *, void *, int); 220 10958 dme static void xnf_buf_destructor(void *, void *); 221 10958 dme static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t); 222 10958 dme #pragma inline(xnf_buf_get) 223 10958 dme static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t); 224 10958 dme #pragma inline(xnf_buf_put) 225 10958 dme static void xnf_buf_refresh(xnf_buf_t *); 226 10958 dme #pragma inline(xnf_buf_refresh) 227 10958 dme static void xnf_buf_recycle(xnf_buf_t *); 228 10958 dme 229 10958 dme static int xnf_tx_buf_constructor(void *, void *, int); 230 10958 dme static void xnf_tx_buf_destructor(void *, void *); 231 10958 dme 232 10958 dme static grant_ref_t gref_get(xnf_t *); 233 10958 dme #pragma inline(gref_get) 234 10958 dme static void gref_put(xnf_t *, grant_ref_t); 235 10958 dme #pragma inline(gref_put) 236 10958 dme 237 10958 dme static xnf_txid_t *txid_get(xnf_t *); 238 10958 dme #pragma inline(txid_get) 239 10958 dme static void txid_put(xnf_t *, xnf_txid_t *); 240 10958 dme #pragma inline(txid_put) 241 10958 dme 242 5084 johnlev void xnf_send_driver_status(int, int); 243 10958 dme static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *); 244 10958 dme static int xnf_tx_clean_ring(xnf_t *); 245 5084 johnlev static void oe_state_change(dev_info_t *, ddi_eventcookie_t, 246 5084 johnlev void *, void *); 247 10958 dme static boolean_t xnf_kstat_init(xnf_t *); 248 10958 dme static void xnf_rx_collect(xnf_t *); 249 5084 johnlev 250 5084 johnlev static mac_callbacks_t xnf_callbacks = { 251 10958 dme MC_GETCAPAB, 252 5084 johnlev xnf_stat, 253 5084 johnlev xnf_start, 254 5084 johnlev xnf_stop, 255 5084 johnlev xnf_set_promiscuous, 256 5084 johnlev xnf_set_multicast, 257 5084 johnlev xnf_set_mac_addr, 258 5084 johnlev xnf_send, 259 10958 dme NULL, 260 5084 johnlev xnf_getcapab 261 5084 johnlev }; 262 5084 johnlev 263 5084 johnlev /* DMA attributes for network ring buffer */ 264 5084 johnlev static ddi_dma_attr_t ringbuf_dma_attr = { 265 5084 johnlev DMA_ATTR_V0, /* version of this structure */ 266 5084 johnlev 0, /* lowest usable address */ 267 5084 johnlev 0xffffffffffffffffULL, /* highest usable address */ 268 5084 johnlev 0x7fffffff, /* maximum DMAable byte count */ 269 5084 johnlev MMU_PAGESIZE, /* alignment in bytes */ 270 5084 johnlev 0x7ff, /* bitmap of burst sizes */ 271 5084 johnlev 1, /* minimum transfer */ 272 5084 johnlev 0xffffffffU, /* maximum transfer */ 273 5084 johnlev 0xffffffffffffffffULL, /* maximum segment length */ 274 5084 johnlev 1, /* maximum number of segments */ 275 5084 johnlev 1, /* granularity */ 276 5084 johnlev 0, /* flags (reserved) */ 277 5084 johnlev }; 278 5084 johnlev 279 10958 dme /* DMA attributes for transmit and receive data */ 280 10958 dme static ddi_dma_attr_t buf_dma_attr = { 281 5084 johnlev DMA_ATTR_V0, /* version of this structure */ 282 5084 johnlev 0, /* lowest usable address */ 283 5084 johnlev 0xffffffffffffffffULL, /* highest usable address */ 284 5084 johnlev 0x7fffffff, /* maximum DMAable byte count */ 285 5084 johnlev MMU_PAGESIZE, /* alignment in bytes */ 286 5084 johnlev 0x7ff, /* bitmap of burst sizes */ 287 5084 johnlev 1, /* minimum transfer */ 288 5084 johnlev 0xffffffffU, /* maximum transfer */ 289 5084 johnlev 0xffffffffffffffffULL, /* maximum segment length */ 290 5084 johnlev 1, /* maximum number of segments */ 291 5084 johnlev 1, /* granularity */ 292 5084 johnlev 0, /* flags (reserved) */ 293 5084 johnlev }; 294 5084 johnlev 295 5084 johnlev /* DMA access attributes for registers and descriptors */ 296 5084 johnlev static ddi_device_acc_attr_t accattr = { 297 5084 johnlev DDI_DEVICE_ATTR_V0, 298 5084 johnlev DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */ 299 5084 johnlev DDI_STRICTORDER_ACC 300 5084 johnlev }; 301 5084 johnlev 302 5084 johnlev /* DMA access attributes for data: NOT to be byte swapped. */ 303 5084 johnlev static ddi_device_acc_attr_t data_accattr = { 304 5084 johnlev DDI_DEVICE_ATTR_V0, 305 5084 johnlev DDI_NEVERSWAP_ACC, 306 5084 johnlev DDI_STRICTORDER_ACC 307 5084 johnlev }; 308 5084 johnlev 309 5084 johnlev DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach, 310 7656 Sherry nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported); 311 5084 johnlev 312 5084 johnlev static struct modldrv xnf_modldrv = { 313 7351 dme &mod_driverops, 314 7351 dme "Virtual Ethernet driver", 315 7351 dme &xnf_dev_ops 316 5084 johnlev }; 317 5084 johnlev 318 5084 johnlev static struct modlinkage modlinkage = { 319 5084 johnlev MODREV_1, &xnf_modldrv, NULL 320 5084 johnlev }; 321 5084 johnlev 322 5084 johnlev int 323 5084 johnlev _init(void) 324 5084 johnlev { 325 5084 johnlev int r; 326 5084 johnlev 327 5084 johnlev mac_init_ops(&xnf_dev_ops, "xnf"); 328 5084 johnlev r = mod_install(&modlinkage); 329 5084 johnlev if (r != DDI_SUCCESS) 330 5084 johnlev mac_fini_ops(&xnf_dev_ops); 331 5084 johnlev 332 5084 johnlev return (r); 333 5084 johnlev } 334 5084 johnlev 335 5084 johnlev int 336 5084 johnlev _fini(void) 337 5084 johnlev { 338 10958 dme return (EBUSY); /* XXPV should be removable */ 339 5084 johnlev } 340 5084 johnlev 341 5084 johnlev int 342 5084 johnlev _info(struct modinfo *modinfop) 343 5084 johnlev { 344 5084 johnlev return (mod_info(&modlinkage, modinfop)); 345 5084 johnlev } 346 5084 johnlev 347 10958 dme /* 348 10958 dme * Acquire a grant reference. 349 10958 dme */ 350 10958 dme static grant_ref_t 351 10958 dme gref_get(xnf_t *xnfp) 352 10958 dme { 353 10958 dme grant_ref_t gref; 354 10958 dme 355 10958 dme mutex_enter(&xnfp->xnf_gref_lock); 356 10958 dme 357 10958 dme do { 358 10958 dme gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head); 359 10958 dme 360 10958 dme } while ((gref == INVALID_GRANT_REF) && 361 10958 dme (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0)); 362 10958 dme 363 10958 dme mutex_exit(&xnfp->xnf_gref_lock); 364 10958 dme 365 10958 dme if (gref == INVALID_GRANT_REF) { 366 10958 dme xnfp->xnf_stat_gref_failure++; 367 10958 dme } else { 368 10958 dme atomic_add_64(&xnfp->xnf_stat_gref_outstanding, 1); 369 10958 dme if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak) 370 10958 dme xnfp->xnf_stat_gref_peak = 371 10958 dme xnfp->xnf_stat_gref_outstanding; 372 10958 dme } 373 10958 dme 374 10958 dme return (gref); 375 10958 dme } 376 10958 dme 377 10958 dme /* 378 10958 dme * Release a grant reference. 379 10958 dme */ 380 10958 dme static void 381 10958 dme gref_put(xnf_t *xnfp, grant_ref_t gref) 382 10958 dme { 383 10958 dme ASSERT(gref != INVALID_GRANT_REF); 384 10958 dme 385 10958 dme mutex_enter(&xnfp->xnf_gref_lock); 386 10958 dme gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref); 387 10958 dme mutex_exit(&xnfp->xnf_gref_lock); 388 10958 dme 389 10958 dme atomic_add_64(&xnfp->xnf_stat_gref_outstanding, -1); 390 10958 dme } 391 10958 dme 392 10958 dme /* 393 10958 dme * Acquire a transmit id. 394 10958 dme */ 395 10958 dme static xnf_txid_t * 396 10958 dme txid_get(xnf_t *xnfp) 397 10958 dme { 398 10958 dme xnf_txid_t *tidp; 399 10958 dme 400 10958 dme ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 401 10958 dme 402 10958 dme if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID) 403 10958 dme return (NULL); 404 10958 dme 405 10958 dme ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head)); 406 10958 dme 407 10958 dme tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head); 408 10958 dme xnfp->xnf_tx_pkt_id_head = tidp->next; 409 10958 dme tidp->next = INVALID_TX_ID; 410 10958 dme 411 10958 dme ASSERT(tidp->txbuf == NULL); 412 10958 dme 413 10958 dme return (tidp); 414 10958 dme } 415 10958 dme 416 10958 dme /* 417 10958 dme * Release a transmit id. 418 10958 dme */ 419 10958 dme static void 420 10958 dme txid_put(xnf_t *xnfp, xnf_txid_t *tidp) 421 10958 dme { 422 10958 dme ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 423 10958 dme ASSERT(TX_ID_VALID(tidp->id)); 424 10958 dme ASSERT(tidp->next == INVALID_TX_ID); 425 10958 dme 426 10958 dme tidp->txbuf = NULL; 427 10958 dme tidp->next = xnfp->xnf_tx_pkt_id_head; 428 10958 dme xnfp->xnf_tx_pkt_id_head = tidp->id; 429 10958 dme } 430 10958 dme 431 10958 dme /* 432 10958 dme * Get `wanted' slots in the transmit ring, waiting for at least that 433 10958 dme * number if `wait' is B_TRUE. Force the ring to be cleaned by setting 434 10958 dme * `wanted' to zero. 435 10958 dme * 436 10958 dme * Return the number of slots available. 437 10958 dme */ 438 10958 dme static int 439 10958 dme tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait) 440 10958 dme { 441 10958 dme int slotsfree; 442 10958 dme boolean_t forced_clean = (wanted == 0); 443 10958 dme 444 10958 dme ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 445 10958 dme 446 10958 dme /* LINTED: constant in conditional context */ 447 10958 dme while (B_TRUE) { 448 10958 dme slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring); 449 10958 dme 450 10958 dme if ((slotsfree < wanted) || forced_clean) 451 10958 dme slotsfree = xnf_tx_clean_ring(xnfp); 452 10958 dme 453 10958 dme /* 454 10958 dme * If there are more than we need free, tell other 455 10958 dme * people to come looking again. We hold txlock, so we 456 10958 dme * are able to take our slots before anyone else runs. 457 10958 dme */ 458 10958 dme if (slotsfree > wanted) 459 10958 dme cv_broadcast(&xnfp->xnf_cv_tx_slots); 460 10958 dme 461 10958 dme if (slotsfree >= wanted) 462 10958 dme break; 463 10958 dme 464 10958 dme if (!wait) 465 10958 dme break; 466 10958 dme 467 10958 dme cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock); 468 10958 dme } 469 10958 dme 470 10958 dme ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring))); 471 10958 dme 472 10958 dme return (slotsfree); 473 10958 dme } 474 10958 dme 475 5084 johnlev static int 476 5084 johnlev xnf_setup_rings(xnf_t *xnfp) 477 5084 johnlev { 478 10958 dme domid_t oeid; 479 10958 dme struct xenbus_device *xsd; 480 5084 johnlev RING_IDX i; 481 10958 dme int err; 482 10958 dme xnf_txid_t *tidp; 483 10958 dme xnf_buf_t **bdescp; 484 5084 johnlev 485 5741 mrj oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 486 5741 mrj xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 487 5084 johnlev 488 10958 dme if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 489 5741 mrj gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 490 5084 johnlev 491 5084 johnlev err = gnttab_grant_foreign_access(oeid, 492 5741 mrj xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0); 493 5084 johnlev if (err <= 0) { 494 5084 johnlev err = -err; 495 5084 johnlev xenbus_dev_error(xsd, err, "granting access to tx ring page"); 496 5084 johnlev goto out; 497 5084 johnlev } 498 5741 mrj xnfp->xnf_tx_ring_ref = (grant_ref_t)err; 499 5084 johnlev 500 10958 dme if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 501 5741 mrj gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 502 5084 johnlev 503 5084 johnlev err = gnttab_grant_foreign_access(oeid, 504 5741 mrj xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0); 505 5084 johnlev if (err <= 0) { 506 5084 johnlev err = -err; 507 5084 johnlev xenbus_dev_error(xsd, err, "granting access to rx ring page"); 508 5084 johnlev goto out; 509 5084 johnlev } 510 5741 mrj xnfp->xnf_rx_ring_ref = (grant_ref_t)err; 511 5084 johnlev 512 10958 dme mutex_enter(&xnfp->xnf_txlock); 513 5084 johnlev 514 5084 johnlev /* 515 10958 dme * Setup/cleanup the TX ring. Note that this can lose packets 516 10958 dme * after a resume, but we expect to stagger on. 517 5084 johnlev */ 518 10958 dme xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */ 519 10958 dme for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 520 10958 dme i < NET_TX_RING_SIZE; 521 10958 dme i++, tidp++) { 522 10958 dme xnf_txbuf_t *txp; 523 5084 johnlev 524 10958 dme tidp->id = i; 525 5084 johnlev 526 10958 dme txp = tidp->txbuf; 527 10958 dme if (txp == NULL) { 528 10958 dme tidp->next = INVALID_TX_ID; /* Appease txid_put(). */ 529 10958 dme txid_put(xnfp, tidp); 530 5084 johnlev continue; 531 5084 johnlev } 532 5084 johnlev 533 10958 dme ASSERT(txp->tx_txreq.gref != INVALID_GRANT_REF); 534 10958 dme ASSERT(txp->tx_mp != NULL); 535 5084 johnlev 536 10958 dme switch (txp->tx_type) { 537 10958 dme case TX_DATA: 538 10958 dme VERIFY(gnttab_query_foreign_access(txp->tx_txreq.gref) 539 10958 dme == 0); 540 5084 johnlev 541 10958 dme if (txp->tx_bdesc == NULL) { 542 10958 dme (void) gnttab_end_foreign_access_ref( 543 10958 dme txp->tx_txreq.gref, 1); 544 10958 dme gref_put(xnfp, txp->tx_txreq.gref); 545 10958 dme (void) ddi_dma_unbind_handle( 546 10958 dme txp->tx_dma_handle); 547 10958 dme } else { 548 10958 dme xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE); 549 10958 dme } 550 5084 johnlev 551 10958 dme freemsg(txp->tx_mp); 552 10958 dme txid_put(xnfp, tidp); 553 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 554 10958 dme 555 10958 dme break; 556 10958 dme 557 10958 dme case TX_MCAST_REQ: 558 10958 dme txp->tx_type = TX_MCAST_RSP; 559 10958 dme txp->tx_status = NETIF_RSP_DROPPED; 560 10958 dme cv_broadcast(&xnfp->xnf_cv_multicast); 561 10958 dme 562 10958 dme /* 563 10958 dme * The request consumed two slots in the ring, 564 10958 dme * yet only a single xnf_txid_t is used. Step 565 10958 dme * over the empty slot. 566 10958 dme */ 567 10958 dme i++; 568 10958 dme ASSERT(i < NET_TX_RING_SIZE); 569 10958 dme 570 10958 dme break; 571 10958 dme 572 10958 dme case TX_MCAST_RSP: 573 10958 dme break; 574 5084 johnlev } 575 5084 johnlev } 576 7351 dme 577 7351 dme /* LINTED: constant in conditional context */ 578 7351 dme SHARED_RING_INIT(xnfp->xnf_tx_ring.sring); 579 10958 dme /* LINTED: constant in conditional context */ 580 10958 dme FRONT_RING_INIT(&xnfp->xnf_tx_ring, 581 10958 dme xnfp->xnf_tx_ring.sring, PAGESIZE); 582 5084 johnlev 583 5741 mrj mutex_exit(&xnfp->xnf_txlock); 584 5084 johnlev 585 10958 dme mutex_enter(&xnfp->xnf_rxlock); 586 10958 dme 587 5084 johnlev /* 588 10958 dme * Clean out any buffers currently posted to the receive ring 589 10958 dme * before we reset it. 590 5084 johnlev */ 591 10958 dme for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0]; 592 10958 dme i < NET_RX_RING_SIZE; 593 10958 dme i++, bdescp++) { 594 10958 dme if (*bdescp != NULL) { 595 10958 dme xnf_buf_put(xnfp, *bdescp, B_FALSE); 596 10958 dme *bdescp = NULL; 597 5084 johnlev } 598 5084 johnlev } 599 5741 mrj 600 7351 dme /* LINTED: constant in conditional context */ 601 7351 dme SHARED_RING_INIT(xnfp->xnf_rx_ring.sring); 602 10958 dme /* LINTED: constant in conditional context */ 603 10958 dme FRONT_RING_INIT(&xnfp->xnf_rx_ring, 604 10958 dme xnfp->xnf_rx_ring.sring, PAGESIZE); 605 7351 dme 606 10958 dme /* 607 10958 dme * Fill the ring with buffers. 608 10958 dme */ 609 5084 johnlev for (i = 0; i < NET_RX_RING_SIZE; i++) { 610 10958 dme xnf_buf_t *bdesc; 611 10958 dme 612 10958 dme bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE); 613 10958 dme VERIFY(bdesc != NULL); 614 10958 dme xnf_rxbuf_hang(xnfp, bdesc); 615 5084 johnlev } 616 10958 dme 617 5084 johnlev /* LINTED: constant in conditional context */ 618 5741 mrj RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring); 619 5084 johnlev 620 10958 dme mutex_exit(&xnfp->xnf_rxlock); 621 5084 johnlev 622 5084 johnlev return (0); 623 5084 johnlev 624 5084 johnlev out: 625 10958 dme if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF) 626 5741 mrj gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0); 627 10958 dme xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 628 5084 johnlev 629 10958 dme if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF) 630 5741 mrj gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0); 631 10958 dme xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 632 5084 johnlev 633 5084 johnlev return (err); 634 5084 johnlev } 635 5084 johnlev 636 5084 johnlev /* 637 5084 johnlev * Connect driver to back end, called to set up communication with 638 5084 johnlev * back end driver both initially and on resume after restore/migrate. 639 5084 johnlev */ 640 5084 johnlev void 641 5084 johnlev xnf_be_connect(xnf_t *xnfp) 642 5084 johnlev { 643 5084 johnlev const char *message; 644 5084 johnlev xenbus_transaction_t xbt; 645 5741 mrj struct xenbus_device *xsd; 646 5084 johnlev char *xsname; 647 6899 cz147101 int err; 648 5084 johnlev 649 5741 mrj ASSERT(!xnfp->xnf_connected); 650 5084 johnlev 651 5741 mrj xsd = xvdi_get_xsd(xnfp->xnf_devinfo); 652 5741 mrj xsname = xvdi_get_xsname(xnfp->xnf_devinfo); 653 5084 johnlev 654 5084 johnlev err = xnf_setup_rings(xnfp); 655 5084 johnlev if (err != 0) { 656 5084 johnlev cmn_err(CE_WARN, "failed to set up tx/rx rings"); 657 5084 johnlev xenbus_dev_error(xsd, err, "setting up ring"); 658 5084 johnlev return; 659 5084 johnlev } 660 5084 johnlev 661 5084 johnlev again: 662 5084 johnlev err = xenbus_transaction_start(&xbt); 663 5084 johnlev if (err != 0) { 664 5084 johnlev xenbus_dev_error(xsd, EIO, "starting transaction"); 665 5084 johnlev return; 666 5084 johnlev } 667 5084 johnlev 668 5084 johnlev err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u", 669 5741 mrj xnfp->xnf_tx_ring_ref); 670 5084 johnlev if (err != 0) { 671 5084 johnlev message = "writing tx ring-ref"; 672 5084 johnlev goto abort_transaction; 673 5084 johnlev } 674 5084 johnlev 675 5084 johnlev err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u", 676 5741 mrj xnfp->xnf_rx_ring_ref); 677 5084 johnlev if (err != 0) { 678 5084 johnlev message = "writing rx ring-ref"; 679 5084 johnlev goto abort_transaction; 680 5084 johnlev } 681 5084 johnlev 682 5741 mrj err = xenbus_printf(xbt, xsname, "event-channel", "%u", 683 5741 mrj xnfp->xnf_evtchn); 684 5084 johnlev if (err != 0) { 685 5084 johnlev message = "writing event-channel"; 686 5084 johnlev goto abort_transaction; 687 5084 johnlev } 688 5084 johnlev 689 5084 johnlev err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1); 690 5084 johnlev if (err != 0) { 691 5084 johnlev message = "writing feature-rx-notify"; 692 5084 johnlev goto abort_transaction; 693 5084 johnlev } 694 5084 johnlev 695 10958 dme err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1); 696 5741 mrj if (err != 0) { 697 5741 mrj message = "writing request-rx-copy"; 698 5084 johnlev goto abort_transaction; 699 5084 johnlev } 700 5084 johnlev 701 10958 dme if (xnfp->xnf_be_mcast_control) { 702 10958 dme err = xenbus_printf(xbt, xsname, "request-multicast-control", 703 10958 dme "%d", 1); 704 10958 dme if (err != 0) { 705 10958 dme message = "writing request-multicast-control"; 706 10958 dme goto abort_transaction; 707 10958 dme } 708 10958 dme } 709 10958 dme 710 10958 dme err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected); 711 5084 johnlev if (err != 0) { 712 10958 dme message = "switching state to XenbusStateConnected"; 713 5084 johnlev goto abort_transaction; 714 5084 johnlev } 715 5084 johnlev 716 5084 johnlev err = xenbus_transaction_end(xbt, 0); 717 5084 johnlev if (err != 0) { 718 5084 johnlev if (err == EAGAIN) 719 5084 johnlev goto again; 720 5084 johnlev xenbus_dev_error(xsd, err, "completing transaction"); 721 5084 johnlev } 722 5084 johnlev 723 5084 johnlev return; 724 5084 johnlev 725 5084 johnlev abort_transaction: 726 5084 johnlev (void) xenbus_transaction_end(xbt, 1); 727 5084 johnlev xenbus_dev_error(xsd, err, "%s", message); 728 6899 cz147101 } 729 6899 cz147101 730 6899 cz147101 /* 731 10958 dme * Read configuration information from xenstore. 732 6899 cz147101 */ 733 6899 cz147101 void 734 6899 cz147101 xnf_read_config(xnf_t *xnfp) 735 6899 cz147101 { 736 10958 dme int err, be_cap; 737 10958 dme char mac[ETHERADDRL * 3]; 738 10958 dme char *oename = xvdi_get_oename(xnfp->xnf_devinfo); 739 6899 cz147101 740 10958 dme err = xenbus_scanf(XBT_NULL, oename, "mac", 741 6899 cz147101 "%s", (char *)&mac[0]); 742 6899 cz147101 if (err != 0) { 743 6899 cz147101 /* 744 6899 cz147101 * bad: we're supposed to be set up with a proper mac 745 6899 cz147101 * addr. at this point 746 6899 cz147101 */ 747 6899 cz147101 cmn_err(CE_WARN, "%s%d: no mac address", 748 6899 cz147101 ddi_driver_name(xnfp->xnf_devinfo), 749 6899 cz147101 ddi_get_instance(xnfp->xnf_devinfo)); 750 6899 cz147101 return; 751 6899 cz147101 } 752 6899 cz147101 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) { 753 6899 cz147101 err = ENOENT; 754 6899 cz147101 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT, 755 6899 cz147101 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo)); 756 6899 cz147101 return; 757 6899 cz147101 } 758 6899 cz147101 759 10958 dme err = xenbus_scanf(XBT_NULL, oename, 760 10958 dme "feature-rx-copy", "%d", &be_cap); 761 6899 cz147101 /* 762 6899 cz147101 * If we fail to read the store we assume that the key is 763 6899 cz147101 * absent, implying an older domain at the far end. Older 764 10958 dme * domains cannot do HV copy. 765 6899 cz147101 */ 766 6899 cz147101 if (err != 0) 767 10958 dme be_cap = 0; 768 10958 dme xnfp->xnf_be_rx_copy = (be_cap != 0); 769 10958 dme 770 10958 dme err = xenbus_scanf(XBT_NULL, oename, 771 10958 dme "feature-multicast-control", "%d", &be_cap); 772 6899 cz147101 /* 773 10958 dme * If we fail to read the store we assume that the key is 774 10958 dme * absent, implying an older domain at the far end. Older 775 10958 dme * domains do not support multicast control. 776 6899 cz147101 */ 777 10958 dme if (err != 0) 778 10958 dme be_cap = 0; 779 10958 dme xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control; 780 5084 johnlev } 781 5084 johnlev 782 5084 johnlev /* 783 5084 johnlev * attach(9E) -- Attach a device to the system 784 5084 johnlev */ 785 5084 johnlev static int 786 5084 johnlev xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd) 787 5084 johnlev { 788 5084 johnlev mac_register_t *macp; 789 5084 johnlev xnf_t *xnfp; 790 5084 johnlev int err; 791 10958 dme char cachename[32]; 792 5084 johnlev 793 5084 johnlev #ifdef XNF_DEBUG 794 10958 dme if (xnf_debug & XNF_DEBUG_DDI) 795 5084 johnlev printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo), 796 5084 johnlev (void *)devinfo); 797 5084 johnlev #endif 798 5084 johnlev 799 5084 johnlev switch (cmd) { 800 5084 johnlev case DDI_RESUME: 801 5084 johnlev xnfp = ddi_get_driver_private(devinfo); 802 10958 dme xnfp->xnf_gen++; 803 5084 johnlev 804 5084 johnlev (void) xvdi_resume(devinfo); 805 5084 johnlev (void) xvdi_alloc_evtchn(devinfo); 806 5741 mrj xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 807 5741 mrj #ifdef XPV_HVM_DRIVER 808 5741 mrj ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, 809 5741 mrj xnfp); 810 5741 mrj #else 811 5084 johnlev (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, 812 5084 johnlev (caddr_t)xnfp); 813 5741 mrj #endif 814 5084 johnlev return (DDI_SUCCESS); 815 5084 johnlev 816 5084 johnlev case DDI_ATTACH: 817 5084 johnlev break; 818 5084 johnlev 819 5084 johnlev default: 820 5084 johnlev return (DDI_FAILURE); 821 5084 johnlev } 822 5084 johnlev 823 5084 johnlev /* 824 5084 johnlev * Allocate gld_mac_info_t and xnf_instance structures 825 5084 johnlev */ 826 5084 johnlev macp = mac_alloc(MAC_VERSION); 827 5084 johnlev if (macp == NULL) 828 5084 johnlev return (DDI_FAILURE); 829 5084 johnlev xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP); 830 5084 johnlev 831 5084 johnlev macp->m_dip = devinfo; 832 5084 johnlev macp->m_driver = xnfp; 833 5741 mrj xnfp->xnf_devinfo = devinfo; 834 5084 johnlev 835 5084 johnlev macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER; 836 5741 mrj macp->m_src_addr = xnfp->xnf_mac_addr; 837 5084 johnlev macp->m_callbacks = &xnf_callbacks; 838 5084 johnlev macp->m_min_sdu = 0; 839 5084 johnlev macp->m_max_sdu = XNF_MAXPKT; 840 5084 johnlev 841 5741 mrj xnfp->xnf_running = B_FALSE; 842 5741 mrj xnfp->xnf_connected = B_FALSE; 843 10958 dme xnfp->xnf_be_rx_copy = B_FALSE; 844 10958 dme xnfp->xnf_be_mcast_control = B_FALSE; 845 7521 dme xnfp->xnf_need_sched = B_FALSE; 846 5741 mrj 847 10958 dme xnfp->xnf_rx_head = NULL; 848 10958 dme xnfp->xnf_rx_tail = NULL; 849 10958 dme xnfp->xnf_rx_new_buffers_posted = B_FALSE; 850 10958 dme 851 5741 mrj #ifdef XPV_HVM_DRIVER 852 6450 rab /* 853 6450 rab * Report our version to dom0. 854 6450 rab */ 855 10175 Stuart if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d", 856 6450 rab HVMPV_XNF_VERS)) 857 6450 rab cmn_err(CE_WARN, "xnf: couldn't write version\n"); 858 5741 mrj #endif 859 5084 johnlev 860 5084 johnlev /* 861 5084 johnlev * Get the iblock cookie with which to initialize the mutexes. 862 5084 johnlev */ 863 5741 mrj if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie) 864 5084 johnlev != DDI_SUCCESS) 865 5084 johnlev goto failure; 866 10958 dme 867 5741 mrj mutex_init(&xnfp->xnf_txlock, 868 5741 mrj NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 869 10958 dme mutex_init(&xnfp->xnf_rxlock, 870 5741 mrj NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 871 10958 dme mutex_init(&xnfp->xnf_schedlock, 872 10958 dme NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 873 10958 dme mutex_init(&xnfp->xnf_gref_lock, 874 10958 dme NULL, MUTEX_DRIVER, xnfp->xnf_icookie); 875 5084 johnlev 876 10958 dme cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL); 877 10958 dme cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL); 878 10958 dme cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL); 879 10958 dme 880 10958 dme (void) sprintf(cachename, "xnf_buf_cache_%d", 881 10958 dme ddi_get_instance(devinfo)); 882 10958 dme xnfp->xnf_buf_cache = kmem_cache_create(cachename, 883 10958 dme sizeof (xnf_buf_t), 0, 884 10958 dme xnf_buf_constructor, xnf_buf_destructor, 885 10958 dme NULL, xnfp, NULL, 0); 886 10958 dme if (xnfp->xnf_buf_cache == NULL) 887 10958 dme goto failure_0; 888 10958 dme 889 10958 dme (void) sprintf(cachename, "xnf_tx_buf_cache_%d", 890 10958 dme ddi_get_instance(devinfo)); 891 10958 dme xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename, 892 10958 dme sizeof (xnf_txbuf_t), 0, 893 10958 dme xnf_tx_buf_constructor, xnf_tx_buf_destructor, 894 10958 dme NULL, xnfp, NULL, 0); 895 10958 dme if (xnfp->xnf_tx_buf_cache == NULL) 896 5741 mrj goto failure_1; 897 10958 dme 898 10958 dme xnfp->xnf_gref_head = INVALID_GRANT_REF; 899 10958 dme 900 5084 johnlev if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) { 901 5084 johnlev cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize " 902 5741 mrj "driver data structures", 903 5741 mrj ddi_get_instance(xnfp->xnf_devinfo)); 904 10958 dme goto failure_2; 905 5084 johnlev } 906 5084 johnlev 907 5741 mrj xnfp->xnf_rx_ring.sring->rsp_event = 908 5741 mrj xnfp->xnf_tx_ring.sring->rsp_event = 1; 909 5084 johnlev 910 10958 dme xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF; 911 10958 dme xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF; 912 5084 johnlev 913 5084 johnlev /* set driver private pointer now */ 914 5084 johnlev ddi_set_driver_private(devinfo, xnfp); 915 5084 johnlev 916 5084 johnlev if (!xnf_kstat_init(xnfp)) 917 10958 dme goto failure_3; 918 5084 johnlev 919 5084 johnlev /* 920 5084 johnlev * Allocate an event channel, add the interrupt handler and 921 5084 johnlev * bind it to the event channel. 922 5084 johnlev */ 923 5084 johnlev (void) xvdi_alloc_evtchn(devinfo); 924 5741 mrj xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo); 925 5741 mrj #ifdef XPV_HVM_DRIVER 926 5741 mrj ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp); 927 5741 mrj #else 928 5084 johnlev (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp); 929 5741 mrj #endif 930 5084 johnlev 931 5741 mrj err = mac_register(macp, &xnfp->xnf_mh); 932 5084 johnlev mac_free(macp); 933 5084 johnlev macp = NULL; 934 5084 johnlev if (err != 0) 935 10958 dme goto failure_4; 936 10958 dme 937 10958 dme if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL) 938 10958 dme != DDI_SUCCESS) 939 10958 dme goto failure_5; 940 6873 fvdl 941 6873 fvdl #ifdef XPV_HVM_DRIVER 942 6873 fvdl /* 943 6873 fvdl * In the HVM case, this driver essentially replaces a driver for 944 6873 fvdl * a 'real' PCI NIC. Without the "model" property set to 945 6873 fvdl * "Ethernet controller", like the PCI code does, netbooting does 946 6873 fvdl * not work correctly, as strplumb_get_netdev_path() will not find 947 6873 fvdl * this interface. 948 6873 fvdl */ 949 6873 fvdl (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model", 950 6873 fvdl "Ethernet controller"); 951 6873 fvdl #endif 952 5084 johnlev 953 10958 dme #ifdef XNF_DEBUG 954 10958 dme if (xnf_debug_instance == NULL) 955 10958 dme xnf_debug_instance = xnfp; 956 10958 dme #endif 957 6899 cz147101 958 5084 johnlev return (DDI_SUCCESS); 959 5084 johnlev 960 10958 dme failure_5: 961 10981 dme (void) mac_unregister(xnfp->xnf_mh); 962 10958 dme 963 10958 dme failure_4: 964 5741 mrj #ifdef XPV_HVM_DRIVER 965 5741 mrj ec_unbind_evtchn(xnfp->xnf_evtchn); 966 6431 smaybe xvdi_free_evtchn(devinfo); 967 5741 mrj #else 968 5741 mrj ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 969 5741 mrj #endif 970 5741 mrj xnfp->xnf_evtchn = INVALID_EVTCHN; 971 10958 dme kstat_delete(xnfp->xnf_kstat_aux); 972 10958 dme 973 10958 dme failure_3: 974 10958 dme xnf_release_dma_resources(xnfp); 975 5084 johnlev 976 6899 cz147101 failure_2: 977 10958 dme kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 978 6899 cz147101 979 5741 mrj failure_1: 980 10958 dme kmem_cache_destroy(xnfp->xnf_buf_cache); 981 10958 dme 982 10958 dme failure_0: 983 10958 dme cv_destroy(&xnfp->xnf_cv_tx_slots); 984 10958 dme cv_destroy(&xnfp->xnf_cv_multicast); 985 10958 dme cv_destroy(&xnfp->xnf_cv_state); 986 10958 dme 987 10958 dme mutex_destroy(&xnfp->xnf_gref_lock); 988 10958 dme mutex_destroy(&xnfp->xnf_schedlock); 989 10958 dme mutex_destroy(&xnfp->xnf_rxlock); 990 5741 mrj mutex_destroy(&xnfp->xnf_txlock); 991 5084 johnlev 992 5084 johnlev failure: 993 5084 johnlev kmem_free(xnfp, sizeof (*xnfp)); 994 5084 johnlev if (macp != NULL) 995 5084 johnlev mac_free(macp); 996 5084 johnlev 997 5084 johnlev return (DDI_FAILURE); 998 5084 johnlev } 999 5084 johnlev 1000 5084 johnlev /* detach(9E) -- Detach a device from the system */ 1001 5084 johnlev static int 1002 5084 johnlev xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd) 1003 5084 johnlev { 1004 5084 johnlev xnf_t *xnfp; /* Our private device info */ 1005 5084 johnlev 1006 5084 johnlev #ifdef XNF_DEBUG 1007 10958 dme if (xnf_debug & XNF_DEBUG_DDI) 1008 5084 johnlev printf("xnf_detach(0x%p)\n", (void *)devinfo); 1009 5084 johnlev #endif 1010 5084 johnlev 1011 5084 johnlev xnfp = ddi_get_driver_private(devinfo); 1012 5084 johnlev 1013 5084 johnlev switch (cmd) { 1014 5084 johnlev case DDI_SUSPEND: 1015 5741 mrj #ifdef XPV_HVM_DRIVER 1016 5741 mrj ec_unbind_evtchn(xnfp->xnf_evtchn); 1017 6431 smaybe xvdi_free_evtchn(devinfo); 1018 5741 mrj #else 1019 5741 mrj ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1020 5741 mrj #endif 1021 5084 johnlev 1022 5084 johnlev xvdi_suspend(devinfo); 1023 5084 johnlev 1024 10958 dme mutex_enter(&xnfp->xnf_rxlock); 1025 5741 mrj mutex_enter(&xnfp->xnf_txlock); 1026 5084 johnlev 1027 5741 mrj xnfp->xnf_evtchn = INVALID_EVTCHN; 1028 5741 mrj xnfp->xnf_connected = B_FALSE; 1029 5741 mrj mutex_exit(&xnfp->xnf_txlock); 1030 10958 dme mutex_exit(&xnfp->xnf_rxlock); 1031 7397 Max 1032 7397 Max /* claim link to be down after disconnect */ 1033 7397 Max mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN); 1034 5084 johnlev return (DDI_SUCCESS); 1035 5084 johnlev 1036 5084 johnlev case DDI_DETACH: 1037 5084 johnlev break; 1038 5084 johnlev 1039 5084 johnlev default: 1040 5084 johnlev return (DDI_FAILURE); 1041 5084 johnlev } 1042 5084 johnlev 1043 5741 mrj if (xnfp->xnf_connected) 1044 5084 johnlev return (DDI_FAILURE); 1045 5084 johnlev 1046 10958 dme /* 1047 10958 dme * Cannot detach if we have xnf_buf_t outstanding. 1048 10958 dme */ 1049 10958 dme if (xnfp->xnf_stat_buf_allocated > 0) 1050 10958 dme return (DDI_FAILURE); 1051 5084 johnlev 1052 5741 mrj if (mac_unregister(xnfp->xnf_mh) != 0) 1053 5084 johnlev return (DDI_FAILURE); 1054 6899 cz147101 1055 6899 cz147101 kstat_delete(xnfp->xnf_kstat_aux); 1056 5084 johnlev 1057 5084 johnlev /* Stop the receiver */ 1058 5084 johnlev xnf_stop(xnfp); 1059 5084 johnlev 1060 5084 johnlev xvdi_remove_event_handler(devinfo, XS_OE_STATE); 1061 5084 johnlev 1062 5084 johnlev /* Remove the interrupt */ 1063 5741 mrj #ifdef XPV_HVM_DRIVER 1064 5741 mrj ec_unbind_evtchn(xnfp->xnf_evtchn); 1065 6431 smaybe xvdi_free_evtchn(devinfo); 1066 5741 mrj #else 1067 5741 mrj ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie); 1068 5741 mrj #endif 1069 5084 johnlev 1070 5084 johnlev /* Release any pending xmit mblks */ 1071 5084 johnlev xnf_release_mblks(xnfp); 1072 5084 johnlev 1073 5084 johnlev /* Release all DMA resources */ 1074 5084 johnlev xnf_release_dma_resources(xnfp); 1075 5084 johnlev 1076 10958 dme cv_destroy(&xnfp->xnf_cv_tx_slots); 1077 10958 dme cv_destroy(&xnfp->xnf_cv_multicast); 1078 10958 dme cv_destroy(&xnfp->xnf_cv_state); 1079 10958 dme 1080 10958 dme kmem_cache_destroy(xnfp->xnf_tx_buf_cache); 1081 10958 dme kmem_cache_destroy(xnfp->xnf_buf_cache); 1082 10958 dme 1083 10958 dme mutex_destroy(&xnfp->xnf_gref_lock); 1084 10958 dme mutex_destroy(&xnfp->xnf_schedlock); 1085 10958 dme mutex_destroy(&xnfp->xnf_rxlock); 1086 5741 mrj mutex_destroy(&xnfp->xnf_txlock); 1087 5084 johnlev 1088 5084 johnlev kmem_free(xnfp, sizeof (*xnfp)); 1089 5084 johnlev 1090 5084 johnlev return (DDI_SUCCESS); 1091 5084 johnlev } 1092 5084 johnlev 1093 5084 johnlev /* 1094 5084 johnlev * xnf_set_mac_addr() -- set the physical network address on the board. 1095 5084 johnlev */ 1096 5084 johnlev static int 1097 5084 johnlev xnf_set_mac_addr(void *arg, const uint8_t *macaddr) 1098 5084 johnlev { 1099 10958 dme _NOTE(ARGUNUSED(arg, macaddr)); 1100 5084 johnlev 1101 5084 johnlev /* 1102 5084 johnlev * We can't set our macaddr. 1103 5084 johnlev */ 1104 5084 johnlev return (ENOTSUP); 1105 5084 johnlev } 1106 5084 johnlev 1107 5084 johnlev /* 1108 5084 johnlev * xnf_set_multicast() -- set (enable) or disable a multicast address. 1109 5084 johnlev * 1110 5084 johnlev * Program the hardware to enable/disable the multicast address 1111 10958 dme * in "mca". Enable if "add" is true, disable if false. 1112 5084 johnlev */ 1113 5084 johnlev static int 1114 5084 johnlev xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca) 1115 5084 johnlev { 1116 5084 johnlev xnf_t *xnfp = arg; 1117 10958 dme xnf_txbuf_t *txp; 1118 10958 dme int n_slots; 1119 10958 dme RING_IDX slot; 1120 10958 dme xnf_txid_t *tidp; 1121 10958 dme netif_tx_request_t *txrp; 1122 10958 dme struct netif_extra_info *erp; 1123 10958 dme boolean_t notify, result; 1124 5084 johnlev 1125 5084 johnlev /* 1126 10958 dme * If the backend does not support multicast control then we 1127 10958 dme * must assume that the right packets will just arrive. 1128 10958 dme */ 1129 10958 dme if (!xnfp->xnf_be_mcast_control) 1130 10958 dme return (0); 1131 10958 dme 1132 10958 dme txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1133 10958 dme if (txp == NULL) 1134 10958 dme return (1); 1135 10958 dme 1136 10958 dme mutex_enter(&xnfp->xnf_txlock); 1137 10958 dme 1138 10958 dme /* 1139 10958 dme * If we're not yet connected then claim success. This is 1140 10958 dme * acceptable because we refresh the entire set of multicast 1141 10958 dme * addresses when we get connected. 1142 5084 johnlev * 1143 10958 dme * We can't wait around here because the MAC layer expects 1144 10958 dme * this to be a non-blocking operation - waiting ends up 1145 10958 dme * causing a deadlock during resume. 1146 5084 johnlev */ 1147 10958 dme if (!xnfp->xnf_connected) { 1148 10958 dme mutex_exit(&xnfp->xnf_txlock); 1149 10958 dme return (0); 1150 10958 dme } 1151 10958 dme 1152 10958 dme /* 1153 10958 dme * 1. Acquire two slots in the ring. 1154 10958 dme * 2. Fill in the slots. 1155 10958 dme * 3. Request notification when the operation is done. 1156 10958 dme * 4. Kick the peer. 1157 10958 dme * 5. Wait for the response via xnf_tx_clean_ring(). 1158 10958 dme */ 1159 10958 dme 1160 10958 dme n_slots = tx_slots_get(xnfp, 2, B_TRUE); 1161 10958 dme ASSERT(n_slots >= 2); 1162 10958 dme 1163 10958 dme slot = xnfp->xnf_tx_ring.req_prod_pvt; 1164 10958 dme tidp = txid_get(xnfp); 1165 10958 dme VERIFY(tidp != NULL); 1166 10958 dme 1167 10958 dme txp->tx_type = TX_MCAST_REQ; 1168 10958 dme txp->tx_slot = slot; 1169 10958 dme 1170 10958 dme txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1171 10958 dme erp = (struct netif_extra_info *) 1172 10958 dme RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1); 1173 10958 dme 1174 10958 dme txrp->gref = 0; 1175 10958 dme txrp->size = 0; 1176 10958 dme txrp->offset = 0; 1177 10958 dme /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */ 1178 10958 dme txrp->id = txp->tx_txreq.id = tidp->id; 1179 10958 dme txrp->flags = NETTXF_extra_info; 1180 10958 dme 1181 10958 dme erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD : 1182 10958 dme XEN_NETIF_EXTRA_TYPE_MCAST_DEL; 1183 10958 dme bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL); 1184 10958 dme 1185 10958 dme tidp->txbuf = txp; 1186 10958 dme 1187 10958 dme xnfp->xnf_tx_ring.req_prod_pvt = slot + 2; 1188 10958 dme 1189 10958 dme mutex_enter(&xnfp->xnf_schedlock); 1190 10958 dme xnfp->xnf_pending_multicast++; 1191 10958 dme mutex_exit(&xnfp->xnf_schedlock); 1192 10958 dme 1193 10958 dme /* LINTED: constant in conditional context */ 1194 10958 dme RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1195 10958 dme notify); 1196 10958 dme if (notify) 1197 10958 dme ec_notify_via_evtchn(xnfp->xnf_evtchn); 1198 10958 dme 1199 10958 dme while (txp->tx_type == TX_MCAST_REQ) 1200 10958 dme cv_wait(&xnfp->xnf_cv_multicast, 1201 10958 dme &xnfp->xnf_txlock); 1202 10958 dme 1203 10958 dme ASSERT(txp->tx_type == TX_MCAST_RSP); 1204 10958 dme 1205 10958 dme mutex_enter(&xnfp->xnf_schedlock); 1206 10958 dme xnfp->xnf_pending_multicast--; 1207 10958 dme mutex_exit(&xnfp->xnf_schedlock); 1208 10958 dme 1209 10958 dme result = (txp->tx_status == NETIF_RSP_OKAY); 1210 10958 dme 1211 10958 dme txid_put(xnfp, tidp); 1212 10958 dme 1213 10958 dme mutex_exit(&xnfp->xnf_txlock); 1214 10958 dme 1215 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1216 10958 dme 1217 10958 dme return (result ? 0 : 1); 1218 5084 johnlev } 1219 5084 johnlev 1220 5084 johnlev /* 1221 5084 johnlev * xnf_set_promiscuous() -- set or reset promiscuous mode on the board 1222 5084 johnlev * 1223 5084 johnlev * Program the hardware to enable/disable promiscuous mode. 1224 5084 johnlev */ 1225 5084 johnlev static int 1226 5084 johnlev xnf_set_promiscuous(void *arg, boolean_t on) 1227 5084 johnlev { 1228 10958 dme _NOTE(ARGUNUSED(arg, on)); 1229 5084 johnlev 1230 5084 johnlev /* 1231 5084 johnlev * We can't really do this, but we pretend that we can in 1232 5084 johnlev * order that snoop will work. 1233 5084 johnlev */ 1234 5084 johnlev return (0); 1235 5084 johnlev } 1236 5084 johnlev 1237 5084 johnlev /* 1238 5084 johnlev * Clean buffers that we have responses for from the transmit ring. 1239 5084 johnlev */ 1240 5084 johnlev static int 1241 10958 dme xnf_tx_clean_ring(xnf_t *xnfp) 1242 5084 johnlev { 1243 10958 dme boolean_t work_to_do; 1244 5084 johnlev 1245 5741 mrj ASSERT(MUTEX_HELD(&xnfp->xnf_txlock)); 1246 5084 johnlev 1247 6899 cz147101 loop: 1248 5990 schuster while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) { 1249 10958 dme RING_IDX cons, prod, i; 1250 10958 dme 1251 10958 dme cons = xnfp->xnf_tx_ring.rsp_cons; 1252 10958 dme prod = xnfp->xnf_tx_ring.sring->rsp_prod; 1253 5084 johnlev membar_consumer(); 1254 5084 johnlev /* 1255 10958 dme * Clean tx requests from ring that we have responses 1256 10958 dme * for. 1257 5084 johnlev */ 1258 10958 dme DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod); 1259 10958 dme for (i = cons; i != prod; i++) { 1260 10958 dme netif_tx_response_t *trp; 1261 10958 dme xnf_txid_t *tidp; 1262 10958 dme xnf_txbuf_t *txp; 1263 10958 dme 1264 10958 dme trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i); 1265 10958 dme ASSERT(TX_ID_VALID(trp->id)); 1266 10958 dme 1267 10958 dme tidp = TX_ID_TO_TXID(xnfp, trp->id); 1268 10958 dme ASSERT(tidp->id == trp->id); 1269 10958 dme ASSERT(tidp->next == INVALID_TX_ID); 1270 10958 dme 1271 10958 dme txp = tidp->txbuf; 1272 10958 dme ASSERT(txp != NULL); 1273 10958 dme ASSERT(txp->tx_txreq.id == trp->id); 1274 10958 dme 1275 10958 dme switch (txp->tx_type) { 1276 10958 dme case TX_DATA: 1277 10958 dme if (gnttab_query_foreign_access( 1278 10958 dme txp->tx_txreq.gref) != 0) 1279 10958 dme cmn_err(CE_PANIC, 1280 10958 dme "tx grant %d still in use by " 1281 10958 dme "backend domain", 1282 10958 dme txp->tx_txreq.gref); 1283 10958 dme 1284 10958 dme if (txp->tx_bdesc == NULL) { 1285 10958 dme (void) gnttab_end_foreign_access_ref( 1286 10958 dme txp->tx_txreq.gref, 1); 1287 10958 dme gref_put(xnfp, txp->tx_txreq.gref); 1288 10958 dme (void) ddi_dma_unbind_handle( 1289 10958 dme txp->tx_dma_handle); 1290 10958 dme } else { 1291 10958 dme xnf_buf_put(xnfp, txp->tx_bdesc, 1292 10958 dme B_TRUE); 1293 10958 dme } 1294 10958 dme 1295 10958 dme freemsg(txp->tx_mp); 1296 10958 dme txid_put(xnfp, tidp); 1297 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1298 10958 dme 1299 10958 dme break; 1300 10958 dme 1301 10958 dme case TX_MCAST_REQ: 1302 10958 dme txp->tx_type = TX_MCAST_RSP; 1303 10958 dme txp->tx_status = trp->status; 1304 10958 dme cv_broadcast(&xnfp->xnf_cv_multicast); 1305 10958 dme 1306 10958 dme break; 1307 10958 dme 1308 10958 dme case TX_MCAST_RSP: 1309 10958 dme break; 1310 10958 dme 1311 10958 dme default: 1312 10958 dme cmn_err(CE_PANIC, "xnf_tx_clean_ring: " 1313 10958 dme "invalid xnf_txbuf_t type: %d", 1314 10958 dme txp->tx_type); 1315 10958 dme break; 1316 10958 dme } 1317 5084 johnlev } 1318 10958 dme /* 1319 10958 dme * Record the last response we dealt with so that we 1320 10958 dme * know where to start next time around. 1321 10958 dme */ 1322 10958 dme xnfp->xnf_tx_ring.rsp_cons = prod; 1323 5084 johnlev membar_enter(); 1324 5990 schuster } 1325 6899 cz147101 1326 6899 cz147101 /* LINTED: constant in conditional context */ 1327 6899 cz147101 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do); 1328 6899 cz147101 if (work_to_do) 1329 6899 cz147101 goto loop; 1330 5990 schuster 1331 5990 schuster return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring)); 1332 5084 johnlev } 1333 5084 johnlev 1334 5084 johnlev /* 1335 10958 dme * Allocate and fill in a look-aside buffer for the packet `mp'. Used 1336 10958 dme * to ensure that the packet is physically contiguous and contained 1337 10958 dme * within a single page. 1338 5084 johnlev */ 1339 10958 dme static xnf_buf_t * 1340 10958 dme xnf_tx_pullup(xnf_t *xnfp, mblk_t *mp) 1341 5084 johnlev { 1342 10958 dme xnf_buf_t *bd; 1343 10958 dme caddr_t bp; 1344 5084 johnlev 1345 10958 dme bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE); 1346 10958 dme if (bd == NULL) 1347 10958 dme return (NULL); 1348 10958 dme 1349 10958 dme bp = bd->buf; 1350 10958 dme while (mp != NULL) { 1351 10958 dme size_t len = MBLKL(mp); 1352 10958 dme 1353 10958 dme bcopy(mp->b_rptr, bp, len); 1354 10958 dme bp += len; 1355 10958 dme 1356 10958 dme mp = mp->b_cont; 1357 10958 dme } 1358 10958 dme 1359 10958 dme ASSERT((bp - bd->buf) <= PAGESIZE); 1360 10958 dme 1361 5741 mrj xnfp->xnf_stat_tx_pullup++; 1362 10958 dme 1363 10958 dme return (bd); 1364 5084 johnlev } 1365 5084 johnlev 1366 10958 dme /* 1367 10958 dme * Insert the pseudo-header checksum into the packet `buf'. 1368 10958 dme */ 1369 7351 dme void 1370 7351 dme xnf_pseudo_cksum(caddr_t buf, int length) 1371 7351 dme { 1372 7351 dme struct ether_header *ehp; 1373 7351 dme uint16_t sap, len, *stuff; 1374 7351 dme uint32_t cksum; 1375 7351 dme size_t offset; 1376 7351 dme ipha_t *ipha; 1377 7351 dme ipaddr_t src, dst; 1378 7351 dme 1379 7351 dme ASSERT(length >= sizeof (*ehp)); 1380 7351 dme ehp = (struct ether_header *)buf; 1381 7351 dme 1382 7351 dme if (ntohs(ehp->ether_type) == VLAN_TPID) { 1383 7351 dme struct ether_vlan_header *evhp; 1384 7351 dme 1385 7351 dme ASSERT(length >= sizeof (*evhp)); 1386 7351 dme evhp = (struct ether_vlan_header *)buf; 1387 7351 dme sap = ntohs(evhp->ether_type); 1388 7351 dme offset = sizeof (*evhp); 1389 7351 dme } else { 1390 7351 dme sap = ntohs(ehp->ether_type); 1391 7351 dme offset = sizeof (*ehp); 1392 7351 dme } 1393 7351 dme 1394 7351 dme ASSERT(sap == ETHERTYPE_IP); 1395 7351 dme 1396 7351 dme /* Packet should have been pulled up by the caller. */ 1397 7351 dme if ((offset + sizeof (ipha_t)) > length) { 1398 7351 dme cmn_err(CE_WARN, "xnf_pseudo_cksum: no room for checksum"); 1399 7351 dme return; 1400 7351 dme } 1401 7351 dme 1402 7351 dme ipha = (ipha_t *)(buf + offset); 1403 7351 dme 1404 7351 dme ASSERT(IPH_HDR_LENGTH(ipha) == IP_SIMPLE_HDR_LENGTH); 1405 7351 dme 1406 7351 dme len = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH; 1407 7351 dme 1408 7351 dme switch (ipha->ipha_protocol) { 1409 7351 dme case IPPROTO_TCP: 1410 7351 dme stuff = IPH_TCPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1411 7351 dme cksum = IP_TCP_CSUM_COMP; 1412 7351 dme break; 1413 7351 dme case IPPROTO_UDP: 1414 7351 dme stuff = IPH_UDPH_CHECKSUMP(ipha, IP_SIMPLE_HDR_LENGTH); 1415 7351 dme cksum = IP_UDP_CSUM_COMP; 1416 7351 dme break; 1417 7351 dme default: 1418 7351 dme cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d", 1419 7351 dme ipha->ipha_protocol); 1420 7351 dme return; 1421 7351 dme } 1422 7351 dme 1423 7351 dme src = ipha->ipha_src; 1424 7351 dme dst = ipha->ipha_dst; 1425 7351 dme 1426 7351 dme cksum += (dst >> 16) + (dst & 0xFFFF); 1427 7351 dme cksum += (src >> 16) + (src & 0xFFFF); 1428 7351 dme cksum += htons(len); 1429 7351 dme 1430 7351 dme cksum = (cksum >> 16) + (cksum & 0xFFFF); 1431 7351 dme cksum = (cksum >> 16) + (cksum & 0xFFFF); 1432 7351 dme 1433 7351 dme ASSERT(cksum <= 0xFFFF); 1434 7351 dme 1435 7351 dme *stuff = (uint16_t)(cksum ? cksum : ~cksum); 1436 7351 dme } 1437 7351 dme 1438 5084 johnlev /* 1439 10958 dme * Push a list of prepared packets (`txp') into the transmit ring. 1440 5084 johnlev */ 1441 10958 dme static xnf_txbuf_t * 1442 10958 dme tx_push_packets(xnf_t *xnfp, xnf_txbuf_t *txp) 1443 5084 johnlev { 1444 10958 dme int slots_free; 1445 10958 dme RING_IDX slot; 1446 10958 dme boolean_t notify; 1447 10958 dme 1448 10958 dme mutex_enter(&xnfp->xnf_txlock); 1449 10958 dme 1450 10958 dme ASSERT(xnfp->xnf_running); 1451 10958 dme 1452 10958 dme /* 1453 10958 dme * Wait until we are connected to the backend. 1454 10958 dme */ 1455 10958 dme while (!xnfp->xnf_connected) 1456 10958 dme cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock); 1457 10958 dme 1458 10958 dme slots_free = tx_slots_get(xnfp, 1, B_FALSE); 1459 10958 dme DTRACE_PROBE1(xnf_send_slotsfree, int, slots_free); 1460 10958 dme 1461 10958 dme slot = xnfp->xnf_tx_ring.req_prod_pvt; 1462 10958 dme 1463 10958 dme while ((txp != NULL) && (slots_free > 0)) { 1464 10958 dme xnf_txid_t *tidp; 1465 10958 dme netif_tx_request_t *txrp; 1466 10958 dme 1467 10958 dme tidp = txid_get(xnfp); 1468 10958 dme VERIFY(tidp != NULL); 1469 10958 dme 1470 10958 dme txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot); 1471 10958 dme 1472 10958 dme txp->tx_slot = slot; 1473 10958 dme txp->tx_txreq.id = tidp->id; 1474 10958 dme *txrp = txp->tx_txreq; 1475 10958 dme 1476 10958 dme tidp->txbuf = txp; 1477 10958 dme 1478 10958 dme xnfp->xnf_stat_opackets++; 1479 10958 dme xnfp->xnf_stat_obytes += txp->tx_txreq.size; 1480 10958 dme 1481 10958 dme txp = txp->tx_next; 1482 10958 dme slots_free--; 1483 10958 dme slot++; 1484 10958 dme 1485 10958 dme } 1486 10958 dme 1487 10958 dme xnfp->xnf_tx_ring.req_prod_pvt = slot; 1488 10958 dme 1489 10958 dme /* 1490 10958 dme * Tell the peer that we sent something, if it cares. 1491 10958 dme */ 1492 10958 dme /* LINTED: constant in conditional context */ 1493 10958 dme RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, 1494 10958 dme notify); 1495 10958 dme if (notify) 1496 10958 dme ec_notify_via_evtchn(xnfp->xnf_evtchn); 1497 10958 dme 1498 10958 dme mutex_exit(&xnfp->xnf_txlock); 1499 10958 dme 1500 10958 dme return (txp); 1501 10958 dme } 1502 10958 dme 1503 10958 dme /* 1504 10958 dme * Send the chain of packets `mp'. Called by the MAC framework. 1505 10958 dme */ 1506 10958 dme static mblk_t * 1507 10958 dme xnf_send(void *arg, mblk_t *mp) 1508 10958 dme { 1509 10958 dme xnf_t *xnfp = arg; 1510 10958 dme domid_t oeid; 1511 10958 dme xnf_txbuf_t *head, *tail; 1512 10958 dme mblk_t *ml; 1513 10958 dme int prepared; 1514 10958 dme 1515 10958 dme oeid = xvdi_get_oeid(xnfp->xnf_devinfo); 1516 10958 dme 1517 10958 dme /* 1518 10958 dme * Prepare packets for transmission. 1519 10958 dme */ 1520 10958 dme head = tail = NULL; 1521 10958 dme prepared = 0; 1522 10958 dme while (mp != NULL) { 1523 10958 dme xnf_txbuf_t *txp; 1524 10958 dme int n_chunks, length; 1525 10958 dme boolean_t page_oops; 1526 10958 dme uint32_t pflags; 1527 10958 dme 1528 10958 dme for (ml = mp, n_chunks = length = 0, page_oops = B_FALSE; 1529 10958 dme ml != NULL; 1530 10958 dme ml = ml->b_cont, n_chunks++) { 1531 10958 dme 1532 10958 dme /* 1533 10958 dme * Test if this buffer includes a page 1534 10958 dme * boundary. The test assumes that the range 1535 10958 dme * b_rptr...b_wptr can include only a single 1536 10958 dme * boundary. 1537 10958 dme */ 1538 10958 dme if (xnf_btop((size_t)ml->b_rptr) != 1539 10958 dme xnf_btop((size_t)ml->b_wptr)) { 1540 10958 dme xnfp->xnf_stat_tx_pagebndry++; 1541 10958 dme page_oops = B_TRUE; 1542 10958 dme } 1543 10958 dme 1544 10958 dme length += MBLKL(ml); 1545 10958 dme } 1546 10958 dme DTRACE_PROBE1(xnf_send_b_cont, int, n_chunks); 1547 10958 dme 1548 10958 dme /* 1549 10958 dme * Make sure packet isn't too large. 1550 10958 dme */ 1551 10958 dme if (length > XNF_FRAMESIZE) { 1552 10958 dme cmn_err(CE_WARN, 1553 10958 dme "xnf%d: oversized packet (%d bytes) dropped", 1554 10958 dme ddi_get_instance(xnfp->xnf_devinfo), length); 1555 10958 dme freemsg(mp); 1556 10958 dme continue; 1557 10958 dme } 1558 10958 dme 1559 10958 dme txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP); 1560 10958 dme if (txp == NULL) 1561 10958 dme break; 1562 10958 dme 1563 10958 dme txp->tx_type = TX_DATA; 1564 10958 dme 1565 10958 dme if ((n_chunks > xnf_max_tx_frags) || page_oops) { 1566 10958 dme /* 1567 10958 dme * Loan a side buffer rather than the mblk 1568 10958 dme * itself. 1569 10958 dme */ 1570 10958 dme txp->tx_bdesc = xnf_tx_pullup(xnfp, mp); 1571 10958 dme if (txp->tx_bdesc == NULL) { 1572 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1573 10958 dme break; 1574 10958 dme } 1575 10958 dme 1576 10958 dme txp->tx_bufp = txp->tx_bdesc->buf; 1577 10958 dme txp->tx_mfn = txp->tx_bdesc->buf_mfn; 1578 10958 dme txp->tx_txreq.gref = txp->tx_bdesc->grant_ref; 1579 10958 dme 1580 10958 dme } else { 1581 10958 dme int rc; 1582 10958 dme ddi_dma_cookie_t dma_cookie; 1583 10958 dme uint_t ncookies; 1584 10958 dme 1585 10958 dme rc = ddi_dma_addr_bind_handle(txp->tx_dma_handle, 1586 10958 dme NULL, (char *)mp->b_rptr, length, 1587 10958 dme DDI_DMA_WRITE | DDI_DMA_STREAMING, 1588 10958 dme DDI_DMA_DONTWAIT, 0, &dma_cookie, 1589 10958 dme &ncookies); 1590 10958 dme if (rc != DDI_DMA_MAPPED) { 1591 10958 dme ASSERT(rc != DDI_DMA_INUSE); 1592 10958 dme ASSERT(rc != DDI_DMA_PARTIAL_MAP); 1593 5084 johnlev 1594 5084 johnlev #ifdef XNF_DEBUG 1595 10958 dme if (rc != DDI_DMA_NORESOURCES) 1596 10958 dme cmn_err(CE_WARN, 1597 10958 dme "xnf%d: bind_handle failed (%x)", 1598 10958 dme ddi_get_instance(xnfp->xnf_devinfo), 1599 10958 dme rc); 1600 5084 johnlev #endif 1601 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1602 10958 dme break; 1603 10958 dme } 1604 10958 dme ASSERT(ncookies == 1); 1605 5084 johnlev 1606 10958 dme txp->tx_bdesc = NULL; 1607 10958 dme txp->tx_bufp = (caddr_t)mp->b_rptr; 1608 10958 dme txp->tx_mfn = 1609 10958 dme xnf_btop(pa_to_ma(dma_cookie.dmac_laddress)); 1610 10958 dme txp->tx_txreq.gref = gref_get(xnfp); 1611 10958 dme if (txp->tx_txreq.gref == INVALID_GRANT_REF) { 1612 10958 dme (void) ddi_dma_unbind_handle( 1613 10958 dme txp->tx_dma_handle); 1614 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 1615 10958 dme break; 1616 10958 dme } 1617 10958 dme gnttab_grant_foreign_access_ref(txp->tx_txreq.gref, 1618 10958 dme oeid, txp->tx_mfn, 1); 1619 10958 dme } 1620 5084 johnlev 1621 10958 dme txp->tx_next = NULL; 1622 10958 dme txp->tx_mp = mp; 1623 10958 dme txp->tx_txreq.size = length; 1624 10958 dme txp->tx_txreq.offset = (uintptr_t)txp->tx_bufp & PAGEOFFSET; 1625 10958 dme txp->tx_txreq.flags = 0; 1626 10958 dme hcksum_retrieve(mp, NULL, NULL, NULL, NULL, NULL, NULL, 1627 10958 dme &pflags); 1628 10958 dme if (pflags != 0) { 1629 10958 dme /* 1630 10958 dme * If the local protocol stack requests checksum 1631 10958 dme * offload we set the 'checksum blank' flag, 1632 10958 dme * indicating to the peer that we need the checksum 1633 10958 dme * calculated for us. 1634 10958 dme * 1635 10958 dme * We _don't_ set the validated flag, because we haven't 1636 10958 dme * validated that the data and the checksum match. 1637 10958 dme */ 1638 10958 dme xnf_pseudo_cksum(txp->tx_bufp, length); 1639 10958 dme txp->tx_txreq.flags |= NETTXF_csum_blank; 1640 5084 johnlev 1641 10958 dme xnfp->xnf_stat_tx_cksum_deferred++; 1642 10958 dme } 1643 10958 dme 1644 10958 dme if (head == NULL) { 1645 10958 dme ASSERT(tail == NULL); 1646 10958 dme 1647 10958 dme head = txp; 1648 10958 dme } else { 1649 10958 dme ASSERT(tail != NULL); 1650 10958 dme 1651 10958 dme tail->tx_next = txp; 1652 10958 dme } 1653 10958 dme tail = txp; 1654 10958 dme 1655 10958 dme mp = mp->b_next; 1656 10958 dme prepared++; 1657 10958 dme 1658 10958 dme /* 1659 10958 dme * There is no point in preparing more than 1660 10958 dme * NET_TX_RING_SIZE, as we won't be able to push them 1661 10958 dme * into the ring in one go and would hence have to 1662 10958 dme * un-prepare the extra. 1663 10958 dme */ 1664 10958 dme if (prepared == NET_TX_RING_SIZE) 1665 10958 dme break; 1666 5084 johnlev } 1667 5084 johnlev 1668 10958 dme DTRACE_PROBE1(xnf_send_prepared, int, prepared); 1669 5084 johnlev 1670 10958 dme if (mp != NULL) { 1671 10958 dme #ifdef XNF_DEBUG 1672 10958 dme int notprepared = 0; 1673 10958 dme mblk_t *l = mp; 1674 10958 dme 1675 10958 dme while (l != NULL) { 1676 10958 dme notprepared++; 1677 10958 dme l = l->b_next; 1678 10958 dme } 1679 10958 dme 1680 10958 dme DTRACE_PROBE1(xnf_send_notprepared, int, notprepared); 1681 10958 dme #else /* !XNF_DEBUG */ 1682 10958 dme DTRACE_PROBE1(xnf_send_notprepared, int, -1); 1683 10958 dme #endif /* XNF_DEBUG */ 1684 5084 johnlev } 1685 5084 johnlev 1686 5084 johnlev /* 1687 10958 dme * Push the packets we have prepared into the ring. They may 1688 10958 dme * not all go. 1689 5084 johnlev */ 1690 10958 dme if (head != NULL) 1691 10958 dme head = tx_push_packets(xnfp, head); 1692 10958 dme 1693 5084 johnlev /* 1694 10958 dme * If some packets that we prepared were not sent, unprepare 1695 10958 dme * them and add them back to the head of those we didn't 1696 10958 dme * prepare. 1697 5084 johnlev */ 1698 10958 dme { 1699 10958 dme xnf_txbuf_t *loop; 1700 10958 dme mblk_t *mp_head, *mp_tail; 1701 10958 dme int unprepared = 0; 1702 10958 dme 1703 10958 dme mp_head = mp_tail = NULL; 1704 10958 dme loop = head; 1705 10958 dme 1706 10958 dme while (loop != NULL) { 1707 10958 dme xnf_txbuf_t *next = loop->tx_next; 1708 10958 dme 1709 10958 dme if (loop->tx_bdesc == NULL) { 1710 10958 dme (void) gnttab_end_foreign_access_ref( 1711 10958 dme loop->tx_txreq.gref, 1); 1712 10958 dme gref_put(xnfp, loop->tx_txreq.gref); 1713 10958 dme (void) ddi_dma_unbind_handle( 1714 10958 dme loop->tx_dma_handle); 1715 10958 dme } else { 1716 10958 dme xnf_buf_put(xnfp, loop->tx_bdesc, B_TRUE); 1717 10958 dme } 1718 10958 dme 1719 10958 dme ASSERT(loop->tx_mp != NULL); 1720 10958 dme if (mp_head == NULL) 1721 10958 dme mp_head = loop->tx_mp; 1722 10958 dme mp_tail = loop->tx_mp; 1723 10958 dme 1724 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, loop); 1725 10958 dme loop = next; 1726 10958 dme unprepared++; 1727 5084 johnlev } 1728 10958 dme 1729 10958 dme if (mp_tail == NULL) { 1730 10958 dme ASSERT(mp_head == NULL); 1731 10958 dme } else { 1732 10958 dme ASSERT(mp_head != NULL); 1733 10958 dme 1734 10958 dme mp_tail->b_next = mp; 1735 10958 dme mp = mp_head; 1736 10958 dme } 1737 10958 dme 1738 10958 dme DTRACE_PROBE1(xnf_send_unprepared, int, unprepared); 1739 5084 johnlev } 1740 5084 johnlev 1741 10958 dme /* 1742 10958 dme * If any mblks are left then we have deferred for some reason 1743 10958 dme * and need to ask for a re-schedule later. This is typically 1744 10958 dme * due to the ring filling. 1745 10958 dme */ 1746 10958 dme if (mp != NULL) { 1747 10958 dme mutex_enter(&xnfp->xnf_schedlock); 1748 10958 dme xnfp->xnf_need_sched = B_TRUE; 1749 10958 dme mutex_exit(&xnfp->xnf_schedlock); 1750 5084 johnlev 1751 10958 dme xnfp->xnf_stat_tx_defer++; 1752 5084 johnlev } 1753 5084 johnlev 1754 5084 johnlev return (mp); 1755 5084 johnlev } 1756 5084 johnlev 1757 5084 johnlev /* 1758 10958 dme * Notification of RX packets. Currently no TX-complete interrupt is 1759 10958 dme * used, as we clean the TX ring lazily. 1760 5084 johnlev */ 1761 5084 johnlev static uint_t 1762 5084 johnlev xnf_intr(caddr_t arg) 1763 5084 johnlev { 1764 5084 johnlev xnf_t *xnfp = (xnf_t *)arg; 1765 10958 dme mblk_t *mp; 1766 10958 dme boolean_t need_sched, clean_ring; 1767 5084 johnlev 1768 10958 dme mutex_enter(&xnfp->xnf_rxlock); 1769 5084 johnlev 1770 10958 dme /* 1771 10958 dme * Interrupts before we are connected are spurious. 1772 10958 dme */ 1773 6899 cz147101 if (!xnfp->xnf_connected) { 1774 10958 dme mutex_exit(&xnfp->xnf_rxlock); 1775 5741 mrj xnfp->xnf_stat_unclaimed_interrupts++; 1776 5084 johnlev return (DDI_INTR_UNCLAIMED); 1777 5084 johnlev } 1778 5084 johnlev 1779 10958 dme /* 1780 10958 dme * Receive side processing. 1781 10958 dme */ 1782 10958 dme do { 1783 10958 dme /* 1784 10958 dme * Collect buffers from the ring. 1785 10958 dme */ 1786 10958 dme xnf_rx_collect(xnfp); 1787 5084 johnlev 1788 10958 dme /* 1789 10958 dme * Interrupt me when the next receive buffer is consumed. 1790 10958 dme */ 1791 10958 dme xnfp->xnf_rx_ring.sring->rsp_event = 1792 10958 dme xnfp->xnf_rx_ring.rsp_cons + 1; 1793 10958 dme xen_mb(); 1794 5741 mrj 1795 10958 dme } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)); 1796 10958 dme 1797 10958 dme if (xnfp->xnf_rx_new_buffers_posted) { 1798 10958 dme boolean_t notify; 1799 10958 dme 1800 10958 dme /* 1801 10958 dme * Indicate to the peer that we have re-filled the 1802 10958 dme * receive ring, if it cares. 1803 10958 dme */ 1804 10958 dme /* LINTED: constant in conditional context */ 1805 10958 dme RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify); 1806 10958 dme if (notify) 1807 10958 dme ec_notify_via_evtchn(xnfp->xnf_evtchn); 1808 10958 dme xnfp->xnf_rx_new_buffers_posted = B_FALSE; 1809 5084 johnlev } 1810 5084 johnlev 1811 10958 dme mp = xnfp->xnf_rx_head; 1812 10958 dme xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL; 1813 10958 dme 1814 7521 dme xnfp->xnf_stat_interrupts++; 1815 10958 dme mutex_exit(&xnfp->xnf_rxlock); 1816 10958 dme 1817 10958 dme if (mp != NULL) 1818 10958 dme mac_rx(xnfp->xnf_mh, NULL, mp); 1819 7521 dme 1820 5084 johnlev /* 1821 10958 dme * Transmit side processing. 1822 10958 dme * 1823 10958 dme * If a previous transmit attempt failed or we have pending 1824 10958 dme * multicast requests, clean the ring. 1825 10958 dme * 1826 10958 dme * If we previously stalled transmission and cleaning produces 1827 10958 dme * some free slots, tell upstream to attempt sending again. 1828 10958 dme * 1829 10958 dme * The odd style is to avoid acquiring xnf_txlock unless we 1830 10958 dme * will actually look inside the tx machinery. 1831 5084 johnlev */ 1832 10958 dme mutex_enter(&xnfp->xnf_schedlock); 1833 10958 dme need_sched = xnfp->xnf_need_sched; 1834 10958 dme clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0); 1835 10958 dme mutex_exit(&xnfp->xnf_schedlock); 1836 10958 dme 1837 10958 dme if (clean_ring) { 1838 10958 dme int free_slots; 1839 10958 dme 1840 10958 dme mutex_enter(&xnfp->xnf_txlock); 1841 10958 dme free_slots = tx_slots_get(xnfp, 0, B_FALSE); 1842 10958 dme 1843 10958 dme if (need_sched && (free_slots > 0)) { 1844 10958 dme mutex_enter(&xnfp->xnf_schedlock); 1845 10958 dme xnfp->xnf_need_sched = B_FALSE; 1846 10958 dme mutex_exit(&xnfp->xnf_schedlock); 1847 10958 dme 1848 10958 dme mac_tx_update(xnfp->xnf_mh); 1849 10958 dme } 1850 10958 dme mutex_exit(&xnfp->xnf_txlock); 1851 7521 dme } 1852 5084 johnlev 1853 7521 dme return (DDI_INTR_CLAIMED); 1854 5084 johnlev } 1855 5084 johnlev 1856 5084 johnlev /* 1857 5084 johnlev * xnf_start() -- start the board receiving and enable interrupts. 1858 5084 johnlev */ 1859 5084 johnlev static int 1860 5084 johnlev xnf_start(void *arg) 1861 5084 johnlev { 1862 5084 johnlev xnf_t *xnfp = arg; 1863 5084 johnlev 1864 5084 johnlev #ifdef XNF_DEBUG 1865 10958 dme if (xnf_debug & XNF_DEBUG_TRACE) 1866 5084 johnlev printf("xnf%d start(0x%p)\n", 1867 5741 mrj ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1868 5084 johnlev #endif 1869 5084 johnlev 1870 10958 dme mutex_enter(&xnfp->xnf_rxlock); 1871 5741 mrj mutex_enter(&xnfp->xnf_txlock); 1872 5084 johnlev 1873 5084 johnlev /* Accept packets from above. */ 1874 5741 mrj xnfp->xnf_running = B_TRUE; 1875 5084 johnlev 1876 5741 mrj mutex_exit(&xnfp->xnf_txlock); 1877 10958 dme mutex_exit(&xnfp->xnf_rxlock); 1878 5084 johnlev 1879 5084 johnlev return (0); 1880 5084 johnlev } 1881 5084 johnlev 1882 5084 johnlev /* xnf_stop() - disable hardware */ 1883 5084 johnlev static void 1884 5084 johnlev xnf_stop(void *arg) 1885 5084 johnlev { 1886 5084 johnlev xnf_t *xnfp = arg; 1887 5084 johnlev 1888 5084 johnlev #ifdef XNF_DEBUG 1889 10958 dme if (xnf_debug & XNF_DEBUG_TRACE) 1890 5084 johnlev printf("xnf%d stop(0x%p)\n", 1891 5741 mrj ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp); 1892 5084 johnlev #endif 1893 5084 johnlev 1894 10958 dme mutex_enter(&xnfp->xnf_rxlock); 1895 5741 mrj mutex_enter(&xnfp->xnf_txlock); 1896 5084 johnlev 1897 5741 mrj xnfp->xnf_running = B_FALSE; 1898 5084 johnlev 1899 5741 mrj mutex_exit(&xnfp->xnf_txlock); 1900 10958 dme mutex_exit(&xnfp->xnf_rxlock); 1901 5084 johnlev } 1902 5084 johnlev 1903 5084 johnlev /* 1904 10958 dme * Hang buffer `bdesc' on the RX ring. 1905 5084 johnlev */ 1906 5084 johnlev static void 1907 10958 dme xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc) 1908 5084 johnlev { 1909 10958 dme netif_rx_request_t *reqp; 1910 10958 dme RING_IDX hang_ix; 1911 5084 johnlev 1912 10958 dme ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1913 5084 johnlev 1914 5741 mrj reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring, 1915 5741 mrj xnfp->xnf_rx_ring.req_prod_pvt); 1916 5741 mrj hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0)); 1917 10958 dme ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL); 1918 5741 mrj 1919 10958 dme reqp->id = bdesc->id = hang_ix; 1920 5084 johnlev reqp->gref = bdesc->grant_ref; 1921 10958 dme 1922 10958 dme xnfp->xnf_rx_pkt_info[hang_ix] = bdesc; 1923 5741 mrj xnfp->xnf_rx_ring.req_prod_pvt++; 1924 10958 dme 1925 10958 dme xnfp->xnf_rx_new_buffers_posted = B_TRUE; 1926 5084 johnlev } 1927 5084 johnlev 1928 10958 dme /* 1929 10958 dme * Collect packets from the RX ring, storing them in `xnfp' for later 1930 10958 dme * use. 1931 10958 dme */ 1932 10958 dme static void 1933 10958 dme xnf_rx_collect(xnf_t *xnfp) 1934 5741 mrj { 1935 10958 dme mblk_t *head, *tail; 1936 10958 dme 1937 10958 dme ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock)); 1938 5741 mrj 1939 5741 mrj /* 1940 10958 dme * Loop over unconsumed responses: 1941 5741 mrj * 1. get a response 1942 5741 mrj * 2. take corresponding buffer off recv. ring 1943 5741 mrj * 3. indicate this by setting slot to NULL 1944 5741 mrj * 4. create a new message and 1945 5741 mrj * 5. copy data in, adjust ptr 1946 5741 mrj */ 1947 5741 mrj 1948 5741 mrj head = tail = NULL; 1949 5741 mrj 1950 5741 mrj while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring)) { 1951 10958 dme netif_rx_response_t *rxpkt; 1952 10958 dme xnf_buf_t *bdesc; 1953 10958 dme ssize_t len; 1954 10958 dme size_t off; 1955 10958 dme mblk_t *mp = NULL; 1956 10958 dme boolean_t hwcsum = B_FALSE; 1957 10958 dme grant_ref_t ref; 1958 5741 mrj 1959 5741 mrj /* 1. */ 1960 5741 mrj rxpkt = RING_GET_RESPONSE(&xnfp->xnf_rx_ring, 1961 5741 mrj xnfp->xnf_rx_ring.rsp_cons); 1962 5741 mrj 1963 10958 dme DTRACE_PROBE4(xnf_rx_got_rsp, int, (int)rxpkt->id, 1964 10958 dme int, (int)rxpkt->offset, 1965 10958 dme int, (int)rxpkt->flags, 1966 10958 dme int, (int)rxpkt->status); 1967 5741 mrj 1968 5741 mrj /* 1969 5741 mrj * 2. 1970 5741 mrj */ 1971 10958 dme bdesc = xnfp->xnf_rx_pkt_info[rxpkt->id]; 1972 10958 dme 1973 10958 dme /* 1974 10958 dme * 3. 1975 10958 dme */ 1976 10958 dme xnfp->xnf_rx_pkt_info[rxpkt->id] = NULL; 1977 5741 mrj ASSERT(bdesc->id == rxpkt->id); 1978 10958 dme 1979 10958 dme ref = bdesc->grant_ref; 1980 10958 dme off = rxpkt->offset; 1981 10958 dme len = rxpkt->status; 1982 10958 dme 1983 6899 cz147101 if (!xnfp->xnf_running) { 1984 10958 dme DTRACE_PROBE4(xnf_rx_not_running, 1985 10958 dme int, rxpkt->status, 1986 6899 cz147101 char *, bdesc->buf, int, rxpkt->offset, 1987 6899 cz147101 char *, ((char *)bdesc->buf) + rxpkt->offset); 1988 10958 dme 1989 6899 cz147101 xnfp->xnf_stat_drop++; 1990 10958 dme 1991 10958 dme } else if (len <= 0) { 1992 10958 dme DTRACE_PROBE4(xnf_rx_pkt_status_negative, 1993 10958 dme int, rxpkt->status, 1994 5741 mrj char *, bdesc->buf, int, rxpkt->offset, 1995 5741 mrj char *, ((char *)bdesc->buf) + rxpkt->offset); 1996 10958 dme 1997 5741 mrj xnfp->xnf_stat_errrx++; 1998 10958 dme 1999 10958 dme switch (len) { 2000 10958 dme case 0: 2001 5741 mrj xnfp->xnf_stat_runt++; 2002 10958 dme break; 2003 10958 dme case NETIF_RSP_ERROR: 2004 5741 mrj xnfp->xnf_stat_mac_rcv_error++; 2005 10958 dme break; 2006 10958 dme case NETIF_RSP_DROPPED: 2007 5741 mrj xnfp->xnf_stat_norxbuf++; 2008 10958 dme break; 2009 10958 dme } 2010 10958 dme 2011 10958 dme } else if (bdesc->grant_ref == INVALID_GRANT_REF) { 2012 10958 dme cmn_err(CE_WARN, "Bad rx grant reference %d " 2013 10958 dme "from domain %d", ref, 2014 10958 dme xvdi_get_oeid(xnfp->xnf_devinfo)); 2015 10958 dme 2016 10958 dme } else if ((off + len) > PAGESIZE) { 2017 10958 dme cmn_err(CE_WARN, "Rx packet overflows page " 2018 10958 dme "(offset %ld, length %ld) from domain %d", 2019 10958 dme off, len, xvdi_get_oeid(xnfp->xnf_devinfo)); 2020 5741 mrj } else { 2021 10958 dme xnf_buf_t *nbuf = NULL; 2022 5741 mrj 2023 10958 dme DTRACE_PROBE4(xnf_rx_packet, int, len, 2024 10958 dme char *, bdesc->buf, int, off, 2025 10958 dme char *, ((char *)bdesc->buf) + off); 2026 10958 dme 2027 5741 mrj ASSERT(off + len <= PAGEOFFSET); 2028 10958 dme 2029 5741 mrj if (rxpkt->flags & NETRXF_data_validated) 2030 5741 mrj hwcsum = B_TRUE; 2031 5741 mrj 2032 5741 mrj /* 2033 10958 dme * If the packet is below a pre-determined 2034 10958 dme * size we will copy data out rather than 2035 10958 dme * replace it. 2036 5741 mrj */ 2037 10958 dme if (len > xnf_rx_copy_limit) 2038 10958 dme nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE); 2039 10958 dme 2040 10958 dme /* 2041 10958 dme * If we have a replacement buffer, attempt to 2042 10958 dme * wrap the existing one with an mblk_t in 2043 10958 dme * order that the upper layers of the stack 2044 10958 dme * might use it directly. 2045 10958 dme */ 2046 10958 dme if (nbuf != NULL) { 2047 10958 dme mp = desballoc((unsigned char *)bdesc->buf, 2048 10958 dme bdesc->len, 0, &bdesc->free_rtn); 2049 10958 dme if (mp == NULL) { 2050 10958 dme xnfp->xnf_stat_rx_desballoc_fail++; 2051 10958 dme xnfp->xnf_stat_norxbuf++; 2052 10958 dme 2053 10958 dme xnf_buf_put(xnfp, nbuf, B_FALSE); 2054 10958 dme nbuf = NULL; 2055 10958 dme } else { 2056 10958 dme mp->b_rptr = mp->b_rptr + off; 2057 10958 dme mp->b_wptr = mp->b_rptr + len; 2058 10958 dme 2059 10958 dme /* 2060 10958 dme * Release the grant reference 2061 10958 dme * associated with this buffer 2062 10958 dme * - they are scarce and the 2063 10958 dme * upper layers of the stack 2064 10958 dme * don't need it. 2065 10958 dme */ 2066 10958 dme (void) gnttab_end_foreign_access_ref( 2067 10958 dme bdesc->grant_ref, 0); 2068 10958 dme gref_put(xnfp, bdesc->grant_ref); 2069 10958 dme bdesc->grant_ref = INVALID_GRANT_REF; 2070 10958 dme 2071 10958 dme bdesc = nbuf; 2072 10958 dme } 2073 10958 dme } 2074 10958 dme 2075 10958 dme if (nbuf == NULL) { 2076 5741 mrj /* 2077 10958 dme * No replacement buffer allocated - 2078 10958 dme * attempt to copy the data out and 2079 10958 dme * re-hang the existing buffer. 2080 5741 mrj */ 2081 5741 mrj 2082 10958 dme /* 4. */ 2083 10958 dme mp = allocb(len, BPRI_MED); 2084 10958 dme if (mp == NULL) { 2085 10958 dme xnfp->xnf_stat_rx_allocb_fail++; 2086 10958 dme xnfp->xnf_stat_norxbuf++; 2087 10958 dme } else { 2088 10958 dme /* 5. */ 2089 10958 dme bcopy(bdesc->buf + off, mp->b_wptr, 2090 10958 dme len); 2091 10958 dme mp->b_wptr += len; 2092 10958 dme } 2093 5741 mrj } 2094 5741 mrj } 2095 5741 mrj 2096 10958 dme /* Re-hang the buffer. */ 2097 10958 dme xnf_rxbuf_hang(xnfp, bdesc); 2098 5741 mrj 2099 10958 dme if (mp != NULL) { 2100 5084 johnlev if (hwcsum) { 2101 5084 johnlev /* 2102 5084 johnlev * If the peer says that the data has 2103 5084 johnlev * been validated then we declare that 2104 5084 johnlev * the full checksum has been 2105 5084 johnlev * verified. 2106 5084 johnlev * 2107 5084 johnlev * We don't look at the "checksum 2108 5084 johnlev * blank" flag, and hence could have a 2109 5084 johnlev * packet here that we are asserting 2110 5084 johnlev * is good with a blank checksum. 2111 5084 johnlev * 2112 5084 johnlev * The hardware checksum offload 2113 5084 johnlev * specification says that we must 2114 5084 johnlev * provide the actual checksum as well 2115 5084 johnlev * as an assertion that it is valid, 2116 5084 johnlev * but the protocol stack doesn't 2117 5084 johnlev * actually use it and some other 2118 5084 johnlev * drivers don't bother, so we don't. 2119 5084 johnlev * If it was necessary we could grovel 2120 5084 johnlev * in the packet to find it. 2121 5084 johnlev */ 2122 5084 johnlev (void) hcksum_assoc(mp, NULL, 2123 5084 johnlev NULL, 0, 0, 0, 0, 2124 5084 johnlev HCK_FULLCKSUM | 2125 10958 dme HCK_FULLCKSUM_OK, 0); 2126 5741 mrj xnfp->xnf_stat_rx_cksum_no_need++; 2127 5084 johnlev } 2128 5084 johnlev if (head == NULL) { 2129 10958 dme ASSERT(tail == NULL); 2130 10958 dme 2131 10958 dme head = mp; 2132 5084 johnlev } else { 2133 10958 dme ASSERT(tail != NULL); 2134 10958 dme 2135 5084 johnlev tail->b_next = mp; 2136 5084 johnlev } 2137 10958 dme tail = mp; 2138 5084 johnlev 2139 5084 johnlev ASSERT(mp->b_next == NULL); 2140 5084 johnlev 2141 5741 mrj xnfp->xnf_stat_ipackets++; 2142 5741 mrj xnfp->xnf_stat_rbytes += len; 2143 5084 johnlev } 2144 5084 johnlev 2145 5741 mrj xnfp->xnf_rx_ring.rsp_cons++; 2146 5084 johnlev } 2147 5084 johnlev 2148 5084 johnlev /* 2149 10958 dme * Store the mblks we have collected. 2150 5084 johnlev */ 2151 10958 dme if (head != NULL) { 2152 10958 dme ASSERT(tail != NULL); 2153 5084 johnlev 2154 10958 dme if (xnfp->xnf_rx_head == NULL) { 2155 10958 dme ASSERT(xnfp->xnf_rx_tail == NULL); 2156 5084 johnlev 2157 10958 dme xnfp->xnf_rx_head = head; 2158 10958 dme } else { 2159 10958 dme ASSERT(xnfp->xnf_rx_tail != NULL); 2160 5084 johnlev 2161 10958 dme xnfp->xnf_rx_tail->b_next = head; 2162 5084 johnlev } 2163 10958 dme xnfp->xnf_rx_tail = tail; 2164 5084 johnlev } 2165 5084 johnlev } 2166 5084 johnlev 2167 5084 johnlev /* 2168 5084 johnlev * xnf_alloc_dma_resources() -- initialize the drivers structures 2169 5084 johnlev */ 2170 5084 johnlev static int 2171 5084 johnlev xnf_alloc_dma_resources(xnf_t *xnfp) 2172 5084 johnlev { 2173 5741 mrj dev_info_t *devinfo = xnfp->xnf_devinfo; 2174 5084 johnlev size_t len; 2175 5084 johnlev ddi_dma_cookie_t dma_cookie; 2176 5084 johnlev uint_t ncookies; 2177 5084 johnlev int rc; 2178 5084 johnlev caddr_t rptr; 2179 5084 johnlev 2180 5084 johnlev /* 2181 5084 johnlev * The code below allocates all the DMA data structures that 2182 5084 johnlev * need to be released when the driver is detached. 2183 5084 johnlev * 2184 5084 johnlev * Allocate page for the transmit descriptor ring. 2185 5084 johnlev */ 2186 5084 johnlev if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2187 5741 mrj DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS) 2188 5084 johnlev goto alloc_error; 2189 5084 johnlev 2190 5741 mrj if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle, 2191 5084 johnlev PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2192 5084 johnlev DDI_DMA_SLEEP, 0, &rptr, &len, 2193 5741 mrj &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) { 2194 5741 mrj ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2195 5741 mrj xnfp->xnf_tx_ring_dma_handle = NULL; 2196 5084 johnlev goto alloc_error; 2197 5084 johnlev } 2198 5084 johnlev 2199 5741 mrj if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL, 2200 5084 johnlev rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2201 5084 johnlev DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2202 5741 mrj ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2203 5741 mrj ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2204 5741 mrj xnfp->xnf_tx_ring_dma_handle = NULL; 2205 5741 mrj xnfp->xnf_tx_ring_dma_acchandle = NULL; 2206 5084 johnlev if (rc == DDI_DMA_NORESOURCES) 2207 5084 johnlev goto alloc_error; 2208 5084 johnlev else 2209 5084 johnlev goto error; 2210 5084 johnlev } 2211 5084 johnlev 2212 5084 johnlev ASSERT(ncookies == 1); 2213 5084 johnlev bzero(rptr, PAGESIZE); 2214 5084 johnlev /* LINTED: constant in conditional context */ 2215 5084 johnlev SHARED_RING_INIT((netif_tx_sring_t *)rptr); 2216 5084 johnlev /* LINTED: constant in conditional context */ 2217 5741 mrj FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE); 2218 5741 mrj xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress; 2219 5084 johnlev 2220 5084 johnlev /* 2221 5084 johnlev * Allocate page for the receive descriptor ring. 2222 5084 johnlev */ 2223 5084 johnlev if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr, 2224 5741 mrj DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS) 2225 5084 johnlev goto alloc_error; 2226 5084 johnlev 2227 5741 mrj if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle, 2228 5084 johnlev PAGESIZE, &accattr, DDI_DMA_CONSISTENT, 2229 5084 johnlev DDI_DMA_SLEEP, 0, &rptr, &len, 2230 5741 mrj &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) { 2231 5741 mrj ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2232 5741 mrj xnfp->xnf_rx_ring_dma_handle = NULL; 2233 5084 johnlev goto alloc_error; 2234 5084 johnlev } 2235 5084 johnlev 2236 5741 mrj if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL, 2237 5084 johnlev rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 2238 5084 johnlev DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) { 2239 5741 mrj ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2240 5741 mrj ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2241 5741 mrj xnfp->xnf_rx_ring_dma_handle = NULL; 2242 5741 mrj xnfp->xnf_rx_ring_dma_acchandle = NULL; 2243 5084 johnlev if (rc == DDI_DMA_NORESOURCES) 2244 5084 johnlev goto alloc_error; 2245 5084 johnlev else 2246 5084 johnlev goto error; 2247 5084 johnlev } 2248 5084 johnlev 2249 5084 johnlev ASSERT(ncookies == 1); 2250 5084 johnlev bzero(rptr, PAGESIZE); 2251 5084 johnlev /* LINTED: constant in conditional context */ 2252 5084 johnlev SHARED_RING_INIT((netif_rx_sring_t *)rptr); 2253 5084 johnlev /* LINTED: constant in conditional context */ 2254 5741 mrj FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE); 2255 5741 mrj xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress; 2256 5084 johnlev 2257 5084 johnlev return (DDI_SUCCESS); 2258 5084 johnlev 2259 5084 johnlev alloc_error: 2260 5084 johnlev cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory", 2261 5741 mrj ddi_get_instance(xnfp->xnf_devinfo)); 2262 5084 johnlev error: 2263 5084 johnlev xnf_release_dma_resources(xnfp); 2264 5084 johnlev return (DDI_FAILURE); 2265 5084 johnlev } 2266 5084 johnlev 2267 5084 johnlev /* 2268 5084 johnlev * Release all DMA resources in the opposite order from acquisition 2269 5084 johnlev */ 2270 5084 johnlev static void 2271 5084 johnlev xnf_release_dma_resources(xnf_t *xnfp) 2272 5084 johnlev { 2273 5084 johnlev int i; 2274 5084 johnlev 2275 5084 johnlev /* 2276 5084 johnlev * Free receive buffers which are currently associated with 2277 10958 dme * descriptors. 2278 5084 johnlev */ 2279 10958 dme mutex_enter(&xnfp->xnf_rxlock); 2280 10958 dme for (i = 0; i < NET_RX_RING_SIZE; i++) { 2281 10958 dme xnf_buf_t *bp; 2282 5084 johnlev 2283 10958 dme if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL) 2284 5084 johnlev continue; 2285 10958 dme xnfp->xnf_rx_pkt_info[i] = NULL; 2286 10958 dme xnf_buf_put(xnfp, bp, B_FALSE); 2287 5084 johnlev } 2288 10958 dme mutex_exit(&xnfp->xnf_rxlock); 2289 5084 johnlev 2290 10958 dme /* Free the receive ring buffer. */ 2291 5741 mrj if (xnfp->xnf_rx_ring_dma_acchandle != NULL) { 2292 5741 mrj (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle); 2293 5741 mrj ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle); 2294 5741 mrj ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle); 2295 5741 mrj xnfp->xnf_rx_ring_dma_acchandle = NULL; 2296 5084 johnlev } 2297 10958 dme /* Free the transmit ring buffer. */ 2298 5741 mrj if (xnfp->xnf_tx_ring_dma_acchandle != NULL) { 2299 5741 mrj (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle); 2300 5741 mrj ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle); 2301 5741 mrj ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle); 2302 5741 mrj xnfp->xnf_tx_ring_dma_acchandle = NULL; 2303 5084 johnlev } 2304 6899 cz147101 2305 5084 johnlev } 2306 5084 johnlev 2307 10958 dme /* 2308 10958 dme * Release any packets and associated structures used by the TX ring. 2309 10958 dme */ 2310 5084 johnlev static void 2311 5084 johnlev xnf_release_mblks(xnf_t *xnfp) 2312 5084 johnlev { 2313 10958 dme RING_IDX i; 2314 10958 dme xnf_txid_t *tidp; 2315 5084 johnlev 2316 10958 dme for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0]; 2317 10958 dme i < NET_TX_RING_SIZE; 2318 10958 dme i++, tidp++) { 2319 10958 dme xnf_txbuf_t *txp = tidp->txbuf; 2320 10958 dme 2321 10958 dme if (txp != NULL) { 2322 10958 dme ASSERT(txp->tx_mp != NULL); 2323 10958 dme freemsg(txp->tx_mp); 2324 10958 dme 2325 10958 dme txid_put(xnfp, tidp); 2326 10958 dme kmem_cache_free(xnfp->xnf_tx_buf_cache, txp); 2327 10958 dme } 2328 5084 johnlev } 2329 5084 johnlev } 2330 5084 johnlev 2331 10958 dme static int 2332 10958 dme xnf_buf_constructor(void *buf, void *arg, int kmflag) 2333 5084 johnlev { 2334 10958 dme int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP; 2335 10958 dme xnf_buf_t *bdesc = buf; 2336 10958 dme xnf_t *xnfp = arg; 2337 10958 dme ddi_dma_cookie_t dma_cookie; 2338 10958 dme uint_t ncookies; 2339 5084 johnlev size_t len; 2340 5084 johnlev 2341 10958 dme if (kmflag & KM_NOSLEEP) 2342 10958 dme ddiflags = DDI_DMA_DONTWAIT; 2343 5084 johnlev 2344 10958 dme /* Allocate a DMA access handle for the buffer. */ 2345 10958 dme if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2346 10958 dme ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS) 2347 5084 johnlev goto failure; 2348 5084 johnlev 2349 10958 dme /* Allocate DMA-able memory for buffer. */ 2350 5084 johnlev if (ddi_dma_mem_alloc(bdesc->dma_handle, 2351 10958 dme PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0, 2352 5084 johnlev &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS) 2353 5741 mrj goto failure_1; 2354 5084 johnlev 2355 10958 dme /* Bind to virtual address of buffer to get physical address. */ 2356 5084 johnlev if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL, 2357 10958 dme bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING, 2358 10958 dme ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED) 2359 5741 mrj goto failure_2; 2360 5084 johnlev ASSERT(ncookies == 1); 2361 5084 johnlev 2362 10958 dme bdesc->free_rtn.free_func = xnf_buf_recycle; 2363 10958 dme bdesc->free_rtn.free_arg = (caddr_t)bdesc; 2364 10958 dme bdesc->xnfp = xnfp; 2365 10958 dme bdesc->buf_phys = dma_cookie.dmac_laddress; 2366 10958 dme bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2367 10958 dme bdesc->len = dma_cookie.dmac_size; 2368 10958 dme bdesc->grant_ref = INVALID_GRANT_REF; 2369 10958 dme bdesc->gen = xnfp->xnf_gen; 2370 5741 mrj 2371 10958 dme atomic_add_64(&xnfp->xnf_stat_buf_allocated, 1); 2372 5084 johnlev 2373 10958 dme return (0); 2374 5084 johnlev 2375 5741 mrj failure_2: 2376 5084 johnlev ddi_dma_mem_free(&bdesc->acc_handle); 2377 5084 johnlev 2378 5741 mrj failure_1: 2379 5084 johnlev ddi_dma_free_handle(&bdesc->dma_handle); 2380 5084 johnlev 2381 5084 johnlev failure: 2382 10958 dme 2383 10958 dme return (-1); 2384 10958 dme } 2385 10958 dme 2386 10958 dme static void 2387 10958 dme xnf_buf_destructor(void *buf, void *arg) 2388 10958 dme { 2389 10958 dme xnf_buf_t *bdesc = buf; 2390 10958 dme xnf_t *xnfp = arg; 2391 10958 dme 2392 10958 dme (void) ddi_dma_unbind_handle(bdesc->dma_handle); 2393 10958 dme ddi_dma_mem_free(&bdesc->acc_handle); 2394 10958 dme ddi_dma_free_handle(&bdesc->dma_handle); 2395 10958 dme 2396 10958 dme atomic_add_64(&xnfp->xnf_stat_buf_allocated, -1); 2397 10958 dme } 2398 10958 dme 2399 10958 dme static xnf_buf_t * 2400 10958 dme xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly) 2401 10958 dme { 2402 10958 dme grant_ref_t gref; 2403 10958 dme xnf_buf_t *bufp; 2404 10958 dme 2405 10958 dme /* 2406 10958 dme * Usually grant references are more scarce than memory, so we 2407 10958 dme * attempt to acquire a grant reference first. 2408 10958 dme */ 2409 10958 dme gref = gref_get(xnfp); 2410 10958 dme if (gref == INVALID_GRANT_REF) 2411 10958 dme return (NULL); 2412 10958 dme 2413 10958 dme bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags); 2414 10958 dme if (bufp == NULL) { 2415 10958 dme gref_put(xnfp, gref); 2416 10958 dme return (NULL); 2417 10958 dme } 2418 10958 dme 2419 10958 dme ASSERT(bufp->grant_ref == INVALID_GRANT_REF); 2420 10958 dme 2421 10958 dme bufp->grant_ref = gref; 2422 10958 dme 2423 10958 dme if (bufp->gen != xnfp->xnf_gen) 2424 10958 dme xnf_buf_refresh(bufp); 2425 10958 dme 2426 10958 dme gnttab_grant_foreign_access_ref(bufp->grant_ref, 2427 10958 dme xvdi_get_oeid(bufp->xnfp->xnf_devinfo), 2428 10958 dme bufp->buf_mfn, readonly ? 1 : 0); 2429 10958 dme 2430 10958 dme atomic_add_64(&xnfp->xnf_stat_buf_outstanding, 1); 2431 10958 dme 2432 10958 dme return (bufp); 2433 10958 dme } 2434 10958 dme 2435 10958 dme static void 2436 10958 dme xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly) 2437 10958 dme { 2438 10958 dme if (bufp->grant_ref != INVALID_GRANT_REF) { 2439 10958 dme (void) gnttab_end_foreign_access_ref( 2440 10958 dme bufp->grant_ref, readonly ? 1 : 0); 2441 10958 dme gref_put(xnfp, bufp->grant_ref); 2442 10958 dme bufp->grant_ref = INVALID_GRANT_REF; 2443 10958 dme } 2444 10958 dme 2445 10958 dme kmem_cache_free(xnfp->xnf_buf_cache, bufp); 2446 10958 dme 2447 10958 dme atomic_add_64(&xnfp->xnf_stat_buf_outstanding, -1); 2448 10958 dme } 2449 10958 dme 2450 10958 dme /* 2451 10958 dme * Refresh any cached data about a buffer after resume. 2452 10958 dme */ 2453 10958 dme static void 2454 10958 dme xnf_buf_refresh(xnf_buf_t *bdesc) 2455 10958 dme { 2456 10958 dme bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys)); 2457 10958 dme bdesc->gen = bdesc->xnfp->xnf_gen; 2458 10958 dme } 2459 10958 dme 2460 10958 dme /* 2461 10958 dme * Streams `freeb' routine for `xnf_buf_t' when used as transmit 2462 10958 dme * look-aside buffers. 2463 10958 dme */ 2464 10958 dme static void 2465 10958 dme xnf_buf_recycle(xnf_buf_t *bdesc) 2466 10958 dme { 2467 10958 dme xnf_t *xnfp = bdesc->xnfp; 2468 10958 dme 2469 10958 dme xnf_buf_put(xnfp, bdesc, B_TRUE); 2470 10958 dme } 2471 10958 dme 2472 10958 dme static int 2473 10958 dme xnf_tx_buf_constructor(void *buf, void *arg, int kmflag) 2474 10958 dme { 2475 10958 dme _NOTE(ARGUNUSED(kmflag)); 2476 10958 dme xnf_txbuf_t *txp = buf; 2477 10958 dme xnf_t *xnfp = arg; 2478 10958 dme 2479 10958 dme if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &buf_dma_attr, 2480 10958 dme 0, 0, &txp->tx_dma_handle) != DDI_SUCCESS) 2481 10958 dme return (-1); 2482 10958 dme 2483 10958 dme return (0); 2484 10958 dme } 2485 10958 dme 2486 10958 dme static void 2487 10958 dme xnf_tx_buf_destructor(void *buf, void *arg) 2488 10958 dme { 2489 10958 dme _NOTE(ARGUNUSED(arg)); 2490 10958 dme xnf_txbuf_t *txp = buf; 2491 10958 dme 2492 10958 dme ddi_dma_free_handle(&txp->tx_dma_handle); 2493 5084 johnlev } 2494 5084 johnlev 2495 5741 mrj /* 2496 5741 mrj * Statistics. 2497 5741 mrj */ 2498 5741 mrj static char *xnf_aux_statistics[] = { 2499 5741 mrj "tx_cksum_deferred", 2500 5741 mrj "rx_cksum_no_need", 2501 5741 mrj "interrupts", 2502 5741 mrj "unclaimed_interrupts", 2503 5741 mrj "tx_pullup", 2504 5741 mrj "tx_pagebndry", 2505 5741 mrj "tx_attempt", 2506 10958 dme "buf_allocated", 2507 10958 dme "buf_outstanding", 2508 10958 dme "gref_outstanding", 2509 10958 dme "gref_failure", 2510 10958 dme "gref_peak", 2511 10958 dme "rx_allocb_fail", 2512 10958 dme "rx_desballoc_fail", 2513 5741 mrj }; 2514 5741 mrj 2515 5741 mrj static int 2516 5741 mrj xnf_kstat_aux_update(kstat_t *ksp, int flag) 2517 5741 mrj { 2518 5741 mrj xnf_t *xnfp; 2519 5741 mrj kstat_named_t *knp; 2520 5741 mrj 2521 5741 mrj if (flag != KSTAT_READ) 2522 5741 mrj return (EACCES); 2523 5741 mrj 2524 5741 mrj xnfp = ksp->ks_private; 2525 5741 mrj knp = ksp->ks_data; 2526 5741 mrj 2527 5741 mrj /* 2528 5741 mrj * Assignment order must match that of the names in 2529 5741 mrj * xnf_aux_statistics. 2530 5741 mrj */ 2531 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred; 2532 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need; 2533 5741 mrj 2534 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_interrupts; 2535 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts; 2536 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup; 2537 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_tx_pagebndry; 2538 5741 mrj (knp++)->value.ui64 = xnfp->xnf_stat_tx_attempt; 2539 5741 mrj 2540 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated; 2541 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding; 2542 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding; 2543 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure; 2544 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak; 2545 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail; 2546 10958 dme (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail; 2547 5741 mrj 2548 5741 mrj return (0); 2549 5741 mrj } 2550 5741 mrj 2551 5741 mrj static boolean_t 2552 5741 mrj xnf_kstat_init(xnf_t *xnfp) 2553 5741 mrj { 2554 5741 mrj int nstat = sizeof (xnf_aux_statistics) / 2555 5741 mrj sizeof (xnf_aux_statistics[0]); 2556 5741 mrj char **cp = xnf_aux_statistics; 2557 5741 mrj kstat_named_t *knp; 2558 5741 mrj 2559 5741 mrj /* 2560 5741 mrj * Create and initialise kstats. 2561 5741 mrj */ 2562 5741 mrj if ((xnfp->xnf_kstat_aux = kstat_create("xnf", 2563 5741 mrj ddi_get_instance(xnfp->xnf_devinfo), 2564 5741 mrj "aux_statistics", "net", KSTAT_TYPE_NAMED, 2565 5741 mrj nstat, 0)) == NULL) 2566 5741 mrj return (B_FALSE); 2567 5741 mrj 2568 5741 mrj xnfp->xnf_kstat_aux->ks_private = xnfp; 2569 5741 mrj xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update; 2570 5741 mrj 2571 5741 mrj knp = xnfp->xnf_kstat_aux->ks_data; 2572 5741 mrj while (nstat > 0) { 2573 5741 mrj kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 2574 5741 mrj 2575 5741 mrj knp++; 2576 5741 mrj cp++; 2577 5741 mrj nstat--; 2578 5741 mrj } 2579 5741 mrj 2580 5741 mrj kstat_install(xnfp->xnf_kstat_aux); 2581 5741 mrj 2582 5741 mrj return (B_TRUE); 2583 5741 mrj } 2584 5741 mrj 2585 5084 johnlev static int 2586 5084 johnlev xnf_stat(void *arg, uint_t stat, uint64_t *val) 2587 5084 johnlev { 2588 5084 johnlev xnf_t *xnfp = arg; 2589 5084 johnlev 2590 10958 dme mutex_enter(&xnfp->xnf_rxlock); 2591 5741 mrj mutex_enter(&xnfp->xnf_txlock); 2592 5084 johnlev 2593 5741 mrj #define mac_stat(q, r) \ 2594 5084 johnlev case (MAC_STAT_##q): \ 2595 5741 mrj *val = xnfp->xnf_stat_##r; \ 2596 5741 mrj break 2597 5741 mrj 2598 5741 mrj #define ether_stat(q, r) \ 2599 5741 mrj case (ETHER_STAT_##q): \ 2600 5741 mrj *val = xnfp->xnf_stat_##r; \ 2601 5084 johnlev break 2602 5084 johnlev 2603 5084 johnlev switch (stat) { 2604 5084 johnlev 2605 5741 mrj mac_stat(IPACKETS, ipackets); 2606 5741 mrj mac_stat(OPACKETS, opackets); 2607 5741 mrj mac_stat(RBYTES, rbytes); 2608 5741 mrj mac_stat(OBYTES, obytes); 2609 5741 mrj mac_stat(NORCVBUF, norxbuf); 2610 5741 mrj mac_stat(IERRORS, errrx); 2611 5741 mrj mac_stat(NOXMTBUF, tx_defer); 2612 5741 mrj 2613 5741 mrj ether_stat(MACRCV_ERRORS, mac_rcv_error); 2614 5741 mrj ether_stat(TOOSHORT_ERRORS, runt); 2615 5084 johnlev 2616 7397 Max /* always claim to be in full duplex mode */ 2617 7397 Max case ETHER_STAT_LINK_DUPLEX: 2618 7397 Max *val = LINK_DUPLEX_FULL; 2619 7397 Max break; 2620 7397 Max 2621 7397 Max /* always claim to be at 1Gb/s link speed */ 2622 7397 Max case MAC_STAT_IFSPEED: 2623 7397 Max *val = 1000000000ull; 2624 7397 Max break; 2625 7397 Max 2626 5084 johnlev default: 2627 5741 mrj mutex_exit(&xnfp->xnf_txlock); 2628 10958 dme mutex_exit(&xnfp->xnf_rxlock); 2629 5084 johnlev 2630 5084 johnlev return (ENOTSUP); 2631 5084 johnlev } 2632 5084 johnlev 2633 5741 mrj #undef mac_stat 2634 5741 mrj #undef ether_stat 2635 5084 johnlev 2636 5741 mrj mutex_exit(&xnfp->xnf_txlock); 2637 10958 dme mutex_exit(&xnfp->xnf_rxlock); 2638 5084 johnlev 2639 5084 johnlev return (0); 2640 5084 johnlev } 2641 5084 johnlev 2642 5084 johnlev static boolean_t 2643 5084 johnlev xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data) 2644 5084 johnlev { 2645 10958 dme _NOTE(ARGUNUSED(arg)); 2646 5084 johnlev 2647 5084 johnlev switch (cap) { 2648 5084 johnlev case MAC_CAPAB_HCKSUM: { 2649 5084 johnlev uint32_t *capab = cap_data; 2650 5084 johnlev 2651 5702 dme /* 2652 7351 dme * Whilst the flag used to communicate with the IO 2653 7351 dme * domain is called "NETTXF_csum_blank", the checksum 2654 7351 dme * in the packet must contain the pseudo-header 2655 7351 dme * checksum and not zero. 2656 5702 dme * 2657 7351 dme * To help out the IO domain, we might use 2658 7351 dme * HCKSUM_INET_PARTIAL. Unfortunately our stack will 2659 7351 dme * then use checksum offload for IPv6 packets, which 2660 7351 dme * the IO domain can't handle. 2661 7351 dme * 2662 7351 dme * As a result, we declare outselves capable of 2663 7351 dme * HCKSUM_INET_FULL_V4. This means that we receive 2664 7351 dme * IPv4 packets from the stack with a blank checksum 2665 7351 dme * field and must insert the pseudo-header checksum 2666 7351 dme * before passing the packet to the IO domain. 2667 5702 dme */ 2668 10958 dme *capab = HCKSUM_INET_FULL_V4; 2669 5084 johnlev break; 2670 5084 johnlev } 2671 5084 johnlev default: 2672 5084 johnlev return (B_FALSE); 2673 5084 johnlev } 2674 5084 johnlev 2675 5084 johnlev return (B_TRUE); 2676 5084 johnlev } 2677 5084 johnlev 2678 10958 dme /* 2679 10958 dme * The state of the peer has changed - react accordingly. 2680 10958 dme */ 2681 5084 johnlev static void 2682 5084 johnlev oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 2683 5084 johnlev void *arg, void *impl_data) 2684 5084 johnlev { 2685 10958 dme _NOTE(ARGUNUSED(id, arg)); 2686 5084 johnlev xnf_t *xnfp = ddi_get_driver_private(dip); 2687 5084 johnlev XenbusState new_state = *(XenbusState *)impl_data; 2688 5084 johnlev 2689 5084 johnlev ASSERT(xnfp != NULL); 2690 5084 johnlev 2691 5084 johnlev switch (new_state) { 2692 10958 dme case XenbusStateUnknown: 2693 10958 dme case XenbusStateInitialising: 2694 10958 dme case XenbusStateInitialised: 2695 10958 dme case XenbusStateClosing: 2696 10958 dme case XenbusStateClosed: 2697 10958 dme case XenbusStateReconfiguring: 2698 10958 dme case XenbusStateReconfigured: 2699 10958 dme break; 2700 10958 dme 2701 10958 dme case XenbusStateInitWait: 2702 10958 dme xnf_read_config(xnfp); 2703 10958 dme 2704 10958 dme if (!xnfp->xnf_be_rx_copy) { 2705 10958 dme cmn_err(CE_WARN, 2706 10958 dme "The xnf driver requires a dom0 that " 2707 10958 dme "supports 'feature-rx-copy'."); 2708 10958 dme (void) xvdi_switch_state(xnfp->xnf_devinfo, 2709 10958 dme XBT_NULL, XenbusStateClosed); 2710 10958 dme break; 2711 10958 dme } 2712 10958 dme 2713 10958 dme /* 2714 10958 dme * Connect to the backend. 2715 10958 dme */ 2716 10958 dme xnf_be_connect(xnfp); 2717 10958 dme 2718 10958 dme /* 2719 10958 dme * Our MAC address as discovered by xnf_read_config(). 2720 10958 dme */ 2721 10958 dme mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr); 2722 10958 dme 2723 10958 dme break; 2724 10958 dme 2725 5084 johnlev case XenbusStateConnected: 2726 10958 dme mutex_enter(&xnfp->xnf_rxlock); 2727 5741 mrj mutex_enter(&xnfp->xnf_txlock); 2728 5084 johnlev 2729 5741 mrj xnfp->xnf_connected = B_TRUE; 2730 6899 cz147101 /* 2731 10958 dme * Wake up any threads waiting to send data to 2732 10958 dme * backend. 2733 6899 cz147101 */ 2734 10958 dme cv_broadcast(&xnfp->xnf_cv_state); 2735 5084 johnlev 2736 5741 mrj mutex_exit(&xnfp->xnf_txlock); 2737 10958 dme mutex_exit(&xnfp->xnf_rxlock); 2738 5084 johnlev 2739 6899 cz147101 /* 2740 10958 dme * Kick the peer in case it missed any transmits 2741 10958 dme * request in the TX ring. 2742 6899 cz147101 */ 2743 5741 mrj ec_notify_via_evtchn(xnfp->xnf_evtchn); 2744 6899 cz147101 2745 6899 cz147101 /* 2746 10958 dme * There may already be completed receive requests in 2747 10958 dme * the ring sent by backend after it gets connected 2748 10958 dme * but before we see its state change here, so we call 2749 10958 dme * xnf_intr() to handle them, if any. 2750 6899 cz147101 */ 2751 6899 cz147101 (void) xnf_intr((caddr_t)xnfp); 2752 6899 cz147101 2753 10958 dme /* 2754 10958 dme * Mark the link up now that we are connected. 2755 10958 dme */ 2756 7397 Max mac_link_update(xnfp->xnf_mh, LINK_STATE_UP); 2757 10958 dme 2758 10958 dme /* 2759 10958 dme * Tell the backend about the multicast addresses in 2760 10958 dme * which we are interested. 2761 10958 dme */ 2762 10958 dme mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE); 2763 7397 Max 2764 5084 johnlev break; 2765 5084 johnlev 2766 5084 johnlev default: 2767 5084 johnlev break; 2768 5084 johnlev } 2769 5084 johnlev } 2770