1 5084 johnlev /* 2 5084 johnlev * CDDL HEADER START 3 5084 johnlev * 4 5084 johnlev * The contents of this file are subject to the terms of the 5 5084 johnlev * Common Development and Distribution License (the "License"). 6 5084 johnlev * You may not use this file except in compliance with the License. 7 5084 johnlev * 8 5084 johnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 5084 johnlev * or http://www.opensolaris.org/os/licensing. 10 5084 johnlev * See the License for the specific language governing permissions 11 5084 johnlev * and limitations under the License. 12 5084 johnlev * 13 5084 johnlev * When distributing Covered Code, include this CDDL HEADER in each 14 5084 johnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 5084 johnlev * If applicable, add the following below this CDDL HEADER, with the 16 5084 johnlev * fields enclosed by brackets "[]" replaced with your own identifying 17 5084 johnlev * information: Portions Copyright [yyyy] [name of copyright owner] 18 5084 johnlev * 19 5084 johnlev * CDDL HEADER END 20 5084 johnlev */ 21 5084 johnlev 22 5084 johnlev /* 23 8757 dme * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 5084 johnlev * Use is subject to license terms. 25 5084 johnlev */ 26 5084 johnlev 27 5084 johnlev #ifdef DEBUG 28 5084 johnlev #define XNB_DEBUG 1 29 5084 johnlev #endif /* DEBUG */ 30 5084 johnlev 31 5084 johnlev #include "xnb.h" 32 5084 johnlev 33 5084 johnlev #include <sys/sunddi.h> 34 5084 johnlev #include <sys/sunndi.h> 35 5084 johnlev #include <sys/modctl.h> 36 5084 johnlev #include <sys/conf.h> 37 5084 johnlev #include <sys/mac.h> 38 10958 dme #include <sys/mac_impl.h> /* For mac_fix_cksum(). */ 39 5084 johnlev #include <sys/dlpi.h> 40 5084 johnlev #include <sys/strsubr.h> 41 5084 johnlev #include <sys/strsun.h> 42 5741 mrj #include <sys/types.h> 43 5084 johnlev #include <sys/pattr.h> 44 5084 johnlev #include <vm/seg_kmem.h> 45 5084 johnlev #include <vm/hat_i86.h> 46 5084 johnlev #include <xen/sys/xenbus_impl.h> 47 5084 johnlev #include <xen/sys/xendev.h> 48 5084 johnlev #include <sys/balloon_impl.h> 49 5084 johnlev #include <sys/evtchn_impl.h> 50 5084 johnlev #include <sys/gnttab.h> 51 5262 rscott #include <vm/vm_dep.h> 52 10958 dme #include <sys/note.h> 53 5084 johnlev #include <sys/gld.h> 54 5084 johnlev #include <inet/ip.h> 55 5084 johnlev #include <inet/ip_impl.h> 56 5084 johnlev 57 5084 johnlev /* 58 7615 Max * The terms "transmit" and "receive" are used in alignment with domU, 59 7615 Max * which means that packets originating from the peer domU are "transmitted" 60 7615 Max * to other parts of the system and packets are "received" from them. 61 5084 johnlev */ 62 5084 johnlev 63 5084 johnlev /* 64 10958 dme * Should we allow guests to manipulate multicast group membership? 65 5084 johnlev */ 66 10958 dme static boolean_t xnb_multicast_control = B_TRUE; 67 5084 johnlev 68 5084 johnlev static boolean_t xnb_connect_rings(dev_info_t *); 69 5084 johnlev static void xnb_disconnect_rings(dev_info_t *); 70 5084 johnlev static void xnb_oe_state_change(dev_info_t *, ddi_eventcookie_t, 71 5084 johnlev void *, void *); 72 5084 johnlev static void xnb_hp_state_change(dev_info_t *, ddi_eventcookie_t, 73 5084 johnlev void *, void *); 74 5084 johnlev 75 7615 Max static int xnb_txbuf_constructor(void *, void *, int); 76 7615 Max static void xnb_txbuf_destructor(void *, void *); 77 10958 dme static void xnb_tx_notify_peer(xnb_t *, boolean_t); 78 7615 Max static void xnb_tx_mark_complete(xnb_t *, RING_IDX, int16_t); 79 10958 dme 80 10958 dme mblk_t *xnb_to_peer(xnb_t *, mblk_t *); 81 5741 mrj mblk_t *xnb_copy_to_peer(xnb_t *, mblk_t *); 82 5741 mrj 83 10958 dme static void setup_gop(xnb_t *, gnttab_copy_t *, uchar_t *, 84 10958 dme size_t, size_t, size_t, grant_ref_t); 85 10958 dme #pragma inline(setup_gop) 86 10958 dme static boolean_t is_foreign(void *); 87 10958 dme #pragma inline(is_foreign) 88 5741 mrj 89 5084 johnlev #define INVALID_GRANT_HANDLE ((grant_handle_t)-1) 90 5084 johnlev #define INVALID_GRANT_REF ((grant_ref_t)-1) 91 5084 johnlev 92 5084 johnlev static kmutex_t xnb_alloc_page_lock; 93 10958 dme 94 10958 dme /* 95 10958 dme * On a 32 bit PAE system physical and machine addresses are larger 96 10958 dme * than 32 bits. ddi_btop() on such systems take an unsigned long 97 10958 dme * argument, and so addresses above 4G are truncated before ddi_btop() 98 10958 dme * gets to see them. To avoid this, code the shift operation here. 99 10958 dme */ 100 10958 dme #define xnb_btop(addr) ((addr) >> PAGESHIFT) 101 10958 dme 102 10958 dme /* DMA attributes for transmit and receive data */ 103 10958 dme static ddi_dma_attr_t buf_dma_attr = { 104 10958 dme DMA_ATTR_V0, /* version of this structure */ 105 10958 dme 0, /* lowest usable address */ 106 10958 dme 0xffffffffffffffffULL, /* highest usable address */ 107 10958 dme 0x7fffffff, /* maximum DMAable byte count */ 108 10958 dme MMU_PAGESIZE, /* alignment in bytes */ 109 10958 dme 0x7ff, /* bitmap of burst sizes */ 110 10958 dme 1, /* minimum transfer */ 111 10958 dme 0xffffffffU, /* maximum transfer */ 112 10958 dme 0xffffffffffffffffULL, /* maximum segment length */ 113 10958 dme 1, /* maximum number of segments */ 114 10958 dme 1, /* granularity */ 115 10958 dme 0, /* flags (reserved) */ 116 10958 dme }; 117 10958 dme 118 10958 dme /* DMA access attributes for data: NOT to be byte swapped. */ 119 10958 dme static ddi_device_acc_attr_t data_accattr = { 120 10958 dme DDI_DEVICE_ATTR_V0, 121 10958 dme DDI_NEVERSWAP_ACC, 122 10958 dme DDI_STRICTORDER_ACC 123 10958 dme }; 124 5084 johnlev 125 5084 johnlev /* 126 5084 johnlev * Statistics. 127 5084 johnlev */ 128 5084 johnlev static char *aux_statistics[] = { 129 7615 Max "rx_cksum_deferred", 130 7615 Max "tx_cksum_no_need", 131 7615 Max "rx_rsp_notok", 132 5084 johnlev "tx_notify_deferred", 133 5084 johnlev "tx_notify_sent", 134 5084 johnlev "rx_notify_deferred", 135 5084 johnlev "rx_notify_sent", 136 5084 johnlev "tx_too_early", 137 5084 johnlev "rx_too_early", 138 5084 johnlev "rx_allocb_failed", 139 5741 mrj "tx_allocb_failed", 140 7615 Max "rx_foreign_page", 141 5084 johnlev "mac_full", 142 5084 johnlev "spurious_intr", 143 5084 johnlev "allocation_success", 144 5084 johnlev "allocation_failure", 145 5084 johnlev "small_allocation_success", 146 5084 johnlev "small_allocation_failure", 147 5741 mrj "other_allocation_failure", 148 7615 Max "rx_pageboundary_crossed", 149 7615 Max "rx_cpoparea_grown", 150 5084 johnlev "csum_hardware", 151 5084 johnlev "csum_software", 152 5084 johnlev }; 153 5084 johnlev 154 5084 johnlev static int 155 5084 johnlev xnb_ks_aux_update(kstat_t *ksp, int flag) 156 5084 johnlev { 157 5084 johnlev xnb_t *xnbp; 158 5084 johnlev kstat_named_t *knp; 159 5084 johnlev 160 5084 johnlev if (flag != KSTAT_READ) 161 5084 johnlev return (EACCES); 162 5084 johnlev 163 5084 johnlev xnbp = ksp->ks_private; 164 5084 johnlev knp = ksp->ks_data; 165 5084 johnlev 166 5084 johnlev /* 167 5084 johnlev * Assignment order should match that of the names in 168 5084 johnlev * aux_statistics. 169 5084 johnlev */ 170 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_rx_cksum_deferred; 171 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_tx_cksum_no_need; 172 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_rx_rsp_notok; 173 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_deferred; 174 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_notify_sent; 175 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_deferred; 176 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_notify_sent; 177 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_too_early; 178 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_too_early; 179 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_rx_allocb_failed; 180 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_tx_allocb_failed; 181 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_rx_foreign_page; 182 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_mac_full; 183 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_spurious_intr; 184 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_success; 185 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_allocation_failure; 186 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_success; 187 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_small_allocation_failure; 188 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_other_allocation_failure; 189 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_rx_pagebndry_crossed; 190 7615 Max (knp++)->value.ui64 = xnbp->xnb_stat_rx_cpoparea_grown; 191 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_hardware; 192 5741 mrj (knp++)->value.ui64 = xnbp->xnb_stat_csum_software; 193 5084 johnlev 194 5084 johnlev return (0); 195 5084 johnlev } 196 5084 johnlev 197 5084 johnlev static boolean_t 198 5084 johnlev xnb_ks_init(xnb_t *xnbp) 199 5084 johnlev { 200 5084 johnlev int nstat = sizeof (aux_statistics) / 201 5084 johnlev sizeof (aux_statistics[0]); 202 5084 johnlev char **cp = aux_statistics; 203 5084 johnlev kstat_named_t *knp; 204 5084 johnlev 205 5084 johnlev /* 206 5084 johnlev * Create and initialise kstats. 207 5084 johnlev */ 208 5741 mrj xnbp->xnb_kstat_aux = kstat_create(ddi_driver_name(xnbp->xnb_devinfo), 209 5741 mrj ddi_get_instance(xnbp->xnb_devinfo), "aux_statistics", "net", 210 5084 johnlev KSTAT_TYPE_NAMED, nstat, 0); 211 5741 mrj if (xnbp->xnb_kstat_aux == NULL) 212 5084 johnlev return (B_FALSE); 213 5084 johnlev 214 5741 mrj xnbp->xnb_kstat_aux->ks_private = xnbp; 215 5741 mrj xnbp->xnb_kstat_aux->ks_update = xnb_ks_aux_update; 216 5084 johnlev 217 5741 mrj knp = xnbp->xnb_kstat_aux->ks_data; 218 5084 johnlev while (nstat > 0) { 219 5084 johnlev kstat_named_init(knp, *cp, KSTAT_DATA_UINT64); 220 5084 johnlev 221 5084 johnlev knp++; 222 5084 johnlev cp++; 223 5084 johnlev nstat--; 224 5084 johnlev } 225 5084 johnlev 226 5741 mrj kstat_install(xnbp->xnb_kstat_aux); 227 5084 johnlev 228 5084 johnlev return (B_TRUE); 229 5084 johnlev } 230 5084 johnlev 231 5084 johnlev static void 232 5084 johnlev xnb_ks_free(xnb_t *xnbp) 233 5084 johnlev { 234 5741 mrj kstat_delete(xnbp->xnb_kstat_aux); 235 5084 johnlev } 236 5084 johnlev 237 5084 johnlev /* 238 10958 dme * Calculate and insert the transport checksum for an arbitrary packet. 239 5084 johnlev */ 240 5084 johnlev static mblk_t * 241 5084 johnlev xnb_software_csum(xnb_t *xnbp, mblk_t *mp) 242 5084 johnlev { 243 10958 dme _NOTE(ARGUNUSED(xnbp)); 244 10958 dme 245 5084 johnlev /* 246 10958 dme * XXPV dme: shouldn't rely on mac_fix_cksum(), not least 247 5084 johnlev * because it doesn't cover all of the interesting cases :-( 248 5084 johnlev */ 249 5084 johnlev (void) hcksum_assoc(mp, NULL, NULL, 0, 0, 0, 0, 250 5084 johnlev HCK_FULLCKSUM, KM_NOSLEEP); 251 5084 johnlev 252 8275 Eric return (mac_fix_cksum(mp)); 253 5084 johnlev } 254 5084 johnlev 255 5084 johnlev mblk_t * 256 5084 johnlev xnb_process_cksum_flags(xnb_t *xnbp, mblk_t *mp, uint32_t capab) 257 5084 johnlev { 258 5084 johnlev struct ether_header *ehp; 259 5084 johnlev uint16_t sap; 260 5084 johnlev uint32_t offset; 261 5084 johnlev ipha_t *ipha; 262 5084 johnlev 263 5084 johnlev ASSERT(mp->b_next == NULL); 264 5084 johnlev 265 5084 johnlev /* 266 5084 johnlev * Check that the packet is contained in a single mblk. In 267 10958 dme * the "from peer" path this is true today, but may change 268 5084 johnlev * when scatter gather support is added. In the "to peer" 269 5084 johnlev * path we cannot be sure, but in most cases it will be true 270 5084 johnlev * (in the xnbo case the packet has come from a MAC device 271 5084 johnlev * which is unlikely to split packets). 272 5084 johnlev */ 273 5084 johnlev if (mp->b_cont != NULL) 274 5084 johnlev goto software; 275 5084 johnlev 276 5084 johnlev /* 277 5084 johnlev * If the MAC has no hardware capability don't do any further 278 5084 johnlev * checking. 279 5084 johnlev */ 280 5084 johnlev if (capab == 0) 281 5084 johnlev goto software; 282 5084 johnlev 283 5084 johnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_header)); 284 5084 johnlev ehp = (struct ether_header *)mp->b_rptr; 285 5084 johnlev 286 5084 johnlev if (ntohs(ehp->ether_type) == VLAN_TPID) { 287 5084 johnlev struct ether_vlan_header *evhp; 288 5084 johnlev 289 5084 johnlev ASSERT(MBLKL(mp) >= sizeof (struct ether_vlan_header)); 290 5084 johnlev evhp = (struct ether_vlan_header *)mp->b_rptr; 291 5084 johnlev sap = ntohs(evhp->ether_type); 292 5084 johnlev offset = sizeof (struct ether_vlan_header); 293 5084 johnlev } else { 294 5084 johnlev sap = ntohs(ehp->ether_type); 295 5084 johnlev offset = sizeof (struct ether_header); 296 5084 johnlev } 297 5084 johnlev 298 5084 johnlev /* 299 5084 johnlev * We only attempt to do IPv4 packets in hardware. 300 5084 johnlev */ 301 5084 johnlev if (sap != ETHERTYPE_IP) 302 5084 johnlev goto software; 303 5084 johnlev 304 5084 johnlev /* 305 5084 johnlev * We know that this is an IPv4 packet. 306 5084 johnlev */ 307 5084 johnlev ipha = (ipha_t *)(mp->b_rptr + offset); 308 5084 johnlev 309 5084 johnlev switch (ipha->ipha_protocol) { 310 5084 johnlev case IPPROTO_TCP: 311 7351 dme case IPPROTO_UDP: { 312 7351 dme uint32_t start, length, stuff, cksum; 313 7351 dme uint16_t *stuffp; 314 7351 dme 315 5084 johnlev /* 316 7351 dme * This is a TCP/IPv4 or UDP/IPv4 packet, for which we 317 7351 dme * can use full IPv4 and partial checksum offload. 318 5084 johnlev */ 319 7351 dme if ((capab & (HCKSUM_INET_FULL_V4|HCKSUM_INET_PARTIAL)) == 0) 320 7351 dme break; 321 7351 dme 322 7351 dme start = IP_SIMPLE_HDR_LENGTH; 323 7351 dme length = ntohs(ipha->ipha_length); 324 7351 dme if (ipha->ipha_protocol == IPPROTO_TCP) { 325 7351 dme stuff = start + TCP_CHECKSUM_OFFSET; 326 7351 dme cksum = IP_TCP_CSUM_COMP; 327 7351 dme } else { 328 7351 dme stuff = start + UDP_CHECKSUM_OFFSET; 329 7351 dme cksum = IP_UDP_CSUM_COMP; 330 7351 dme } 331 7351 dme stuffp = (uint16_t *)(mp->b_rptr + offset + stuff); 332 7351 dme 333 7351 dme if (capab & HCKSUM_INET_FULL_V4) { 334 7351 dme /* 335 7351 dme * Some devices require that the checksum 336 7351 dme * field of the packet is zero for full 337 7351 dme * offload. 338 7351 dme */ 339 7351 dme *stuffp = 0; 340 7351 dme 341 5084 johnlev (void) hcksum_assoc(mp, NULL, NULL, 342 5084 johnlev 0, 0, 0, 0, 343 5084 johnlev HCK_FULLCKSUM, KM_NOSLEEP); 344 5084 johnlev 345 5741 mrj xnbp->xnb_stat_csum_hardware++; 346 5084 johnlev 347 5084 johnlev return (mp); 348 5084 johnlev } 349 5084 johnlev 350 7351 dme if (capab & HCKSUM_INET_PARTIAL) { 351 7351 dme if (*stuffp == 0) { 352 7351 dme ipaddr_t src, dst; 353 5084 johnlev 354 7351 dme /* 355 7351 dme * Older Solaris guests don't insert 356 7351 dme * the pseudo-header checksum, so we 357 7351 dme * calculate it here. 358 7351 dme */ 359 7351 dme src = ipha->ipha_src; 360 7351 dme dst = ipha->ipha_dst; 361 7351 dme 362 7351 dme cksum += (dst >> 16) + (dst & 0xFFFF); 363 7351 dme cksum += (src >> 16) + (src & 0xFFFF); 364 7351 dme cksum += length - IP_SIMPLE_HDR_LENGTH; 365 7351 dme 366 7351 dme cksum = (cksum >> 16) + (cksum & 0xFFFF); 367 7351 dme cksum = (cksum >> 16) + (cksum & 0xFFFF); 368 7351 dme 369 7351 dme ASSERT(cksum <= 0xFFFF); 370 7351 dme 371 7351 dme *stuffp = (uint16_t)(cksum ? cksum : ~cksum); 372 7351 dme } 373 7351 dme 374 7351 dme (void) hcksum_assoc(mp, NULL, NULL, 375 7351 dme start, stuff, length, 0, 376 7351 dme HCK_PARTIALCKSUM, KM_NOSLEEP); 377 7351 dme 378 7351 dme xnbp->xnb_stat_csum_hardware++; 379 7351 dme 380 7351 dme return (mp); 381 7351 dme } 382 7351 dme 383 7351 dme /* NOTREACHED */ 384 5084 johnlev break; 385 7351 dme } 386 5084 johnlev 387 5084 johnlev default: 388 5084 johnlev /* Use software. */ 389 5084 johnlev break; 390 5084 johnlev } 391 5084 johnlev 392 5084 johnlev software: 393 5084 johnlev /* 394 5084 johnlev * We are not able to use any offload so do the whole thing in 395 5084 johnlev * software. 396 5084 johnlev */ 397 5741 mrj xnbp->xnb_stat_csum_software++; 398 5084 johnlev 399 5084 johnlev return (xnb_software_csum(xnbp, mp)); 400 5084 johnlev } 401 5084 johnlev 402 5084 johnlev int 403 5084 johnlev xnb_attach(dev_info_t *dip, xnb_flavour_t *flavour, void *flavour_data) 404 5084 johnlev { 405 5084 johnlev xnb_t *xnbp; 406 10958 dme char *xsname; 407 10958 dme char cachename[32]; 408 5084 johnlev 409 5084 johnlev xnbp = kmem_zalloc(sizeof (*xnbp), KM_SLEEP); 410 5084 johnlev 411 5741 mrj xnbp->xnb_flavour = flavour; 412 5741 mrj xnbp->xnb_flavour_data = flavour_data; 413 5741 mrj xnbp->xnb_devinfo = dip; 414 5741 mrj xnbp->xnb_evtchn = INVALID_EVTCHN; 415 5741 mrj xnbp->xnb_irq = B_FALSE; 416 5741 mrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 417 5741 mrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 418 5741 mrj xnbp->xnb_connected = B_FALSE; 419 5741 mrj xnbp->xnb_hotplugged = B_FALSE; 420 5741 mrj xnbp->xnb_detachable = B_FALSE; 421 5741 mrj xnbp->xnb_peer = xvdi_get_oeid(dip); 422 10958 dme xnbp->xnb_be_status = XNB_STATE_INIT; 423 10958 dme xnbp->xnb_fe_status = XNB_STATE_INIT; 424 5084 johnlev 425 7615 Max xnbp->xnb_tx_buf_count = 0; 426 5084 johnlev 427 10958 dme xnbp->xnb_rx_hv_copy = B_FALSE; 428 10958 dme xnbp->xnb_multicast_control = B_FALSE; 429 5084 johnlev 430 7615 Max xnbp->xnb_rx_va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP); 431 7615 Max ASSERT(xnbp->xnb_rx_va != NULL); 432 5741 mrj 433 5741 mrj if (ddi_get_iblock_cookie(dip, 0, &xnbp->xnb_icookie) 434 5084 johnlev != DDI_SUCCESS) 435 5084 johnlev goto failure; 436 5084 johnlev 437 10958 dme /* Allocated on demand, when/if we enter xnb_copy_to_peer(). */ 438 7615 Max xnbp->xnb_rx_cpop = NULL; 439 10958 dme xnbp->xnb_rx_cpop_count = 0; 440 5741 mrj 441 5741 mrj mutex_init(&xnbp->xnb_tx_lock, NULL, MUTEX_DRIVER, 442 5741 mrj xnbp->xnb_icookie); 443 5741 mrj mutex_init(&xnbp->xnb_rx_lock, NULL, MUTEX_DRIVER, 444 5741 mrj xnbp->xnb_icookie); 445 10958 dme mutex_init(&xnbp->xnb_state_lock, NULL, MUTEX_DRIVER, 446 10958 dme xnbp->xnb_icookie); 447 5084 johnlev 448 10958 dme /* Set driver private pointer now. */ 449 5084 johnlev ddi_set_driver_private(dip, xnbp); 450 10958 dme 451 10958 dme (void) sprintf(cachename, "xnb_tx_buf_cache_%d", ddi_get_instance(dip)); 452 10958 dme xnbp->xnb_tx_buf_cache = kmem_cache_create(cachename, 453 10958 dme sizeof (xnb_txbuf_t), 0, 454 10958 dme xnb_txbuf_constructor, xnb_txbuf_destructor, 455 10958 dme NULL, xnbp, NULL, 0); 456 10958 dme if (xnbp->xnb_tx_buf_cache == NULL) 457 10958 dme goto failure_0; 458 5084 johnlev 459 5084 johnlev if (!xnb_ks_init(xnbp)) 460 5741 mrj goto failure_1; 461 5084 johnlev 462 5084 johnlev /* 463 5084 johnlev * Receive notification of changes in the state of the 464 5084 johnlev * driver in the guest domain. 465 5084 johnlev */ 466 7756 Mark if (xvdi_add_event_handler(dip, XS_OE_STATE, xnb_oe_state_change, 467 7756 Mark NULL) != DDI_SUCCESS) 468 5741 mrj goto failure_2; 469 5084 johnlev 470 5084 johnlev /* 471 5084 johnlev * Receive notification of hotplug events. 472 5084 johnlev */ 473 7756 Mark if (xvdi_add_event_handler(dip, XS_HP_STATE, xnb_hp_state_change, 474 7756 Mark NULL) != DDI_SUCCESS) 475 5741 mrj goto failure_2; 476 5084 johnlev 477 5084 johnlev xsname = xvdi_get_xsname(dip); 478 5084 johnlev 479 5084 johnlev if (xenbus_printf(XBT_NULL, xsname, 480 10958 dme "feature-multicast-control", "%d", 481 10958 dme xnb_multicast_control ? 1 : 0) != 0) 482 5741 mrj goto failure_3; 483 5741 mrj 484 5741 mrj if (xenbus_printf(XBT_NULL, xsname, 485 10958 dme "feature-rx-copy", "%d", 1) != 0) 486 5741 mrj goto failure_3; 487 5741 mrj /* 488 5741 mrj * Linux domUs seem to depend on "feature-rx-flip" being 0 489 5741 mrj * in addition to "feature-rx-copy" being 1. It seems strange 490 5741 mrj * to use four possible states to describe a binary decision, 491 5741 mrj * but we might as well play nice. 492 5741 mrj */ 493 5741 mrj if (xenbus_printf(XBT_NULL, xsname, 494 10958 dme "feature-rx-flip", "%d", 0) != 0) 495 5741 mrj goto failure_3; 496 5084 johnlev 497 5084 johnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateInitWait); 498 5084 johnlev (void) xvdi_post_event(dip, XEN_HP_ADD); 499 5084 johnlev 500 5084 johnlev return (DDI_SUCCESS); 501 5084 johnlev 502 5741 mrj failure_3: 503 5084 johnlev xvdi_remove_event_handler(dip, NULL); 504 5084 johnlev 505 5741 mrj failure_2: 506 5084 johnlev xnb_ks_free(xnbp); 507 5084 johnlev 508 5741 mrj failure_1: 509 10958 dme kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 510 10958 dme 511 10958 dme failure_0: 512 10958 dme mutex_destroy(&xnbp->xnb_state_lock); 513 5741 mrj mutex_destroy(&xnbp->xnb_rx_lock); 514 5741 mrj mutex_destroy(&xnbp->xnb_tx_lock); 515 5084 johnlev 516 5084 johnlev failure: 517 7615 Max vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 518 5084 johnlev kmem_free(xnbp, sizeof (*xnbp)); 519 5084 johnlev return (DDI_FAILURE); 520 5084 johnlev } 521 5084 johnlev 522 5084 johnlev void 523 5084 johnlev xnb_detach(dev_info_t *dip) 524 5084 johnlev { 525 5084 johnlev xnb_t *xnbp = ddi_get_driver_private(dip); 526 5084 johnlev 527 5084 johnlev ASSERT(xnbp != NULL); 528 5741 mrj ASSERT(!xnbp->xnb_connected); 529 7615 Max ASSERT(xnbp->xnb_tx_buf_count == 0); 530 5084 johnlev 531 5084 johnlev xnb_disconnect_rings(dip); 532 5084 johnlev 533 5084 johnlev xvdi_remove_event_handler(dip, NULL); 534 5084 johnlev 535 5084 johnlev xnb_ks_free(xnbp); 536 5084 johnlev 537 10958 dme kmem_cache_destroy(xnbp->xnb_tx_buf_cache); 538 10958 dme 539 5084 johnlev ddi_set_driver_private(dip, NULL); 540 5084 johnlev 541 10958 dme mutex_destroy(&xnbp->xnb_state_lock); 542 10958 dme mutex_destroy(&xnbp->xnb_rx_lock); 543 5741 mrj mutex_destroy(&xnbp->xnb_tx_lock); 544 5084 johnlev 545 10958 dme if (xnbp->xnb_rx_cpop_count > 0) 546 10958 dme kmem_free(xnbp->xnb_rx_cpop, sizeof (xnbp->xnb_rx_cpop[0]) 547 10958 dme * xnbp->xnb_rx_cpop_count); 548 5741 mrj 549 7615 Max ASSERT(xnbp->xnb_rx_va != NULL); 550 7615 Max vmem_free(heap_arena, xnbp->xnb_rx_va, PAGESIZE); 551 5084 johnlev 552 5084 johnlev kmem_free(xnbp, sizeof (*xnbp)); 553 5084 johnlev } 554 5084 johnlev 555 10958 dme /* 556 10958 dme * Allocate a page from the hypervisor to be flipped to the peer. 557 10958 dme * 558 10958 dme * Try to get pages in batches to reduce the overhead of calls into 559 10958 dme * the balloon driver. 560 10958 dme */ 561 5084 johnlev static mfn_t 562 5084 johnlev xnb_alloc_page(xnb_t *xnbp) 563 5084 johnlev { 564 5084 johnlev #define WARNING_RATE_LIMIT 100 565 5084 johnlev #define BATCH_SIZE 256 566 5084 johnlev static mfn_t mfns[BATCH_SIZE]; /* common across all instances */ 567 5084 johnlev static int nth = BATCH_SIZE; 568 5084 johnlev mfn_t mfn; 569 5084 johnlev 570 5084 johnlev mutex_enter(&xnb_alloc_page_lock); 571 5084 johnlev if (nth == BATCH_SIZE) { 572 5084 johnlev if (balloon_alloc_pages(BATCH_SIZE, mfns) != BATCH_SIZE) { 573 5741 mrj xnbp->xnb_stat_allocation_failure++; 574 5084 johnlev mutex_exit(&xnb_alloc_page_lock); 575 5084 johnlev 576 5084 johnlev /* 577 5084 johnlev * Try for a single page in low memory situations. 578 5084 johnlev */ 579 5084 johnlev if (balloon_alloc_pages(1, &mfn) != 1) { 580 5741 mrj if ((xnbp->xnb_stat_small_allocation_failure++ 581 5741 mrj % WARNING_RATE_LIMIT) == 0) 582 5084 johnlev cmn_err(CE_WARN, "xnb_alloc_page: " 583 5084 johnlev "Cannot allocate memory to " 584 5084 johnlev "transfer packets to peer."); 585 5084 johnlev return (0); 586 5084 johnlev } else { 587 5741 mrj xnbp->xnb_stat_small_allocation_success++; 588 5084 johnlev return (mfn); 589 5084 johnlev } 590 5084 johnlev } 591 5084 johnlev 592 5084 johnlev nth = 0; 593 5741 mrj xnbp->xnb_stat_allocation_success++; 594 5084 johnlev } 595 5084 johnlev 596 5084 johnlev mfn = mfns[nth++]; 597 5084 johnlev mutex_exit(&xnb_alloc_page_lock); 598 5084 johnlev 599 5084 johnlev ASSERT(mfn != 0); 600 5084 johnlev 601 5084 johnlev return (mfn); 602 5084 johnlev #undef BATCH_SIZE 603 5084 johnlev #undef WARNING_RATE_LIMIT 604 5084 johnlev } 605 5084 johnlev 606 10958 dme /* 607 10958 dme * Free a page back to the hypervisor. 608 10958 dme * 609 10958 dme * This happens only in the error path, so batching is not worth the 610 10958 dme * complication. 611 10958 dme */ 612 5084 johnlev static void 613 5084 johnlev xnb_free_page(xnb_t *xnbp, mfn_t mfn) 614 5084 johnlev { 615 10958 dme _NOTE(ARGUNUSED(xnbp)); 616 5084 johnlev int r; 617 5262 rscott pfn_t pfn; 618 5262 rscott 619 5262 rscott pfn = xen_assign_pfn(mfn); 620 5262 rscott pfnzero(pfn, 0, PAGESIZE); 621 5262 rscott xen_release_pfn(pfn); 622 5084 johnlev 623 5084 johnlev if ((r = balloon_free_pages(1, &mfn, NULL, NULL)) != 1) { 624 5084 johnlev cmn_err(CE_WARN, "free_page: cannot decrease memory " 625 5084 johnlev "reservation (%d): page kept but unusable (mfn = 0x%lx).", 626 5084 johnlev r, mfn); 627 5084 johnlev } 628 5084 johnlev } 629 5084 johnlev 630 5741 mrj /* 631 10958 dme * Similar to RING_HAS_UNCONSUMED_REQUESTS(&xnbp->rx_ring) but using 632 10958 dme * local variables. Used in both xnb_to_peer() and xnb_copy_to_peer(). 633 5741 mrj */ 634 5741 mrj #define XNB_RING_HAS_UNCONSUMED_REQUESTS(_r) \ 635 5741 mrj ((((_r)->sring->req_prod - loop) < \ 636 5741 mrj (RING_SIZE(_r) - (loop - prod))) ? \ 637 5741 mrj ((_r)->sring->req_prod - loop) : \ 638 5741 mrj (RING_SIZE(_r) - (loop - prod))) 639 5741 mrj 640 10958 dme /* 641 10958 dme * Pass packets to the peer using page flipping. 642 10958 dme */ 643 5084 johnlev mblk_t * 644 5084 johnlev xnb_to_peer(xnb_t *xnbp, mblk_t *mp) 645 5084 johnlev { 646 5084 johnlev mblk_t *free = mp, *prev = NULL; 647 5084 johnlev size_t len; 648 5084 johnlev gnttab_transfer_t *gop; 649 5084 johnlev boolean_t notify; 650 5084 johnlev RING_IDX loop, prod, end; 651 5084 johnlev 652 5084 johnlev /* 653 5084 johnlev * For each packet the sequence of operations is: 654 5084 johnlev * 655 5084 johnlev * 1. get a new page from the hypervisor. 656 5084 johnlev * 2. get a request slot from the ring. 657 5084 johnlev * 3. copy the data into the new page. 658 5084 johnlev * 4. transfer the page to the peer. 659 5084 johnlev * 5. update the request slot. 660 5084 johnlev * 6. kick the peer. 661 5084 johnlev * 7. free mp. 662 5084 johnlev * 663 5084 johnlev * In order to reduce the number of hypercalls, we prepare 664 5084 johnlev * several packets for the peer and perform a single hypercall 665 5084 johnlev * to transfer them. 666 5084 johnlev */ 667 5084 johnlev 668 7615 Max mutex_enter(&xnbp->xnb_rx_lock); 669 5084 johnlev 670 5084 johnlev /* 671 5084 johnlev * If we are not connected to the peer or have not yet 672 5084 johnlev * finished hotplug it is too early to pass packets to the 673 5084 johnlev * peer. 674 5084 johnlev */ 675 5741 mrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 676 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 677 7615 Max DTRACE_PROBE(flip_rx_too_early); 678 7615 Max xnbp->xnb_stat_rx_too_early++; 679 5084 johnlev return (mp); 680 5084 johnlev } 681 5084 johnlev 682 5741 mrj loop = xnbp->xnb_rx_ring.req_cons; 683 5741 mrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 684 7615 Max gop = xnbp->xnb_rx_top; 685 5084 johnlev 686 5084 johnlev while ((mp != NULL) && 687 5741 mrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 688 5084 johnlev 689 5084 johnlev mfn_t mfn; 690 5084 johnlev pfn_t pfn; 691 5084 johnlev netif_rx_request_t *rxreq; 692 5084 johnlev netif_rx_response_t *rxresp; 693 5084 johnlev char *valoop; 694 5084 johnlev mblk_t *ml; 695 5084 johnlev uint16_t cksum_flags; 696 5084 johnlev 697 5084 johnlev /* 1 */ 698 5084 johnlev if ((mfn = xnb_alloc_page(xnbp)) == 0) { 699 7615 Max xnbp->xnb_stat_rx_defer++; 700 5084 johnlev break; 701 5084 johnlev } 702 5084 johnlev 703 5084 johnlev /* 2 */ 704 5741 mrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 705 5084 johnlev 706 5084 johnlev #ifdef XNB_DEBUG 707 5084 johnlev if (!(rxreq->id < NET_RX_RING_SIZE)) 708 5084 johnlev cmn_err(CE_PANIC, "xnb_to_peer: " 709 5084 johnlev "id %d out of range in request 0x%p", 710 5084 johnlev rxreq->id, (void *)rxreq); 711 5084 johnlev #endif /* XNB_DEBUG */ 712 5084 johnlev 713 5084 johnlev /* Assign a pfn and map the new page at the allocated va. */ 714 5084 johnlev pfn = xen_assign_pfn(mfn); 715 7615 Max hat_devload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 716 5084 johnlev pfn, PROT_READ | PROT_WRITE, HAT_LOAD); 717 5084 johnlev 718 5084 johnlev /* 3 */ 719 5084 johnlev len = 0; 720 8757 dme valoop = xnbp->xnb_rx_va; 721 5084 johnlev for (ml = mp; ml != NULL; ml = ml->b_cont) { 722 5084 johnlev size_t chunk = ml->b_wptr - ml->b_rptr; 723 5084 johnlev 724 5084 johnlev bcopy(ml->b_rptr, valoop, chunk); 725 5084 johnlev valoop += chunk; 726 5084 johnlev len += chunk; 727 5084 johnlev } 728 5084 johnlev 729 8757 dme ASSERT(len < PAGESIZE); 730 5084 johnlev 731 5084 johnlev /* Release the pfn. */ 732 7615 Max hat_unload(kas.a_hat, xnbp->xnb_rx_va, PAGESIZE, 733 5084 johnlev HAT_UNLOAD_UNMAP); 734 5084 johnlev xen_release_pfn(pfn); 735 5084 johnlev 736 5084 johnlev /* 4 */ 737 5084 johnlev gop->mfn = mfn; 738 5741 mrj gop->domid = xnbp->xnb_peer; 739 5084 johnlev gop->ref = rxreq->gref; 740 5084 johnlev 741 5084 johnlev /* 5.1 */ 742 5741 mrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 743 8757 dme rxresp->offset = 0; 744 5084 johnlev rxresp->flags = 0; 745 5084 johnlev 746 5741 mrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 747 5084 johnlev if (cksum_flags != 0) 748 7615 Max xnbp->xnb_stat_rx_cksum_deferred++; 749 5084 johnlev rxresp->flags |= cksum_flags; 750 5084 johnlev 751 5741 mrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 752 5084 johnlev rxresp->status = len; 753 5084 johnlev 754 5084 johnlev loop++; 755 5084 johnlev prod++; 756 5084 johnlev gop++; 757 5084 johnlev prev = mp; 758 5084 johnlev mp = mp->b_next; 759 5084 johnlev } 760 5084 johnlev 761 5084 johnlev /* 762 5084 johnlev * Did we actually do anything? 763 5084 johnlev */ 764 5741 mrj if (loop == xnbp->xnb_rx_ring.req_cons) { 765 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 766 5084 johnlev return (mp); 767 5084 johnlev } 768 5084 johnlev 769 5084 johnlev end = loop; 770 5084 johnlev 771 5084 johnlev /* 772 5084 johnlev * Unlink the end of the 'done' list from the remainder. 773 5084 johnlev */ 774 5084 johnlev ASSERT(prev != NULL); 775 5084 johnlev prev->b_next = NULL; 776 5084 johnlev 777 7615 Max if (HYPERVISOR_grant_table_op(GNTTABOP_transfer, xnbp->xnb_rx_top, 778 5741 mrj loop - xnbp->xnb_rx_ring.req_cons) != 0) { 779 5084 johnlev cmn_err(CE_WARN, "xnb_to_peer: transfer operation failed"); 780 5084 johnlev } 781 5084 johnlev 782 5741 mrj loop = xnbp->xnb_rx_ring.req_cons; 783 5741 mrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 784 7615 Max gop = xnbp->xnb_rx_top; 785 5084 johnlev 786 5084 johnlev while (loop < end) { 787 5084 johnlev int16_t status = NETIF_RSP_OKAY; 788 5084 johnlev 789 5084 johnlev if (gop->status != 0) { 790 5084 johnlev status = NETIF_RSP_ERROR; 791 5084 johnlev 792 5084 johnlev /* 793 5084 johnlev * If the status is anything other than 794 5084 johnlev * GNTST_bad_page then we don't own the page 795 5084 johnlev * any more, so don't try to give it back. 796 5084 johnlev */ 797 5084 johnlev if (gop->status != GNTST_bad_page) 798 5084 johnlev gop->mfn = 0; 799 5084 johnlev } else { 800 5084 johnlev /* The page is no longer ours. */ 801 5084 johnlev gop->mfn = 0; 802 5084 johnlev } 803 5084 johnlev 804 5084 johnlev if (gop->mfn != 0) 805 5084 johnlev /* 806 5084 johnlev * Give back the page, as we won't be using 807 5084 johnlev * it. 808 5084 johnlev */ 809 5084 johnlev xnb_free_page(xnbp, gop->mfn); 810 5084 johnlev else 811 5084 johnlev /* 812 5084 johnlev * We gave away a page, update our accounting 813 5084 johnlev * now. 814 5084 johnlev */ 815 5084 johnlev balloon_drv_subtracted(1); 816 5084 johnlev 817 5084 johnlev /* 5.2 */ 818 5084 johnlev if (status != NETIF_RSP_OKAY) { 819 5741 mrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 820 5084 johnlev status; 821 5084 johnlev } else { 822 7615 Max xnbp->xnb_stat_ipackets++; 823 7615 Max xnbp->xnb_stat_rbytes += len; 824 5084 johnlev } 825 5084 johnlev 826 5084 johnlev loop++; 827 5084 johnlev prod++; 828 5084 johnlev gop++; 829 5084 johnlev } 830 5084 johnlev 831 5741 mrj xnbp->xnb_rx_ring.req_cons = loop; 832 5741 mrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 833 5084 johnlev 834 5084 johnlev /* 6 */ 835 5741 mrj /* LINTED: constant in conditional context */ 836 5741 mrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 837 5084 johnlev if (notify) { 838 5741 mrj ec_notify_via_evtchn(xnbp->xnb_evtchn); 839 7615 Max xnbp->xnb_stat_rx_notify_sent++; 840 5084 johnlev } else { 841 7615 Max xnbp->xnb_stat_rx_notify_deferred++; 842 5084 johnlev } 843 5084 johnlev 844 5084 johnlev if (mp != NULL) 845 7615 Max xnbp->xnb_stat_rx_defer++; 846 5084 johnlev 847 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 848 5084 johnlev 849 5084 johnlev /* Free mblk_t's that we consumed. */ 850 5741 mrj freemsgchain(free); 851 5741 mrj 852 5741 mrj return (mp); 853 5741 mrj } 854 5741 mrj 855 10958 dme /* Helper functions for xnb_copy_to_peer(). */ 856 5741 mrj 857 5741 mrj /* 858 5741 mrj * Grow the array of copy operation descriptors. 859 5741 mrj */ 860 10958 dme static boolean_t 861 10958 dme grow_cpop_area(xnb_t *xnbp) 862 5741 mrj { 863 10958 dme size_t count; 864 10958 dme gnttab_copy_t *new; 865 5741 mrj 866 7615 Max ASSERT(MUTEX_HELD(&xnbp->xnb_rx_lock)); 867 5741 mrj 868 10958 dme count = xnbp->xnb_rx_cpop_count + CPOP_DEFCNT; 869 5741 mrj 870 10958 dme if ((new = kmem_alloc(sizeof (new[0]) * count, KM_NOSLEEP)) == NULL) { 871 5741 mrj xnbp->xnb_stat_other_allocation_failure++; 872 10958 dme return (B_FALSE); 873 5741 mrj } 874 5741 mrj 875 10958 dme bcopy(xnbp->xnb_rx_cpop, new, 876 10958 dme sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 877 5741 mrj 878 10958 dme kmem_free(xnbp->xnb_rx_cpop, 879 10958 dme sizeof (xnbp->xnb_rx_cpop[0]) * xnbp->xnb_rx_cpop_count); 880 5741 mrj 881 10958 dme xnbp->xnb_rx_cpop = new; 882 10958 dme xnbp->xnb_rx_cpop_count = count; 883 5741 mrj 884 7615 Max xnbp->xnb_stat_rx_cpoparea_grown++; 885 5741 mrj 886 10958 dme return (B_TRUE); 887 5741 mrj } 888 5741 mrj 889 5741 mrj /* 890 5741 mrj * Check whether an address is on a page that's foreign to this domain. 891 5741 mrj */ 892 5741 mrj static boolean_t 893 5741 mrj is_foreign(void *addr) 894 5741 mrj { 895 10958 dme pfn_t pfn = hat_getpfnum(kas.a_hat, addr); 896 5741 mrj 897 10958 dme return ((pfn & PFN_IS_FOREIGN_MFN) == PFN_IS_FOREIGN_MFN); 898 5741 mrj } 899 5741 mrj 900 5741 mrj /* 901 5741 mrj * Insert a newly allocated mblk into a chain, replacing the old one. 902 5741 mrj */ 903 5741 mrj static mblk_t * 904 5741 mrj replace_msg(mblk_t *mp, size_t len, mblk_t *mp_prev, mblk_t *ml_prev) 905 5741 mrj { 906 5741 mrj uint32_t start, stuff, end, value, flags; 907 5741 mrj mblk_t *new_mp; 908 5741 mrj 909 5741 mrj new_mp = copyb(mp); 910 5741 mrj if (new_mp == NULL) 911 5741 mrj cmn_err(CE_PANIC, "replace_msg: cannot alloc new message" 912 5741 mrj "for %p, len %lu", (void *) mp, len); 913 5741 mrj 914 5741 mrj hcksum_retrieve(mp, NULL, NULL, &start, &stuff, &end, &value, &flags); 915 5741 mrj (void) hcksum_assoc(new_mp, NULL, NULL, start, stuff, end, value, 916 5741 mrj flags, KM_NOSLEEP); 917 5741 mrj 918 5741 mrj new_mp->b_next = mp->b_next; 919 5741 mrj new_mp->b_prev = mp->b_prev; 920 5741 mrj new_mp->b_cont = mp->b_cont; 921 5741 mrj 922 5741 mrj /* Make sure we only overwrite pointers to the mblk being replaced. */ 923 5741 mrj if (mp_prev != NULL && mp_prev->b_next == mp) 924 5741 mrj mp_prev->b_next = new_mp; 925 5741 mrj 926 5741 mrj if (ml_prev != NULL && ml_prev->b_cont == mp) 927 5741 mrj ml_prev->b_cont = new_mp; 928 5741 mrj 929 5741 mrj mp->b_next = mp->b_prev = mp->b_cont = NULL; 930 5741 mrj freemsg(mp); 931 5741 mrj 932 5741 mrj return (new_mp); 933 5741 mrj } 934 5741 mrj 935 5741 mrj /* 936 5741 mrj * Set all the fields in a gnttab_copy_t. 937 5741 mrj */ 938 5741 mrj static void 939 5741 mrj setup_gop(xnb_t *xnbp, gnttab_copy_t *gp, uchar_t *rptr, 940 5741 mrj size_t s_off, size_t d_off, size_t len, grant_ref_t d_ref) 941 5741 mrj { 942 5741 mrj ASSERT(xnbp != NULL && gp != NULL); 943 5741 mrj 944 5741 mrj gp->source.offset = s_off; 945 5741 mrj gp->source.u.gmfn = pfn_to_mfn(hat_getpfnum(kas.a_hat, (caddr_t)rptr)); 946 5741 mrj gp->source.domid = DOMID_SELF; 947 5741 mrj 948 5741 mrj gp->len = (uint16_t)len; 949 5741 mrj gp->flags = GNTCOPY_dest_gref; 950 5741 mrj gp->status = 0; 951 5741 mrj 952 5741 mrj gp->dest.u.ref = d_ref; 953 5741 mrj gp->dest.offset = d_off; 954 5741 mrj gp->dest.domid = xnbp->xnb_peer; 955 5741 mrj } 956 5741 mrj 957 10958 dme /* 958 10958 dme * Pass packets to the peer using hypervisor copy operations. 959 10958 dme */ 960 5741 mrj mblk_t * 961 5741 mrj xnb_copy_to_peer(xnb_t *xnbp, mblk_t *mp) 962 5741 mrj { 963 5741 mrj mblk_t *free = mp, *mp_prev = NULL, *saved_mp = mp; 964 5741 mrj mblk_t *ml, *ml_prev; 965 5741 mrj boolean_t notify; 966 5741 mrj RING_IDX loop, prod; 967 5741 mrj int i; 968 5741 mrj 969 10958 dme /* 970 10958 dme * If the peer does not pre-post buffers for received packets, 971 10958 dme * use page flipping to pass packets to it. 972 10958 dme */ 973 10958 dme if (!xnbp->xnb_rx_hv_copy) 974 5741 mrj return (xnb_to_peer(xnbp, mp)); 975 5741 mrj 976 5741 mrj /* 977 5741 mrj * For each packet the sequence of operations is: 978 5741 mrj * 979 5741 mrj * 1. get a request slot from the ring. 980 5741 mrj * 2. set up data for hypercall (see NOTE below) 981 5741 mrj * 3. have the hypervisore copy the data 982 5741 mrj * 4. update the request slot. 983 5741 mrj * 5. kick the peer. 984 5741 mrj * 985 5741 mrj * NOTE ad 2. 986 5741 mrj * In order to reduce the number of hypercalls, we prepare 987 10958 dme * several mblks (mp->b_cont != NULL) for the peer and 988 10958 dme * perform a single hypercall to transfer them. We also have 989 10958 dme * to set up a seperate copy operation for every page. 990 5741 mrj * 991 10958 dme * If we have more than one packet (mp->b_next != NULL), we do 992 10958 dme * this whole dance repeatedly. 993 5741 mrj */ 994 5741 mrj 995 7615 Max mutex_enter(&xnbp->xnb_rx_lock); 996 5741 mrj 997 5741 mrj if (!(xnbp->xnb_connected && xnbp->xnb_hotplugged)) { 998 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 999 7615 Max DTRACE_PROBE(copy_rx_too_early); 1000 7615 Max xnbp->xnb_stat_rx_too_early++; 1001 5741 mrj return (mp); 1002 5741 mrj } 1003 5741 mrj 1004 5741 mrj loop = xnbp->xnb_rx_ring.req_cons; 1005 5741 mrj prod = xnbp->xnb_rx_ring.rsp_prod_pvt; 1006 5741 mrj 1007 5741 mrj while ((mp != NULL) && 1008 5741 mrj XNB_RING_HAS_UNCONSUMED_REQUESTS(&xnbp->xnb_rx_ring)) { 1009 5741 mrj netif_rx_request_t *rxreq; 1010 10958 dme size_t d_offset, len; 1011 10958 dme int item_count; 1012 10958 dme gnttab_copy_t *gop_cp; 1013 5741 mrj netif_rx_response_t *rxresp; 1014 5741 mrj uint16_t cksum_flags; 1015 5741 mrj int16_t status = NETIF_RSP_OKAY; 1016 5741 mrj 1017 5741 mrj /* 1 */ 1018 5741 mrj rxreq = RING_GET_REQUEST(&xnbp->xnb_rx_ring, loop); 1019 5741 mrj 1020 5741 mrj #ifdef XNB_DEBUG 1021 5741 mrj if (!(rxreq->id < NET_RX_RING_SIZE)) 1022 5741 mrj cmn_err(CE_PANIC, "xnb_copy_to_peer: " 1023 5741 mrj "id %d out of range in request 0x%p", 1024 5741 mrj rxreq->id, (void *)rxreq); 1025 5741 mrj #endif /* XNB_DEBUG */ 1026 5741 mrj 1027 5741 mrj /* 2 */ 1028 8757 dme d_offset = 0; 1029 5741 mrj len = 0; 1030 5741 mrj item_count = 0; 1031 5741 mrj 1032 7615 Max gop_cp = xnbp->xnb_rx_cpop; 1033 5741 mrj 1034 5741 mrj /* 1035 10958 dme * We walk the b_cont pointers and set up a 1036 10958 dme * gnttab_copy_t for each sub-page chunk in each data 1037 10958 dme * block. 1038 5741 mrj */ 1039 5741 mrj /* 2a */ 1040 5741 mrj for (ml = mp, ml_prev = NULL; ml != NULL; ml = ml->b_cont) { 1041 5741 mrj size_t chunk = ml->b_wptr - ml->b_rptr; 1042 5741 mrj uchar_t *r_tmp, *rpt_align; 1043 5741 mrj size_t r_offset; 1044 5741 mrj 1045 5741 mrj /* 1046 10958 dme * The hypervisor will not allow us to 1047 10958 dme * reference a foreign page (e.g. one 1048 10958 dme * belonging to another domain) by mfn in the 1049 10958 dme * copy operation. If the data in this mblk is 1050 10958 dme * on such a page we must copy the data into a 1051 10958 dme * local page before initiating the hypervisor 1052 10958 dme * copy operation. 1053 5741 mrj */ 1054 5741 mrj if (is_foreign(ml->b_rptr) || is_foreign(ml->b_wptr)) { 1055 5741 mrj mblk_t *ml_new = replace_msg(ml, chunk, 1056 5741 mrj mp_prev, ml_prev); 1057 5741 mrj 1058 5741 mrj /* We can still use old ml, but not *ml! */ 1059 5741 mrj if (free == ml) 1060 5741 mrj free = ml_new; 1061 5741 mrj if (mp == ml) 1062 5741 mrj mp = ml_new; 1063 5741 mrj ml = ml_new; 1064 5741 mrj 1065 7615 Max xnbp->xnb_stat_rx_foreign_page++; 1066 5741 mrj } 1067 5741 mrj 1068 5741 mrj rpt_align = (uchar_t *)ALIGN2PAGE(ml->b_rptr); 1069 5741 mrj r_offset = (uint16_t)(ml->b_rptr - rpt_align); 1070 5741 mrj r_tmp = ml->b_rptr; 1071 5741 mrj 1072 5741 mrj if (d_offset + chunk > PAGESIZE) 1073 5741 mrj cmn_err(CE_PANIC, "xnb_copy_to_peer: mp %p " 1074 5741 mrj "(svd: %p), ml %p,rpt_alg. %p, d_offset " 1075 5741 mrj "(%lu) + chunk (%lu) > PAGESIZE %d!", 1076 5741 mrj (void *)mp, (void *)saved_mp, (void *)ml, 1077 5741 mrj (void *)rpt_align, 1078 5741 mrj d_offset, chunk, (int)PAGESIZE); 1079 5741 mrj 1080 5741 mrj while (chunk > 0) { 1081 5741 mrj size_t part_len; 1082 5741 mrj 1083 10958 dme if (item_count == xnbp->xnb_rx_cpop_count) { 1084 10958 dme if (!grow_cpop_area(xnbp)) 1085 5741 mrj goto failure; 1086 10958 dme gop_cp = &xnbp->xnb_rx_cpop[item_count]; 1087 5741 mrj } 1088 5741 mrj /* 1089 5741 mrj * If our mblk crosses a page boundary, we need 1090 10958 dme * to do a seperate copy for each page. 1091 5741 mrj */ 1092 5741 mrj if (r_offset + chunk > PAGESIZE) { 1093 5741 mrj part_len = PAGESIZE - r_offset; 1094 5741 mrj 1095 5741 mrj DTRACE_PROBE3(mblk_page_crossed, 1096 5741 mrj (mblk_t *), ml, int, chunk, int, 1097 5741 mrj (int)r_offset); 1098 5741 mrj 1099 7615 Max xnbp->xnb_stat_rx_pagebndry_crossed++; 1100 5741 mrj } else { 1101 5741 mrj part_len = chunk; 1102 5741 mrj } 1103 5741 mrj 1104 5741 mrj setup_gop(xnbp, gop_cp, r_tmp, r_offset, 1105 5741 mrj d_offset, part_len, rxreq->gref); 1106 5741 mrj 1107 5741 mrj chunk -= part_len; 1108 5741 mrj 1109 5741 mrj len += part_len; 1110 5741 mrj d_offset += part_len; 1111 5741 mrj r_tmp += part_len; 1112 5741 mrj /* 1113 5741 mrj * The 2nd, 3rd ... last copies will always 1114 5741 mrj * start at r_tmp, therefore r_offset is 0. 1115 5741 mrj */ 1116 5741 mrj r_offset = 0; 1117 5741 mrj gop_cp++; 1118 10958 dme item_count++; 1119 5741 mrj } 1120 5741 mrj ml_prev = ml; 1121 10958 dme 1122 5741 mrj DTRACE_PROBE4(mblk_loop_end, (mblk_t *), ml, int, 1123 5741 mrj chunk, int, len, int, item_count); 1124 5741 mrj } 1125 5741 mrj /* 3 */ 1126 7615 Max if (HYPERVISOR_grant_table_op(GNTTABOP_copy, xnbp->xnb_rx_cpop, 1127 5741 mrj item_count) != 0) { 1128 5741 mrj cmn_err(CE_WARN, "xnb_copy_to_peer: copy op. failed"); 1129 5741 mrj DTRACE_PROBE(HV_granttableopfailed); 1130 5741 mrj } 1131 5741 mrj 1132 5741 mrj /* 4 */ 1133 5741 mrj rxresp = RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod); 1134 8757 dme rxresp->offset = 0; 1135 5741 mrj 1136 5741 mrj rxresp->flags = 0; 1137 5741 mrj 1138 5741 mrj DTRACE_PROBE4(got_RX_rsp, int, (int)rxresp->id, int, 1139 5741 mrj (int)rxresp->offset, int, (int)rxresp->flags, int, 1140 5741 mrj (int)rxresp->status); 1141 5741 mrj 1142 5741 mrj cksum_flags = xnbp->xnb_flavour->xf_cksum_to_peer(xnbp, mp); 1143 5741 mrj if (cksum_flags != 0) 1144 7615 Max xnbp->xnb_stat_rx_cksum_deferred++; 1145 5741 mrj rxresp->flags |= cksum_flags; 1146 5741 mrj 1147 5741 mrj rxresp->id = RING_GET_REQUEST(&xnbp->xnb_rx_ring, prod)->id; 1148 5741 mrj rxresp->status = len; 1149 5741 mrj 1150 5741 mrj DTRACE_PROBE4(RX_rsp_set, int, (int)rxresp->id, int, 1151 5741 mrj (int)rxresp->offset, int, (int)rxresp->flags, int, 1152 5741 mrj (int)rxresp->status); 1153 5741 mrj 1154 5741 mrj for (i = 0; i < item_count; i++) { 1155 7615 Max if (xnbp->xnb_rx_cpop[i].status != 0) { 1156 10958 dme DTRACE_PROBE2(cpop_status_nonnull, int, 1157 7615 Max (int)xnbp->xnb_rx_cpop[i].status, 1158 5741 mrj int, i); 1159 5741 mrj status = NETIF_RSP_ERROR; 1160 5741 mrj } 1161 5741 mrj } 1162 5741 mrj 1163 5741 mrj /* 5.2 */ 1164 5741 mrj if (status != NETIF_RSP_OKAY) { 1165 5741 mrj RING_GET_RESPONSE(&xnbp->xnb_rx_ring, prod)->status = 1166 5741 mrj status; 1167 7615 Max xnbp->xnb_stat_rx_rsp_notok++; 1168 5741 mrj } else { 1169 7615 Max xnbp->xnb_stat_ipackets++; 1170 7615 Max xnbp->xnb_stat_rbytes += len; 1171 5741 mrj } 1172 5741 mrj 1173 5741 mrj loop++; 1174 5741 mrj prod++; 1175 5741 mrj mp_prev = mp; 1176 5741 mrj mp = mp->b_next; 1177 5741 mrj } 1178 5741 mrj failure: 1179 5741 mrj /* 1180 5741 mrj * Did we actually do anything? 1181 5741 mrj */ 1182 5741 mrj if (loop == xnbp->xnb_rx_ring.req_cons) { 1183 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 1184 5741 mrj return (mp); 1185 5741 mrj } 1186 5741 mrj 1187 5741 mrj /* 1188 5741 mrj * Unlink the end of the 'done' list from the remainder. 1189 5741 mrj */ 1190 5741 mrj ASSERT(mp_prev != NULL); 1191 5741 mrj mp_prev->b_next = NULL; 1192 5741 mrj 1193 5741 mrj xnbp->xnb_rx_ring.req_cons = loop; 1194 5741 mrj xnbp->xnb_rx_ring.rsp_prod_pvt = prod; 1195 5741 mrj 1196 5741 mrj /* 6 */ 1197 5741 mrj /* LINTED: constant in conditional context */ 1198 5741 mrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_rx_ring, notify); 1199 5741 mrj if (notify) { 1200 5741 mrj ec_notify_via_evtchn(xnbp->xnb_evtchn); 1201 7615 Max xnbp->xnb_stat_rx_notify_sent++; 1202 5741 mrj } else { 1203 7615 Max xnbp->xnb_stat_rx_notify_deferred++; 1204 5741 mrj } 1205 5741 mrj 1206 5741 mrj if (mp != NULL) 1207 7615 Max xnbp->xnb_stat_rx_defer++; 1208 5741 mrj 1209 7615 Max mutex_exit(&xnbp->xnb_rx_lock); 1210 5741 mrj 1211 5741 mrj /* Free mblk_t structs we have consumed. */ 1212 5084 johnlev freemsgchain(free); 1213 5084 johnlev 1214 5084 johnlev return (mp); 1215 5084 johnlev } 1216 5084 johnlev 1217 5084 johnlev 1218 5084 johnlev static void 1219 10958 dme xnb_tx_notify_peer(xnb_t *xnbp, boolean_t force) 1220 5084 johnlev { 1221 5084 johnlev boolean_t notify; 1222 5084 johnlev 1223 7615 Max ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1224 5084 johnlev 1225 5741 mrj /* LINTED: constant in conditional context */ 1226 5741 mrj RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&xnbp->xnb_tx_ring, notify); 1227 10958 dme if (notify || force) { 1228 5741 mrj ec_notify_via_evtchn(xnbp->xnb_evtchn); 1229 7615 Max xnbp->xnb_stat_tx_notify_sent++; 1230 5084 johnlev } else { 1231 7615 Max xnbp->xnb_stat_tx_notify_deferred++; 1232 5084 johnlev } 1233 5084 johnlev } 1234 5084 johnlev 1235 5084 johnlev static void 1236 7615 Max xnb_tx_mark_complete(xnb_t *xnbp, RING_IDX id, int16_t status) 1237 5084 johnlev { 1238 5084 johnlev RING_IDX i; 1239 5084 johnlev netif_tx_response_t *txresp; 1240 5084 johnlev 1241 7615 Max ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1242 5084 johnlev 1243 5741 mrj i = xnbp->xnb_tx_ring.rsp_prod_pvt; 1244 5084 johnlev 1245 5741 mrj txresp = RING_GET_RESPONSE(&xnbp->xnb_tx_ring, i); 1246 5084 johnlev txresp->id = id; 1247 5084 johnlev txresp->status = status; 1248 5084 johnlev 1249 5741 mrj xnbp->xnb_tx_ring.rsp_prod_pvt = i + 1; 1250 5084 johnlev 1251 5084 johnlev /* 1252 5084 johnlev * Note that we don't push the change to the peer here - that 1253 5084 johnlev * is the callers responsibility. 1254 5084 johnlev */ 1255 5084 johnlev } 1256 5084 johnlev 1257 5084 johnlev static void 1258 10958 dme xnb_txbuf_recycle(xnb_txbuf_t *txp) 1259 5084 johnlev { 1260 10958 dme xnb_t *xnbp = txp->xt_xnbp; 1261 5084 johnlev 1262 10958 dme kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1263 5084 johnlev 1264 10958 dme xnbp->xnb_tx_buf_outstanding--; 1265 10958 dme } 1266 5084 johnlev 1267 10958 dme static int 1268 10958 dme xnb_txbuf_constructor(void *buf, void *arg, int kmflag) 1269 10958 dme { 1270 10958 dme _NOTE(ARGUNUSED(kmflag)); 1271 10958 dme xnb_txbuf_t *txp = buf; 1272 10958 dme xnb_t *xnbp = arg; 1273 10958 dme size_t len; 1274 10958 dme ddi_dma_cookie_t dma_cookie; 1275 10958 dme uint_t ncookies; 1276 5741 mrj 1277 10958 dme txp->xt_free_rtn.free_func = xnb_txbuf_recycle; 1278 10958 dme txp->xt_free_rtn.free_arg = (caddr_t)txp; 1279 10958 dme txp->xt_xnbp = xnbp; 1280 10958 dme txp->xt_next = NULL; 1281 5084 johnlev 1282 10958 dme if (ddi_dma_alloc_handle(xnbp->xnb_devinfo, &buf_dma_attr, 1283 10958 dme 0, 0, &txp->xt_dma_handle) != DDI_SUCCESS) 1284 10958 dme goto failure; 1285 5084 johnlev 1286 10958 dme if (ddi_dma_mem_alloc(txp->xt_dma_handle, PAGESIZE, &data_accattr, 1287 10958 dme DDI_DMA_STREAMING, 0, 0, &txp->xt_buf, &len, 1288 10958 dme &txp->xt_acc_handle) != DDI_SUCCESS) 1289 10958 dme goto failure_1; 1290 5741 mrj 1291 10958 dme if (ddi_dma_addr_bind_handle(txp->xt_dma_handle, NULL, txp->xt_buf, 1292 10958 dme len, DDI_DMA_RDWR | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT, 0, 1293 10958 dme &dma_cookie, &ncookies) 1294 10958 dme != DDI_DMA_MAPPED) 1295 10958 dme goto failure_2; 1296 10958 dme ASSERT(ncookies == 1); 1297 10958 dme 1298 10958 dme txp->xt_mfn = xnb_btop(dma_cookie.dmac_laddress); 1299 10958 dme txp->xt_buflen = dma_cookie.dmac_size; 1300 10958 dme 1301 10958 dme DTRACE_PROBE(txbuf_allocated); 1302 10958 dme 1303 10958 dme atomic_add_32(&xnbp->xnb_tx_buf_count, 1); 1304 10958 dme xnbp->xnb_tx_buf_outstanding++; 1305 10958 dme 1306 10958 dme return (0); 1307 10958 dme 1308 10958 dme failure_2: 1309 10958 dme ddi_dma_mem_free(&txp->xt_acc_handle); 1310 10958 dme 1311 10958 dme failure_1: 1312 10958 dme ddi_dma_free_handle(&txp->xt_dma_handle); 1313 10958 dme 1314 10958 dme failure: 1315 10958 dme 1316 10958 dme return (-1); 1317 10958 dme } 1318 10958 dme 1319 10958 dme static void 1320 10958 dme xnb_txbuf_destructor(void *buf, void *arg) 1321 10958 dme { 1322 10958 dme xnb_txbuf_t *txp = buf; 1323 10958 dme xnb_t *xnbp = arg; 1324 10958 dme 1325 10958 dme (void) ddi_dma_unbind_handle(txp->xt_dma_handle); 1326 10958 dme ddi_dma_mem_free(&txp->xt_acc_handle); 1327 10958 dme ddi_dma_free_handle(&txp->xt_dma_handle); 1328 10958 dme 1329 10958 dme atomic_add_32(&xnbp->xnb_tx_buf_count, -1); 1330 5084 johnlev } 1331 5084 johnlev 1332 5741 mrj /* 1333 10958 dme * Take packets from the peer and deliver them onward. 1334 5741 mrj */ 1335 5084 johnlev static mblk_t * 1336 7615 Max xnb_from_peer(xnb_t *xnbp) 1337 5084 johnlev { 1338 5084 johnlev RING_IDX start, end, loop; 1339 10958 dme gnttab_copy_t *cop; 1340 7615 Max xnb_txbuf_t **txpp; 1341 5084 johnlev netif_tx_request_t *txreq; 1342 10958 dme boolean_t work_to_do, need_notify = B_FALSE; 1343 5084 johnlev mblk_t *head, *tail; 1344 10958 dme int n_data_req, i; 1345 5084 johnlev 1346 10958 dme ASSERT(MUTEX_HELD(&xnbp->xnb_tx_lock)); 1347 5084 johnlev 1348 5084 johnlev head = tail = NULL; 1349 5084 johnlev around: 1350 5084 johnlev 1351 5741 mrj /* LINTED: constant in conditional context */ 1352 5741 mrj RING_FINAL_CHECK_FOR_REQUESTS(&xnbp->xnb_tx_ring, work_to_do); 1353 5084 johnlev if (!work_to_do) { 1354 5084 johnlev finished: 1355 10958 dme xnb_tx_notify_peer(xnbp, need_notify); 1356 10958 dme 1357 5084 johnlev return (head); 1358 5084 johnlev } 1359 5084 johnlev 1360 5741 mrj start = xnbp->xnb_tx_ring.req_cons; 1361 5741 mrj end = xnbp->xnb_tx_ring.sring->req_prod; 1362 5084 johnlev 1363 7676 dme if ((end - start) > NET_TX_RING_SIZE) { 1364 7676 dme /* 1365 7676 dme * This usually indicates that the frontend driver is 1366 7676 dme * misbehaving, as it's not possible to have more than 1367 7676 dme * NET_TX_RING_SIZE ring elements in play at any one 1368 7676 dme * time. 1369 7676 dme * 1370 7676 dme * We reset the ring pointers to the state declared by 1371 7676 dme * the frontend and try to carry on. 1372 7676 dme */ 1373 7676 dme cmn_err(CE_WARN, "xnb_from_peer: domain %d tried to give us %u " 1374 7676 dme "items in the ring, resetting and trying to recover.", 1375 7676 dme xnbp->xnb_peer, (end - start)); 1376 7676 dme 1377 7676 dme /* LINTED: constant in conditional context */ 1378 7676 dme BACK_RING_ATTACH(&xnbp->xnb_tx_ring, 1379 7676 dme (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1380 7676 dme 1381 7676 dme goto around; 1382 7676 dme } 1383 7676 dme 1384 10958 dme loop = start; 1385 10958 dme cop = xnbp->xnb_tx_cop; 1386 10958 dme txpp = xnbp->xnb_tx_bufp; 1387 10958 dme n_data_req = 0; 1388 5084 johnlev 1389 10958 dme while (loop < end) { 1390 10958 dme txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1391 5084 johnlev 1392 10958 dme if (txreq->flags & NETTXF_extra_info) { 1393 10958 dme struct netif_extra_info *erp; 1394 10958 dme boolean_t status; 1395 5084 johnlev 1396 10958 dme loop++; /* Consume another slot in the ring. */ 1397 10958 dme ASSERT(loop <= end); 1398 5084 johnlev 1399 10958 dme erp = (struct netif_extra_info *) 1400 10958 dme RING_GET_REQUEST(&xnbp->xnb_tx_ring, loop); 1401 10958 dme 1402 10958 dme switch (erp->type) { 1403 10958 dme case XEN_NETIF_EXTRA_TYPE_MCAST_ADD: 1404 10958 dme ASSERT(xnbp->xnb_multicast_control); 1405 10958 dme status = xnbp->xnb_flavour->xf_mcast_add(xnbp, 1406 10958 dme &erp->u.mcast.addr); 1407 10958 dme break; 1408 10958 dme case XEN_NETIF_EXTRA_TYPE_MCAST_DEL: 1409 10958 dme ASSERT(xnbp->xnb_multicast_control); 1410 10958 dme status = xnbp->xnb_flavour->xf_mcast_del(xnbp, 1411 10958 dme &erp->u.mcast.addr); 1412 10958 dme break; 1413 10958 dme default: 1414 10958 dme status = B_FALSE; 1415 10958 dme cmn_err(CE_WARN, "xnb_from_peer: " 1416 10958 dme "unknown extra type %d", erp->type); 1417 10958 dme break; 1418 10958 dme } 1419 10958 dme 1420 10958 dme xnb_tx_mark_complete(xnbp, txreq->id, 1421 10958 dme status ? NETIF_RSP_OKAY : NETIF_RSP_ERROR); 1422 10958 dme need_notify = B_TRUE; 1423 10958 dme } else { 1424 10958 dme xnb_txbuf_t *txp; 1425 10958 dme 1426 10958 dme txp = kmem_cache_alloc(xnbp->xnb_tx_buf_cache, 1427 10958 dme KM_NOSLEEP); 1428 10958 dme if (txp == NULL) 1429 10958 dme break; 1430 10958 dme 1431 10958 dme txp->xt_mblk = desballoc((unsigned char *)txp->xt_buf, 1432 10958 dme txp->xt_buflen, 0, &txp->xt_free_rtn); 1433 10958 dme if (txp->xt_mblk == NULL) { 1434 10958 dme kmem_cache_free(xnbp->xnb_tx_buf_cache, txp); 1435 10958 dme break; 1436 10958 dme } 1437 10958 dme 1438 10958 dme txp->xt_idx = loop; 1439 10958 dme txp->xt_id = txreq->id; 1440 10958 dme 1441 10958 dme cop->source.u.ref = txreq->gref; 1442 10958 dme cop->source.domid = xnbp->xnb_peer; 1443 10958 dme cop->source.offset = txreq->offset; 1444 10958 dme 1445 10958 dme cop->dest.u.gmfn = txp->xt_mfn; 1446 10958 dme cop->dest.domid = DOMID_SELF; 1447 10958 dme cop->dest.offset = 0; 1448 10958 dme 1449 10958 dme cop->len = txreq->size; 1450 10958 dme cop->flags = GNTCOPY_source_gref; 1451 10958 dme cop->status = 0; 1452 10958 dme 1453 10958 dme *txpp = txp; 1454 10958 dme 1455 10958 dme txpp++; 1456 10958 dme cop++; 1457 10958 dme n_data_req++; 1458 10958 dme 1459 10958 dme ASSERT(n_data_req <= NET_TX_RING_SIZE); 1460 10958 dme } 1461 10958 dme 1462 10958 dme loop++; 1463 5084 johnlev } 1464 5084 johnlev 1465 10958 dme xnbp->xnb_tx_ring.req_cons = loop; 1466 5084 johnlev 1467 10958 dme if (n_data_req == 0) 1468 10958 dme goto around; 1469 5084 johnlev 1470 10958 dme if (HYPERVISOR_grant_table_op(GNTTABOP_copy, 1471 10958 dme xnbp->xnb_tx_cop, n_data_req) != 0) { 1472 5084 johnlev 1473 10958 dme cmn_err(CE_WARN, "xnb_from_peer: copy operation failed"); 1474 5084 johnlev 1475 7615 Max txpp = xnbp->xnb_tx_bufp; 1476 10958 dme i = n_data_req; 1477 10958 dme while (i > 0) { 1478 10958 dme kmem_cache_free(xnbp->xnb_tx_buf_cache, *txpp); 1479 7615 Max txpp++; 1480 10958 dme i--; 1481 5084 johnlev } 1482 5084 johnlev 1483 5084 johnlev goto finished; 1484 5084 johnlev } 1485 5084 johnlev 1486 10958 dme txpp = xnbp->xnb_tx_bufp; 1487 10958 dme cop = xnbp->xnb_tx_cop; 1488 10958 dme i = n_data_req; 1489 10958 dme 1490 10958 dme while (i > 0) { 1491 7615 Max xnb_txbuf_t *txp = *txpp; 1492 5084 johnlev 1493 10958 dme txreq = RING_GET_REQUEST(&xnbp->xnb_tx_ring, txp->xt_idx); 1494 10958 dme 1495 10958 dme if (cop->status != 0) { 1496 10958 dme #ifdef XNB_DEBUG 1497 7615 Max cmn_err(CE_WARN, "xnb_from_peer: " 1498 10958 dme "txpp 0x%p failed (%d)", 1499 10958 dme (void *)*txpp, cop->status); 1500 10958 dme #endif /* XNB_DEBUG */ 1501 10958 dme xnb_tx_mark_complete(xnbp, txp->xt_id, cop->status); 1502 10958 dme freemsg(txp->xt_mblk); 1503 10958 dme } else { 1504 10958 dme mblk_t *mp; 1505 5084 johnlev 1506 10958 dme mp = txp->xt_mblk; 1507 10958 dme mp->b_rptr = mp->b_wptr = (unsigned char *)txp->xt_buf; 1508 10958 dme mp->b_wptr += txreq->size; 1509 10958 dme mp->b_next = NULL; 1510 5084 johnlev 1511 5084 johnlev /* 1512 10958 dme * If there are checksum flags, process them 1513 10958 dme * appropriately. 1514 5084 johnlev */ 1515 10958 dme if ((txreq->flags & 1516 5084 johnlev (NETTXF_csum_blank | NETTXF_data_validated)) 1517 10958 dme != 0) { 1518 5741 mrj mp = xnbp->xnb_flavour->xf_cksum_from_peer(xnbp, 1519 5084 johnlev mp, txreq->flags); 1520 7615 Max xnbp->xnb_stat_tx_cksum_no_need++; 1521 10958 dme 1522 10958 dme txp->xt_mblk = mp; 1523 5084 johnlev } 1524 5084 johnlev 1525 5084 johnlev if (head == NULL) { 1526 5084 johnlev ASSERT(tail == NULL); 1527 5084 johnlev head = mp; 1528 5084 johnlev } else { 1529 5084 johnlev ASSERT(tail != NULL); 1530 5084 johnlev tail->b_next = mp; 1531 5084 johnlev } 1532 5084 johnlev tail = mp; 1533 10958 dme 1534 10958 dme xnbp->xnb_stat_opackets++; 1535 10958 dme xnbp->xnb_stat_obytes += txreq->size; 1536 10958 dme 1537 10958 dme xnb_tx_mark_complete(xnbp, txp->xt_id, cop->status); 1538 5084 johnlev } 1539 10958 dme 1540 10958 dme txpp++; 1541 10958 dme cop++; 1542 10958 dme i--; 1543 5084 johnlev } 1544 5084 johnlev 1545 5084 johnlev goto around; 1546 5084 johnlev /* NOTREACHED */ 1547 5084 johnlev } 1548 5084 johnlev 1549 5084 johnlev static uint_t 1550 5084 johnlev xnb_intr(caddr_t arg) 1551 5084 johnlev { 1552 5084 johnlev xnb_t *xnbp = (xnb_t *)arg; 1553 5084 johnlev mblk_t *mp; 1554 5084 johnlev 1555 5741 mrj xnbp->xnb_stat_intr++; 1556 5084 johnlev 1557 7615 Max mutex_enter(&xnbp->xnb_tx_lock); 1558 5084 johnlev 1559 5741 mrj ASSERT(xnbp->xnb_connected); 1560 5084 johnlev 1561 7615 Max mp = xnb_from_peer(xnbp); 1562 5084 johnlev 1563 7615 Max mutex_exit(&xnbp->xnb_tx_lock); 1564 5084 johnlev 1565 5741 mrj if (!xnbp->xnb_hotplugged) { 1566 7615 Max xnbp->xnb_stat_tx_too_early++; 1567 5084 johnlev goto fail; 1568 5084 johnlev } 1569 5084 johnlev if (mp == NULL) { 1570 5741 mrj xnbp->xnb_stat_spurious_intr++; 1571 5084 johnlev goto fail; 1572 5084 johnlev } 1573 5084 johnlev 1574 7615 Max xnbp->xnb_flavour->xf_from_peer(xnbp, mp); 1575 5084 johnlev 1576 5084 johnlev return (DDI_INTR_CLAIMED); 1577 5084 johnlev 1578 5084 johnlev fail: 1579 5084 johnlev freemsgchain(mp); 1580 5084 johnlev return (DDI_INTR_CLAIMED); 1581 5084 johnlev } 1582 5084 johnlev 1583 10958 dme /* 1584 10958 dme * Read our configuration from xenstore. 1585 10958 dme */ 1586 10958 dme boolean_t 1587 10958 dme xnb_read_xs_config(xnb_t *xnbp) 1588 10958 dme { 1589 10958 dme char *xsname; 1590 10958 dme char mac[ETHERADDRL * 3]; 1591 10958 dme 1592 10958 dme xsname = xvdi_get_xsname(xnbp->xnb_devinfo); 1593 10958 dme 1594 10958 dme if (xenbus_scanf(XBT_NULL, xsname, 1595 10958 dme "mac", "%s", mac) != 0) { 1596 10958 dme cmn_err(CE_WARN, "xnb_attach: " 1597 10958 dme "cannot read mac address from %s", 1598 10958 dme xsname); 1599 10958 dme return (B_FALSE); 1600 10958 dme } 1601 10958 dme 1602 10958 dme if (ether_aton(mac, xnbp->xnb_mac_addr) != ETHERADDRL) { 1603 10958 dme cmn_err(CE_WARN, 1604 10958 dme "xnb_attach: cannot parse mac address %s", 1605 10958 dme mac); 1606 10958 dme return (B_FALSE); 1607 10958 dme } 1608 10958 dme 1609 10958 dme return (B_TRUE); 1610 10958 dme } 1611 10958 dme 1612 10958 dme /* 1613 10958 dme * Read the configuration of the peer from xenstore. 1614 10958 dme */ 1615 10958 dme boolean_t 1616 10958 dme xnb_read_oe_config(xnb_t *xnbp) 1617 10958 dme { 1618 10958 dme char *oename; 1619 10958 dme int i; 1620 10958 dme 1621 10958 dme oename = xvdi_get_oename(xnbp->xnb_devinfo); 1622 10958 dme 1623 10958 dme if (xenbus_gather(XBT_NULL, oename, 1624 10958 dme "event-channel", "%u", &xnbp->xnb_fe_evtchn, 1625 10958 dme "tx-ring-ref", "%lu", &xnbp->xnb_tx_ring_ref, 1626 10958 dme "rx-ring-ref", "%lu", &xnbp->xnb_rx_ring_ref, 1627 10958 dme NULL) != 0) { 1628 10958 dme cmn_err(CE_WARN, "xnb_read_oe_config: " 1629 10958 dme "cannot read other-end details from %s", 1630 10958 dme oename); 1631 10958 dme return (B_FALSE); 1632 10958 dme } 1633 10958 dme 1634 10958 dme /* 1635 10958 dme * Check whether our peer requests receive side hypervisor 1636 10958 dme * copy. 1637 10958 dme */ 1638 10958 dme if (xenbus_scanf(XBT_NULL, oename, 1639 10958 dme "request-rx-copy", "%d", &i) != 0) 1640 10958 dme i = 0; 1641 10958 dme if (i != 0) 1642 10958 dme xnbp->xnb_rx_hv_copy = B_TRUE; 1643 10958 dme 1644 10958 dme /* 1645 10958 dme * Check whether our peer requests multicast_control. 1646 10958 dme */ 1647 10958 dme if (xenbus_scanf(XBT_NULL, oename, 1648 10958 dme "request-multicast-control", "%d", &i) != 0) 1649 10958 dme i = 0; 1650 10958 dme if (i != 0) 1651 10958 dme xnbp->xnb_multicast_control = B_TRUE; 1652 10958 dme 1653 10958 dme /* 1654 10958 dme * The Linux backend driver here checks to see if the peer has 1655 10958 dme * set 'feature-no-csum-offload'. This is used to indicate 1656 10958 dme * that the guest cannot handle receiving packets without a 1657 10958 dme * valid checksum. We don't check here, because packets passed 1658 10958 dme * to the peer _always_ have a valid checksum. 1659 10958 dme * 1660 10958 dme * There are three cases: 1661 10958 dme * 1662 10958 dme * - the NIC is dedicated: packets from the wire should always 1663 10958 dme * have a valid checksum. If the hardware validates the 1664 10958 dme * checksum then the relevant bit will be set in the packet 1665 10958 dme * attributes and we will inform the peer. It can choose to 1666 10958 dme * ignore the hardware verification. 1667 10958 dme * 1668 10958 dme * - the NIC is shared (VNIC) and a packet originates from the 1669 10958 dme * wire: this is the same as the case above - the packets 1670 10958 dme * will have a valid checksum. 1671 10958 dme * 1672 10958 dme * - the NIC is shared (VNIC) and a packet originates from the 1673 10958 dme * host: the MAC layer ensures that all such packets have a 1674 10958 dme * valid checksum by calculating one if the stack did not. 1675 10958 dme */ 1676 10958 dme 1677 10958 dme return (B_TRUE); 1678 10958 dme } 1679 10958 dme 1680 10958 dme void 1681 10958 dme xnb_start_connect(xnb_t *xnbp) 1682 10958 dme { 1683 10958 dme dev_info_t *dip = xnbp->xnb_devinfo; 1684 10958 dme 1685 10958 dme if (!xnb_connect_rings(dip)) { 1686 10958 dme cmn_err(CE_WARN, "xnb_start_connect: " 1687 10958 dme "cannot connect rings"); 1688 10958 dme goto failed; 1689 10958 dme } 1690 10958 dme 1691 10958 dme if (!xnbp->xnb_flavour->xf_start_connect(xnbp)) { 1692 10958 dme cmn_err(CE_WARN, "xnb_start_connect: " 1693 10958 dme "flavour failed to connect"); 1694 10958 dme goto failed; 1695 10958 dme } 1696 10958 dme 1697 10958 dme (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateConnected); 1698 10958 dme return; 1699 10958 dme 1700 10958 dme failed: 1701 10958 dme xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1702 10958 dme xnb_disconnect_rings(dip); 1703 10958 dme (void) xvdi_switch_state(dip, XBT_NULL, 1704 10958 dme XenbusStateClosed); 1705 10958 dme (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1706 10958 dme } 1707 10958 dme 1708 5084 johnlev static boolean_t 1709 5084 johnlev xnb_connect_rings(dev_info_t *dip) 1710 5084 johnlev { 1711 5084 johnlev xnb_t *xnbp = ddi_get_driver_private(dip); 1712 5084 johnlev struct gnttab_map_grant_ref map_op; 1713 5084 johnlev 1714 5084 johnlev /* 1715 5084 johnlev * Cannot attempt to connect the rings if already connected. 1716 5084 johnlev */ 1717 5741 mrj ASSERT(!xnbp->xnb_connected); 1718 5084 johnlev 1719 5084 johnlev /* 1720 5084 johnlev * 1. allocate a vaddr for the tx page, one for the rx page. 1721 5084 johnlev * 2. call GNTTABOP_map_grant_ref to map the relevant pages 1722 5084 johnlev * into the allocated vaddr (one for tx, one for rx). 1723 5084 johnlev * 3. call EVTCHNOP_bind_interdomain to have the event channel 1724 5084 johnlev * bound to this domain. 1725 5084 johnlev * 4. associate the event channel with an interrupt. 1726 10958 dme * 5. enable the interrupt. 1727 5084 johnlev */ 1728 5084 johnlev 1729 5084 johnlev /* 1.tx */ 1730 5741 mrj xnbp->xnb_tx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1731 5084 johnlev 0, 0, 0, 0, VM_SLEEP); 1732 5741 mrj ASSERT(xnbp->xnb_tx_ring_addr != NULL); 1733 5084 johnlev 1734 5084 johnlev /* 2.tx */ 1735 5741 mrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_tx_ring_addr); 1736 5084 johnlev map_op.flags = GNTMAP_host_map; 1737 5741 mrj map_op.ref = xnbp->xnb_tx_ring_ref; 1738 5741 mrj map_op.dom = xnbp->xnb_peer; 1739 7756 Mark hat_prepare_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr, NULL); 1740 7756 Mark if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1741 7756 Mark map_op.status != 0) { 1742 5084 johnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map tx-ring page."); 1743 5084 johnlev goto fail; 1744 5084 johnlev } 1745 5741 mrj xnbp->xnb_tx_ring_handle = map_op.handle; 1746 5084 johnlev 1747 5741 mrj /* LINTED: constant in conditional context */ 1748 5741 mrj BACK_RING_INIT(&xnbp->xnb_tx_ring, 1749 5741 mrj (netif_tx_sring_t *)xnbp->xnb_tx_ring_addr, PAGESIZE); 1750 5084 johnlev 1751 5084 johnlev /* 1.rx */ 1752 5741 mrj xnbp->xnb_rx_ring_addr = vmem_xalloc(heap_arena, PAGESIZE, PAGESIZE, 1753 5084 johnlev 0, 0, 0, 0, VM_SLEEP); 1754 5741 mrj ASSERT(xnbp->xnb_rx_ring_addr != NULL); 1755 5084 johnlev 1756 5084 johnlev /* 2.rx */ 1757 5741 mrj map_op.host_addr = (uint64_t)((long)xnbp->xnb_rx_ring_addr); 1758 5084 johnlev map_op.flags = GNTMAP_host_map; 1759 5741 mrj map_op.ref = xnbp->xnb_rx_ring_ref; 1760 5741 mrj map_op.dom = xnbp->xnb_peer; 1761 7756 Mark hat_prepare_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr, NULL); 1762 7756 Mark if (xen_map_gref(GNTTABOP_map_grant_ref, &map_op, 1, B_FALSE) != 0 || 1763 7756 Mark map_op.status != 0) { 1764 5084 johnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot map rx-ring page."); 1765 5084 johnlev goto fail; 1766 5084 johnlev } 1767 5741 mrj xnbp->xnb_rx_ring_handle = map_op.handle; 1768 5084 johnlev 1769 5741 mrj /* LINTED: constant in conditional context */ 1770 5741 mrj BACK_RING_INIT(&xnbp->xnb_rx_ring, 1771 5741 mrj (netif_rx_sring_t *)xnbp->xnb_rx_ring_addr, PAGESIZE); 1772 5084 johnlev 1773 5084 johnlev /* 3 */ 1774 10958 dme if (xvdi_bind_evtchn(dip, xnbp->xnb_fe_evtchn) != DDI_SUCCESS) { 1775 5084 johnlev cmn_err(CE_WARN, "xnb_connect_rings: " 1776 5741 mrj "cannot bind event channel %d", xnbp->xnb_evtchn); 1777 5741 mrj xnbp->xnb_evtchn = INVALID_EVTCHN; 1778 5084 johnlev goto fail; 1779 5084 johnlev } 1780 5741 mrj xnbp->xnb_evtchn = xvdi_get_evtchn(dip); 1781 5084 johnlev 1782 5084 johnlev /* 1783 5084 johnlev * It would be good to set the state to XenbusStateConnected 1784 5084 johnlev * here as well, but then what if ddi_add_intr() failed? 1785 5084 johnlev * Changing the state in the store will be noticed by the peer 1786 5084 johnlev * and cannot be "taken back". 1787 5084 johnlev */ 1788 5741 mrj mutex_enter(&xnbp->xnb_tx_lock); 1789 5741 mrj mutex_enter(&xnbp->xnb_rx_lock); 1790 5084 johnlev 1791 5741 mrj xnbp->xnb_connected = B_TRUE; 1792 5084 johnlev 1793 5741 mrj mutex_exit(&xnbp->xnb_rx_lock); 1794 5741 mrj mutex_exit(&xnbp->xnb_tx_lock); 1795 5084 johnlev 1796 10958 dme /* 4, 5 */ 1797 5084 johnlev if (ddi_add_intr(dip, 0, NULL, NULL, xnb_intr, (caddr_t)xnbp) 1798 5084 johnlev != DDI_SUCCESS) { 1799 5084 johnlev cmn_err(CE_WARN, "xnb_connect_rings: cannot add interrupt"); 1800 5084 johnlev goto fail; 1801 5084 johnlev } 1802 5741 mrj xnbp->xnb_irq = B_TRUE; 1803 5084 johnlev 1804 5084 johnlev return (B_TRUE); 1805 5084 johnlev 1806 5084 johnlev fail: 1807 5741 mrj mutex_enter(&xnbp->xnb_tx_lock); 1808 5741 mrj mutex_enter(&xnbp->xnb_rx_lock); 1809 5084 johnlev 1810 5741 mrj xnbp->xnb_connected = B_FALSE; 1811 10958 dme 1812 5741 mrj mutex_exit(&xnbp->xnb_rx_lock); 1813 5741 mrj mutex_exit(&xnbp->xnb_tx_lock); 1814 5084 johnlev 1815 5084 johnlev return (B_FALSE); 1816 5084 johnlev } 1817 5084 johnlev 1818 5084 johnlev static void 1819 5084 johnlev xnb_disconnect_rings(dev_info_t *dip) 1820 5084 johnlev { 1821 5084 johnlev xnb_t *xnbp = ddi_get_driver_private(dip); 1822 5084 johnlev 1823 5741 mrj if (xnbp->xnb_irq) { 1824 5084 johnlev ddi_remove_intr(dip, 0, NULL); 1825 5741 mrj xnbp->xnb_irq = B_FALSE; 1826 5084 johnlev } 1827 5741 mrj 1828 5741 mrj if (xnbp->xnb_evtchn != INVALID_EVTCHN) { 1829 5084 johnlev xvdi_free_evtchn(dip); 1830 5741 mrj xnbp->xnb_evtchn = INVALID_EVTCHN; 1831 5084 johnlev } 1832 5084 johnlev 1833 5741 mrj if (xnbp->xnb_rx_ring_handle != INVALID_GRANT_HANDLE) { 1834 5084 johnlev struct gnttab_unmap_grant_ref unmap_op; 1835 5084 johnlev 1836 5741 mrj unmap_op.host_addr = (uint64_t)(uintptr_t) 1837 5741 mrj xnbp->xnb_rx_ring_addr; 1838 5084 johnlev unmap_op.dev_bus_addr = 0; 1839 5741 mrj unmap_op.handle = xnbp->xnb_rx_ring_handle; 1840 5084 johnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1841 5084 johnlev &unmap_op, 1) != 0) 1842 5084 johnlev cmn_err(CE_WARN, "xnb_disconnect_rings: " 1843 5084 johnlev "cannot unmap rx-ring page (%d)", 1844 5084 johnlev unmap_op.status); 1845 5084 johnlev 1846 5741 mrj xnbp->xnb_rx_ring_handle = INVALID_GRANT_HANDLE; 1847 5084 johnlev } 1848 5084 johnlev 1849 5741 mrj if (xnbp->xnb_rx_ring_addr != NULL) { 1850 5741 mrj hat_release_mapping(kas.a_hat, xnbp->xnb_rx_ring_addr); 1851 5741 mrj vmem_free(heap_arena, xnbp->xnb_rx_ring_addr, PAGESIZE); 1852 5741 mrj xnbp->xnb_rx_ring_addr = NULL; 1853 5084 johnlev } 1854 5084 johnlev 1855 5741 mrj if (xnbp->xnb_tx_ring_handle != INVALID_GRANT_HANDLE) { 1856 5084 johnlev struct gnttab_unmap_grant_ref unmap_op; 1857 5084 johnlev 1858 5741 mrj unmap_op.host_addr = (uint64_t)(uintptr_t) 1859 5741 mrj xnbp->xnb_tx_ring_addr; 1860 5084 johnlev unmap_op.dev_bus_addr = 0; 1861 5741 mrj unmap_op.handle = xnbp->xnb_tx_ring_handle; 1862 5084 johnlev if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, 1863 5084 johnlev &unmap_op, 1) != 0) 1864 5084 johnlev cmn_err(CE_WARN, "xnb_disconnect_rings: " 1865 5084 johnlev "cannot unmap tx-ring page (%d)", 1866 5084 johnlev unmap_op.status); 1867 5084 johnlev 1868 5741 mrj xnbp->xnb_tx_ring_handle = INVALID_GRANT_HANDLE; 1869 5084 johnlev } 1870 5084 johnlev 1871 5741 mrj if (xnbp->xnb_tx_ring_addr != NULL) { 1872 5741 mrj hat_release_mapping(kas.a_hat, xnbp->xnb_tx_ring_addr); 1873 5741 mrj vmem_free(heap_arena, xnbp->xnb_tx_ring_addr, PAGESIZE); 1874 5741 mrj xnbp->xnb_tx_ring_addr = NULL; 1875 5084 johnlev } 1876 5084 johnlev } 1877 5084 johnlev 1878 5084 johnlev static void 1879 5084 johnlev xnb_oe_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1880 5084 johnlev void *arg, void *impl_data) 1881 5084 johnlev { 1882 10958 dme _NOTE(ARGUNUSED(id, arg)); 1883 5084 johnlev xnb_t *xnbp = ddi_get_driver_private(dip); 1884 5084 johnlev XenbusState new_state = *(XenbusState *)impl_data; 1885 5084 johnlev 1886 5084 johnlev ASSERT(xnbp != NULL); 1887 5084 johnlev 1888 5084 johnlev switch (new_state) { 1889 5084 johnlev case XenbusStateConnected: 1890 7005 cz147101 /* spurious state change */ 1891 7005 cz147101 if (xnbp->xnb_connected) 1892 7005 cz147101 return; 1893 7005 cz147101 1894 10958 dme if (!xnb_read_oe_config(xnbp) || 1895 10958 dme !xnbp->xnb_flavour->xf_peer_connected(xnbp)) { 1896 10958 dme cmn_err(CE_WARN, "xnb_oe_state_change: " 1897 10958 dme "read otherend config error"); 1898 5084 johnlev (void) xvdi_switch_state(dip, XBT_NULL, 1899 5084 johnlev XenbusStateClosed); 1900 5084 johnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1901 10958 dme 1902 10958 dme break; 1903 5084 johnlev } 1904 10958 dme 1905 10958 dme 1906 10958 dme mutex_enter(&xnbp->xnb_state_lock); 1907 10958 dme xnbp->xnb_fe_status = XNB_STATE_READY; 1908 10958 dme if (xnbp->xnb_be_status == XNB_STATE_READY) 1909 10958 dme xnb_start_connect(xnbp); 1910 10958 dme mutex_exit(&xnbp->xnb_state_lock); 1911 5084 johnlev 1912 5084 johnlev /* 1913 5084 johnlev * Now that we've attempted to connect it's reasonable 1914 5084 johnlev * to allow an attempt to detach. 1915 5084 johnlev */ 1916 5741 mrj xnbp->xnb_detachable = B_TRUE; 1917 5084 johnlev 1918 5084 johnlev break; 1919 5084 johnlev 1920 5084 johnlev case XenbusStateClosing: 1921 5084 johnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosing); 1922 5084 johnlev 1923 5084 johnlev break; 1924 5084 johnlev 1925 5084 johnlev case XenbusStateClosed: 1926 5741 mrj xnbp->xnb_flavour->xf_peer_disconnected(xnbp); 1927 5084 johnlev 1928 5741 mrj mutex_enter(&xnbp->xnb_tx_lock); 1929 5741 mrj mutex_enter(&xnbp->xnb_rx_lock); 1930 5084 johnlev 1931 5084 johnlev xnb_disconnect_rings(dip); 1932 5741 mrj xnbp->xnb_connected = B_FALSE; 1933 5084 johnlev 1934 5741 mrj mutex_exit(&xnbp->xnb_rx_lock); 1935 5741 mrj mutex_exit(&xnbp->xnb_tx_lock); 1936 5084 johnlev 1937 5084 johnlev (void) xvdi_switch_state(dip, XBT_NULL, XenbusStateClosed); 1938 5084 johnlev (void) xvdi_post_event(dip, XEN_HP_REMOVE); 1939 5084 johnlev /* 1940 5084 johnlev * In all likelyhood this is already set (in the above 1941 5084 johnlev * case), but if the peer never attempted to connect 1942 5084 johnlev * and the domain is destroyed we get here without 1943 5084 johnlev * having been through the case above, so we set it to 1944 5084 johnlev * be sure. 1945 5084 johnlev */ 1946 5741 mrj xnbp->xnb_detachable = B_TRUE; 1947 5084 johnlev 1948 5084 johnlev break; 1949 5084 johnlev 1950 5084 johnlev default: 1951 5084 johnlev break; 1952 5084 johnlev } 1953 5084 johnlev } 1954 5084 johnlev 1955 5084 johnlev static void 1956 5084 johnlev xnb_hp_state_change(dev_info_t *dip, ddi_eventcookie_t id, 1957 5084 johnlev void *arg, void *impl_data) 1958 5084 johnlev { 1959 10958 dme _NOTE(ARGUNUSED(id, arg)); 1960 5084 johnlev xnb_t *xnbp = ddi_get_driver_private(dip); 1961 5084 johnlev xendev_hotplug_state_t state = *(xendev_hotplug_state_t *)impl_data; 1962 5084 johnlev 1963 5084 johnlev ASSERT(xnbp != NULL); 1964 5084 johnlev 1965 5084 johnlev switch (state) { 1966 5084 johnlev case Connected: 1967 7005 cz147101 /* spurious hotplug event */ 1968 7005 cz147101 if (xnbp->xnb_hotplugged) 1969 10958 dme break; 1970 7005 cz147101 1971 10958 dme if (!xnb_read_xs_config(xnbp)) 1972 10958 dme break; 1973 10958 dme 1974 10958 dme if (!xnbp->xnb_flavour->xf_hotplug_connected(xnbp)) 1975 10958 dme break; 1976 5084 johnlev 1977 5741 mrj mutex_enter(&xnbp->xnb_tx_lock); 1978 5741 mrj mutex_enter(&xnbp->xnb_rx_lock); 1979 5084 johnlev 1980 10958 dme xnbp->xnb_hotplugged = B_TRUE; 1981 5084 johnlev 1982 5741 mrj mutex_exit(&xnbp->xnb_rx_lock); 1983 5741 mrj mutex_exit(&xnbp->xnb_tx_lock); 1984 10958 dme 1985 10958 dme mutex_enter(&xnbp->xnb_state_lock); 1986 10958 dme xnbp->xnb_be_status = XNB_STATE_READY; 1987 10958 dme if (xnbp->xnb_fe_status == XNB_STATE_READY) 1988 10958 dme xnb_start_connect(xnbp); 1989 10958 dme mutex_exit(&xnbp->xnb_state_lock); 1990 10958 dme 1991 5084 johnlev break; 1992 5084 johnlev 1993 5084 johnlev default: 1994 5084 johnlev break; 1995 5084 johnlev } 1996 5084 johnlev } 1997 5084 johnlev 1998 5084 johnlev static struct modldrv modldrv = { 1999 7351 dme &mod_miscops, "xnb", 2000 5084 johnlev }; 2001 5084 johnlev 2002 5084 johnlev static struct modlinkage modlinkage = { 2003 5084 johnlev MODREV_1, &modldrv, NULL 2004 5084 johnlev }; 2005 5084 johnlev 2006 5084 johnlev int 2007 5084 johnlev _init(void) 2008 5084 johnlev { 2009 5084 johnlev int i; 2010 5084 johnlev 2011 5084 johnlev mutex_init(&xnb_alloc_page_lock, NULL, MUTEX_DRIVER, NULL); 2012 5084 johnlev 2013 10958 dme i = mod_install(&modlinkage); 2014 10958 dme if (i != DDI_SUCCESS) 2015 10958 dme mutex_destroy(&xnb_alloc_page_lock); 2016 5084 johnlev 2017 5084 johnlev return (i); 2018 5084 johnlev } 2019 5084 johnlev 2020 5084 johnlev int 2021 5084 johnlev _info(struct modinfo *modinfop) 2022 5084 johnlev { 2023 5084 johnlev return (mod_info(&modlinkage, modinfop)); 2024 5084 johnlev } 2025 5084 johnlev 2026 5084 johnlev int 2027 5084 johnlev _fini(void) 2028 5084 johnlev { 2029 5084 johnlev int i; 2030 5084 johnlev 2031 5084 johnlev i = mod_remove(&modlinkage); 2032 10958 dme if (i == DDI_SUCCESS) 2033 5084 johnlev mutex_destroy(&xnb_alloc_page_lock); 2034 10958 dme 2035 5084 johnlev return (i); 2036 5084 johnlev } 2037