Home | History | Annotate | Download | only in igb
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * Copyright(c) 2007-2009 Intel Corporation. All rights reserved.
      5  * The contents of this file are subject to the terms of the
      6  * Common Development and Distribution License (the "License").
      7  * You may not use this file except in compliance with the License.
      8  *
      9  * You can obtain a copy of the license at:
     10  *	http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When using or redistributing this file, you may do so under the
     15  * License only. No other modification of this header is permitted.
     16  *
     17  * If applicable, add the following below this CDDL HEADER, with the
     18  * fields enclosed by brackets "[]" replaced with your own identifying
     19  * information: Portions Copyright [yyyy] [name of copyright owner]
     20  *
     21  * CDDL HEADER END
     22  */
     23 
     24 /*
     25  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     26  * Use is subject to license terms.
     27  */
     28 
     29 #include "igb_sw.h"
     30 
     31 static char ident[] = "Intel 1Gb Ethernet";
     32 static char igb_version[] = "igb 1.1.13";
     33 
     34 /*
     35  * Local function protoypes
     36  */
     37 static int igb_register_mac(igb_t *);
     38 static int igb_identify_hardware(igb_t *);
     39 static int igb_regs_map(igb_t *);
     40 static void igb_init_properties(igb_t *);
     41 static int igb_init_driver_settings(igb_t *);
     42 static void igb_init_locks(igb_t *);
     43 static void igb_destroy_locks(igb_t *);
     44 static int igb_init_mac_address(igb_t *);
     45 static int igb_init(igb_t *);
     46 static int igb_init_adapter(igb_t *);
     47 static void igb_stop_adapter(igb_t *);
     48 static int igb_reset(igb_t *);
     49 static void igb_tx_clean(igb_t *);
     50 static boolean_t igb_tx_drain(igb_t *);
     51 static boolean_t igb_rx_drain(igb_t *);
     52 static int igb_alloc_rings(igb_t *);
     53 static int igb_alloc_rx_data(igb_t *);
     54 static void igb_free_rx_data(igb_t *);
     55 static void igb_free_rings(igb_t *);
     56 static void igb_setup_rings(igb_t *);
     57 static void igb_setup_rx(igb_t *);
     58 static void igb_setup_tx(igb_t *);
     59 static void igb_setup_rx_ring(igb_rx_ring_t *);
     60 static void igb_setup_tx_ring(igb_tx_ring_t *);
     61 static void igb_setup_rss(igb_t *);
     62 static void igb_setup_mac_rss_classify(igb_t *);
     63 static void igb_setup_mac_classify(igb_t *);
     64 static void igb_init_unicst(igb_t *);
     65 static void igb_setup_multicst(igb_t *);
     66 static void igb_get_phy_state(igb_t *);
     67 static void igb_param_sync(igb_t *);
     68 static void igb_get_conf(igb_t *);
     69 static int igb_get_prop(igb_t *, char *, int, int, int);
     70 static boolean_t igb_is_link_up(igb_t *);
     71 static boolean_t igb_link_check(igb_t *);
     72 static void igb_local_timer(void *);
     73 static void igb_link_timer(void *);
     74 static void igb_arm_watchdog_timer(igb_t *);
     75 static void igb_start_watchdog_timer(igb_t *);
     76 static void igb_restart_watchdog_timer(igb_t *);
     77 static void igb_stop_watchdog_timer(igb_t *);
     78 static void igb_start_link_timer(igb_t *);
     79 static void igb_stop_link_timer(igb_t *);
     80 static void igb_disable_adapter_interrupts(igb_t *);
     81 static void igb_enable_adapter_interrupts_82575(igb_t *);
     82 static void igb_enable_adapter_interrupts_82576(igb_t *);
     83 static void igb_enable_adapter_interrupts_82580(igb_t *);
     84 static boolean_t is_valid_mac_addr(uint8_t *);
     85 static boolean_t igb_stall_check(igb_t *);
     86 static boolean_t igb_set_loopback_mode(igb_t *, uint32_t);
     87 static void igb_set_external_loopback(igb_t *);
     88 static void igb_set_internal_phy_loopback(igb_t *);
     89 static void igb_set_internal_serdes_loopback(igb_t *);
     90 static boolean_t igb_find_mac_address(igb_t *);
     91 static int igb_alloc_intrs(igb_t *);
     92 static int igb_alloc_intr_handles(igb_t *, int);
     93 static int igb_add_intr_handlers(igb_t *);
     94 static void igb_rem_intr_handlers(igb_t *);
     95 static void igb_rem_intrs(igb_t *);
     96 static int igb_enable_intrs(igb_t *);
     97 static int igb_disable_intrs(igb_t *);
     98 static void igb_setup_msix_82575(igb_t *);
     99 static void igb_setup_msix_82576(igb_t *);
    100 static void igb_setup_msix_82580(igb_t *);
    101 static uint_t igb_intr_legacy(void *, void *);
    102 static uint_t igb_intr_msi(void *, void *);
    103 static uint_t igb_intr_rx(void *, void *);
    104 static uint_t igb_intr_tx(void *, void *);
    105 static uint_t igb_intr_tx_other(void *, void *);
    106 static void igb_intr_rx_work(igb_rx_ring_t *);
    107 static void igb_intr_tx_work(igb_tx_ring_t *);
    108 static void igb_intr_link_work(igb_t *);
    109 static void igb_get_driver_control(struct e1000_hw *);
    110 static void igb_release_driver_control(struct e1000_hw *);
    111 
    112 static int igb_attach(dev_info_t *, ddi_attach_cmd_t);
    113 static int igb_detach(dev_info_t *, ddi_detach_cmd_t);
    114 static int igb_resume(dev_info_t *);
    115 static int igb_suspend(dev_info_t *);
    116 static int igb_quiesce(dev_info_t *);
    117 static void igb_unconfigure(dev_info_t *, igb_t *);
    118 static int igb_fm_error_cb(dev_info_t *, ddi_fm_error_t *,
    119     const void *);
    120 static void igb_fm_init(igb_t *);
    121 static void igb_fm_fini(igb_t *);
    122 static void igb_release_multicast(igb_t *);
    123 
    124 mac_priv_prop_t igb_priv_props[] = {
    125 	{"_tx_copy_thresh", MAC_PROP_PERM_RW},
    126 	{"_tx_recycle_thresh", MAC_PROP_PERM_RW},
    127 	{"_tx_overload_thresh", MAC_PROP_PERM_RW},
    128 	{"_tx_resched_thresh", MAC_PROP_PERM_RW},
    129 	{"_rx_copy_thresh", MAC_PROP_PERM_RW},
    130 	{"_rx_limit_per_intr", MAC_PROP_PERM_RW},
    131 	{"_intr_throttling", MAC_PROP_PERM_RW},
    132 	{"_adv_pause_cap", MAC_PROP_PERM_READ},
    133 	{"_adv_asym_pause_cap", MAC_PROP_PERM_READ}
    134 };
    135 
    136 #define	IGB_MAX_PRIV_PROPS \
    137 	(sizeof (igb_priv_props) / sizeof (mac_priv_prop_t))
    138 
    139 static struct cb_ops igb_cb_ops = {
    140 	nulldev,		/* cb_open */
    141 	nulldev,		/* cb_close */
    142 	nodev,			/* cb_strategy */
    143 	nodev,			/* cb_print */
    144 	nodev,			/* cb_dump */
    145 	nodev,			/* cb_read */
    146 	nodev,			/* cb_write */
    147 	nodev,			/* cb_ioctl */
    148 	nodev,			/* cb_devmap */
    149 	nodev,			/* cb_mmap */
    150 	nodev,			/* cb_segmap */
    151 	nochpoll,		/* cb_chpoll */
    152 	ddi_prop_op,		/* cb_prop_op */
    153 	NULL,			/* cb_stream */
    154 	D_MP | D_HOTPLUG,	/* cb_flag */
    155 	CB_REV,			/* cb_rev */
    156 	nodev,			/* cb_aread */
    157 	nodev			/* cb_awrite */
    158 };
    159 
    160 static struct dev_ops igb_dev_ops = {
    161 	DEVO_REV,		/* devo_rev */
    162 	0,			/* devo_refcnt */
    163 	NULL,			/* devo_getinfo */
    164 	nulldev,		/* devo_identify */
    165 	nulldev,		/* devo_probe */
    166 	igb_attach,		/* devo_attach */
    167 	igb_detach,		/* devo_detach */
    168 	nodev,			/* devo_reset */
    169 	&igb_cb_ops,		/* devo_cb_ops */
    170 	NULL,			/* devo_bus_ops */
    171 	ddi_power,		/* devo_power */
    172 	igb_quiesce,	/* devo_quiesce */
    173 };
    174 
    175 static struct modldrv igb_modldrv = {
    176 	&mod_driverops,		/* Type of module.  This one is a driver */
    177 	ident,			/* Discription string */
    178 	&igb_dev_ops,		/* driver ops */
    179 };
    180 
    181 static struct modlinkage igb_modlinkage = {
    182 	MODREV_1, &igb_modldrv, NULL
    183 };
    184 
    185 /* Access attributes for register mapping */
    186 ddi_device_acc_attr_t igb_regs_acc_attr = {
    187 	DDI_DEVICE_ATTR_V1,
    188 	DDI_STRUCTURE_LE_ACC,
    189 	DDI_STRICTORDER_ACC,
    190 	DDI_FLAGERR_ACC
    191 };
    192 
    193 #define	IGB_M_CALLBACK_FLAGS \
    194 	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_GETPROP)
    195 
    196 static mac_callbacks_t igb_m_callbacks = {
    197 	IGB_M_CALLBACK_FLAGS,
    198 	igb_m_stat,
    199 	igb_m_start,
    200 	igb_m_stop,
    201 	igb_m_promisc,
    202 	igb_m_multicst,
    203 	NULL,
    204 	NULL,
    205 	igb_m_ioctl,
    206 	igb_m_getcapab,
    207 	NULL,
    208 	NULL,
    209 	igb_m_setprop,
    210 	igb_m_getprop
    211 };
    212 
    213 /*
    214  * Initialize capabilities of each supported adapter type
    215  */
    216 static adapter_info_t igb_82575_cap = {
    217 	/* limits */
    218 	4,		/* maximum number of rx queues */
    219 	1,		/* minimum number of rx queues */
    220 	4,		/* default number of rx queues */
    221 	4,		/* maximum number of tx queues */
    222 	1,		/* minimum number of tx queues */
    223 	4,		/* default number of tx queues */
    224 	65535,		/* maximum interrupt throttle rate */
    225 	0,		/* minimum interrupt throttle rate */
    226 	200,		/* default interrupt throttle rate */
    227 
    228 	/* function pointers */
    229 	igb_enable_adapter_interrupts_82575,
    230 	igb_setup_msix_82575,
    231 
    232 	/* capabilities */
    233 	(IGB_FLAG_HAS_DCA |	/* capability flags */
    234 	IGB_FLAG_VMDQ_POOL),
    235 
    236 	0xffc00000		/* mask for RXDCTL register */
    237 };
    238 
    239 static adapter_info_t igb_82576_cap = {
    240 	/* limits */
    241 	16,		/* maximum number of rx queues */
    242 	1,		/* minimum number of rx queues */
    243 	4,		/* default number of rx queues */
    244 	16,		/* maximum number of tx queues */
    245 	1,		/* minimum number of tx queues */
    246 	4,		/* default number of tx queues */
    247 	65535,		/* maximum interrupt throttle rate */
    248 	0,		/* minimum interrupt throttle rate */
    249 	200,		/* default interrupt throttle rate */
    250 
    251 	/* function pointers */
    252 	igb_enable_adapter_interrupts_82576,
    253 	igb_setup_msix_82576,
    254 
    255 	/* capabilities */
    256 	(IGB_FLAG_HAS_DCA |	/* capability flags */
    257 	IGB_FLAG_VMDQ_POOL |
    258 	IGB_FLAG_NEED_CTX_IDX),
    259 
    260 	0xffe00000		/* mask for RXDCTL register */
    261 };
    262 
    263 static adapter_info_t igb_82580_cap = {
    264 	/* limits */
    265 	8,		/* maximum number of rx queues */
    266 	1,		/* minimum number of rx queues */
    267 	4,		/* default number of rx queues */
    268 	8,		/* maximum number of tx queues */
    269 	1,		/* minimum number of tx queues */
    270 	4,		/* default number of tx queues */
    271 	65535,		/* maximum interrupt throttle rate */
    272 	0,		/* minimum interrupt throttle rate */
    273 	200,		/* default interrupt throttle rate */
    274 
    275 	/* function pointers */
    276 	igb_enable_adapter_interrupts_82580,
    277 	igb_setup_msix_82580,
    278 
    279 	/* capabilities */
    280 	(IGB_FLAG_HAS_DCA |	/* capability flags */
    281 	IGB_FLAG_VMDQ_POOL |
    282 	IGB_FLAG_NEED_CTX_IDX),
    283 
    284 	0xffe00000		/* mask for RXDCTL register */
    285 };
    286 
    287 /*
    288  * Module Initialization Functions
    289  */
    290 
    291 int
    292 _init(void)
    293 {
    294 	int status;
    295 
    296 	mac_init_ops(&igb_dev_ops, MODULE_NAME);
    297 
    298 	status = mod_install(&igb_modlinkage);
    299 
    300 	if (status != DDI_SUCCESS) {
    301 		mac_fini_ops(&igb_dev_ops);
    302 	}
    303 
    304 	return (status);
    305 }
    306 
    307 int
    308 _fini(void)
    309 {
    310 	int status;
    311 
    312 	status = mod_remove(&igb_modlinkage);
    313 
    314 	if (status == DDI_SUCCESS) {
    315 		mac_fini_ops(&igb_dev_ops);
    316 	}
    317 
    318 	return (status);
    319 
    320 }
    321 
    322 int
    323 _info(struct modinfo *modinfop)
    324 {
    325 	int status;
    326 
    327 	status = mod_info(&igb_modlinkage, modinfop);
    328 
    329 	return (status);
    330 }
    331 
    332 /*
    333  * igb_attach - driver attach
    334  *
    335  * This function is the device specific initialization entry
    336  * point. This entry point is required and must be written.
    337  * The DDI_ATTACH command must be provided in the attach entry
    338  * point. When attach() is called with cmd set to DDI_ATTACH,
    339  * all normal kernel services (such as kmem_alloc(9F)) are
    340  * available for use by the driver.
    341  *
    342  * The attach() function will be called once for each instance
    343  * of  the  device  on  the  system with cmd set to DDI_ATTACH.
    344  * Until attach() succeeds, the only driver entry points which
    345  * may be called are open(9E) and getinfo(9E).
    346  */
    347 static int
    348 igb_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
    349 {
    350 	igb_t *igb;
    351 	struct igb_osdep *osdep;
    352 	struct e1000_hw *hw;
    353 	int instance;
    354 
    355 	/*
    356 	 * Check the command and perform corresponding operations
    357 	 */
    358 	switch (cmd) {
    359 	default:
    360 		return (DDI_FAILURE);
    361 
    362 	case DDI_RESUME:
    363 		return (igb_resume(devinfo));
    364 
    365 	case DDI_ATTACH:
    366 		break;
    367 	}
    368 
    369 	/* Get the device instance */
    370 	instance = ddi_get_instance(devinfo);
    371 
    372 	/* Allocate memory for the instance data structure */
    373 	igb = kmem_zalloc(sizeof (igb_t), KM_SLEEP);
    374 
    375 	igb->dip = devinfo;
    376 	igb->instance = instance;
    377 
    378 	hw = &igb->hw;
    379 	osdep = &igb->osdep;
    380 	hw->back = osdep;
    381 	osdep->igb = igb;
    382 
    383 	/* Attach the instance pointer to the dev_info data structure */
    384 	ddi_set_driver_private(devinfo, igb);
    385 
    386 
    387 	/* Initialize for fma support */
    388 	igb->fm_capabilities = igb_get_prop(igb, "fm-capable",
    389 	    0, 0x0f,
    390 	    DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
    391 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE);
    392 	igb_fm_init(igb);
    393 	igb->attach_progress |= ATTACH_PROGRESS_FMINIT;
    394 
    395 	/*
    396 	 * Map PCI config space registers
    397 	 */
    398 	if (pci_config_setup(devinfo, &osdep->cfg_handle) != DDI_SUCCESS) {
    399 		igb_error(igb, "Failed to map PCI configurations");
    400 		goto attach_fail;
    401 	}
    402 	igb->attach_progress |= ATTACH_PROGRESS_PCI_CONFIG;
    403 
    404 	/*
    405 	 * Identify the chipset family
    406 	 */
    407 	if (igb_identify_hardware(igb) != IGB_SUCCESS) {
    408 		igb_error(igb, "Failed to identify hardware");
    409 		goto attach_fail;
    410 	}
    411 
    412 	/*
    413 	 * Map device registers
    414 	 */
    415 	if (igb_regs_map(igb) != IGB_SUCCESS) {
    416 		igb_error(igb, "Failed to map device registers");
    417 		goto attach_fail;
    418 	}
    419 	igb->attach_progress |= ATTACH_PROGRESS_REGS_MAP;
    420 
    421 	/*
    422 	 * Initialize driver parameters
    423 	 */
    424 	igb_init_properties(igb);
    425 	igb->attach_progress |= ATTACH_PROGRESS_PROPS;
    426 
    427 	/*
    428 	 * Allocate interrupts
    429 	 */
    430 	if (igb_alloc_intrs(igb) != IGB_SUCCESS) {
    431 		igb_error(igb, "Failed to allocate interrupts");
    432 		goto attach_fail;
    433 	}
    434 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_INTR;
    435 
    436 	/*
    437 	 * Allocate rx/tx rings based on the ring numbers.
    438 	 * The actual numbers of rx/tx rings are decided by the number of
    439 	 * allocated interrupt vectors, so we should allocate the rings after
    440 	 * interrupts are allocated.
    441 	 */
    442 	if (igb_alloc_rings(igb) != IGB_SUCCESS) {
    443 		igb_error(igb, "Failed to allocate rx/tx rings or groups");
    444 		goto attach_fail;
    445 	}
    446 	igb->attach_progress |= ATTACH_PROGRESS_ALLOC_RINGS;
    447 
    448 	/*
    449 	 * Add interrupt handlers
    450 	 */
    451 	if (igb_add_intr_handlers(igb) != IGB_SUCCESS) {
    452 		igb_error(igb, "Failed to add interrupt handlers");
    453 		goto attach_fail;
    454 	}
    455 	igb->attach_progress |= ATTACH_PROGRESS_ADD_INTR;
    456 
    457 	/*
    458 	 * Initialize driver parameters
    459 	 */
    460 	if (igb_init_driver_settings(igb) != IGB_SUCCESS) {
    461 		igb_error(igb, "Failed to initialize driver settings");
    462 		goto attach_fail;
    463 	}
    464 
    465 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK) {
    466 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
    467 		goto attach_fail;
    468 	}
    469 
    470 	/*
    471 	 * Initialize mutexes for this device.
    472 	 * Do this before enabling the interrupt handler and
    473 	 * register the softint to avoid the condition where
    474 	 * interrupt handler can try using uninitialized mutex
    475 	 */
    476 	igb_init_locks(igb);
    477 	igb->attach_progress |= ATTACH_PROGRESS_LOCKS;
    478 
    479 	/*
    480 	 * Initialize the adapter
    481 	 */
    482 	if (igb_init(igb) != IGB_SUCCESS) {
    483 		igb_error(igb, "Failed to initialize adapter");
    484 		goto attach_fail;
    485 	}
    486 	igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
    487 
    488 	/*
    489 	 * Initialize statistics
    490 	 */
    491 	if (igb_init_stats(igb) != IGB_SUCCESS) {
    492 		igb_error(igb, "Failed to initialize statistics");
    493 		goto attach_fail;
    494 	}
    495 	igb->attach_progress |= ATTACH_PROGRESS_STATS;
    496 
    497 	/*
    498 	 * Register the driver to the MAC
    499 	 */
    500 	if (igb_register_mac(igb) != IGB_SUCCESS) {
    501 		igb_error(igb, "Failed to register MAC");
    502 		goto attach_fail;
    503 	}
    504 	igb->attach_progress |= ATTACH_PROGRESS_MAC;
    505 
    506 	/*
    507 	 * Now that mutex locks are initialized, and the chip is also
    508 	 * initialized, enable interrupts.
    509 	 */
    510 	if (igb_enable_intrs(igb) != IGB_SUCCESS) {
    511 		igb_error(igb, "Failed to enable DDI interrupts");
    512 		goto attach_fail;
    513 	}
    514 	igb->attach_progress |= ATTACH_PROGRESS_ENABLE_INTR;
    515 
    516 	igb_log(igb, "%s", igb_version);
    517 	atomic_or_32(&igb->igb_state, IGB_INITIALIZED);
    518 
    519 	return (DDI_SUCCESS);
    520 
    521 attach_fail:
    522 	igb_unconfigure(devinfo, igb);
    523 	return (DDI_FAILURE);
    524 }
    525 
    526 /*
    527  * igb_detach - driver detach
    528  *
    529  * The detach() function is the complement of the attach routine.
    530  * If cmd is set to DDI_DETACH, detach() is used to remove  the
    531  * state  associated  with  a  given  instance of a device node
    532  * prior to the removal of that instance from the system.
    533  *
    534  * The detach() function will be called once for each  instance
    535  * of the device for which there has been a successful attach()
    536  * once there are no longer  any  opens  on  the  device.
    537  *
    538  * Interrupts routine are disabled, All memory allocated by this
    539  * driver are freed.
    540  */
    541 static int
    542 igb_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
    543 {
    544 	igb_t *igb;
    545 
    546 	/*
    547 	 * Check detach command
    548 	 */
    549 	switch (cmd) {
    550 	default:
    551 		return (DDI_FAILURE);
    552 
    553 	case DDI_SUSPEND:
    554 		return (igb_suspend(devinfo));
    555 
    556 	case DDI_DETACH:
    557 		break;
    558 	}
    559 
    560 
    561 	/*
    562 	 * Get the pointer to the driver private data structure
    563 	 */
    564 	igb = (igb_t *)ddi_get_driver_private(devinfo);
    565 	if (igb == NULL)
    566 		return (DDI_FAILURE);
    567 
    568 	/*
    569 	 * Unregister MAC. If failed, we have to fail the detach
    570 	 */
    571 	if (mac_unregister(igb->mac_hdl) != 0) {
    572 		igb_error(igb, "Failed to unregister MAC");
    573 		return (DDI_FAILURE);
    574 	}
    575 	igb->attach_progress &= ~ATTACH_PROGRESS_MAC;
    576 
    577 	/*
    578 	 * If the device is still running, it needs to be stopped first.
    579 	 * This check is necessary because under some specific circumstances,
    580 	 * the detach routine can be called without stopping the interface
    581 	 * first.
    582 	 */
    583 	mutex_enter(&igb->gen_lock);
    584 	if (igb->igb_state & IGB_STARTED) {
    585 		atomic_and_32(&igb->igb_state, ~IGB_STARTED);
    586 		igb_stop(igb, B_TRUE);
    587 		mutex_exit(&igb->gen_lock);
    588 		/* Disable and stop the watchdog timer */
    589 		igb_disable_watchdog_timer(igb);
    590 	} else
    591 		mutex_exit(&igb->gen_lock);
    592 
    593 	/*
    594 	 * Check if there are still rx buffers held by the upper layer.
    595 	 * If so, fail the detach.
    596 	 */
    597 	if (!igb_rx_drain(igb))
    598 		return (DDI_FAILURE);
    599 
    600 	/*
    601 	 * Do the remaining unconfigure routines
    602 	 */
    603 	igb_unconfigure(devinfo, igb);
    604 
    605 	return (DDI_SUCCESS);
    606 }
    607 
    608 /*
    609  * quiesce(9E) entry point.
    610  *
    611  * This function is called when the system is single-threaded at high
    612  * PIL with preemption disabled. Therefore, this function must not be
    613  * blocked.
    614  *
    615  * This function returns DDI_SUCCESS on success, or DDI_FAILURE on failure.
    616  * DDI_FAILURE indicates an error condition and should almost never happen.
    617  */
    618 static int
    619 igb_quiesce(dev_info_t *devinfo)
    620 {
    621 	igb_t *igb;
    622 	struct e1000_hw *hw;
    623 
    624 	igb = (igb_t *)ddi_get_driver_private(devinfo);
    625 
    626 	if (igb == NULL)
    627 		return (DDI_FAILURE);
    628 
    629 	hw = &igb->hw;
    630 
    631 	/*
    632 	 * Disable the adapter interrupts
    633 	 */
    634 	igb_disable_adapter_interrupts(igb);
    635 
    636 	/* Tell firmware driver is no longer in control */
    637 	igb_release_driver_control(hw);
    638 
    639 	/*
    640 	 * Reset the chipset
    641 	 */
    642 	(void) e1000_reset_hw(hw);
    643 
    644 	/*
    645 	 * Reset PHY if possible
    646 	 */
    647 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
    648 		(void) e1000_phy_hw_reset(hw);
    649 
    650 	return (DDI_SUCCESS);
    651 }
    652 
    653 /*
    654  * igb_unconfigure - release all resources held by this instance
    655  */
    656 static void
    657 igb_unconfigure(dev_info_t *devinfo, igb_t *igb)
    658 {
    659 	/*
    660 	 * Disable interrupt
    661 	 */
    662 	if (igb->attach_progress & ATTACH_PROGRESS_ENABLE_INTR) {
    663 		(void) igb_disable_intrs(igb);
    664 	}
    665 
    666 	/*
    667 	 * Unregister MAC
    668 	 */
    669 	if (igb->attach_progress & ATTACH_PROGRESS_MAC) {
    670 		(void) mac_unregister(igb->mac_hdl);
    671 	}
    672 
    673 	/*
    674 	 * Free statistics
    675 	 */
    676 	if (igb->attach_progress & ATTACH_PROGRESS_STATS) {
    677 		kstat_delete((kstat_t *)igb->igb_ks);
    678 	}
    679 
    680 	/*
    681 	 * Remove interrupt handlers
    682 	 */
    683 	if (igb->attach_progress & ATTACH_PROGRESS_ADD_INTR) {
    684 		igb_rem_intr_handlers(igb);
    685 	}
    686 
    687 	/*
    688 	 * Remove interrupts
    689 	 */
    690 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_INTR) {
    691 		igb_rem_intrs(igb);
    692 	}
    693 
    694 	/*
    695 	 * Remove driver properties
    696 	 */
    697 	if (igb->attach_progress & ATTACH_PROGRESS_PROPS) {
    698 		(void) ddi_prop_remove_all(devinfo);
    699 	}
    700 
    701 	/*
    702 	 * Stop the adapter
    703 	 */
    704 	if (igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) {
    705 		mutex_enter(&igb->gen_lock);
    706 		igb_stop_adapter(igb);
    707 		mutex_exit(&igb->gen_lock);
    708 		if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
    709 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_UNAFFECTED);
    710 	}
    711 
    712 	/*
    713 	 * Free multicast table
    714 	 */
    715 	igb_release_multicast(igb);
    716 
    717 	/*
    718 	 * Free register handle
    719 	 */
    720 	if (igb->attach_progress & ATTACH_PROGRESS_REGS_MAP) {
    721 		if (igb->osdep.reg_handle != NULL)
    722 			ddi_regs_map_free(&igb->osdep.reg_handle);
    723 	}
    724 
    725 	/*
    726 	 * Free PCI config handle
    727 	 */
    728 	if (igb->attach_progress & ATTACH_PROGRESS_PCI_CONFIG) {
    729 		if (igb->osdep.cfg_handle != NULL)
    730 			pci_config_teardown(&igb->osdep.cfg_handle);
    731 	}
    732 
    733 	/*
    734 	 * Free locks
    735 	 */
    736 	if (igb->attach_progress & ATTACH_PROGRESS_LOCKS) {
    737 		igb_destroy_locks(igb);
    738 	}
    739 
    740 	/*
    741 	 * Free the rx/tx rings
    742 	 */
    743 	if (igb->attach_progress & ATTACH_PROGRESS_ALLOC_RINGS) {
    744 		igb_free_rings(igb);
    745 	}
    746 
    747 	/*
    748 	 * Remove FMA
    749 	 */
    750 	if (igb->attach_progress & ATTACH_PROGRESS_FMINIT) {
    751 		igb_fm_fini(igb);
    752 	}
    753 
    754 	/*
    755 	 * Free the driver data structure
    756 	 */
    757 	kmem_free(igb, sizeof (igb_t));
    758 
    759 	ddi_set_driver_private(devinfo, NULL);
    760 }
    761 
    762 /*
    763  * igb_register_mac - Register the driver and its function pointers with
    764  * the GLD interface
    765  */
    766 static int
    767 igb_register_mac(igb_t *igb)
    768 {
    769 	struct e1000_hw *hw = &igb->hw;
    770 	mac_register_t *mac;
    771 	int status;
    772 
    773 	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
    774 		return (IGB_FAILURE);
    775 
    776 	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
    777 	mac->m_driver = igb;
    778 	mac->m_dip = igb->dip;
    779 	mac->m_src_addr = hw->mac.addr;
    780 	mac->m_callbacks = &igb_m_callbacks;
    781 	mac->m_min_sdu = 0;
    782 	mac->m_max_sdu = igb->max_frame_size -
    783 	    sizeof (struct ether_vlan_header) - ETHERFCSL;
    784 	mac->m_margin = VLAN_TAGSZ;
    785 	mac->m_priv_props = igb_priv_props;
    786 	mac->m_priv_prop_count = IGB_MAX_PRIV_PROPS;
    787 	mac->m_v12n = MAC_VIRT_LEVEL1;
    788 
    789 	status = mac_register(mac, &igb->mac_hdl);
    790 
    791 	mac_free(mac);
    792 
    793 	return ((status == 0) ? IGB_SUCCESS : IGB_FAILURE);
    794 }
    795 
    796 /*
    797  * igb_identify_hardware - Identify the type of the chipset
    798  */
    799 static int
    800 igb_identify_hardware(igb_t *igb)
    801 {
    802 	struct e1000_hw *hw = &igb->hw;
    803 	struct igb_osdep *osdep = &igb->osdep;
    804 
    805 	/*
    806 	 * Get the device id
    807 	 */
    808 	hw->vendor_id =
    809 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_VENID);
    810 	hw->device_id =
    811 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_DEVID);
    812 	hw->revision_id =
    813 	    pci_config_get8(osdep->cfg_handle, PCI_CONF_REVID);
    814 	hw->subsystem_device_id =
    815 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBSYSID);
    816 	hw->subsystem_vendor_id =
    817 	    pci_config_get16(osdep->cfg_handle, PCI_CONF_SUBVENID);
    818 
    819 	/*
    820 	 * Set the mac type of the adapter based on the device id
    821 	 */
    822 	if (e1000_set_mac_type(hw) != E1000_SUCCESS) {
    823 		return (IGB_FAILURE);
    824 	}
    825 
    826 	/*
    827 	 * Install adapter capabilities based on mac type
    828 	 */
    829 	switch (hw->mac.type) {
    830 	case e1000_82575:
    831 		igb->capab = &igb_82575_cap;
    832 		break;
    833 	case e1000_82576:
    834 		igb->capab = &igb_82576_cap;
    835 		break;
    836 	case e1000_82580:
    837 		igb->capab = &igb_82580_cap;
    838 		break;
    839 	default:
    840 		return (IGB_FAILURE);
    841 	}
    842 
    843 	return (IGB_SUCCESS);
    844 }
    845 
    846 /*
    847  * igb_regs_map - Map the device registers
    848  */
    849 static int
    850 igb_regs_map(igb_t *igb)
    851 {
    852 	dev_info_t *devinfo = igb->dip;
    853 	struct e1000_hw *hw = &igb->hw;
    854 	struct igb_osdep *osdep = &igb->osdep;
    855 	off_t mem_size;
    856 
    857 	/*
    858 	 * First get the size of device registers to be mapped.
    859 	 */
    860 	if (ddi_dev_regsize(devinfo, IGB_ADAPTER_REGSET, &mem_size) !=
    861 	    DDI_SUCCESS) {
    862 		return (IGB_FAILURE);
    863 	}
    864 
    865 	/*
    866 	 * Call ddi_regs_map_setup() to map registers
    867 	 */
    868 	if ((ddi_regs_map_setup(devinfo, IGB_ADAPTER_REGSET,
    869 	    (caddr_t *)&hw->hw_addr, 0,
    870 	    mem_size, &igb_regs_acc_attr,
    871 	    &osdep->reg_handle)) != DDI_SUCCESS) {
    872 		return (IGB_FAILURE);
    873 	}
    874 
    875 	return (IGB_SUCCESS);
    876 }
    877 
    878 /*
    879  * igb_init_properties - Initialize driver properties
    880  */
    881 static void
    882 igb_init_properties(igb_t *igb)
    883 {
    884 	/*
    885 	 * Get conf file properties, including link settings
    886 	 * jumbo frames, ring number, descriptor number, etc.
    887 	 */
    888 	igb_get_conf(igb);
    889 }
    890 
    891 /*
    892  * igb_init_driver_settings - Initialize driver settings
    893  *
    894  * The settings include hardware function pointers, bus information,
    895  * rx/tx rings settings, link state, and any other parameters that
    896  * need to be setup during driver initialization.
    897  */
    898 static int
    899 igb_init_driver_settings(igb_t *igb)
    900 {
    901 	struct e1000_hw *hw = &igb->hw;
    902 	igb_rx_ring_t *rx_ring;
    903 	igb_tx_ring_t *tx_ring;
    904 	uint32_t rx_size;
    905 	uint32_t tx_size;
    906 	int i;
    907 
    908 	/*
    909 	 * Initialize chipset specific hardware function pointers
    910 	 */
    911 	if (e1000_setup_init_funcs(hw, B_TRUE) != E1000_SUCCESS) {
    912 		return (IGB_FAILURE);
    913 	}
    914 
    915 	/*
    916 	 * Get bus information
    917 	 */
    918 	if (e1000_get_bus_info(hw) != E1000_SUCCESS) {
    919 		return (IGB_FAILURE);
    920 	}
    921 
    922 	/*
    923 	 * Get the system page size
    924 	 */
    925 	igb->page_size = ddi_ptob(igb->dip, (ulong_t)1);
    926 
    927 	/*
    928 	 * Set rx buffer size
    929 	 * The IP header alignment room is counted in the calculation.
    930 	 * The rx buffer size is in unit of 1K that is required by the
    931 	 * chipset hardware.
    932 	 */
    933 	rx_size = igb->max_frame_size + IPHDR_ALIGN_ROOM;
    934 	igb->rx_buf_size = ((rx_size >> 10) +
    935 	    ((rx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
    936 
    937 	/*
    938 	 * Set tx buffer size
    939 	 */
    940 	tx_size = igb->max_frame_size;
    941 	igb->tx_buf_size = ((tx_size >> 10) +
    942 	    ((tx_size & (((uint32_t)1 << 10) - 1)) > 0 ? 1 : 0)) << 10;
    943 
    944 	/*
    945 	 * Initialize rx/tx rings parameters
    946 	 */
    947 	for (i = 0; i < igb->num_rx_rings; i++) {
    948 		rx_ring = &igb->rx_rings[i];
    949 		rx_ring->index = i;
    950 		rx_ring->igb = igb;
    951 	}
    952 
    953 	for (i = 0; i < igb->num_tx_rings; i++) {
    954 		tx_ring = &igb->tx_rings[i];
    955 		tx_ring->index = i;
    956 		tx_ring->igb = igb;
    957 		if (igb->tx_head_wb_enable)
    958 			tx_ring->tx_recycle = igb_tx_recycle_head_wb;
    959 		else
    960 			tx_ring->tx_recycle = igb_tx_recycle_legacy;
    961 
    962 		tx_ring->ring_size = igb->tx_ring_size;
    963 		tx_ring->free_list_size = igb->tx_ring_size +
    964 		    (igb->tx_ring_size >> 1);
    965 	}
    966 
    967 	/*
    968 	 * Initialize values of interrupt throttling rates
    969 	 */
    970 	for (i = 1; i < MAX_NUM_EITR; i++)
    971 		igb->intr_throttling[i] = igb->intr_throttling[0];
    972 
    973 	/*
    974 	 * The initial link state should be "unknown"
    975 	 */
    976 	igb->link_state = LINK_STATE_UNKNOWN;
    977 
    978 	return (IGB_SUCCESS);
    979 }
    980 
    981 /*
    982  * igb_init_locks - Initialize locks
    983  */
    984 static void
    985 igb_init_locks(igb_t *igb)
    986 {
    987 	igb_rx_ring_t *rx_ring;
    988 	igb_tx_ring_t *tx_ring;
    989 	int i;
    990 
    991 	for (i = 0; i < igb->num_rx_rings; i++) {
    992 		rx_ring = &igb->rx_rings[i];
    993 		mutex_init(&rx_ring->rx_lock, NULL,
    994 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
    995 	}
    996 
    997 	for (i = 0; i < igb->num_tx_rings; i++) {
    998 		tx_ring = &igb->tx_rings[i];
    999 		mutex_init(&tx_ring->tx_lock, NULL,
   1000 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1001 		mutex_init(&tx_ring->recycle_lock, NULL,
   1002 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1003 		mutex_init(&tx_ring->tcb_head_lock, NULL,
   1004 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1005 		mutex_init(&tx_ring->tcb_tail_lock, NULL,
   1006 		    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1007 	}
   1008 
   1009 	mutex_init(&igb->gen_lock, NULL,
   1010 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1011 
   1012 	mutex_init(&igb->watchdog_lock, NULL,
   1013 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1014 
   1015 	mutex_init(&igb->link_lock, NULL,
   1016 	    MUTEX_DRIVER, DDI_INTR_PRI(igb->intr_pri));
   1017 }
   1018 
   1019 /*
   1020  * igb_destroy_locks - Destroy locks
   1021  */
   1022 static void
   1023 igb_destroy_locks(igb_t *igb)
   1024 {
   1025 	igb_rx_ring_t *rx_ring;
   1026 	igb_tx_ring_t *tx_ring;
   1027 	int i;
   1028 
   1029 	for (i = 0; i < igb->num_rx_rings; i++) {
   1030 		rx_ring = &igb->rx_rings[i];
   1031 		mutex_destroy(&rx_ring->rx_lock);
   1032 	}
   1033 
   1034 	for (i = 0; i < igb->num_tx_rings; i++) {
   1035 		tx_ring = &igb->tx_rings[i];
   1036 		mutex_destroy(&tx_ring->tx_lock);
   1037 		mutex_destroy(&tx_ring->recycle_lock);
   1038 		mutex_destroy(&tx_ring->tcb_head_lock);
   1039 		mutex_destroy(&tx_ring->tcb_tail_lock);
   1040 	}
   1041 
   1042 	mutex_destroy(&igb->gen_lock);
   1043 	mutex_destroy(&igb->watchdog_lock);
   1044 	mutex_destroy(&igb->link_lock);
   1045 }
   1046 
   1047 static int
   1048 igb_resume(dev_info_t *devinfo)
   1049 {
   1050 	igb_t *igb;
   1051 
   1052 	igb = (igb_t *)ddi_get_driver_private(devinfo);
   1053 	if (igb == NULL)
   1054 		return (DDI_FAILURE);
   1055 
   1056 	mutex_enter(&igb->gen_lock);
   1057 
   1058 	if (igb->igb_state & IGB_STARTED) {
   1059 		if (igb_start(igb, B_FALSE) != IGB_SUCCESS) {
   1060 			mutex_exit(&igb->gen_lock);
   1061 			return (DDI_FAILURE);
   1062 		}
   1063 
   1064 		/*
   1065 		 * Enable and start the watchdog timer
   1066 		 */
   1067 		igb_enable_watchdog_timer(igb);
   1068 	}
   1069 
   1070 	atomic_and_32(&igb->igb_state, ~IGB_SUSPENDED);
   1071 
   1072 	mutex_exit(&igb->gen_lock);
   1073 
   1074 	return (DDI_SUCCESS);
   1075 }
   1076 
   1077 static int
   1078 igb_suspend(dev_info_t *devinfo)
   1079 {
   1080 	igb_t *igb;
   1081 
   1082 	igb = (igb_t *)ddi_get_driver_private(devinfo);
   1083 	if (igb == NULL)
   1084 		return (DDI_FAILURE);
   1085 
   1086 	mutex_enter(&igb->gen_lock);
   1087 
   1088 	atomic_or_32(&igb->igb_state, IGB_SUSPENDED);
   1089 
   1090 	if (!(igb->igb_state & IGB_STARTED)) {
   1091 		mutex_exit(&igb->gen_lock);
   1092 		return (DDI_SUCCESS);
   1093 	}
   1094 
   1095 	igb_stop(igb, B_FALSE);
   1096 
   1097 	mutex_exit(&igb->gen_lock);
   1098 
   1099 	/*
   1100 	 * Disable and stop the watchdog timer
   1101 	 */
   1102 	igb_disable_watchdog_timer(igb);
   1103 
   1104 	return (DDI_SUCCESS);
   1105 }
   1106 
   1107 static int
   1108 igb_init(igb_t *igb)
   1109 {
   1110 	mutex_enter(&igb->gen_lock);
   1111 
   1112 	/*
   1113 	 * Initilize the adapter
   1114 	 */
   1115 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
   1116 		mutex_exit(&igb->gen_lock);
   1117 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
   1118 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   1119 		return (IGB_FAILURE);
   1120 	}
   1121 
   1122 	mutex_exit(&igb->gen_lock);
   1123 
   1124 	return (IGB_SUCCESS);
   1125 }
   1126 
   1127 /*
   1128  * igb_init_mac_address - Initialize the default MAC address
   1129  *
   1130  * On success, the MAC address is entered in the igb->hw.mac.addr
   1131  * and hw->mac.perm_addr fields and the adapter's RAR(0) receive
   1132  * address register.
   1133  *
   1134  * Important side effects:
   1135  * 1. adapter is reset - this is required to put it in a known state.
   1136  * 2. all of non-volatile memory (NVM) is read & checksummed - NVM is where
   1137  * MAC address and all default settings are stored, so a valid checksum
   1138  * is required.
   1139  */
   1140 static int
   1141 igb_init_mac_address(igb_t *igb)
   1142 {
   1143 	struct e1000_hw *hw = &igb->hw;
   1144 
   1145 	ASSERT(mutex_owned(&igb->gen_lock));
   1146 
   1147 	/*
   1148 	 * Reset chipset to put the hardware in a known state
   1149 	 * before we try to get MAC address from NVM.
   1150 	 */
   1151 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
   1152 		igb_error(igb, "Adapter reset failed.");
   1153 		goto init_mac_fail;
   1154 	}
   1155 
   1156 	/*
   1157 	 * NVM validation
   1158 	 */
   1159 	if (e1000_validate_nvm_checksum(hw) < 0) {
   1160 		/*
   1161 		 * Some PCI-E parts fail the first check due to
   1162 		 * the link being in sleep state.  Call it again,
   1163 		 * if it fails a second time its a real issue.
   1164 		 */
   1165 		if (e1000_validate_nvm_checksum(hw) < 0) {
   1166 			igb_error(igb,
   1167 			    "Invalid NVM checksum. Please contact "
   1168 			    "the vendor to update the NVM.");
   1169 			goto init_mac_fail;
   1170 		}
   1171 	}
   1172 
   1173 	/*
   1174 	 * Get the mac address
   1175 	 * This function should handle SPARC case correctly.
   1176 	 */
   1177 	if (!igb_find_mac_address(igb)) {
   1178 		igb_error(igb, "Failed to get the mac address");
   1179 		goto init_mac_fail;
   1180 	}
   1181 
   1182 	/* Validate mac address */
   1183 	if (!is_valid_mac_addr(hw->mac.addr)) {
   1184 		igb_error(igb, "Invalid mac address");
   1185 		goto init_mac_fail;
   1186 	}
   1187 
   1188 	return (IGB_SUCCESS);
   1189 
   1190 init_mac_fail:
   1191 	return (IGB_FAILURE);
   1192 }
   1193 
   1194 /*
   1195  * igb_init_adapter - Initialize the adapter
   1196  */
   1197 static int
   1198 igb_init_adapter(igb_t *igb)
   1199 {
   1200 	struct e1000_hw *hw = &igb->hw;
   1201 	uint32_t pba;
   1202 	uint32_t high_water;
   1203 	int i;
   1204 
   1205 	ASSERT(mutex_owned(&igb->gen_lock));
   1206 
   1207 	/*
   1208 	 * In order to obtain the default MAC address, this will reset the
   1209 	 * adapter and validate the NVM that the address and many other
   1210 	 * default settings come from.
   1211 	 */
   1212 	if (igb_init_mac_address(igb) != IGB_SUCCESS) {
   1213 		igb_error(igb, "Failed to initialize MAC address");
   1214 		goto init_adapter_fail;
   1215 	}
   1216 
   1217 	/*
   1218 	 * Setup flow control
   1219 	 *
   1220 	 * These parameters set thresholds for the adapter's generation(Tx)
   1221 	 * and response(Rx) to Ethernet PAUSE frames.  These are just threshold
   1222 	 * settings.  Flow control is enabled or disabled in the configuration
   1223 	 * file.
   1224 	 * High-water mark is set down from the top of the rx fifo (not
   1225 	 * sensitive to max_frame_size) and low-water is set just below
   1226 	 * high-water mark.
   1227 	 * The high water mark must be low enough to fit one full frame above
   1228 	 * it in the rx FIFO.  Should be the lower of:
   1229 	 * 90% of the Rx FIFO size, or the full Rx FIFO size minus one full
   1230 	 * frame.
   1231 	 */
   1232 	/*
   1233 	 * The default setting of PBA is correct for 82575 and other supported
   1234 	 * adapters do not have the E1000_PBA register, so PBA value is only
   1235 	 * used for calculation here and is never written to the adapter.
   1236 	 */
   1237 	if (hw->mac.type == e1000_82575) {
   1238 		pba = E1000_PBA_34K;
   1239 	} else {
   1240 		pba = E1000_PBA_64K;
   1241 	}
   1242 
   1243 	high_water = min(((pba << 10) * 9 / 10),
   1244 	    ((pba << 10) - igb->max_frame_size));
   1245 
   1246 	if (hw->mac.type == e1000_82575) {
   1247 		/* 8-byte granularity */
   1248 		hw->fc.high_water = high_water & 0xFFF8;
   1249 		hw->fc.low_water = hw->fc.high_water - 8;
   1250 	} else {
   1251 		/* 16-byte granularity */
   1252 		hw->fc.high_water = high_water & 0xFFF0;
   1253 		hw->fc.low_water = hw->fc.high_water - 16;
   1254 	}
   1255 
   1256 	hw->fc.pause_time = E1000_FC_PAUSE_TIME;
   1257 	hw->fc.send_xon = B_TRUE;
   1258 
   1259 	(void) e1000_validate_mdi_setting(hw);
   1260 
   1261 	/*
   1262 	 * Reset the chipset hardware the second time to put PBA settings
   1263 	 * into effect.
   1264 	 */
   1265 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
   1266 		igb_error(igb, "Second reset failed");
   1267 		goto init_adapter_fail;
   1268 	}
   1269 
   1270 	/*
   1271 	 * Don't wait for auto-negotiation to complete
   1272 	 */
   1273 	hw->phy.autoneg_wait_to_complete = B_FALSE;
   1274 
   1275 	/*
   1276 	 * Copper options
   1277 	 */
   1278 	if (hw->phy.media_type == e1000_media_type_copper) {
   1279 		hw->phy.mdix = 0;	/* AUTO_ALL_MODES */
   1280 		hw->phy.disable_polarity_correction = B_FALSE;
   1281 		hw->phy.ms_type = e1000_ms_hw_default; /* E1000_MASTER_SLAVE */
   1282 	}
   1283 
   1284 	/*
   1285 	 * Initialize link settings
   1286 	 */
   1287 	(void) igb_setup_link(igb, B_FALSE);
   1288 
   1289 	/*
   1290 	 * Configure/Initialize hardware
   1291 	 */
   1292 	if (e1000_init_hw(hw) != E1000_SUCCESS) {
   1293 		igb_error(igb, "Failed to initialize hardware");
   1294 		goto init_adapter_fail;
   1295 	}
   1296 
   1297 	/*
   1298 	 *  Start the link setup timer
   1299 	 */
   1300 	igb_start_link_timer(igb);
   1301 
   1302 	/*
   1303 	 * Disable wakeup control by default
   1304 	 */
   1305 	E1000_WRITE_REG(hw, E1000_WUC, 0);
   1306 
   1307 	/*
   1308 	 * Record phy info in hw struct
   1309 	 */
   1310 	(void) e1000_get_phy_info(hw);
   1311 
   1312 	/*
   1313 	 * Make sure driver has control
   1314 	 */
   1315 	igb_get_driver_control(hw);
   1316 
   1317 	/*
   1318 	 * Restore LED settings to the default from EEPROM
   1319 	 * to meet the standard for Sun platforms.
   1320 	 */
   1321 	(void) e1000_cleanup_led(hw);
   1322 
   1323 	/*
   1324 	 * Setup MSI-X interrupts
   1325 	 */
   1326 	if (igb->intr_type == DDI_INTR_TYPE_MSIX)
   1327 		igb->capab->setup_msix(igb);
   1328 
   1329 	/*
   1330 	 * Initialize unicast addresses.
   1331 	 */
   1332 	igb_init_unicst(igb);
   1333 
   1334 	/*
   1335 	 * Setup and initialize the mctable structures.
   1336 	 */
   1337 	igb_setup_multicst(igb);
   1338 
   1339 	/*
   1340 	 * Set interrupt throttling rate
   1341 	 */
   1342 	for (i = 0; i < igb->intr_cnt; i++)
   1343 		E1000_WRITE_REG(hw, E1000_EITR(i), igb->intr_throttling[i]);
   1344 
   1345 	/*
   1346 	 * Save the state of the phy
   1347 	 */
   1348 	igb_get_phy_state(igb);
   1349 
   1350 	igb_param_sync(igb);
   1351 
   1352 	return (IGB_SUCCESS);
   1353 
   1354 init_adapter_fail:
   1355 	/*
   1356 	 * Reset PHY if possible
   1357 	 */
   1358 	if (e1000_check_reset_block(hw) == E1000_SUCCESS)
   1359 		(void) e1000_phy_hw_reset(hw);
   1360 
   1361 	return (IGB_FAILURE);
   1362 }
   1363 
   1364 /*
   1365  * igb_stop_adapter - Stop the adapter
   1366  */
   1367 static void
   1368 igb_stop_adapter(igb_t *igb)
   1369 {
   1370 	struct e1000_hw *hw = &igb->hw;
   1371 
   1372 	ASSERT(mutex_owned(&igb->gen_lock));
   1373 
   1374 	/* Stop the link setup timer */
   1375 	igb_stop_link_timer(igb);
   1376 
   1377 	/* Tell firmware driver is no longer in control */
   1378 	igb_release_driver_control(hw);
   1379 
   1380 	/*
   1381 	 * Reset the chipset
   1382 	 */
   1383 	if (e1000_reset_hw(hw) != E1000_SUCCESS) {
   1384 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
   1385 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   1386 	}
   1387 
   1388 	/*
   1389 	 * e1000_phy_hw_reset is not needed here, MAC reset above is sufficient
   1390 	 */
   1391 }
   1392 
   1393 /*
   1394  * igb_reset - Reset the chipset and restart the driver.
   1395  *
   1396  * It involves stopping and re-starting the chipset,
   1397  * and re-configuring the rx/tx rings.
   1398  */
   1399 static int
   1400 igb_reset(igb_t *igb)
   1401 {
   1402 	int i;
   1403 
   1404 	mutex_enter(&igb->gen_lock);
   1405 
   1406 	ASSERT(igb->igb_state & IGB_STARTED);
   1407 	atomic_and_32(&igb->igb_state, ~IGB_STARTED);
   1408 
   1409 	/*
   1410 	 * Disable the adapter interrupts to stop any rx/tx activities
   1411 	 * before draining pending data and resetting hardware.
   1412 	 */
   1413 	igb_disable_adapter_interrupts(igb);
   1414 
   1415 	/*
   1416 	 * Drain the pending transmit packets
   1417 	 */
   1418 	(void) igb_tx_drain(igb);
   1419 
   1420 	for (i = 0; i < igb->num_rx_rings; i++)
   1421 		mutex_enter(&igb->rx_rings[i].rx_lock);
   1422 	for (i = 0; i < igb->num_tx_rings; i++)
   1423 		mutex_enter(&igb->tx_rings[i].tx_lock);
   1424 
   1425 	/*
   1426 	 * Stop the adapter
   1427 	 */
   1428 	igb_stop_adapter(igb);
   1429 
   1430 	/*
   1431 	 * Clean the pending tx data/resources
   1432 	 */
   1433 	igb_tx_clean(igb);
   1434 
   1435 	/*
   1436 	 * Start the adapter
   1437 	 */
   1438 	if (igb_init_adapter(igb) != IGB_SUCCESS) {
   1439 		igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
   1440 		goto reset_failure;
   1441 	}
   1442 
   1443 	/*
   1444 	 * Setup the rx/tx rings
   1445 	 */
   1446 	igb->tx_ring_init = B_FALSE;
   1447 	igb_setup_rings(igb);
   1448 
   1449 	atomic_and_32(&igb->igb_state, ~(IGB_ERROR | IGB_STALL));
   1450 
   1451 	/*
   1452 	 * Enable adapter interrupts
   1453 	 * The interrupts must be enabled after the driver state is START
   1454 	 */
   1455 	igb->capab->enable_intr(igb);
   1456 
   1457 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
   1458 		goto reset_failure;
   1459 
   1460 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
   1461 		goto reset_failure;
   1462 
   1463 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
   1464 		mutex_exit(&igb->tx_rings[i].tx_lock);
   1465 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
   1466 		mutex_exit(&igb->rx_rings[i].rx_lock);
   1467 
   1468 	atomic_or_32(&igb->igb_state, IGB_STARTED);
   1469 
   1470 	mutex_exit(&igb->gen_lock);
   1471 
   1472 	return (IGB_SUCCESS);
   1473 
   1474 reset_failure:
   1475 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
   1476 		mutex_exit(&igb->tx_rings[i].tx_lock);
   1477 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
   1478 		mutex_exit(&igb->rx_rings[i].rx_lock);
   1479 
   1480 	mutex_exit(&igb->gen_lock);
   1481 
   1482 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   1483 
   1484 	return (IGB_FAILURE);
   1485 }
   1486 
   1487 /*
   1488  * igb_tx_clean - Clean the pending transmit packets and DMA resources
   1489  */
   1490 static void
   1491 igb_tx_clean(igb_t *igb)
   1492 {
   1493 	igb_tx_ring_t *tx_ring;
   1494 	tx_control_block_t *tcb;
   1495 	link_list_t pending_list;
   1496 	uint32_t desc_num;
   1497 	int i, j;
   1498 
   1499 	LINK_LIST_INIT(&pending_list);
   1500 
   1501 	for (i = 0; i < igb->num_tx_rings; i++) {
   1502 		tx_ring = &igb->tx_rings[i];
   1503 
   1504 		mutex_enter(&tx_ring->recycle_lock);
   1505 
   1506 		/*
   1507 		 * Clean the pending tx data - the pending packets in the
   1508 		 * work_list that have no chances to be transmitted again.
   1509 		 *
   1510 		 * We must ensure the chipset is stopped or the link is down
   1511 		 * before cleaning the transmit packets.
   1512 		 */
   1513 		desc_num = 0;
   1514 		for (j = 0; j < tx_ring->ring_size; j++) {
   1515 			tcb = tx_ring->work_list[j];
   1516 			if (tcb != NULL) {
   1517 				desc_num += tcb->desc_num;
   1518 
   1519 				tx_ring->work_list[j] = NULL;
   1520 
   1521 				igb_free_tcb(tcb);
   1522 
   1523 				LIST_PUSH_TAIL(&pending_list, &tcb->link);
   1524 			}
   1525 		}
   1526 
   1527 		if (desc_num > 0) {
   1528 			atomic_add_32(&tx_ring->tbd_free, desc_num);
   1529 			ASSERT(tx_ring->tbd_free == tx_ring->ring_size);
   1530 
   1531 			/*
   1532 			 * Reset the head and tail pointers of the tbd ring;
   1533 			 * Reset the head write-back if it is enabled.
   1534 			 */
   1535 			tx_ring->tbd_head = 0;
   1536 			tx_ring->tbd_tail = 0;
   1537 			if (igb->tx_head_wb_enable)
   1538 				*tx_ring->tbd_head_wb = 0;
   1539 
   1540 			E1000_WRITE_REG(&igb->hw, E1000_TDH(tx_ring->index), 0);
   1541 			E1000_WRITE_REG(&igb->hw, E1000_TDT(tx_ring->index), 0);
   1542 		}
   1543 
   1544 		mutex_exit(&tx_ring->recycle_lock);
   1545 
   1546 		/*
   1547 		 * Add the tx control blocks in the pending list to
   1548 		 * the free list.
   1549 		 */
   1550 		igb_put_free_list(tx_ring, &pending_list);
   1551 	}
   1552 }
   1553 
   1554 /*
   1555  * igb_tx_drain - Drain the tx rings to allow pending packets to be transmitted
   1556  */
   1557 static boolean_t
   1558 igb_tx_drain(igb_t *igb)
   1559 {
   1560 	igb_tx_ring_t *tx_ring;
   1561 	boolean_t done;
   1562 	int i, j;
   1563 
   1564 	/*
   1565 	 * Wait for a specific time to allow pending tx packets
   1566 	 * to be transmitted.
   1567 	 *
   1568 	 * Check the counter tbd_free to see if transmission is done.
   1569 	 * No lock protection is needed here.
   1570 	 *
   1571 	 * Return B_TRUE if all pending packets have been transmitted;
   1572 	 * Otherwise return B_FALSE;
   1573 	 */
   1574 	for (i = 0; i < TX_DRAIN_TIME; i++) {
   1575 
   1576 		done = B_TRUE;
   1577 		for (j = 0; j < igb->num_tx_rings; j++) {
   1578 			tx_ring = &igb->tx_rings[j];
   1579 			done = done &&
   1580 			    (tx_ring->tbd_free == tx_ring->ring_size);
   1581 		}
   1582 
   1583 		if (done)
   1584 			break;
   1585 
   1586 		msec_delay(1);
   1587 	}
   1588 
   1589 	return (done);
   1590 }
   1591 
   1592 /*
   1593  * igb_rx_drain - Wait for all rx buffers to be released by upper layer
   1594  */
   1595 static boolean_t
   1596 igb_rx_drain(igb_t *igb)
   1597 {
   1598 	boolean_t done;
   1599 	int i;
   1600 
   1601 	/*
   1602 	 * Polling the rx free list to check if those rx buffers held by
   1603 	 * the upper layer are released.
   1604 	 *
   1605 	 * Check the counter rcb_free to see if all pending buffers are
   1606 	 * released. No lock protection is needed here.
   1607 	 *
   1608 	 * Return B_TRUE if all pending buffers have been released;
   1609 	 * Otherwise return B_FALSE;
   1610 	 */
   1611 	for (i = 0; i < RX_DRAIN_TIME; i++) {
   1612 		done = (igb->rcb_pending == 0);
   1613 
   1614 		if (done)
   1615 			break;
   1616 
   1617 		msec_delay(1);
   1618 	}
   1619 
   1620 	return (done);
   1621 }
   1622 
   1623 /*
   1624  * igb_start - Start the driver/chipset
   1625  */
   1626 int
   1627 igb_start(igb_t *igb, boolean_t alloc_buffer)
   1628 {
   1629 	int i;
   1630 
   1631 	ASSERT(mutex_owned(&igb->gen_lock));
   1632 
   1633 	if (alloc_buffer) {
   1634 		if (igb_alloc_rx_data(igb) != IGB_SUCCESS) {
   1635 			igb_error(igb,
   1636 			    "Failed to allocate software receive rings");
   1637 			return (IGB_FAILURE);
   1638 		}
   1639 
   1640 		/* Allocate buffers for all the rx/tx rings */
   1641 		if (igb_alloc_dma(igb) != IGB_SUCCESS) {
   1642 			igb_error(igb, "Failed to allocate DMA resource");
   1643 			return (IGB_FAILURE);
   1644 		}
   1645 
   1646 		igb->tx_ring_init = B_TRUE;
   1647 	} else {
   1648 		igb->tx_ring_init = B_FALSE;
   1649 	}
   1650 
   1651 	for (i = 0; i < igb->num_rx_rings; i++)
   1652 		mutex_enter(&igb->rx_rings[i].rx_lock);
   1653 	for (i = 0; i < igb->num_tx_rings; i++)
   1654 		mutex_enter(&igb->tx_rings[i].tx_lock);
   1655 
   1656 	/*
   1657 	 * Start the adapter
   1658 	 */
   1659 	if ((igb->attach_progress & ATTACH_PROGRESS_INIT_ADAPTER) == 0) {
   1660 		if (igb_init_adapter(igb) != IGB_SUCCESS) {
   1661 			igb_fm_ereport(igb, DDI_FM_DEVICE_INVAL_STATE);
   1662 			goto start_failure;
   1663 		}
   1664 		igb->attach_progress |= ATTACH_PROGRESS_INIT_ADAPTER;
   1665 	}
   1666 
   1667 	/*
   1668 	 * Setup the rx/tx rings
   1669 	 */
   1670 	igb_setup_rings(igb);
   1671 
   1672 	/*
   1673 	 * Enable adapter interrupts
   1674 	 * The interrupts must be enabled after the driver state is START
   1675 	 */
   1676 	igb->capab->enable_intr(igb);
   1677 
   1678 	if (igb_check_acc_handle(igb->osdep.cfg_handle) != DDI_FM_OK)
   1679 		goto start_failure;
   1680 
   1681 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
   1682 		goto start_failure;
   1683 
   1684 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
   1685 		mutex_exit(&igb->tx_rings[i].tx_lock);
   1686 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
   1687 		mutex_exit(&igb->rx_rings[i].rx_lock);
   1688 
   1689 	return (IGB_SUCCESS);
   1690 
   1691 start_failure:
   1692 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
   1693 		mutex_exit(&igb->tx_rings[i].tx_lock);
   1694 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
   1695 		mutex_exit(&igb->rx_rings[i].rx_lock);
   1696 
   1697 	ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   1698 
   1699 	return (IGB_FAILURE);
   1700 }
   1701 
   1702 /*
   1703  * igb_stop - Stop the driver/chipset
   1704  */
   1705 void
   1706 igb_stop(igb_t *igb, boolean_t free_buffer)
   1707 {
   1708 	int i;
   1709 
   1710 	ASSERT(mutex_owned(&igb->gen_lock));
   1711 
   1712 	igb->attach_progress &= ~ATTACH_PROGRESS_INIT_ADAPTER;
   1713 
   1714 	/*
   1715 	 * Disable the adapter interrupts
   1716 	 */
   1717 	igb_disable_adapter_interrupts(igb);
   1718 
   1719 	/*
   1720 	 * Drain the pending tx packets
   1721 	 */
   1722 	(void) igb_tx_drain(igb);
   1723 
   1724 	for (i = 0; i < igb->num_rx_rings; i++)
   1725 		mutex_enter(&igb->rx_rings[i].rx_lock);
   1726 	for (i = 0; i < igb->num_tx_rings; i++)
   1727 		mutex_enter(&igb->tx_rings[i].tx_lock);
   1728 
   1729 	/*
   1730 	 * Stop the adapter
   1731 	 */
   1732 	igb_stop_adapter(igb);
   1733 
   1734 	/*
   1735 	 * Clean the pending tx data/resources
   1736 	 */
   1737 	igb_tx_clean(igb);
   1738 
   1739 	for (i = igb->num_tx_rings - 1; i >= 0; i--)
   1740 		mutex_exit(&igb->tx_rings[i].tx_lock);
   1741 	for (i = igb->num_rx_rings - 1; i >= 0; i--)
   1742 		mutex_exit(&igb->rx_rings[i].rx_lock);
   1743 
   1744 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK)
   1745 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   1746 
   1747 	if (igb->link_state == LINK_STATE_UP) {
   1748 		igb->link_state = LINK_STATE_UNKNOWN;
   1749 		mac_link_update(igb->mac_hdl, igb->link_state);
   1750 	}
   1751 
   1752 	if (free_buffer) {
   1753 		/*
   1754 		 * Release the DMA/memory resources of rx/tx rings
   1755 		 */
   1756 		igb_free_dma(igb);
   1757 		igb_free_rx_data(igb);
   1758 	}
   1759 }
   1760 
   1761 /*
   1762  * igb_alloc_rings - Allocate memory space for rx/tx rings
   1763  */
   1764 static int
   1765 igb_alloc_rings(igb_t *igb)
   1766 {
   1767 	/*
   1768 	 * Allocate memory space for rx rings
   1769 	 */
   1770 	igb->rx_rings = kmem_zalloc(
   1771 	    sizeof (igb_rx_ring_t) * igb->num_rx_rings,
   1772 	    KM_NOSLEEP);
   1773 
   1774 	if (igb->rx_rings == NULL) {
   1775 		return (IGB_FAILURE);
   1776 	}
   1777 
   1778 	/*
   1779 	 * Allocate memory space for tx rings
   1780 	 */
   1781 	igb->tx_rings = kmem_zalloc(
   1782 	    sizeof (igb_tx_ring_t) * igb->num_tx_rings,
   1783 	    KM_NOSLEEP);
   1784 
   1785 	if (igb->tx_rings == NULL) {
   1786 		kmem_free(igb->rx_rings,
   1787 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
   1788 		igb->rx_rings = NULL;
   1789 		return (IGB_FAILURE);
   1790 	}
   1791 
   1792 	/*
   1793 	 * Allocate memory space for rx ring groups
   1794 	 */
   1795 	igb->rx_groups = kmem_zalloc(
   1796 	    sizeof (igb_rx_group_t) * igb->num_rx_groups,
   1797 	    KM_NOSLEEP);
   1798 
   1799 	if (igb->rx_groups == NULL) {
   1800 		kmem_free(igb->rx_rings,
   1801 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
   1802 		kmem_free(igb->tx_rings,
   1803 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
   1804 		igb->rx_rings = NULL;
   1805 		igb->tx_rings = NULL;
   1806 		return (IGB_FAILURE);
   1807 	}
   1808 
   1809 	return (IGB_SUCCESS);
   1810 }
   1811 
   1812 /*
   1813  * igb_free_rings - Free the memory space of rx/tx rings.
   1814  */
   1815 static void
   1816 igb_free_rings(igb_t *igb)
   1817 {
   1818 	if (igb->rx_rings != NULL) {
   1819 		kmem_free(igb->rx_rings,
   1820 		    sizeof (igb_rx_ring_t) * igb->num_rx_rings);
   1821 		igb->rx_rings = NULL;
   1822 	}
   1823 
   1824 	if (igb->tx_rings != NULL) {
   1825 		kmem_free(igb->tx_rings,
   1826 		    sizeof (igb_tx_ring_t) * igb->num_tx_rings);
   1827 		igb->tx_rings = NULL;
   1828 	}
   1829 
   1830 	if (igb->rx_groups != NULL) {
   1831 		kmem_free(igb->rx_groups,
   1832 		    sizeof (igb_rx_group_t) * igb->num_rx_groups);
   1833 		igb->rx_groups = NULL;
   1834 	}
   1835 }
   1836 
   1837 static int
   1838 igb_alloc_rx_data(igb_t *igb)
   1839 {
   1840 	igb_rx_ring_t *rx_ring;
   1841 	int i;
   1842 
   1843 	for (i = 0; i < igb->num_rx_rings; i++) {
   1844 		rx_ring = &igb->rx_rings[i];
   1845 		if (igb_alloc_rx_ring_data(rx_ring) != IGB_SUCCESS)
   1846 			goto alloc_rx_rings_failure;
   1847 	}
   1848 	return (IGB_SUCCESS);
   1849 
   1850 alloc_rx_rings_failure:
   1851 	igb_free_rx_data(igb);
   1852 	return (IGB_FAILURE);
   1853 }
   1854 
   1855 static void
   1856 igb_free_rx_data(igb_t *igb)
   1857 {
   1858 	igb_rx_ring_t *rx_ring;
   1859 	igb_rx_data_t *rx_data;
   1860 	int i;
   1861 
   1862 	for (i = 0; i < igb->num_rx_rings; i++) {
   1863 		rx_ring = &igb->rx_rings[i];
   1864 
   1865 		mutex_enter(&igb->rx_pending_lock);
   1866 		rx_data = rx_ring->rx_data;
   1867 
   1868 		if (rx_data != NULL) {
   1869 			rx_data->flag |= IGB_RX_STOPPED;
   1870 
   1871 			if (rx_data->rcb_pending == 0) {
   1872 				igb_free_rx_ring_data(rx_data);
   1873 				rx_ring->rx_data = NULL;
   1874 			}
   1875 		}
   1876 
   1877 		mutex_exit(&igb->rx_pending_lock);
   1878 	}
   1879 }
   1880 
   1881 /*
   1882  * igb_setup_rings - Setup rx/tx rings
   1883  */
   1884 static void
   1885 igb_setup_rings(igb_t *igb)
   1886 {
   1887 	/*
   1888 	 * Setup the rx/tx rings, including the following:
   1889 	 *
   1890 	 * 1. Setup the descriptor ring and the control block buffers;
   1891 	 * 2. Initialize necessary registers for receive/transmit;
   1892 	 * 3. Initialize software pointers/parameters for receive/transmit;
   1893 	 */
   1894 	igb_setup_rx(igb);
   1895 
   1896 	igb_setup_tx(igb);
   1897 }
   1898 
   1899 static void
   1900 igb_setup_rx_ring(igb_rx_ring_t *rx_ring)
   1901 {
   1902 	igb_t *igb = rx_ring->igb;
   1903 	igb_rx_data_t *rx_data = rx_ring->rx_data;
   1904 	struct e1000_hw *hw = &igb->hw;
   1905 	rx_control_block_t *rcb;
   1906 	union e1000_adv_rx_desc	*rbd;
   1907 	uint32_t size;
   1908 	uint32_t buf_low;
   1909 	uint32_t buf_high;
   1910 	uint32_t rxdctl;
   1911 	int i;
   1912 
   1913 	ASSERT(mutex_owned(&rx_ring->rx_lock));
   1914 	ASSERT(mutex_owned(&igb->gen_lock));
   1915 
   1916 	/*
   1917 	 * Initialize descriptor ring with buffer addresses
   1918 	 */
   1919 	for (i = 0; i < igb->rx_ring_size; i++) {
   1920 		rcb = rx_data->work_list[i];
   1921 		rbd = &rx_data->rbd_ring[i];
   1922 
   1923 		rbd->read.pkt_addr = rcb->rx_buf.dma_address;
   1924 		rbd->read.hdr_addr = NULL;
   1925 	}
   1926 
   1927 	/*
   1928 	 * Initialize the base address registers
   1929 	 */
   1930 	buf_low = (uint32_t)rx_data->rbd_area.dma_address;
   1931 	buf_high = (uint32_t)(rx_data->rbd_area.dma_address >> 32);
   1932 	E1000_WRITE_REG(hw, E1000_RDBAH(rx_ring->index), buf_high);
   1933 	E1000_WRITE_REG(hw, E1000_RDBAL(rx_ring->index), buf_low);
   1934 
   1935 	/*
   1936 	 * Initialize the length register
   1937 	 */
   1938 	size = rx_data->ring_size * sizeof (union e1000_adv_rx_desc);
   1939 	E1000_WRITE_REG(hw, E1000_RDLEN(rx_ring->index), size);
   1940 
   1941 	/*
   1942 	 * Initialize buffer size & descriptor type
   1943 	 */
   1944 	E1000_WRITE_REG(hw, E1000_SRRCTL(rx_ring->index),
   1945 	    ((igb->rx_buf_size >> E1000_SRRCTL_BSIZEPKT_SHIFT) |
   1946 	    E1000_SRRCTL_DESCTYPE_ADV_ONEBUF));
   1947 
   1948 	/*
   1949 	 * Setup the Receive Descriptor Control Register (RXDCTL)
   1950 	 */
   1951 	rxdctl = E1000_READ_REG(hw, E1000_RXDCTL(rx_ring->index));
   1952 	rxdctl &= igb->capab->rxdctl_mask;
   1953 	rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
   1954 	rxdctl |= 16;		/* pthresh */
   1955 	rxdctl |= 8 << 8;	/* hthresh */
   1956 	rxdctl |= 1 << 16;	/* wthresh */
   1957 	E1000_WRITE_REG(hw, E1000_RXDCTL(rx_ring->index), rxdctl);
   1958 
   1959 	rx_data->rbd_next = 0;
   1960 }
   1961 
   1962 static void
   1963 igb_setup_rx(igb_t *igb)
   1964 {
   1965 	igb_rx_ring_t *rx_ring;
   1966 	igb_rx_data_t *rx_data;
   1967 	igb_rx_group_t *rx_group;
   1968 	struct e1000_hw *hw = &igb->hw;
   1969 	uint32_t rctl, rxcsum;
   1970 	uint32_t ring_per_group;
   1971 	int i;
   1972 
   1973 	/*
   1974 	 * Setup the Receive Control Register (RCTL), and enable the
   1975 	 * receiver. The initial configuration is to: enable the receiver,
   1976 	 * accept broadcasts, discard bad packets, accept long packets,
   1977 	 * disable VLAN filter checking, and set receive buffer size to
   1978 	 * 2k.  For 82575, also set the receive descriptor minimum
   1979 	 * threshold size to 1/2 the ring.
   1980 	 */
   1981 	rctl = E1000_READ_REG(hw, E1000_RCTL);
   1982 
   1983 	/*
   1984 	 * Clear the field used for wakeup control.  This driver doesn't do
   1985 	 * wakeup but leave this here for completeness.
   1986 	 */
   1987 	rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
   1988 	rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
   1989 
   1990 	rctl |= (E1000_RCTL_EN |	/* Enable Receive Unit */
   1991 	    E1000_RCTL_BAM |		/* Accept Broadcast Packets */
   1992 	    E1000_RCTL_LPE |		/* Large Packet Enable */
   1993 					/* Multicast filter offset */
   1994 	    (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT) |
   1995 	    E1000_RCTL_RDMTS_HALF |	/* rx descriptor threshold */
   1996 	    E1000_RCTL_SECRC);		/* Strip Ethernet CRC */
   1997 
   1998 	for (i = 0; i < igb->num_rx_groups; i++) {
   1999 		rx_group = &igb->rx_groups[i];
   2000 		rx_group->index = i;
   2001 		rx_group->igb = igb;
   2002 	}
   2003 
   2004 	/*
   2005 	 * Set up all rx descriptor rings - must be called before receive unit
   2006 	 * enabled.
   2007 	 */
   2008 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
   2009 	for (i = 0; i < igb->num_rx_rings; i++) {
   2010 		rx_ring = &igb->rx_rings[i];
   2011 		igb_setup_rx_ring(rx_ring);
   2012 
   2013 		/*
   2014 		 * Map a ring to a group by assigning a group index
   2015 		 */
   2016 		rx_ring->group_index = i / ring_per_group;
   2017 	}
   2018 
   2019 	/*
   2020 	 * Setup the Rx Long Packet Max Length register
   2021 	 */
   2022 	E1000_WRITE_REG(hw, E1000_RLPML, igb->max_frame_size);
   2023 
   2024 	/*
   2025 	 * Hardware checksum settings
   2026 	 */
   2027 	if (igb->rx_hcksum_enable) {
   2028 		rxcsum =
   2029 		    E1000_RXCSUM_TUOFL |	/* TCP/UDP checksum */
   2030 		    E1000_RXCSUM_IPOFL;		/* IP checksum */
   2031 
   2032 		E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
   2033 	}
   2034 
   2035 	/*
   2036 	 * Setup classify and RSS for multiple receive queues
   2037 	 */
   2038 	switch (igb->vmdq_mode) {
   2039 	case E1000_VMDQ_OFF:
   2040 		/*
   2041 		 * One ring group, only RSS is needed when more than
   2042 		 * one ring enabled.
   2043 		 */
   2044 		if (igb->num_rx_rings > 1)
   2045 			igb_setup_rss(igb);
   2046 		break;
   2047 	case E1000_VMDQ_MAC:
   2048 		/*
   2049 		 * Multiple groups, each group has one ring,
   2050 		 * only the MAC classification is needed.
   2051 		 */
   2052 		igb_setup_mac_classify(igb);
   2053 		break;
   2054 	case E1000_VMDQ_MAC_RSS:
   2055 		/*
   2056 		 * Multiple groups and multiple rings, both
   2057 		 * MAC classification and RSS are needed.
   2058 		 */
   2059 		igb_setup_mac_rss_classify(igb);
   2060 		break;
   2061 	}
   2062 
   2063 	/*
   2064 	 * Enable the receive unit - must be done after all
   2065 	 * the rx setup above.
   2066 	 */
   2067 	E1000_WRITE_REG(hw, E1000_RCTL, rctl);
   2068 
   2069 	/*
   2070 	 * Initialize all adapter ring head & tail pointers - must
   2071 	 * be done after receive unit is enabled
   2072 	 */
   2073 	for (i = 0; i < igb->num_rx_rings; i++) {
   2074 		rx_ring = &igb->rx_rings[i];
   2075 		rx_data = rx_ring->rx_data;
   2076 		E1000_WRITE_REG(hw, E1000_RDH(i), 0);
   2077 		E1000_WRITE_REG(hw, E1000_RDT(i), rx_data->ring_size - 1);
   2078 	}
   2079 
   2080 	/*
   2081 	 * 82575 with manageability enabled needs a special flush to make
   2082 	 * sure the fifos start clean.
   2083 	 */
   2084 	if ((hw->mac.type == e1000_82575) &&
   2085 	    (E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) {
   2086 		e1000_rx_fifo_flush_82575(hw);
   2087 	}
   2088 }
   2089 
   2090 static void
   2091 igb_setup_tx_ring(igb_tx_ring_t *tx_ring)
   2092 {
   2093 	igb_t *igb = tx_ring->igb;
   2094 	struct e1000_hw *hw = &igb->hw;
   2095 	uint32_t size;
   2096 	uint32_t buf_low;
   2097 	uint32_t buf_high;
   2098 	uint32_t reg_val;
   2099 
   2100 	ASSERT(mutex_owned(&tx_ring->tx_lock));
   2101 	ASSERT(mutex_owned(&igb->gen_lock));
   2102 
   2103 
   2104 	/*
   2105 	 * Initialize the length register
   2106 	 */
   2107 	size = tx_ring->ring_size * sizeof (union e1000_adv_tx_desc);
   2108 	E1000_WRITE_REG(hw, E1000_TDLEN(tx_ring->index), size);
   2109 
   2110 	/*
   2111 	 * Initialize the base address registers
   2112 	 */
   2113 	buf_low = (uint32_t)tx_ring->tbd_area.dma_address;
   2114 	buf_high = (uint32_t)(tx_ring->tbd_area.dma_address >> 32);
   2115 	E1000_WRITE_REG(hw, E1000_TDBAL(tx_ring->index), buf_low);
   2116 	E1000_WRITE_REG(hw, E1000_TDBAH(tx_ring->index), buf_high);
   2117 
   2118 	/*
   2119 	 * Setup head & tail pointers
   2120 	 */
   2121 	E1000_WRITE_REG(hw, E1000_TDH(tx_ring->index), 0);
   2122 	E1000_WRITE_REG(hw, E1000_TDT(tx_ring->index), 0);
   2123 
   2124 	/*
   2125 	 * Setup head write-back
   2126 	 */
   2127 	if (igb->tx_head_wb_enable) {
   2128 		/*
   2129 		 * The memory of the head write-back is allocated using
   2130 		 * the extra tbd beyond the tail of the tbd ring.
   2131 		 */
   2132 		tx_ring->tbd_head_wb = (uint32_t *)
   2133 		    ((uintptr_t)tx_ring->tbd_area.address + size);
   2134 		*tx_ring->tbd_head_wb = 0;
   2135 
   2136 		buf_low = (uint32_t)
   2137 		    (tx_ring->tbd_area.dma_address + size);
   2138 		buf_high = (uint32_t)
   2139 		    ((tx_ring->tbd_area.dma_address + size) >> 32);
   2140 
   2141 		/* Set the head write-back enable bit */
   2142 		buf_low |= E1000_TX_HEAD_WB_ENABLE;
   2143 
   2144 		E1000_WRITE_REG(hw, E1000_TDWBAL(tx_ring->index), buf_low);
   2145 		E1000_WRITE_REG(hw, E1000_TDWBAH(tx_ring->index), buf_high);
   2146 
   2147 		/*
   2148 		 * Turn off relaxed ordering for head write back or it will
   2149 		 * cause problems with the tx recycling
   2150 		 */
   2151 		reg_val = E1000_READ_REG(hw,
   2152 		    E1000_DCA_TXCTRL(tx_ring->index));
   2153 		reg_val &= ~E1000_DCA_TXCTRL_TX_WB_RO_EN;
   2154 		E1000_WRITE_REG(hw,
   2155 		    E1000_DCA_TXCTRL(tx_ring->index), reg_val);
   2156 	} else {
   2157 		tx_ring->tbd_head_wb = NULL;
   2158 	}
   2159 
   2160 	tx_ring->tbd_head = 0;
   2161 	tx_ring->tbd_tail = 0;
   2162 	tx_ring->tbd_free = tx_ring->ring_size;
   2163 
   2164 	if (igb->tx_ring_init == B_TRUE) {
   2165 		tx_ring->tcb_head = 0;
   2166 		tx_ring->tcb_tail = 0;
   2167 		tx_ring->tcb_free = tx_ring->free_list_size;
   2168 	}
   2169 
   2170 	/*
   2171 	 * Enable TXDCTL per queue
   2172 	 */
   2173 	reg_val = E1000_READ_REG(hw, E1000_TXDCTL(tx_ring->index));
   2174 	reg_val |= E1000_TXDCTL_QUEUE_ENABLE;
   2175 	E1000_WRITE_REG(hw, E1000_TXDCTL(tx_ring->index), reg_val);
   2176 
   2177 	/*
   2178 	 * Initialize hardware checksum offload settings
   2179 	 */
   2180 	bzero(&tx_ring->tx_context, sizeof (tx_context_t));
   2181 }
   2182 
   2183 static void
   2184 igb_setup_tx(igb_t *igb)
   2185 {
   2186 	igb_tx_ring_t *tx_ring;
   2187 	struct e1000_hw *hw = &igb->hw;
   2188 	uint32_t reg_val;
   2189 	int i;
   2190 
   2191 	for (i = 0; i < igb->num_tx_rings; i++) {
   2192 		tx_ring = &igb->tx_rings[i];
   2193 		igb_setup_tx_ring(tx_ring);
   2194 	}
   2195 
   2196 	/*
   2197 	 * Setup the Transmit Control Register (TCTL)
   2198 	 */
   2199 	reg_val = E1000_READ_REG(hw, E1000_TCTL);
   2200 	reg_val &= ~E1000_TCTL_CT;
   2201 	reg_val |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
   2202 	    (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
   2203 
   2204 	/* Enable transmits */
   2205 	reg_val |= E1000_TCTL_EN;
   2206 
   2207 	E1000_WRITE_REG(hw, E1000_TCTL, reg_val);
   2208 }
   2209 
   2210 /*
   2211  * igb_setup_rss - Setup receive-side scaling feature
   2212  */
   2213 static void
   2214 igb_setup_rss(igb_t *igb)
   2215 {
   2216 	struct e1000_hw *hw = &igb->hw;
   2217 	uint32_t i, mrqc, rxcsum;
   2218 	int shift = 0;
   2219 	uint32_t random;
   2220 	union e1000_reta {
   2221 		uint32_t	dword;
   2222 		uint8_t		bytes[4];
   2223 	} reta;
   2224 
   2225 	/* Setup the Redirection Table */
   2226 	if (hw->mac.type == e1000_82576) {
   2227 		shift = 3;
   2228 	} else if (hw->mac.type == e1000_82575) {
   2229 		shift = 6;
   2230 	}
   2231 	for (i = 0; i < (32 * 4); i++) {
   2232 		reta.bytes[i & 3] = (i % igb->num_rx_rings) << shift;
   2233 		if ((i & 3) == 3) {
   2234 			E1000_WRITE_REG(hw,
   2235 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
   2236 		}
   2237 	}
   2238 
   2239 	/* Fill out hash function seeds */
   2240 	for (i = 0; i < 10; i++) {
   2241 		(void) random_get_pseudo_bytes((uint8_t *)&random,
   2242 		    sizeof (uint32_t));
   2243 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
   2244 	}
   2245 
   2246 	/* Setup the Multiple Receive Queue Control register */
   2247 	mrqc = E1000_MRQC_ENABLE_RSS_4Q;
   2248 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
   2249 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
   2250 	    E1000_MRQC_RSS_FIELD_IPV6 |
   2251 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
   2252 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
   2253 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
   2254 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
   2255 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
   2256 
   2257 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
   2258 
   2259 	/*
   2260 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
   2261 	 *
   2262 	 * The Packet Checksum is not ethernet CRC. It is another kind of
   2263 	 * checksum offloading provided by the 82575 chipset besides the IP
   2264 	 * header checksum offloading and the TCP/UDP checksum offloading.
   2265 	 * The Packet Checksum is by default computed over the entire packet
   2266 	 * from the first byte of the DA through the last byte of the CRC,
   2267 	 * including the Ethernet and IP headers.
   2268 	 *
   2269 	 * It is a hardware limitation that Packet Checksum is mutually
   2270 	 * exclusive with RSS.
   2271 	 */
   2272 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
   2273 	rxcsum |= E1000_RXCSUM_PCSD;
   2274 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
   2275 }
   2276 
   2277 /*
   2278  * igb_setup_mac_rss_classify - Setup MAC classification and rss
   2279  */
   2280 static void
   2281 igb_setup_mac_rss_classify(igb_t *igb)
   2282 {
   2283 	struct e1000_hw *hw = &igb->hw;
   2284 	uint32_t i, mrqc, vmdctl, rxcsum;
   2285 	uint32_t ring_per_group;
   2286 	int shift_group0, shift_group1;
   2287 	uint32_t random;
   2288 	union e1000_reta {
   2289 		uint32_t	dword;
   2290 		uint8_t		bytes[4];
   2291 	} reta;
   2292 
   2293 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
   2294 
   2295 	/* Setup the Redirection Table, it is shared between two groups */
   2296 	shift_group0 = 2;
   2297 	shift_group1 = 6;
   2298 	for (i = 0; i < (32 * 4); i++) {
   2299 		reta.bytes[i & 3] = ((i % ring_per_group) << shift_group0) |
   2300 		    ((ring_per_group + (i % ring_per_group)) << shift_group1);
   2301 		if ((i & 3) == 3) {
   2302 			E1000_WRITE_REG(hw,
   2303 			    (E1000_RETA(0) + (i & ~3)), reta.dword);
   2304 		}
   2305 	}
   2306 
   2307 	/* Fill out hash function seeds */
   2308 	for (i = 0; i < 10; i++) {
   2309 		(void) random_get_pseudo_bytes((uint8_t *)&random,
   2310 		    sizeof (uint32_t));
   2311 		E1000_WRITE_REG(hw, E1000_RSSRK(i), random);
   2312 	}
   2313 
   2314 	/*
   2315 	 * Setup the Multiple Receive Queue Control register,
   2316 	 * enable VMDq based on packet destination MAC address and RSS.
   2317 	 */
   2318 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_RSS_GROUP;
   2319 	mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
   2320 	    E1000_MRQC_RSS_FIELD_IPV4_TCP |
   2321 	    E1000_MRQC_RSS_FIELD_IPV6 |
   2322 	    E1000_MRQC_RSS_FIELD_IPV6_TCP |
   2323 	    E1000_MRQC_RSS_FIELD_IPV4_UDP |
   2324 	    E1000_MRQC_RSS_FIELD_IPV6_UDP |
   2325 	    E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
   2326 	    E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
   2327 
   2328 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
   2329 
   2330 
   2331 	/* Define the default group and default queues */
   2332 	vmdctl = E1000_VMDQ_MAC_GROUP_DEFAULT_QUEUE;
   2333 	E1000_WRITE_REG(hw, E1000_VT_CTL, vmdctl);
   2334 
   2335 	/*
   2336 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
   2337 	 *
   2338 	 * The Packet Checksum is not ethernet CRC. It is another kind of
   2339 	 * checksum offloading provided by the 82575 chipset besides the IP
   2340 	 * header checksum offloading and the TCP/UDP checksum offloading.
   2341 	 * The Packet Checksum is by default computed over the entire packet
   2342 	 * from the first byte of the DA through the last byte of the CRC,
   2343 	 * including the Ethernet and IP headers.
   2344 	 *
   2345 	 * It is a hardware limitation that Packet Checksum is mutually
   2346 	 * exclusive with RSS.
   2347 	 */
   2348 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
   2349 	rxcsum |= E1000_RXCSUM_PCSD;
   2350 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
   2351 }
   2352 
   2353 /*
   2354  * igb_setup_mac_classify - Setup MAC classification feature
   2355  */
   2356 static void
   2357 igb_setup_mac_classify(igb_t *igb)
   2358 {
   2359 	struct e1000_hw *hw = &igb->hw;
   2360 	uint32_t mrqc, rxcsum;
   2361 
   2362 	/*
   2363 	 * Setup the Multiple Receive Queue Control register,
   2364 	 * enable VMDq based on packet destination MAC address.
   2365 	 */
   2366 	mrqc = E1000_MRQC_ENABLE_VMDQ_MAC_GROUP;
   2367 	E1000_WRITE_REG(hw, E1000_MRQC, mrqc);
   2368 
   2369 	/*
   2370 	 * Disable Packet Checksum to enable RSS for multiple receive queues.
   2371 	 *
   2372 	 * The Packet Checksum is not ethernet CRC. It is another kind of
   2373 	 * checksum offloading provided by the 82575 chipset besides the IP
   2374 	 * header checksum offloading and the TCP/UDP checksum offloading.
   2375 	 * The Packet Checksum is by default computed over the entire packet
   2376 	 * from the first byte of the DA through the last byte of the CRC,
   2377 	 * including the Ethernet and IP headers.
   2378 	 *
   2379 	 * It is a hardware limitation that Packet Checksum is mutually
   2380 	 * exclusive with RSS.
   2381 	 */
   2382 	rxcsum = E1000_READ_REG(hw, E1000_RXCSUM);
   2383 	rxcsum |= E1000_RXCSUM_PCSD;
   2384 	E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum);
   2385 
   2386 }
   2387 
   2388 /*
   2389  * igb_init_unicst - Initialize the unicast addresses
   2390  */
   2391 static void
   2392 igb_init_unicst(igb_t *igb)
   2393 {
   2394 	struct e1000_hw *hw = &igb->hw;
   2395 	int slot;
   2396 
   2397 	/*
   2398 	 * Here we should consider two situations:
   2399 	 *
   2400 	 * 1. Chipset is initialized the first time
   2401 	 *    Initialize the multiple unicast addresses, and
   2402 	 *    save the default MAC address.
   2403 	 *
   2404 	 * 2. Chipset is reset
   2405 	 *    Recover the multiple unicast addresses from the
   2406 	 *    software data structure to the RAR registers.
   2407 	 */
   2408 
   2409 	/*
   2410 	 * Clear the default MAC address in the RAR0 rgister,
   2411 	 * which is loaded from EEPROM when system boot or chipreset,
   2412 	 * this will cause the conficts with add_mac/rem_mac entry
   2413 	 * points when VMDq is enabled. For this reason, the RAR0
   2414 	 * must be cleared for both cases mentioned above.
   2415 	 */
   2416 	e1000_rar_clear(hw, 0);
   2417 
   2418 	if (!igb->unicst_init) {
   2419 
   2420 		/* Initialize the multiple unicast addresses */
   2421 		igb->unicst_total = MAX_NUM_UNICAST_ADDRESSES;
   2422 		igb->unicst_avail = igb->unicst_total;
   2423 
   2424 		for (slot = 0; slot < igb->unicst_total; slot++)
   2425 			igb->unicst_addr[slot].mac.set = 0;
   2426 
   2427 		igb->unicst_init = B_TRUE;
   2428 	} else {
   2429 		/* Re-configure the RAR registers */
   2430 		for (slot = 0; slot < igb->unicst_total; slot++) {
   2431 			e1000_rar_set_vmdq(hw, igb->unicst_addr[slot].mac.addr,
   2432 			    slot, igb->vmdq_mode,
   2433 			    igb->unicst_addr[slot].mac.group_index);
   2434 		}
   2435 	}
   2436 }
   2437 
   2438 /*
   2439  * igb_unicst_find - Find the slot for the specified unicast address
   2440  */
   2441 int
   2442 igb_unicst_find(igb_t *igb, const uint8_t *mac_addr)
   2443 {
   2444 	int slot;
   2445 
   2446 	ASSERT(mutex_owned(&igb->gen_lock));
   2447 
   2448 	for (slot = 0; slot < igb->unicst_total; slot++) {
   2449 		if (bcmp(igb->unicst_addr[slot].mac.addr,
   2450 		    mac_addr, ETHERADDRL) == 0)
   2451 			return (slot);
   2452 	}
   2453 
   2454 	return (-1);
   2455 }
   2456 
   2457 /*
   2458  * igb_unicst_set - Set the unicast address to the specified slot
   2459  */
   2460 int
   2461 igb_unicst_set(igb_t *igb, const uint8_t *mac_addr,
   2462     int slot)
   2463 {
   2464 	struct e1000_hw *hw = &igb->hw;
   2465 
   2466 	ASSERT(mutex_owned(&igb->gen_lock));
   2467 
   2468 	/*
   2469 	 * Save the unicast address in the software data structure
   2470 	 */
   2471 	bcopy(mac_addr, igb->unicst_addr[slot].mac.addr, ETHERADDRL);
   2472 
   2473 	/*
   2474 	 * Set the unicast address to the RAR register
   2475 	 */
   2476 	e1000_rar_set(hw, (uint8_t *)mac_addr, slot);
   2477 
   2478 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   2479 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   2480 		return (EIO);
   2481 	}
   2482 
   2483 	return (0);
   2484 }
   2485 
   2486 /*
   2487  * igb_multicst_add - Add a multicst address
   2488  */
   2489 int
   2490 igb_multicst_add(igb_t *igb, const uint8_t *multiaddr)
   2491 {
   2492 	struct ether_addr *new_table;
   2493 	size_t new_len;
   2494 	size_t old_len;
   2495 
   2496 	ASSERT(mutex_owned(&igb->gen_lock));
   2497 
   2498 	if ((multiaddr[0] & 01) == 0) {
   2499 		igb_error(igb, "Illegal multicast address");
   2500 		return (EINVAL);
   2501 	}
   2502 
   2503 	if (igb->mcast_count >= igb->mcast_max_num) {
   2504 		igb_error(igb, "Adapter requested more than %d mcast addresses",
   2505 		    igb->mcast_max_num);
   2506 		return (ENOENT);
   2507 	}
   2508 
   2509 	if (igb->mcast_count == igb->mcast_alloc_count) {
   2510 		old_len = igb->mcast_alloc_count *
   2511 		    sizeof (struct ether_addr);
   2512 		new_len = (igb->mcast_alloc_count + MCAST_ALLOC_COUNT) *
   2513 		    sizeof (struct ether_addr);
   2514 
   2515 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
   2516 		if (new_table == NULL) {
   2517 			igb_error(igb,
   2518 			    "Not enough memory to alloc mcast table");
   2519 			return (ENOMEM);
   2520 		}
   2521 
   2522 		if (igb->mcast_table != NULL) {
   2523 			bcopy(igb->mcast_table, new_table, old_len);
   2524 			kmem_free(igb->mcast_table, old_len);
   2525 		}
   2526 		igb->mcast_alloc_count += MCAST_ALLOC_COUNT;
   2527 		igb->mcast_table = new_table;
   2528 	}
   2529 
   2530 	bcopy(multiaddr,
   2531 	    &igb->mcast_table[igb->mcast_count], ETHERADDRL);
   2532 	igb->mcast_count++;
   2533 
   2534 	/*
   2535 	 * Update the multicast table in the hardware
   2536 	 */
   2537 	igb_setup_multicst(igb);
   2538 
   2539 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   2540 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   2541 		return (EIO);
   2542 	}
   2543 
   2544 	return (0);
   2545 }
   2546 
   2547 /*
   2548  * igb_multicst_remove - Remove a multicst address
   2549  */
   2550 int
   2551 igb_multicst_remove(igb_t *igb, const uint8_t *multiaddr)
   2552 {
   2553 	struct ether_addr *new_table;
   2554 	size_t new_len;
   2555 	size_t old_len;
   2556 	int i;
   2557 
   2558 	ASSERT(mutex_owned(&igb->gen_lock));
   2559 
   2560 	for (i = 0; i < igb->mcast_count; i++) {
   2561 		if (bcmp(multiaddr, &igb->mcast_table[i],
   2562 		    ETHERADDRL) == 0) {
   2563 			for (i++; i < igb->mcast_count; i++) {
   2564 				igb->mcast_table[i - 1] =
   2565 				    igb->mcast_table[i];
   2566 			}
   2567 			igb->mcast_count--;
   2568 			break;
   2569 		}
   2570 	}
   2571 
   2572 	if ((igb->mcast_alloc_count - igb->mcast_count) >
   2573 	    MCAST_ALLOC_COUNT) {
   2574 		old_len = igb->mcast_alloc_count *
   2575 		    sizeof (struct ether_addr);
   2576 		new_len = (igb->mcast_alloc_count - MCAST_ALLOC_COUNT) *
   2577 		    sizeof (struct ether_addr);
   2578 
   2579 		new_table = kmem_alloc(new_len, KM_NOSLEEP);
   2580 		if (new_table != NULL) {
   2581 			bcopy(igb->mcast_table, new_table, new_len);
   2582 			kmem_free(igb->mcast_table, old_len);
   2583 			igb->mcast_alloc_count -= MCAST_ALLOC_COUNT;
   2584 			igb->mcast_table = new_table;
   2585 		}
   2586 	}
   2587 
   2588 	/*
   2589 	 * Update the multicast table in the hardware
   2590 	 */
   2591 	igb_setup_multicst(igb);
   2592 
   2593 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   2594 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   2595 		return (EIO);
   2596 	}
   2597 
   2598 	return (0);
   2599 }
   2600 
   2601 static void
   2602 igb_release_multicast(igb_t *igb)
   2603 {
   2604 	if (igb->mcast_table != NULL) {
   2605 		kmem_free(igb->mcast_table,
   2606 		    igb->mcast_alloc_count * sizeof (struct ether_addr));
   2607 		igb->mcast_table = NULL;
   2608 	}
   2609 }
   2610 
   2611 /*
   2612  * igb_setup_multicast - setup multicast data structures
   2613  *
   2614  * This routine initializes all of the multicast related structures
   2615  * and save them in the hardware registers.
   2616  */
   2617 static void
   2618 igb_setup_multicst(igb_t *igb)
   2619 {
   2620 	uint8_t *mc_addr_list;
   2621 	uint32_t mc_addr_count;
   2622 	struct e1000_hw *hw = &igb->hw;
   2623 
   2624 	ASSERT(mutex_owned(&igb->gen_lock));
   2625 	ASSERT(igb->mcast_count <= igb->mcast_max_num);
   2626 
   2627 	mc_addr_list = (uint8_t *)igb->mcast_table;
   2628 	mc_addr_count = igb->mcast_count;
   2629 
   2630 	/*
   2631 	 * Update the multicase addresses to the MTA registers
   2632 	 */
   2633 	e1000_update_mc_addr_list(hw, mc_addr_list, mc_addr_count);
   2634 }
   2635 
   2636 /*
   2637  * igb_get_conf - Get driver configurations set in driver.conf
   2638  *
   2639  * This routine gets user-configured values out of the configuration
   2640  * file igb.conf.
   2641  *
   2642  * For each configurable value, there is a minimum, a maximum, and a
   2643  * default.
   2644  * If user does not configure a value, use the default.
   2645  * If user configures below the minimum, use the minumum.
   2646  * If user configures above the maximum, use the maxumum.
   2647  */
   2648 static void
   2649 igb_get_conf(igb_t *igb)
   2650 {
   2651 	struct e1000_hw *hw = &igb->hw;
   2652 	uint32_t default_mtu;
   2653 	uint32_t flow_control;
   2654 	uint32_t ring_per_group;
   2655 	int i;
   2656 
   2657 	/*
   2658 	 * igb driver supports the following user configurations:
   2659 	 *
   2660 	 * Link configurations:
   2661 	 *    adv_autoneg_cap
   2662 	 *    adv_1000fdx_cap
   2663 	 *    adv_100fdx_cap
   2664 	 *    adv_100hdx_cap
   2665 	 *    adv_10fdx_cap
   2666 	 *    adv_10hdx_cap
   2667 	 * Note: 1000hdx is not supported.
   2668 	 *
   2669 	 * Jumbo frame configuration:
   2670 	 *    default_mtu
   2671 	 *
   2672 	 * Ethernet flow control configuration:
   2673 	 *    flow_control
   2674 	 *
   2675 	 * Multiple rings configurations:
   2676 	 *    tx_queue_number
   2677 	 *    tx_ring_size
   2678 	 *    rx_queue_number
   2679 	 *    rx_ring_size
   2680 	 *
   2681 	 * Call igb_get_prop() to get the value for a specific
   2682 	 * configuration parameter.
   2683 	 */
   2684 
   2685 	/*
   2686 	 * Link configurations
   2687 	 */
   2688 	igb->param_adv_autoneg_cap = igb_get_prop(igb,
   2689 	    PROP_ADV_AUTONEG_CAP, 0, 1, 1);
   2690 	igb->param_adv_1000fdx_cap = igb_get_prop(igb,
   2691 	    PROP_ADV_1000FDX_CAP, 0, 1, 1);
   2692 	igb->param_adv_100fdx_cap = igb_get_prop(igb,
   2693 	    PROP_ADV_100FDX_CAP, 0, 1, 1);
   2694 	igb->param_adv_100hdx_cap = igb_get_prop(igb,
   2695 	    PROP_ADV_100HDX_CAP, 0, 1, 1);
   2696 	igb->param_adv_10fdx_cap = igb_get_prop(igb,
   2697 	    PROP_ADV_10FDX_CAP, 0, 1, 1);
   2698 	igb->param_adv_10hdx_cap = igb_get_prop(igb,
   2699 	    PROP_ADV_10HDX_CAP, 0, 1, 1);
   2700 
   2701 	/*
   2702 	 * Jumbo frame configurations
   2703 	 */
   2704 	default_mtu = igb_get_prop(igb, PROP_DEFAULT_MTU,
   2705 	    MIN_MTU, MAX_MTU, DEFAULT_MTU);
   2706 
   2707 	igb->max_frame_size = default_mtu +
   2708 	    sizeof (struct ether_vlan_header) + ETHERFCSL;
   2709 
   2710 	/*
   2711 	 * Ethernet flow control configuration
   2712 	 */
   2713 	flow_control = igb_get_prop(igb, PROP_FLOW_CONTROL,
   2714 	    e1000_fc_none, 4, e1000_fc_full);
   2715 	if (flow_control == 4)
   2716 		flow_control = e1000_fc_default;
   2717 
   2718 	hw->fc.requested_mode = flow_control;
   2719 
   2720 	/*
   2721 	 * Multiple rings configurations
   2722 	 */
   2723 	igb->tx_ring_size = igb_get_prop(igb, PROP_TX_RING_SIZE,
   2724 	    MIN_TX_RING_SIZE, MAX_TX_RING_SIZE, DEFAULT_TX_RING_SIZE);
   2725 	igb->rx_ring_size = igb_get_prop(igb, PROP_RX_RING_SIZE,
   2726 	    MIN_RX_RING_SIZE, MAX_RX_RING_SIZE, DEFAULT_RX_RING_SIZE);
   2727 
   2728 	igb->mr_enable = igb_get_prop(igb, PROP_MR_ENABLE, 0, 1, 0);
   2729 	igb->num_rx_groups = igb_get_prop(igb, PROP_RX_GROUP_NUM,
   2730 	    MIN_RX_GROUP_NUM, MAX_RX_GROUP_NUM, DEFAULT_RX_GROUP_NUM);
   2731 	/*
   2732 	 * Currently we do not support VMDq for 82576 and 82580.
   2733 	 * If it is e1000_82576, set num_rx_groups to 1.
   2734 	 */
   2735 	if (hw->mac.type >= e1000_82576)
   2736 		igb->num_rx_groups = 1;
   2737 
   2738 	if (igb->mr_enable) {
   2739 		igb->num_tx_rings = igb->capab->def_tx_que_num;
   2740 		igb->num_rx_rings = igb->capab->def_rx_que_num;
   2741 	} else {
   2742 		igb->num_tx_rings = 1;
   2743 		igb->num_rx_rings = 1;
   2744 
   2745 		if (igb->num_rx_groups > 1) {
   2746 			igb_error(igb,
   2747 			    "Invalid rx groups number. Please enable multiple "
   2748 			    "rings first");
   2749 			igb->num_rx_groups = 1;
   2750 		}
   2751 	}
   2752 
   2753 	/*
   2754 	 * Check the divisibility between rx rings and rx groups.
   2755 	 */
   2756 	for (i = igb->num_rx_groups; i > 0; i--) {
   2757 		if ((igb->num_rx_rings % i) == 0)
   2758 			break;
   2759 	}
   2760 	if (i != igb->num_rx_groups) {
   2761 		igb_error(igb,
   2762 		    "Invalid rx groups number. Downgrade the rx group "
   2763 		    "number to %d.", i);
   2764 		igb->num_rx_groups = i;
   2765 	}
   2766 
   2767 	/*
   2768 	 * Get the ring number per group.
   2769 	 */
   2770 	ring_per_group = igb->num_rx_rings / igb->num_rx_groups;
   2771 
   2772 	if (igb->num_rx_groups == 1) {
   2773 		/*
   2774 		 * One rx ring group, the rx ring number is num_rx_rings.
   2775 		 */
   2776 		igb->vmdq_mode = E1000_VMDQ_OFF;
   2777 	} else if (ring_per_group == 1) {
   2778 		/*
   2779 		 * Multiple rx groups, each group has one rx ring.
   2780 		 */
   2781 		igb->vmdq_mode = E1000_VMDQ_MAC;
   2782 	} else {
   2783 		/*
   2784 		 * Multiple groups and multiple rings.
   2785 		 */
   2786 		igb->vmdq_mode = E1000_VMDQ_MAC_RSS;
   2787 	}
   2788 
   2789 	/*
   2790 	 * Tunable used to force an interrupt type. The only use is
   2791 	 * for testing of the lesser interrupt types.
   2792 	 * 0 = don't force interrupt type
   2793 	 * 1 = force interrupt type MSIX
   2794 	 * 2 = force interrupt type MSI
   2795 	 * 3 = force interrupt type Legacy
   2796 	 */
   2797 	igb->intr_force = igb_get_prop(igb, PROP_INTR_FORCE,
   2798 	    IGB_INTR_NONE, IGB_INTR_LEGACY, IGB_INTR_NONE);
   2799 
   2800 	igb->tx_hcksum_enable = igb_get_prop(igb, PROP_TX_HCKSUM_ENABLE,
   2801 	    0, 1, 1);
   2802 	igb->rx_hcksum_enable = igb_get_prop(igb, PROP_RX_HCKSUM_ENABLE,
   2803 	    0, 1, 1);
   2804 	igb->lso_enable = igb_get_prop(igb, PROP_LSO_ENABLE,
   2805 	    0, 1, 1);
   2806 	igb->tx_head_wb_enable = igb_get_prop(igb, PROP_TX_HEAD_WB_ENABLE,
   2807 	    0, 1, 1);
   2808 
   2809 	/*
   2810 	 * igb LSO needs the tx h/w checksum support.
   2811 	 * Here LSO will be disabled if tx h/w checksum has been disabled.
   2812 	 */
   2813 	if (igb->tx_hcksum_enable == B_FALSE)
   2814 		igb->lso_enable = B_FALSE;
   2815 
   2816 	igb->tx_copy_thresh = igb_get_prop(igb, PROP_TX_COPY_THRESHOLD,
   2817 	    MIN_TX_COPY_THRESHOLD, MAX_TX_COPY_THRESHOLD,
   2818 	    DEFAULT_TX_COPY_THRESHOLD);
   2819 	igb->tx_recycle_thresh = igb_get_prop(igb, PROP_TX_RECYCLE_THRESHOLD,
   2820 	    MIN_TX_RECYCLE_THRESHOLD, MAX_TX_RECYCLE_THRESHOLD,
   2821 	    DEFAULT_TX_RECYCLE_THRESHOLD);
   2822 	igb->tx_overload_thresh = igb_get_prop(igb, PROP_TX_OVERLOAD_THRESHOLD,
   2823 	    MIN_TX_OVERLOAD_THRESHOLD, MAX_TX_OVERLOAD_THRESHOLD,
   2824 	    DEFAULT_TX_OVERLOAD_THRESHOLD);
   2825 	igb->tx_resched_thresh = igb_get_prop(igb, PROP_TX_RESCHED_THRESHOLD,
   2826 	    MIN_TX_RESCHED_THRESHOLD, MAX_TX_RESCHED_THRESHOLD,
   2827 	    DEFAULT_TX_RESCHED_THRESHOLD);
   2828 
   2829 	igb->rx_copy_thresh = igb_get_prop(igb, PROP_RX_COPY_THRESHOLD,
   2830 	    MIN_RX_COPY_THRESHOLD, MAX_RX_COPY_THRESHOLD,
   2831 	    DEFAULT_RX_COPY_THRESHOLD);
   2832 	igb->rx_limit_per_intr = igb_get_prop(igb, PROP_RX_LIMIT_PER_INTR,
   2833 	    MIN_RX_LIMIT_PER_INTR, MAX_RX_LIMIT_PER_INTR,
   2834 	    DEFAULT_RX_LIMIT_PER_INTR);
   2835 
   2836 	igb->intr_throttling[0] = igb_get_prop(igb, PROP_INTR_THROTTLING,
   2837 	    igb->capab->min_intr_throttle,
   2838 	    igb->capab->max_intr_throttle,
   2839 	    igb->capab->def_intr_throttle);
   2840 
   2841 	/*
   2842 	 * Max number of multicast addresses
   2843 	 */
   2844 	igb->mcast_max_num =
   2845 	    igb_get_prop(igb, PROP_MCAST_MAX_NUM,
   2846 	    MIN_MCAST_NUM, MAX_MCAST_NUM, DEFAULT_MCAST_NUM);
   2847 }
   2848 
   2849 /*
   2850  * igb_get_prop - Get a property value out of the configuration file igb.conf
   2851  *
   2852  * Caller provides the name of the property, a default value, a minimum
   2853  * value, and a maximum value.
   2854  *
   2855  * Return configured value of the property, with default, minimum and
   2856  * maximum properly applied.
   2857  */
   2858 static int
   2859 igb_get_prop(igb_t *igb,
   2860     char *propname,	/* name of the property */
   2861     int minval,		/* minimum acceptable value */
   2862     int maxval,		/* maximim acceptable value */
   2863     int defval)		/* default value */
   2864 {
   2865 	int value;
   2866 
   2867 	/*
   2868 	 * Call ddi_prop_get_int() to read the conf settings
   2869 	 */
   2870 	value = ddi_prop_get_int(DDI_DEV_T_ANY, igb->dip,
   2871 	    DDI_PROP_DONTPASS, propname, defval);
   2872 
   2873 	if (value > maxval)
   2874 		value = maxval;
   2875 
   2876 	if (value < minval)
   2877 		value = minval;
   2878 
   2879 	return (value);
   2880 }
   2881 
   2882 /*
   2883  * igb_setup_link - Using the link properties to setup the link
   2884  */
   2885 int
   2886 igb_setup_link(igb_t *igb, boolean_t setup_hw)
   2887 {
   2888 	struct e1000_mac_info *mac;
   2889 	struct e1000_phy_info *phy;
   2890 	boolean_t invalid;
   2891 
   2892 	mac = &igb->hw.mac;
   2893 	phy = &igb->hw.phy;
   2894 	invalid = B_FALSE;
   2895 
   2896 	if (igb->param_adv_autoneg_cap == 1) {
   2897 		mac->autoneg = B_TRUE;
   2898 		phy->autoneg_advertised = 0;
   2899 
   2900 		/*
   2901 		 * 1000hdx is not supported for autonegotiation
   2902 		 */
   2903 		if (igb->param_adv_1000fdx_cap == 1)
   2904 			phy->autoneg_advertised |= ADVERTISE_1000_FULL;
   2905 
   2906 		if (igb->param_adv_100fdx_cap == 1)
   2907 			phy->autoneg_advertised |= ADVERTISE_100_FULL;
   2908 
   2909 		if (igb->param_adv_100hdx_cap == 1)
   2910 			phy->autoneg_advertised |= ADVERTISE_100_HALF;
   2911 
   2912 		if (igb->param_adv_10fdx_cap == 1)
   2913 			phy->autoneg_advertised |= ADVERTISE_10_FULL;
   2914 
   2915 		if (igb->param_adv_10hdx_cap == 1)
   2916 			phy->autoneg_advertised |= ADVERTISE_10_HALF;
   2917 
   2918 		if (phy->autoneg_advertised == 0)
   2919 			invalid = B_TRUE;
   2920 	} else {
   2921 		mac->autoneg = B_FALSE;
   2922 
   2923 		/*
   2924 		 * 1000fdx and 1000hdx are not supported for forced link
   2925 		 */
   2926 		if (igb->param_adv_100fdx_cap == 1)
   2927 			mac->forced_speed_duplex = ADVERTISE_100_FULL;
   2928 		else if (igb->param_adv_100hdx_cap == 1)
   2929 			mac->forced_speed_duplex = ADVERTISE_100_HALF;
   2930 		else if (igb->param_adv_10fdx_cap == 1)
   2931 			mac->forced_speed_duplex = ADVERTISE_10_FULL;
   2932 		else if (igb->param_adv_10hdx_cap == 1)
   2933 			mac->forced_speed_duplex = ADVERTISE_10_HALF;
   2934 		else
   2935 			invalid = B_TRUE;
   2936 	}
   2937 
   2938 	if (invalid) {
   2939 		igb_notice(igb, "Invalid link settings. Setup link to "
   2940 		    "autonegotiation with full link capabilities.");
   2941 		mac->autoneg = B_TRUE;
   2942 		phy->autoneg_advertised = ADVERTISE_1000_FULL |
   2943 		    ADVERTISE_100_FULL | ADVERTISE_100_HALF |
   2944 		    ADVERTISE_10_FULL | ADVERTISE_10_HALF;
   2945 	}
   2946 
   2947 	if (setup_hw) {
   2948 		if (e1000_setup_link(&igb->hw) != E1000_SUCCESS)
   2949 			return (IGB_FAILURE);
   2950 	}
   2951 
   2952 	return (IGB_SUCCESS);
   2953 }
   2954 
   2955 
   2956 /*
   2957  * igb_is_link_up - Check if the link is up
   2958  */
   2959 static boolean_t
   2960 igb_is_link_up(igb_t *igb)
   2961 {
   2962 	struct e1000_hw *hw = &igb->hw;
   2963 	boolean_t link_up = B_FALSE;
   2964 
   2965 	ASSERT(mutex_owned(&igb->gen_lock));
   2966 
   2967 	/*
   2968 	 * get_link_status is set in the interrupt handler on link-status-change
   2969 	 * or rx sequence error interrupt.  get_link_status will stay
   2970 	 * false until the e1000_check_for_link establishes link only
   2971 	 * for copper adapters.
   2972 	 */
   2973 	switch (hw->phy.media_type) {
   2974 	case e1000_media_type_copper:
   2975 		if (hw->mac.get_link_status) {
   2976 			(void) e1000_check_for_link(hw);
   2977 			link_up = !hw->mac.get_link_status;
   2978 		} else {
   2979 			link_up = B_TRUE;
   2980 		}
   2981 		break;
   2982 	case e1000_media_type_fiber:
   2983 		(void) e1000_check_for_link(hw);
   2984 		link_up = (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU);
   2985 		break;
   2986 	case e1000_media_type_internal_serdes:
   2987 		(void) e1000_check_for_link(hw);
   2988 		link_up = hw->mac.serdes_has_link;
   2989 		break;
   2990 	}
   2991 
   2992 	return (link_up);
   2993 }
   2994 
   2995 /*
   2996  * igb_link_check - Link status processing
   2997  */
   2998 static boolean_t
   2999 igb_link_check(igb_t *igb)
   3000 {
   3001 	struct e1000_hw *hw = &igb->hw;
   3002 	uint16_t speed = 0, duplex = 0;
   3003 	boolean_t link_changed = B_FALSE;
   3004 
   3005 	ASSERT(mutex_owned(&igb->gen_lock));
   3006 
   3007 	if (igb_is_link_up(igb)) {
   3008 		/*
   3009 		 * The Link is up, check whether it was marked as down earlier
   3010 		 */
   3011 		if (igb->link_state != LINK_STATE_UP) {
   3012 			(void) e1000_get_speed_and_duplex(hw, &speed, &duplex);
   3013 			igb->link_speed = speed;
   3014 			igb->link_duplex = duplex;
   3015 			igb->link_state = LINK_STATE_UP;
   3016 			igb->link_down_timeout = 0;
   3017 			link_changed = B_TRUE;
   3018 			if (!igb->link_complete)
   3019 				igb_stop_link_timer(igb);
   3020 		}
   3021 	} else if (igb->link_complete) {
   3022 		if (igb->link_state != LINK_STATE_DOWN) {
   3023 			igb->link_speed = 0;
   3024 			igb->link_duplex = 0;
   3025 			igb->link_state = LINK_STATE_DOWN;
   3026 			link_changed = B_TRUE;
   3027 		}
   3028 
   3029 		if (igb->igb_state & IGB_STARTED) {
   3030 			if (igb->link_down_timeout < MAX_LINK_DOWN_TIMEOUT) {
   3031 				igb->link_down_timeout++;
   3032 			} else if (igb->link_down_timeout ==
   3033 			    MAX_LINK_DOWN_TIMEOUT) {
   3034 				igb_tx_clean(igb);
   3035 				igb->link_down_timeout++;
   3036 			}
   3037 		}
   3038 	}
   3039 
   3040 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   3041 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   3042 		return (B_FALSE);
   3043 	}
   3044 
   3045 	return (link_changed);
   3046 }
   3047 
   3048 /*
   3049  * igb_local_timer - driver watchdog function
   3050  *
   3051  * This function will handle the hardware stall check, link status
   3052  * check and other routines.
   3053  */
   3054 static void
   3055 igb_local_timer(void *arg)
   3056 {
   3057 	igb_t *igb = (igb_t *)arg;
   3058 	boolean_t link_changed = B_FALSE;
   3059 
   3060 	if (igb->igb_state & IGB_ERROR) {
   3061 		igb->reset_count++;
   3062 		if (igb_reset(igb) == IGB_SUCCESS)
   3063 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
   3064 
   3065 		igb_restart_watchdog_timer(igb);
   3066 		return;
   3067 	}
   3068 
   3069 	if (igb_stall_check(igb) || (igb->igb_state & IGB_STALL)) {
   3070 		igb_fm_ereport(igb, DDI_FM_DEVICE_STALL);
   3071 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_LOST);
   3072 		igb->reset_count++;
   3073 		if (igb_reset(igb) == IGB_SUCCESS)
   3074 			ddi_fm_service_impact(igb->dip, DDI_SERVICE_RESTORED);
   3075 
   3076 		igb_restart_watchdog_timer(igb);
   3077 		return;
   3078 	}
   3079 
   3080 	mutex_enter(&igb->gen_lock);
   3081 	if (!(igb->igb_state & IGB_SUSPENDED) && (igb->igb_state & IGB_STARTED))
   3082 		link_changed = igb_link_check(igb);
   3083 	mutex_exit(&igb->gen_lock);
   3084 
   3085 	if (link_changed)
   3086 		mac_link_update(igb->mac_hdl, igb->link_state);
   3087 
   3088 	igb_restart_watchdog_timer(igb);
   3089 }
   3090 
   3091 /*
   3092  * igb_link_timer - link setup timer function
   3093  *
   3094  * It is called when the timer for link setup is expired, which indicates
   3095  * the completion of the link setup. The link state will not be updated
   3096  * until the link setup is completed. And the link state will not be sent
   3097  * to the upper layer through mac_link_update() in this function. It will
   3098  * be updated in the local timer routine or the interrupts service routine
   3099  * after the interface is started (plumbed).
   3100  */
   3101 static void
   3102 igb_link_timer(void *arg)
   3103 {
   3104 	igb_t *igb = (igb_t *)arg;
   3105 
   3106 	mutex_enter(&igb->link_lock);
   3107 	igb->link_complete = B_TRUE;
   3108 	igb->link_tid = 0;
   3109 	mutex_exit(&igb->link_lock);
   3110 }
   3111 /*
   3112  * igb_stall_check - check for transmit stall
   3113  *
   3114  * This function checks if the adapter is stalled (in transmit).
   3115  *
   3116  * It is called each time the watchdog timeout is invoked.
   3117  * If the transmit descriptor reclaim continuously fails,
   3118  * the watchdog value will increment by 1. If the watchdog
   3119  * value exceeds the threshold, the igb is assumed to
   3120  * have stalled and need to be reset.
   3121  */
   3122 static boolean_t
   3123 igb_stall_check(igb_t *igb)
   3124 {
   3125 	igb_tx_ring_t *tx_ring;
   3126 	struct e1000_hw *hw = &igb->hw;
   3127 	boolean_t result;
   3128 	int i;
   3129 
   3130 	if (igb->link_state != LINK_STATE_UP)
   3131 		return (B_FALSE);
   3132 
   3133 	/*
   3134 	 * If any tx ring is stalled, we'll reset the chipset
   3135 	 */
   3136 	result = B_FALSE;
   3137 	for (i = 0; i < igb->num_tx_rings; i++) {
   3138 		tx_ring = &igb->tx_rings[i];
   3139 
   3140 		if (tx_ring->recycle_fail > 0)
   3141 			tx_ring->stall_watchdog++;
   3142 		else
   3143 			tx_ring->stall_watchdog = 0;
   3144 
   3145 		if (tx_ring->stall_watchdog >= STALL_WATCHDOG_TIMEOUT) {
   3146 			result = B_TRUE;
   3147 			if (hw->mac.type == e1000_82580) {
   3148 				hw->dev_spec._82575.global_device_reset
   3149 				    = B_TRUE;
   3150 			}
   3151 			break;
   3152 		}
   3153 	}
   3154 
   3155 	if (result) {
   3156 		tx_ring->stall_watchdog = 0;
   3157 		tx_ring->recycle_fail = 0;
   3158 	}
   3159 
   3160 	return (result);
   3161 }
   3162 
   3163 
   3164 /*
   3165  * is_valid_mac_addr - Check if the mac address is valid
   3166  */
   3167 static boolean_t
   3168 is_valid_mac_addr(uint8_t *mac_addr)
   3169 {
   3170 	const uint8_t addr_test1[6] = { 0, 0, 0, 0, 0, 0 };
   3171 	const uint8_t addr_test2[6] =
   3172 	    { 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF };
   3173 
   3174 	if (!(bcmp(addr_test1, mac_addr, ETHERADDRL)) ||
   3175 	    !(bcmp(addr_test2, mac_addr, ETHERADDRL)))
   3176 		return (B_FALSE);
   3177 
   3178 	return (B_TRUE);
   3179 }
   3180 
   3181 static boolean_t
   3182 igb_find_mac_address(igb_t *igb)
   3183 {
   3184 	struct e1000_hw *hw = &igb->hw;
   3185 #ifdef __sparc
   3186 	uchar_t *bytes;
   3187 	struct ether_addr sysaddr;
   3188 	uint_t nelts;
   3189 	int err;
   3190 	boolean_t found = B_FALSE;
   3191 
   3192 	/*
   3193 	 * The "vendor's factory-set address" may already have
   3194 	 * been extracted from the chip, but if the property
   3195 	 * "local-mac-address" is set we use that instead.
   3196 	 *
   3197 	 * We check whether it looks like an array of 6
   3198 	 * bytes (which it should, if OBP set it).  If we can't
   3199 	 * make sense of it this way, we'll ignore it.
   3200 	 */
   3201 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
   3202 	    DDI_PROP_DONTPASS, "local-mac-address", &bytes, &nelts);
   3203 	if (err == DDI_PROP_SUCCESS) {
   3204 		if (nelts == ETHERADDRL) {
   3205 			while (nelts--)
   3206 				hw->mac.addr[nelts] = bytes[nelts];
   3207 			found = B_TRUE;
   3208 		}
   3209 		ddi_prop_free(bytes);
   3210 	}
   3211 
   3212 	/*
   3213 	 * Look up the OBP property "local-mac-address?". If the user has set
   3214 	 * 'local-mac-address? = false', use "the system address" instead.
   3215 	 */
   3216 	if (ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip, 0,
   3217 	    "local-mac-address?", &bytes, &nelts) == DDI_PROP_SUCCESS) {
   3218 		if (strncmp("false", (caddr_t)bytes, (size_t)nelts) == 0) {
   3219 			if (localetheraddr(NULL, &sysaddr) != 0) {
   3220 				bcopy(&sysaddr, hw->mac.addr, ETHERADDRL);
   3221 				found = B_TRUE;
   3222 			}
   3223 		}
   3224 		ddi_prop_free(bytes);
   3225 	}
   3226 
   3227 	/*
   3228 	 * Finally(!), if there's a valid "mac-address" property (created
   3229 	 * if we netbooted from this interface), we must use this instead
   3230 	 * of any of the above to ensure that the NFS/install server doesn't
   3231 	 * get confused by the address changing as Solaris takes over!
   3232 	 */
   3233 	err = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, igb->dip,
   3234 	    DDI_PROP_DONTPASS, "mac-address", &bytes, &nelts);
   3235 	if (err == DDI_PROP_SUCCESS) {
   3236 		if (nelts == ETHERADDRL) {
   3237 			while (nelts--)
   3238 				hw->mac.addr[nelts] = bytes[nelts];
   3239 			found = B_TRUE;
   3240 		}
   3241 		ddi_prop_free(bytes);
   3242 	}
   3243 
   3244 	if (found) {
   3245 		bcopy(hw->mac.addr, hw->mac.perm_addr, ETHERADDRL);
   3246 		return (B_TRUE);
   3247 	}
   3248 #endif
   3249 
   3250 	/*
   3251 	 * Read the device MAC address from the EEPROM
   3252 	 */
   3253 	if (e1000_read_mac_addr(hw) != E1000_SUCCESS)
   3254 		return (B_FALSE);
   3255 
   3256 	return (B_TRUE);
   3257 }
   3258 
   3259 #pragma inline(igb_arm_watchdog_timer)
   3260 
   3261 static void
   3262 igb_arm_watchdog_timer(igb_t *igb)
   3263 {
   3264 	/*
   3265 	 * Fire a watchdog timer
   3266 	 */
   3267 	igb->watchdog_tid =
   3268 	    timeout(igb_local_timer,
   3269 	    (void *)igb, 1 * drv_usectohz(1000000));
   3270 
   3271 }
   3272 
   3273 /*
   3274  * igb_enable_watchdog_timer - Enable and start the driver watchdog timer
   3275  */
   3276 void
   3277 igb_enable_watchdog_timer(igb_t *igb)
   3278 {
   3279 	mutex_enter(&igb->watchdog_lock);
   3280 
   3281 	if (!igb->watchdog_enable) {
   3282 		igb->watchdog_enable = B_TRUE;
   3283 		igb->watchdog_start = B_TRUE;
   3284 		igb_arm_watchdog_timer(igb);
   3285 	}
   3286 
   3287 	mutex_exit(&igb->watchdog_lock);
   3288 
   3289 }
   3290 
   3291 /*
   3292  * igb_disable_watchdog_timer - Disable and stop the driver watchdog timer
   3293  */
   3294 void
   3295 igb_disable_watchdog_timer(igb_t *igb)
   3296 {
   3297 	timeout_id_t tid;
   3298 
   3299 	mutex_enter(&igb->watchdog_lock);
   3300 
   3301 	igb->watchdog_enable = B_FALSE;
   3302 	igb->watchdog_start = B_FALSE;
   3303 	tid = igb->watchdog_tid;
   3304 	igb->watchdog_tid = 0;
   3305 
   3306 	mutex_exit(&igb->watchdog_lock);
   3307 
   3308 	if (tid != 0)
   3309 		(void) untimeout(tid);
   3310 
   3311 }
   3312 
   3313 /*
   3314  * igb_start_watchdog_timer - Start the driver watchdog timer
   3315  */
   3316 static void
   3317 igb_start_watchdog_timer(igb_t *igb)
   3318 {
   3319 	mutex_enter(&igb->watchdog_lock);
   3320 
   3321 	if (igb->watchdog_enable) {
   3322 		if (!igb->watchdog_start) {
   3323 			igb->watchdog_start = B_TRUE;
   3324 			igb_arm_watchdog_timer(igb);
   3325 		}
   3326 	}
   3327 
   3328 	mutex_exit(&igb->watchdog_lock);
   3329 }
   3330 
   3331 /*
   3332  * igb_restart_watchdog_timer - Restart the driver watchdog timer
   3333  */
   3334 static void
   3335 igb_restart_watchdog_timer(igb_t *igb)
   3336 {
   3337 	mutex_enter(&igb->watchdog_lock);
   3338 
   3339 	if (igb->watchdog_start)
   3340 		igb_arm_watchdog_timer(igb);
   3341 
   3342 	mutex_exit(&igb->watchdog_lock);
   3343 }
   3344 
   3345 /*
   3346  * igb_stop_watchdog_timer - Stop the driver watchdog timer
   3347  */
   3348 static void
   3349 igb_stop_watchdog_timer(igb_t *igb)
   3350 {
   3351 	timeout_id_t tid;
   3352 
   3353 	mutex_enter(&igb->watchdog_lock);
   3354 
   3355 	igb->watchdog_start = B_FALSE;
   3356 	tid = igb->watchdog_tid;
   3357 	igb->watchdog_tid = 0;
   3358 
   3359 	mutex_exit(&igb->watchdog_lock);
   3360 
   3361 	if (tid != 0)
   3362 		(void) untimeout(tid);
   3363 }
   3364 
   3365 /*
   3366  * igb_start_link_timer - Start the link setup timer
   3367  */
   3368 static void
   3369 igb_start_link_timer(struct igb *igb)
   3370 {
   3371 	struct e1000_hw *hw = &igb->hw;
   3372 	clock_t link_timeout;
   3373 
   3374 	if (hw->mac.autoneg)
   3375 		link_timeout = PHY_AUTO_NEG_LIMIT *
   3376 		    drv_usectohz(100000);
   3377 	else
   3378 		link_timeout = PHY_FORCE_LIMIT * drv_usectohz(100000);
   3379 
   3380 	mutex_enter(&igb->link_lock);
   3381 	if (hw->phy.autoneg_wait_to_complete) {
   3382 		igb->link_complete = B_TRUE;
   3383 	} else {
   3384 		igb->link_complete = B_FALSE;
   3385 		igb->link_tid = timeout(igb_link_timer, (void *)igb,
   3386 		    link_timeout);
   3387 	}
   3388 	mutex_exit(&igb->link_lock);
   3389 }
   3390 
   3391 /*
   3392  * igb_stop_link_timer - Stop the link setup timer
   3393  */
   3394 static void
   3395 igb_stop_link_timer(struct igb *igb)
   3396 {
   3397 	timeout_id_t tid;
   3398 
   3399 	mutex_enter(&igb->link_lock);
   3400 	igb->link_complete = B_TRUE;
   3401 	tid = igb->link_tid;
   3402 	igb->link_tid = 0;
   3403 	mutex_exit(&igb->link_lock);
   3404 
   3405 	if (tid != 0)
   3406 		(void) untimeout(tid);
   3407 }
   3408 
   3409 /*
   3410  * igb_disable_adapter_interrupts - Clear/disable all hardware interrupts
   3411  */
   3412 static void
   3413 igb_disable_adapter_interrupts(igb_t *igb)
   3414 {
   3415 	struct e1000_hw *hw = &igb->hw;
   3416 
   3417 	/*
   3418 	 * Set the IMC register to mask all the interrupts,
   3419 	 * including the tx interrupts.
   3420 	 */
   3421 	E1000_WRITE_REG(hw, E1000_IMC, ~0);
   3422 	E1000_WRITE_REG(hw, E1000_IAM, 0);
   3423 
   3424 	/*
   3425 	 * Additional disabling for MSI-X
   3426 	 */
   3427 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
   3428 		E1000_WRITE_REG(hw, E1000_EIMC, ~0);
   3429 		E1000_WRITE_REG(hw, E1000_EIAC, 0);
   3430 		E1000_WRITE_REG(hw, E1000_EIAM, 0);
   3431 	}
   3432 
   3433 	E1000_WRITE_FLUSH(hw);
   3434 }
   3435 
   3436 /*
   3437  * igb_enable_adapter_interrupts_82580 - Enable NIC interrupts for 82580
   3438  */
   3439 static void
   3440 igb_enable_adapter_interrupts_82580(igb_t *igb)
   3441 {
   3442 	struct e1000_hw *hw = &igb->hw;
   3443 
   3444 	/* Clear any pending interrupts */
   3445 	(void) E1000_READ_REG(hw, E1000_ICR);
   3446 	igb->ims_mask |= E1000_IMS_DRSTA;
   3447 
   3448 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
   3449 
   3450 		/* Interrupt enabling for MSI-X */
   3451 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
   3452 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
   3453 		igb->ims_mask = (E1000_IMS_LSC | E1000_IMS_DRSTA);
   3454 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
   3455 	} else { /* Interrupt enabling for MSI and legacy */
   3456 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
   3457 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
   3458 		igb->ims_mask |= E1000_IMS_DRSTA;
   3459 		E1000_WRITE_REG(hw, E1000_IMS, igb->ims_mask);
   3460 	}
   3461 
   3462 	/* Disable auto-mask for ICR interrupt bits */
   3463 	E1000_WRITE_REG(hw, E1000_IAM, 0);
   3464 
   3465 	E1000_WRITE_FLUSH(hw);
   3466 }
   3467 
   3468 /*
   3469  * igb_enable_adapter_interrupts_82576 - Enable NIC interrupts for 82576
   3470  */
   3471 static void
   3472 igb_enable_adapter_interrupts_82576(igb_t *igb)
   3473 {
   3474 	struct e1000_hw *hw = &igb->hw;
   3475 
   3476 	/* Clear any pending interrupts */
   3477 	(void) E1000_READ_REG(hw, E1000_ICR);
   3478 
   3479 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
   3480 
   3481 		/* Interrupt enabling for MSI-X */
   3482 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
   3483 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
   3484 		igb->ims_mask = E1000_IMS_LSC;
   3485 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
   3486 	} else {
   3487 		/* Interrupt enabling for MSI and legacy */
   3488 		E1000_WRITE_REG(hw, E1000_IVAR0, E1000_IVAR_VALID);
   3489 		igb->ims_mask = IMS_ENABLE_MASK | E1000_IMS_TXQE;
   3490 		E1000_WRITE_REG(hw, E1000_IMS,
   3491 		    (IMS_ENABLE_MASK | E1000_IMS_TXQE));
   3492 	}
   3493 
   3494 	/* Disable auto-mask for ICR interrupt bits */
   3495 	E1000_WRITE_REG(hw, E1000_IAM, 0);
   3496 
   3497 	E1000_WRITE_FLUSH(hw);
   3498 }
   3499 
   3500 /*
   3501  * igb_enable_adapter_interrupts_82575 - Enable NIC interrupts for 82575
   3502  */
   3503 static void
   3504 igb_enable_adapter_interrupts_82575(igb_t *igb)
   3505 {
   3506 	struct e1000_hw *hw = &igb->hw;
   3507 	uint32_t reg;
   3508 
   3509 	/* Clear any pending interrupts */
   3510 	(void) E1000_READ_REG(hw, E1000_ICR);
   3511 
   3512 	if (igb->intr_type == DDI_INTR_TYPE_MSIX) {
   3513 		/* Interrupt enabling for MSI-X */
   3514 		E1000_WRITE_REG(hw, E1000_EIMS, igb->eims_mask);
   3515 		E1000_WRITE_REG(hw, E1000_EIAC, igb->eims_mask);
   3516 		igb->ims_mask = E1000_IMS_LSC;
   3517 		E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_LSC);
   3518 
   3519 		/* Enable MSI-X PBA support */
   3520 		reg = E1000_READ_REG(hw, E1000_CTRL_EXT);
   3521 		reg |= E1000_CTRL_EXT_PBA_CLR;
   3522 
   3523 		/* Non-selective interrupt clear-on-read */
   3524 		reg |= E1000_CTRL_EXT_IRCA;	/* Called NSICR in the EAS */
   3525 
   3526 		E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg);
   3527 	} else {
   3528 		/* Interrupt enabling for MSI and legacy */
   3529 		igb->ims_mask = IMS_ENABLE_MASK;
   3530 		E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK);
   3531 	}
   3532 
   3533 	E1000_WRITE_FLUSH(hw);
   3534 }
   3535 
   3536 /*
   3537  * Loopback Support
   3538  */
   3539 static lb_property_t lb_normal =
   3540 	{ normal,	"normal",	IGB_LB_NONE		};
   3541 static lb_property_t lb_external =
   3542 	{ external,	"External",	IGB_LB_EXTERNAL		};
   3543 static lb_property_t lb_phy =
   3544 	{ internal,	"PHY",		IGB_LB_INTERNAL_PHY	};
   3545 static lb_property_t lb_serdes =
   3546 	{ internal,	"SerDes",	IGB_LB_INTERNAL_SERDES	};
   3547 
   3548 enum ioc_reply
   3549 igb_loopback_ioctl(igb_t *igb, struct iocblk *iocp, mblk_t *mp)
   3550 {
   3551 	lb_info_sz_t *lbsp;
   3552 	lb_property_t *lbpp;
   3553 	struct e1000_hw *hw;
   3554 	uint32_t *lbmp;
   3555 	uint32_t size;
   3556 	uint32_t value;
   3557 
   3558 	hw = &igb->hw;
   3559 
   3560 	if (mp->b_cont == NULL)
   3561 		return (IOC_INVAL);
   3562 
   3563 	switch (iocp->ioc_cmd) {
   3564 	default:
   3565 		return (IOC_INVAL);
   3566 
   3567 	case LB_GET_INFO_SIZE:
   3568 		size = sizeof (lb_info_sz_t);
   3569 		if (iocp->ioc_count != size)
   3570 			return (IOC_INVAL);
   3571 
   3572 		value = sizeof (lb_normal);
   3573 		if (hw->phy.media_type == e1000_media_type_copper)
   3574 			value += sizeof (lb_phy);
   3575 		else
   3576 			value += sizeof (lb_serdes);
   3577 		value += sizeof (lb_external);
   3578 
   3579 		lbsp = (lb_info_sz_t *)(uintptr_t)mp->b_cont->b_rptr;
   3580 		*lbsp = value;
   3581 		break;
   3582 
   3583 	case LB_GET_INFO:
   3584 		value = sizeof (lb_normal);
   3585 		if (hw->phy.media_type == e1000_media_type_copper)
   3586 			value += sizeof (lb_phy);
   3587 		else
   3588 			value += sizeof (lb_serdes);
   3589 		value += sizeof (lb_external);
   3590 
   3591 		size = value;
   3592 		if (iocp->ioc_count != size)
   3593 			return (IOC_INVAL);
   3594 
   3595 		value = 0;
   3596 		lbpp = (lb_property_t *)(uintptr_t)mp->b_cont->b_rptr;
   3597 
   3598 		lbpp[value++] = lb_normal;
   3599 		if (hw->phy.media_type == e1000_media_type_copper)
   3600 			lbpp[value++] = lb_phy;
   3601 		else
   3602 			lbpp[value++] = lb_serdes;
   3603 		lbpp[value++] = lb_external;
   3604 		break;
   3605 
   3606 	case LB_GET_MODE:
   3607 		size = sizeof (uint32_t);
   3608 		if (iocp->ioc_count != size)
   3609 			return (IOC_INVAL);
   3610 
   3611 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
   3612 		*lbmp = igb->loopback_mode;
   3613 		break;
   3614 
   3615 	case LB_SET_MODE:
   3616 		size = 0;
   3617 		if (iocp->ioc_count != sizeof (uint32_t))
   3618 			return (IOC_INVAL);
   3619 
   3620 		lbmp = (uint32_t *)(uintptr_t)mp->b_cont->b_rptr;
   3621 		if (!igb_set_loopback_mode(igb, *lbmp))
   3622 			return (IOC_INVAL);
   3623 		break;
   3624 	}
   3625 
   3626 	iocp->ioc_count = size;
   3627 	iocp->ioc_error = 0;
   3628 
   3629 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   3630 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   3631 		return (IOC_INVAL);
   3632 	}
   3633 
   3634 	return (IOC_REPLY);
   3635 }
   3636 
   3637 /*
   3638  * igb_set_loopback_mode - Setup loopback based on the loopback mode
   3639  */
   3640 static boolean_t
   3641 igb_set_loopback_mode(igb_t *igb, uint32_t mode)
   3642 {
   3643 	struct e1000_hw *hw;
   3644 	int i;
   3645 
   3646 	if (mode == igb->loopback_mode)
   3647 		return (B_TRUE);
   3648 
   3649 	hw = &igb->hw;
   3650 
   3651 	igb->loopback_mode = mode;
   3652 
   3653 	if (mode == IGB_LB_NONE) {
   3654 		/* Reset the chip */
   3655 		hw->phy.autoneg_wait_to_complete = B_TRUE;
   3656 		(void) igb_reset(igb);
   3657 		hw->phy.autoneg_wait_to_complete = B_FALSE;
   3658 		return (B_TRUE);
   3659 	}
   3660 
   3661 	mutex_enter(&igb->gen_lock);
   3662 
   3663 	switch (mode) {
   3664 	default:
   3665 		mutex_exit(&igb->gen_lock);
   3666 		return (B_FALSE);
   3667 
   3668 	case IGB_LB_EXTERNAL:
   3669 		igb_set_external_loopback(igb);
   3670 		break;
   3671 
   3672 	case IGB_LB_INTERNAL_PHY:
   3673 		igb_set_internal_phy_loopback(igb);
   3674 		break;
   3675 
   3676 	case IGB_LB_INTERNAL_SERDES:
   3677 		igb_set_internal_serdes_loopback(igb);
   3678 		break;
   3679 	}
   3680 
   3681 	mutex_exit(&igb->gen_lock);
   3682 
   3683 	/*
   3684 	 * When external loopback is set, wait up to 1000ms to get the link up.
   3685 	 * According to test, 1000ms can work and it's an experimental value.
   3686 	 */
   3687 	if (mode == IGB_LB_EXTERNAL) {
   3688 		for (i = 0; i <= 10; i++) {
   3689 			mutex_enter(&igb->gen_lock);
   3690 			(void) igb_link_check(igb);
   3691 			mutex_exit(&igb->gen_lock);
   3692 
   3693 			if (igb->link_state == LINK_STATE_UP)
   3694 				break;
   3695 
   3696 			msec_delay(100);
   3697 		}
   3698 
   3699 		if (igb->link_state != LINK_STATE_UP) {
   3700 			/*
   3701 			 * Does not support external loopback.
   3702 			 * Reset driver to loopback none.
   3703 			 */
   3704 			igb->loopback_mode = IGB_LB_NONE;
   3705 
   3706 			/* Reset the chip */
   3707 			hw->phy.autoneg_wait_to_complete = B_TRUE;
   3708 			(void) igb_reset(igb);
   3709 			hw->phy.autoneg_wait_to_complete = B_FALSE;
   3710 
   3711 			IGB_DEBUGLOG_0(igb, "Set external loopback failed, "
   3712 			    "reset to loopback none.");
   3713 
   3714 			return (B_FALSE);
   3715 		}
   3716 	}
   3717 
   3718 	return (B_TRUE);
   3719 }
   3720 
   3721 /*
   3722  * igb_set_external_loopback - Set the external loopback mode
   3723  */
   3724 static void
   3725 igb_set_external_loopback(igb_t *igb)
   3726 {
   3727 	struct e1000_hw *hw;
   3728 
   3729 	hw = &igb->hw;
   3730 
   3731 	/* Set phy to known state */
   3732 	(void) e1000_phy_hw_reset(hw);
   3733 
   3734 	(void) e1000_write_phy_reg(hw, 0x0, 0x0140);
   3735 	(void) e1000_write_phy_reg(hw, 0x9, 0x1b00);
   3736 	(void) e1000_write_phy_reg(hw, 0x12, 0x1610);
   3737 	(void) e1000_write_phy_reg(hw, 0x1f37, 0x3f1c);
   3738 }
   3739 
   3740 /*
   3741  * igb_set_internal_phy_loopback - Set the internal PHY loopback mode
   3742  */
   3743 static void
   3744 igb_set_internal_phy_loopback(igb_t *igb)
   3745 {
   3746 	struct e1000_hw *hw;
   3747 	uint32_t ctrl_ext;
   3748 	uint16_t phy_ctrl;
   3749 	uint16_t phy_pconf;
   3750 
   3751 	hw = &igb->hw;
   3752 
   3753 	/* Set link mode to PHY (00b) in the Extended Control register */
   3754 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
   3755 	ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK;
   3756 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
   3757 
   3758 	/*
   3759 	 * Set PHY control register (0x4140):
   3760 	 *    Set full duplex mode
   3761 	 *    Set loopback bit
   3762 	 *    Clear auto-neg enable bit
   3763 	 *    Set PHY speed
   3764 	 */
   3765 	phy_ctrl = MII_CR_FULL_DUPLEX | MII_CR_SPEED_1000 | MII_CR_LOOPBACK;
   3766 	(void) e1000_write_phy_reg(hw, PHY_CONTROL, phy_ctrl);
   3767 
   3768 	/* Set the link disable bit in the Port Configuration register */
   3769 	(void) e1000_read_phy_reg(hw, 0x10, &phy_pconf);
   3770 	phy_pconf |= (uint16_t)1 << 14;
   3771 	(void) e1000_write_phy_reg(hw, 0x10, phy_pconf);
   3772 }
   3773 
   3774 /*
   3775  * igb_set_internal_serdes_loopback - Set the internal SerDes loopback mode
   3776  */
   3777 static void
   3778 igb_set_internal_serdes_loopback(igb_t *igb)
   3779 {
   3780 	struct e1000_hw *hw;
   3781 	uint32_t ctrl_ext;
   3782 	uint32_t ctrl;
   3783 	uint32_t pcs_lctl;
   3784 	uint32_t connsw;
   3785 
   3786 	hw = &igb->hw;
   3787 
   3788 	/* Set link mode to SerDes (11b) in the Extended Control register */
   3789 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
   3790 	ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES;
   3791 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
   3792 
   3793 	/* Configure the SerDes to loopback */
   3794 	E1000_WRITE_REG(hw, E1000_SCTL, 0x410);
   3795 
   3796 	/* Set Device Control register */
   3797 	ctrl = E1000_READ_REG(hw, E1000_CTRL);
   3798 	ctrl |= (E1000_CTRL_FD |	/* Force full duplex */
   3799 	    E1000_CTRL_SLU);		/* Force link up */
   3800 	ctrl &= ~(E1000_CTRL_RFCE |	/* Disable receive flow control */
   3801 	    E1000_CTRL_TFCE |		/* Disable transmit flow control */
   3802 	    E1000_CTRL_LRST);		/* Clear link reset */
   3803 	E1000_WRITE_REG(hw, E1000_CTRL, ctrl);
   3804 
   3805 	/* Set PCS Link Control register */
   3806 	pcs_lctl = E1000_READ_REG(hw, E1000_PCS_LCTL);
   3807 	pcs_lctl |= (E1000_PCS_LCTL_FORCE_LINK |
   3808 	    E1000_PCS_LCTL_FSD |
   3809 	    E1000_PCS_LCTL_FDV_FULL |
   3810 	    E1000_PCS_LCTL_FLV_LINK_UP);
   3811 	pcs_lctl &= ~E1000_PCS_LCTL_AN_ENABLE;
   3812 	E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_lctl);
   3813 
   3814 	/* Set the Copper/Fiber Switch Control - CONNSW register */
   3815 	connsw = E1000_READ_REG(hw, E1000_CONNSW);
   3816 	connsw &= ~E1000_CONNSW_ENRGSRC;
   3817 	E1000_WRITE_REG(hw, E1000_CONNSW, connsw);
   3818 }
   3819 
   3820 #pragma inline(igb_intr_rx_work)
   3821 /*
   3822  * igb_intr_rx_work - rx processing of ISR
   3823  */
   3824 static void
   3825 igb_intr_rx_work(igb_rx_ring_t *rx_ring)
   3826 {
   3827 	mblk_t *mp;
   3828 
   3829 	mutex_enter(&rx_ring->rx_lock);
   3830 	mp = igb_rx(rx_ring, IGB_NO_POLL);
   3831 	mutex_exit(&rx_ring->rx_lock);
   3832 
   3833 	if (mp != NULL)
   3834 		mac_rx_ring(rx_ring->igb->mac_hdl, rx_ring->ring_handle, mp,
   3835 		    rx_ring->ring_gen_num);
   3836 }
   3837 
   3838 #pragma inline(igb_intr_tx_work)
   3839 /*
   3840  * igb_intr_tx_work - tx processing of ISR
   3841  */
   3842 static void
   3843 igb_intr_tx_work(igb_tx_ring_t *tx_ring)
   3844 {
   3845 	igb_t *igb = tx_ring->igb;
   3846 
   3847 	/* Recycle the tx descriptors */
   3848 	tx_ring->tx_recycle(tx_ring);
   3849 
   3850 	/* Schedule the re-transmit */
   3851 	if (tx_ring->reschedule &&
   3852 	    (tx_ring->tbd_free >= igb->tx_resched_thresh)) {
   3853 		tx_ring->reschedule = B_FALSE;
   3854 		mac_tx_ring_update(tx_ring->igb->mac_hdl, tx_ring->ring_handle);
   3855 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
   3856 	}
   3857 }
   3858 
   3859 #pragma inline(igb_intr_link_work)
   3860 /*
   3861  * igb_intr_link_work - link-status-change processing of ISR
   3862  */
   3863 static void
   3864 igb_intr_link_work(igb_t *igb)
   3865 {
   3866 	boolean_t link_changed;
   3867 
   3868 	igb_stop_watchdog_timer(igb);
   3869 
   3870 	mutex_enter(&igb->gen_lock);
   3871 
   3872 	/*
   3873 	 * Because we got a link-status-change interrupt, force
   3874 	 * e1000_check_for_link() to look at phy
   3875 	 */
   3876 	igb->hw.mac.get_link_status = B_TRUE;
   3877 
   3878 	/* igb_link_check takes care of link status change */
   3879 	link_changed = igb_link_check(igb);
   3880 
   3881 	/* Get new phy state */
   3882 	igb_get_phy_state(igb);
   3883 
   3884 	mutex_exit(&igb->gen_lock);
   3885 
   3886 	if (link_changed)
   3887 		mac_link_update(igb->mac_hdl, igb->link_state);
   3888 
   3889 	igb_start_watchdog_timer(igb);
   3890 }
   3891 
   3892 /*
   3893  * igb_intr_legacy - Interrupt handler for legacy interrupts
   3894  */
   3895 static uint_t
   3896 igb_intr_legacy(void *arg1, void *arg2)
   3897 {
   3898 	igb_t *igb = (igb_t *)arg1;
   3899 	igb_tx_ring_t *tx_ring;
   3900 	uint32_t icr;
   3901 	mblk_t *mp;
   3902 	boolean_t tx_reschedule;
   3903 	boolean_t link_changed;
   3904 	uint_t result;
   3905 
   3906 	_NOTE(ARGUNUSED(arg2));
   3907 
   3908 	mutex_enter(&igb->gen_lock);
   3909 
   3910 	if (igb->igb_state & IGB_SUSPENDED) {
   3911 		mutex_exit(&igb->gen_lock);
   3912 		return (DDI_INTR_UNCLAIMED);
   3913 	}
   3914 
   3915 	mp = NULL;
   3916 	tx_reschedule = B_FALSE;
   3917 	link_changed = B_FALSE;
   3918 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
   3919 
   3920 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   3921 		mutex_exit(&igb->gen_lock);
   3922 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   3923 		atomic_or_32(&igb->igb_state, IGB_ERROR);
   3924 		return (DDI_INTR_UNCLAIMED);
   3925 	}
   3926 
   3927 	if (icr & E1000_ICR_INT_ASSERTED) {
   3928 		/*
   3929 		 * E1000_ICR_INT_ASSERTED bit was set:
   3930 		 * Read(Clear) the ICR, claim this interrupt,
   3931 		 * look for work to do.
   3932 		 */
   3933 		ASSERT(igb->num_rx_rings == 1);
   3934 		ASSERT(igb->num_tx_rings == 1);
   3935 
   3936 		/* Make sure all interrupt causes cleared */
   3937 		(void) E1000_READ_REG(&igb->hw, E1000_EICR);
   3938 
   3939 		if (icr & E1000_ICR_RXT0) {
   3940 			mp = igb_rx(&igb->rx_rings[0], IGB_NO_POLL);
   3941 		}
   3942 
   3943 		if (icr & E1000_ICR_TXDW) {
   3944 			tx_ring = &igb->tx_rings[0];
   3945 
   3946 			/* Recycle the tx descriptors */
   3947 			tx_ring->tx_recycle(tx_ring);
   3948 
   3949 			/* Schedule the re-transmit */
   3950 			tx_reschedule = (tx_ring->reschedule &&
   3951 			    (tx_ring->tbd_free >= igb->tx_resched_thresh));
   3952 		}
   3953 
   3954 		if (icr & E1000_ICR_LSC) {
   3955 			/*
   3956 			 * Because we got a link-status-change interrupt, force
   3957 			 * e1000_check_for_link() to look at phy
   3958 			 */
   3959 			igb->hw.mac.get_link_status = B_TRUE;
   3960 
   3961 			/* igb_link_check takes care of link status change */
   3962 			link_changed = igb_link_check(igb);
   3963 
   3964 			/* Get new phy state */
   3965 			igb_get_phy_state(igb);
   3966 		}
   3967 
   3968 		if (icr & E1000_ICR_DRSTA) {
   3969 			/* 82580 Full Device Reset needed */
   3970 			atomic_or_32(&igb->igb_state, IGB_STALL);
   3971 		}
   3972 
   3973 		result = DDI_INTR_CLAIMED;
   3974 	} else {
   3975 		/*
   3976 		 * E1000_ICR_INT_ASSERTED bit was not set:
   3977 		 * Don't claim this interrupt.
   3978 		 */
   3979 		result = DDI_INTR_UNCLAIMED;
   3980 	}
   3981 
   3982 	mutex_exit(&igb->gen_lock);
   3983 
   3984 	/*
   3985 	 * Do the following work outside of the gen_lock
   3986 	 */
   3987 	if (mp != NULL)
   3988 		mac_rx(igb->mac_hdl, NULL, mp);
   3989 
   3990 	if (tx_reschedule)  {
   3991 		tx_ring->reschedule = B_FALSE;
   3992 		mac_tx_ring_update(igb->mac_hdl, tx_ring->ring_handle);
   3993 		IGB_DEBUG_STAT(tx_ring->stat_reschedule);
   3994 	}
   3995 
   3996 	if (link_changed)
   3997 		mac_link_update(igb->mac_hdl, igb->link_state);
   3998 
   3999 	return (result);
   4000 }
   4001 
   4002 /*
   4003  * igb_intr_msi - Interrupt handler for MSI
   4004  */
   4005 static uint_t
   4006 igb_intr_msi(void *arg1, void *arg2)
   4007 {
   4008 	igb_t *igb = (igb_t *)arg1;
   4009 	uint32_t icr;
   4010 
   4011 	_NOTE(ARGUNUSED(arg2));
   4012 
   4013 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
   4014 
   4015 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   4016 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   4017 		atomic_or_32(&igb->igb_state, IGB_ERROR);
   4018 		return (DDI_INTR_CLAIMED);
   4019 	}
   4020 
   4021 	/* Make sure all interrupt causes cleared */
   4022 	(void) E1000_READ_REG(&igb->hw, E1000_EICR);
   4023 
   4024 	/*
   4025 	 * For MSI interrupt, we have only one vector,
   4026 	 * so we have only one rx ring and one tx ring enabled.
   4027 	 */
   4028 	ASSERT(igb->num_rx_rings == 1);
   4029 	ASSERT(igb->num_tx_rings == 1);
   4030 
   4031 	if (icr & E1000_ICR_RXT0) {
   4032 		igb_intr_rx_work(&igb->rx_rings[0]);
   4033 	}
   4034 
   4035 	if (icr & E1000_ICR_TXDW) {
   4036 		igb_intr_tx_work(&igb->tx_rings[0]);
   4037 	}
   4038 
   4039 	if (icr & E1000_ICR_LSC) {
   4040 		igb_intr_link_work(igb);
   4041 	}
   4042 
   4043 	if (icr & E1000_ICR_DRSTA) {
   4044 		/* 82580 Full Device Reset needed */
   4045 		atomic_or_32(&igb->igb_state, IGB_STALL);
   4046 	}
   4047 
   4048 	return (DDI_INTR_CLAIMED);
   4049 }
   4050 
   4051 /*
   4052  * igb_intr_rx - Interrupt handler for rx
   4053  */
   4054 static uint_t
   4055 igb_intr_rx(void *arg1, void *arg2)
   4056 {
   4057 	igb_rx_ring_t *rx_ring = (igb_rx_ring_t *)arg1;
   4058 
   4059 	_NOTE(ARGUNUSED(arg2));
   4060 
   4061 	/*
   4062 	 * Only used via MSI-X vector so don't check cause bits
   4063 	 * and only clean the given ring.
   4064 	 */
   4065 	igb_intr_rx_work(rx_ring);
   4066 
   4067 	return (DDI_INTR_CLAIMED);
   4068 }
   4069 
   4070 /*
   4071  * igb_intr_tx - Interrupt handler for tx
   4072  */
   4073 static uint_t
   4074 igb_intr_tx(void *arg1, void *arg2)
   4075 {
   4076 	igb_tx_ring_t *tx_ring = (igb_tx_ring_t *)arg1;
   4077 
   4078 	_NOTE(ARGUNUSED(arg2));
   4079 
   4080 	/*
   4081 	 * Only used via MSI-X vector so don't check cause bits
   4082 	 * and only clean the given ring.
   4083 	 */
   4084 	igb_intr_tx_work(tx_ring);
   4085 
   4086 	return (DDI_INTR_CLAIMED);
   4087 }
   4088 
   4089 /*
   4090  * igb_intr_tx_other - Interrupt handler for both tx and other
   4091  *
   4092  */
   4093 static uint_t
   4094 igb_intr_tx_other(void *arg1, void *arg2)
   4095 {
   4096 	igb_t *igb = (igb_t *)arg1;
   4097 	uint32_t icr;
   4098 
   4099 	_NOTE(ARGUNUSED(arg2));
   4100 
   4101 	icr = E1000_READ_REG(&igb->hw, E1000_ICR);
   4102 
   4103 	if (igb_check_acc_handle(igb->osdep.reg_handle) != DDI_FM_OK) {
   4104 		ddi_fm_service_impact(igb->dip, DDI_SERVICE_DEGRADED);
   4105 		atomic_or_32(&igb->igb_state, IGB_ERROR);
   4106 		return (DDI_INTR_CLAIMED);
   4107 	}
   4108 
   4109 	/*
   4110 	 * Look for tx reclaiming work first. Remember, in the
   4111 	 * case of only interrupt sharing, only one tx ring is
   4112 	 * used
   4113 	 */
   4114 	igb_intr_tx_work(&igb->tx_rings[0]);
   4115 
   4116 	/*
   4117 	 * Check for "other" causes.
   4118 	 */
   4119 	if (icr & E1000_ICR_LSC) {
   4120 		igb_intr_link_work(igb);
   4121 	}
   4122 
   4123 	/*
   4124 	 * The DOUTSYNC bit indicates a tx packet dropped because
   4125 	 * DMA engine gets "out of sync". There isn't a real fix
   4126 	 * for this. The Intel recommendation is to count the number
   4127 	 * of occurrences so user can detect when it is happening.
   4128 	 * The issue is non-fatal and there's no recovery action
   4129 	 * available.
   4130 	 */
   4131 	if (icr & E1000_ICR_DOUTSYNC) {
   4132 		IGB_STAT(igb->dout_sync);
   4133 	}
   4134 
   4135 	if (icr & E1000_ICR_DRSTA) {
   4136 		/* 82580 Full Device Reset needed */
   4137 		atomic_or_32(&igb->igb_state, IGB_STALL);
   4138 	}
   4139 
   4140 	return (DDI_INTR_CLAIMED);
   4141 }
   4142 
   4143 /*
   4144  * igb_alloc_intrs - Allocate interrupts for the driver
   4145  *
   4146  * Normal sequence is to try MSI-X; if not sucessful, try MSI;
   4147  * if not successful, try Legacy.
   4148  * igb->intr_force can be used to force sequence to start with
   4149  * any of the 3 types.
   4150  * If MSI-X is not used, number of tx/rx rings is forced to 1.
   4151  */
   4152 static int
   4153 igb_alloc_intrs(igb_t *igb)
   4154 {
   4155 	dev_info_t *devinfo;
   4156 	int intr_types;
   4157 	int rc;
   4158 
   4159 	devinfo = igb->dip;
   4160 
   4161 	/* Get supported interrupt types */
   4162 	rc = ddi_intr_get_supported_types(devinfo, &intr_types);
   4163 
   4164 	if (rc != DDI_SUCCESS) {
   4165 		igb_log(igb,
   4166 		    "Get supported interrupt types failed: %d", rc);
   4167 		return (IGB_FAILURE);
   4168 	}
   4169 	IGB_DEBUGLOG_1(igb, "Supported interrupt types: %x", intr_types);
   4170 
   4171 	igb->intr_type = 0;
   4172 
   4173 	/* Install MSI-X interrupts */
   4174 	if ((intr_types & DDI_INTR_TYPE_MSIX) &&
   4175 	    (igb->intr_force <= IGB_INTR_MSIX)) {
   4176 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSIX);
   4177 
   4178 		if (rc == IGB_SUCCESS)
   4179 			return (IGB_SUCCESS);
   4180 
   4181 		igb_log(igb,
   4182 		    "Allocate MSI-X failed, trying MSI interrupts...");
   4183 	}
   4184 
   4185 	/* MSI-X not used, force rings to 1 */
   4186 	igb->num_rx_rings = 1;
   4187 	igb->num_tx_rings = 1;
   4188 	igb_log(igb,
   4189 	    "MSI-X not used, force rx and tx queue number to 1");
   4190 
   4191 	/* Install MSI interrupts */
   4192 	if ((intr_types & DDI_INTR_TYPE_MSI) &&
   4193 	    (igb->intr_force <= IGB_INTR_MSI)) {
   4194 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_MSI);
   4195 
   4196 		if (rc == IGB_SUCCESS)
   4197 			return (IGB_SUCCESS);
   4198 
   4199 		igb_log(igb,
   4200 		    "Allocate MSI failed, trying Legacy interrupts...");
   4201 	}
   4202 
   4203 	/* Install legacy interrupts */
   4204 	if (intr_types & DDI_INTR_TYPE_FIXED) {
   4205 		rc = igb_alloc_intr_handles(igb, DDI_INTR_TYPE_FIXED);
   4206 
   4207 		if (rc == IGB_SUCCESS)
   4208 			return (IGB_SUCCESS);
   4209 
   4210 		igb_log(igb,
   4211 		    "Allocate Legacy interrupts failed");
   4212 	}
   4213 
   4214 	/* If none of the 3 types succeeded, return failure */
   4215 	return (IGB_FAILURE);
   4216 }
   4217 
   4218 /*
   4219  * igb_alloc_intr_handles - Allocate interrupt handles.
   4220  *
   4221  * For legacy and MSI, only 1 handle is needed.  For MSI-X,
   4222  * if fewer than 2 handles are available, return failure.
   4223  * Upon success, this sets the number of Rx rings to a number that
   4224  * matches the handles available for Rx interrupts.
   4225  */
   4226 static int
   4227 igb_alloc_intr_handles(igb_t *igb, int intr_type)
   4228 {
   4229 	dev_info_t *devinfo;
   4230 	int orig, request, count, avail, actual;
   4231 	int diff, minimum;
   4232 	int rc;
   4233 
   4234 	devinfo = igb->dip;
   4235 
   4236 	switch (intr_type) {
   4237 	case DDI_INTR_TYPE_FIXED:
   4238 		request = 1;	/* Request 1 legacy interrupt handle */
   4239 		minimum = 1;
   4240 		IGB_DEBUGLOG_0(igb, "interrupt type: legacy");
   4241 		break;
   4242 
   4243 	case DDI_INTR_TYPE_MSI:
   4244 		request = 1;	/* Request 1 MSI interrupt handle */
   4245 		minimum = 1;
   4246 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI");
   4247 		break;
   4248 
   4249 	case DDI_INTR_TYPE_MSIX:
   4250 		/*
   4251 		 * Number of vectors for the adapter is
   4252 		 * # rx rings + # tx rings
   4253 		 * One of tx vectors is for tx & other
   4254 		 */
   4255 		request = igb->num_rx_rings + igb->num_tx_rings;
   4256 		orig = request;
   4257 		minimum = 2;
   4258 		IGB_DEBUGLOG_0(igb, "interrupt type: MSI-X");
   4259 		break;
   4260 
   4261 	default:
   4262 		igb_log(igb,
   4263 		    "invalid call to igb_alloc_intr_handles(): %d\n",
   4264 		    intr_type);
   4265 		return (IGB_FAILURE);
   4266 	}
   4267 	IGB_DEBUGLOG_2(igb, "interrupt handles requested: %d  minimum: %d",
   4268 	    request, minimum);
   4269 
   4270 	/*
   4271 	 * Get number of supported interrupts
   4272 	 */
   4273 	rc = ddi_intr_get_nintrs(devinfo, intr_type, &count);
   4274 	if ((rc != DDI_SUCCESS) || (count < minimum)) {
   4275 		igb_log(igb,
   4276 		    "Get supported interrupt number failed. "
   4277 		    "Return: %d, count: %d", rc, count);
   4278 		return (IGB_FAILURE);
   4279 	}
   4280 	IGB_DEBUGLOG_1(igb, "interrupts supported: %d", count);
   4281 
   4282 	/*
   4283 	 * Get number of available interrupts
   4284 	 */
   4285 	rc = ddi_intr_get_navail(devinfo, intr_type, &avail);
   4286 	if ((rc != DDI_SUCCESS) || (avail < minimum)) {
   4287 		igb_log(igb,
   4288 		    "Get available interrupt number failed. "
   4289 		    "Return: %d, available: %d", rc, avail);
   4290 		return (IGB_FAILURE);
   4291 	}
   4292 	IGB_DEBUGLOG_1(igb, "interrupts available: %d", avail);
   4293 
   4294 	if (avail < request) {
   4295 		igb_log(igb, "Request %d handles, %d available",
   4296 		    request, avail);
   4297 		request = avail;
   4298 	}
   4299 
   4300 	actual = 0;
   4301 	igb->intr_cnt = 0;
   4302 
   4303 	/*
   4304 	 * Allocate an array of interrupt handles
   4305 	 */
   4306 	igb->intr_size = request * sizeof (ddi_intr_handle_t);
   4307 	igb->htable = kmem_alloc(igb->intr_size, KM_SLEEP);
   4308 
   4309 	rc = ddi_intr_alloc(devinfo, igb->htable, intr_type, 0,
   4310 	    request, &actual, DDI_INTR_ALLOC_NORMAL);
   4311 	if (rc != DDI_SUCCESS) {
   4312 		igb_log(igb, "Allocate interrupts failed. "
   4313 		    "return: %d, request: %d, actual: %d",
   4314 		    rc, request, actual);
   4315 		goto alloc_handle_fail;
   4316 	}
   4317 	IGB_DEBUGLOG_1(igb, "interrupts actually allocated: %d", actual);
   4318 
   4319 	igb->intr_cnt = actual;
   4320 
   4321 	if (actual < minimum) {
   4322 		igb_log(igb, "Insufficient interrupt handles allocated: %d",
   4323 		    actual);
   4324 		goto alloc_handle_fail;
   4325 	}
   4326 
   4327 	/*
   4328 	 * For MSI-X, actual might force us to reduce number of tx & rx rings
   4329 	 */
   4330 	if ((intr_type == DDI_INTR_TYPE_MSIX) && (orig > actual)) {
   4331 		diff = orig - actual;
   4332 		if (diff < igb->num_tx_rings) {
   4333 			igb_log(igb,
   4334 			    "MSI-X vectors force Tx queue number to %d",
   4335 			    igb->num_tx_rings - diff);
   4336 			igb->num_tx_rings -= diff;
   4337 		} else {
   4338 			igb_log(igb,
   4339 			    "MSI-X vectors force Tx queue number to 1");
   4340 			igb->num_tx_rings = 1;
   4341 
   4342 			igb_log(igb,
   4343 			    "MSI-X vectors force Rx queue number to %d",
   4344 			    actual - 1);
   4345 			igb->num_rx_rings = actual - 1;
   4346 		}
   4347 	}
   4348 
   4349 	/*
   4350 	 * Get priority for first vector, assume remaining are all the same
   4351 	 */
   4352 	rc = ddi_intr_get_pri(igb->htable[0], &igb->intr_pri);
   4353 	if (rc != DDI_SUCCESS) {
   4354 		igb_log(igb,
   4355 		    "Get interrupt priority failed: %d", rc);
   4356 		goto alloc_handle_fail;
   4357 	}
   4358 
   4359 	rc = ddi_intr_get_cap(igb->htable[0], &igb->intr_cap);
   4360 	if (rc != DDI_SUCCESS) {
   4361 		igb_log(igb,
   4362 		    "Get interrupt cap failed: %d", rc);
   4363 		goto alloc_handle_fail;
   4364 	}
   4365 
   4366 	igb->intr_type = intr_type;
   4367 
   4368 	return (IGB_SUCCESS);
   4369 
   4370 alloc_handle_fail:
   4371 	igb_rem_intrs(igb);
   4372 
   4373 	return (IGB_FAILURE);
   4374 }
   4375 
   4376 /*
   4377  * igb_add_intr_handlers - Add interrupt handlers based on the interrupt type
   4378  *
   4379  * Before adding the interrupt handlers, the interrupt vectors have
   4380  * been allocated, and the rx/tx rings have also been allocated.
   4381  */
   4382 static int
   4383 igb_add_intr_handlers(igb_t *igb)
   4384 {
   4385 	igb_rx_ring_t *rx_ring;
   4386 	igb_tx_ring_t *tx_ring;
   4387 	int vector;
   4388 	int rc;
   4389 	int i;
   4390 
   4391 	vector = 0;
   4392 
   4393 	switch (igb->intr_type) {
   4394 	case DDI_INTR_TYPE_MSIX:
   4395 		/* Add interrupt handler for tx + other */
   4396 		tx_ring = &igb->tx_rings[0];
   4397 		rc = ddi_intr_add_handler(igb->htable[vector],
   4398 		    (ddi_intr_handler_t *)igb_intr_tx_other,
   4399 		    (void *)igb, NULL);
   4400 
   4401 		if (rc != DDI_SUCCESS) {
   4402 			igb_log(igb,
   4403 			    "Add tx/other interrupt handler failed: %d", rc);
   4404 			return (IGB_FAILURE);
   4405 		}
   4406 		tx_ring->intr_vector = vector;
   4407 		vector++;
   4408 
   4409 		/* Add interrupt handler for each rx ring */
   4410 		for (i = 0; i < igb->num_rx_rings; i++) {
   4411 			rx_ring = &igb->rx_rings[i];
   4412 
   4413 			rc = ddi_intr_add_handler(igb->htable[vector],
   4414 			    (ddi_intr_handler_t *)igb_intr_rx,
   4415 			    (void *)rx_ring, NULL);
   4416 
   4417 			if (rc != DDI_SUCCESS) {
   4418 				igb_log(igb,
   4419 				    "Add rx interrupt handler failed. "
   4420 				    "return: %d, rx ring: %d", rc, i);
   4421 				for (vector--; vector >= 0; vector--) {
   4422 					(void) ddi_intr_remove_handler(
   4423 					    igb->htable[vector]);
   4424 				}
   4425 				return (IGB_FAILURE);
   4426 			}
   4427 
   4428 			rx_ring->intr_vector = vector;
   4429 
   4430 			vector++;
   4431 		}
   4432 
   4433 		/* Add interrupt handler for each tx ring from 2nd ring */
   4434 		for (i = 1; i < igb->num_tx_rings; i++) {
   4435 			tx_ring = &igb->tx_rings[i];
   4436 
   4437 			rc = ddi_intr_add_handler(igb->htable[vector],
   4438 			    (ddi_intr_handler_t *)igb_intr_tx,
   4439 			    (void *)tx_ring, NULL);
   4440 
   4441 			if (rc != DDI_SUCCESS) {
   4442 				igb_log(igb,
   4443 				    "Add tx interrupt handler failed. "
   4444 				    "return: %d, tx ring: %d", rc, i);
   4445 				for (vector--; vector >= 0; vector--) {
   4446 					(void) ddi_intr_remove_handler(
   4447 					    igb->htable[vector]);
   4448 				}
   4449 				return (IGB_FAILURE);
   4450 			}
   4451 
   4452 			tx_ring->intr_vector = vector;
   4453 
   4454 			vector++;
   4455 		}
   4456 
   4457 		break;
   4458 
   4459 	case DDI_INTR_TYPE_MSI:
   4460 		/* Add interrupt handlers for the only vector */
   4461 		rc = ddi_intr_add_handler(igb->htable[vector],
   4462 		    (ddi_intr_handler_t *)igb_intr_msi,
   4463 		    (void *)igb, NULL);
   4464 
   4465 		if (rc != DDI_SUCCESS) {
   4466 			igb_log(igb,
   4467 			    "Add MSI interrupt handler failed: %d", rc);
   4468 			return (IGB_FAILURE);
   4469 		}
   4470 
   4471 		rx_ring = &igb->rx_rings[0];
   4472 		rx_ring->intr_vector = vector;
   4473 
   4474 		vector++;
   4475 		break;
   4476 
   4477 	case DDI_INTR_TYPE_FIXED:
   4478 		/* Add interrupt handlers for the only vector */
   4479 		rc = ddi_intr_add_handler(igb->htable[vector],
   4480 		    (ddi_intr_handler_t *)igb_intr_legacy,
   4481 		    (void *)igb, NULL);
   4482 
   4483 		if (rc != DDI_SUCCESS) {
   4484 			igb_log(igb,
   4485 			    "Add legacy interrupt handler failed: %d", rc);
   4486 			return (IGB_FAILURE);
   4487 		}
   4488 
   4489 		rx_ring = &igb->rx_rings[0];
   4490 		rx_ring->intr_vector = vector;
   4491 
   4492 		vector++;
   4493 		break;
   4494 
   4495 	default:
   4496 		return (IGB_FAILURE);
   4497 	}
   4498 
   4499 	ASSERT(vector == igb->intr_cnt);
   4500 
   4501 	return (IGB_SUCCESS);
   4502 }
   4503 
   4504 /*
   4505  * igb_setup_msix_82575 - setup 82575 adapter to use MSI-X interrupts
   4506  *
   4507  * For each vector enabled on the adapter, Set the MSIXBM register accordingly
   4508  */
   4509 static void
   4510 igb_setup_msix_82575(igb_t *igb)
   4511 {
   4512 	uint32_t eims = 0;
   4513 	int i, vector;
   4514 	struct e1000_hw *hw = &igb->hw;
   4515 
   4516 	/*
   4517 	 * Set vector for tx ring 0 and other causes.
   4518 	 * NOTE assumption that it is vector 0.
   4519 	 */
   4520 	vector = 0;
   4521 
   4522 	igb->eims_mask = E1000_EICR_TX_QUEUE0 | E1000_EICR_OTHER;
   4523 	E1000_WRITE_REG(hw, E1000_MSIXBM(vector), igb->eims_mask);
   4524 	vector++;
   4525 
   4526 	for (i = 0; i < igb->num_rx_rings; i++) {
   4527 		/*
   4528 		 * Set vector for each rx ring
   4529 		 */
   4530 		eims = (E1000_EICR_RX_QUEUE0 << i);
   4531 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
   4532 
   4533 		/*
   4534 		 * Accumulate bits to enable in
   4535 		 * igb_enable_adapter_interrupts_82575()
   4536 		 */
   4537 		igb->eims_mask |= eims;
   4538 
   4539 		vector++;
   4540 	}
   4541 
   4542 	for (i = 1; i < igb->num_tx_rings; i++) {
   4543 		/*
   4544 		 * Set vector for each tx ring from 2nd tx ring
   4545 		 */
   4546 		eims = (E1000_EICR_TX_QUEUE0 << i);
   4547 		E1000_WRITE_REG(hw, E1000_MSIXBM(vector), eims);
   4548 
   4549 		/*
   4550 		 * Accumulate bits to enable in
   4551 		 * igb_enable_adapter_interrupts_82575()
   4552 		 */
   4553 		igb->eims_mask |= eims;
   4554 
   4555 		vector++;
   4556 	}
   4557 
   4558 	ASSERT(vector == igb->intr_cnt);
   4559 
   4560 	/*
   4561 	 * Disable IAM for ICR interrupt bits
   4562 	 */
   4563 	E1000_WRITE_REG(hw, E1000_IAM, 0);
   4564 	E1000_WRITE_FLUSH(hw);
   4565 }
   4566 
   4567 /*
   4568  * igb_setup_msix_82576 - setup 82576 adapter to use MSI-X interrupts
   4569  *
   4570  * 82576 uses a table based method for assigning vectors.  Each queue has a
   4571  * single entry in the table to which we write a vector number along with a
   4572  * "valid" bit.  The entry is a single byte in a 4-byte register.  Vectors
   4573  * take a different position in the 4-byte register depending on whether
   4574  * they are numbered above or below 8.
   4575  */
   4576 static void
   4577 igb_setup_msix_82576(igb_t *igb)
   4578 {
   4579 	struct e1000_hw *hw = &igb->hw;
   4580 	uint32_t ivar, index, vector;
   4581 	int i;
   4582 
   4583 	/* must enable msi-x capability before IVAR settings */
   4584 	E1000_WRITE_REG(hw, E1000_GPIE,
   4585 	    (E1000_GPIE_MSIX_MODE | E1000_GPIE_PBA | E1000_GPIE_NSICR));
   4586 
   4587 	/*
   4588 	 * Set vector for tx ring 0 and other causes.
   4589 	 * NOTE assumption that it is vector 0.
   4590 	 * This is also interdependent with installation of interrupt service
   4591 	 * routines in igb_add_intr_handlers().
   4592 	 */
   4593 
   4594 	/* assign "other" causes to vector 0 */
   4595 	vector = 0;
   4596 	ivar = ((vector | E1000_IVAR_VALID) << 8);
   4597 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
   4598 
   4599 	/* assign tx ring 0 to vector 0 */
   4600 	ivar = ((vector | E1000_IVAR_VALID) << 8);
   4601 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
   4602 
   4603 	/* prepare to enable tx & other interrupt causes */
   4604 	igb->eims_mask = (1 << vector);
   4605 
   4606 	vector ++;
   4607 	for (i = 0; i < igb->num_rx_rings; i++) {
   4608 		/*
   4609 		 * Set vector for each rx ring
   4610 		 */
   4611 		index = (i & 0x7);
   4612 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
   4613 
   4614 		if (i < 8) {
   4615 			/* vector goes into low byte of register */
   4616 			ivar = ivar & 0xFFFFFF00;
   4617 			ivar |= (vector | E1000_IVAR_VALID);
   4618 		} else {
   4619 			/* vector goes into third byte of register */
   4620 			ivar = ivar & 0xFF00FFFF;
   4621 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
   4622 		}
   4623 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
   4624 
   4625 		/* Accumulate interrupt-cause bits to enable */
   4626 		igb->eims_mask |= (1 << vector);
   4627 
   4628 		vector ++;
   4629 	}
   4630 
   4631 	for (i = 1; i < igb->num_tx_rings; i++) {
   4632 		/*
   4633 		 * Set vector for each tx ring from 2nd tx ring.
   4634 		 * Note assumption that tx vectors numericall follow rx vectors.
   4635 		 */
   4636 		index = (i & 0x7);
   4637 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
   4638 
   4639 		if (i < 8) {
   4640 			/* vector goes into second byte of register */
   4641 			ivar = ivar & 0xFFFF00FF;
   4642 			ivar |= ((vector | E1000_IVAR_VALID) << 8);
   4643 		} else {
   4644 			/* vector goes into fourth byte of register */
   4645 			ivar = ivar & 0x00FFFFFF;
   4646 			ivar |= (vector | E1000_IVAR_VALID) << 24;
   4647 		}
   4648 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
   4649 
   4650 		/* Accumulate interrupt-cause bits to enable */
   4651 		igb->eims_mask |= (1 << vector);
   4652 
   4653 		vector ++;
   4654 	}
   4655 
   4656 	ASSERT(vector == igb->intr_cnt);
   4657 }
   4658 
   4659 /*
   4660  * igb_setup_msix_82580 - setup 82580 adapter to use MSI-X interrupts
   4661  *
   4662  * 82580 uses same table approach at 82576 but has fewer entries.  Each
   4663  * queue has a single entry in the table to which we write a vector number
   4664  * along with a "valid" bit.  Vectors take a different position in the
   4665  * register depending on * whether * they are numbered above or below 4.
   4666  */
   4667 static void
   4668 igb_setup_msix_82580(igb_t *igb)
   4669 {
   4670 	struct e1000_hw *hw = &igb->hw;
   4671 	uint32_t ivar, index, vector;
   4672 	int i;
   4673 
   4674 	/* must enable msi-x capability before IVAR settings */
   4675 	E1000_WRITE_REG(hw, E1000_GPIE, (E1000_GPIE_MSIX_MODE |
   4676 	    E1000_GPIE_PBA | E1000_GPIE_NSICR | E1000_GPIE_EIAME));
   4677 	/*
   4678 	 * Set vector for tx ring 0 and other causes.
   4679 	 * NOTE assumption that it is vector 0.
   4680 	 * This is also interdependent with installation of interrupt service
   4681 	 * routines in igb_add_intr_handlers().
   4682 	 */
   4683 
   4684 	/* assign "other" causes to vector 0 */
   4685 	vector = 0;
   4686 	ivar = ((vector | E1000_IVAR_VALID) << 8);
   4687 	E1000_WRITE_REG(hw, E1000_IVAR_MISC, ivar);
   4688 
   4689 	/* assign tx ring 0 to vector 0 */
   4690 	ivar = ((vector | E1000_IVAR_VALID) << 8);
   4691 	E1000_WRITE_REG(hw, E1000_IVAR0, ivar);
   4692 
   4693 	/* prepare to enable tx & other interrupt causes */
   4694 	igb->eims_mask = (1 << vector);
   4695 
   4696 	vector ++;
   4697 
   4698 	for (i = 0; i < igb->num_rx_rings; i++) {
   4699 		/*
   4700 		 * Set vector for each rx ring
   4701 		 */
   4702 		index = (i >> 1);
   4703 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
   4704 
   4705 		if (i & 1) {
   4706 			/* vector goes into third byte of register */
   4707 			ivar = ivar & 0xFF00FFFF;
   4708 			ivar |= ((vector | E1000_IVAR_VALID) << 16);
   4709 		} else {
   4710 			/* vector goes into low byte of register */
   4711 			ivar = ivar & 0xFFFFFF00;
   4712 			ivar |= (vector | E1000_IVAR_VALID);
   4713 		}
   4714 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
   4715 
   4716 		/* Accumulate interrupt-cause bits to enable */
   4717 		igb->eims_mask |= (1 << vector);
   4718 
   4719 		vector ++;
   4720 	}
   4721 
   4722 	for (i = 1; i < igb->num_tx_rings; i++) {
   4723 		/*
   4724 		 * Set vector for each tx ring from 2nd tx ring.
   4725 		 * Note assumption that tx vectors numericall follow rx vectors.
   4726 		 */
   4727 		index = (i >> 1);
   4728 		ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index);
   4729 
   4730 		if (i & 1) {
   4731 			/* vector goes into high byte of register */
   4732 			ivar = ivar & 0x00FFFFFF;
   4733 			ivar |= ((vector | E1000_IVAR_VALID) << 24);
   4734 		} else {
   4735 			/* vector goes into second byte of register */
   4736 			ivar = ivar & 0xFFFF00FF;
   4737 			ivar |= (vector | E1000_IVAR_VALID) << 8;
   4738 		}
   4739 		E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar);
   4740 
   4741 		/* Accumulate interrupt-cause bits to enable */
   4742 		igb->eims_mask |= (1 << vector);
   4743 
   4744 		vector ++;
   4745 	}
   4746 	ASSERT(vector == igb->intr_cnt);
   4747 }
   4748 
   4749 /*
   4750  * igb_rem_intr_handlers - remove the interrupt handlers
   4751  */
   4752 static void
   4753 igb_rem_intr_handlers(igb_t *igb)
   4754 {
   4755 	int i;
   4756 	int rc;
   4757 
   4758 	for (i = 0; i < igb->intr_cnt; i++) {
   4759 		rc = ddi_intr_remove_handler(igb->htable[i]);
   4760 		if (rc != DDI_SUCCESS) {
   4761 			IGB_DEBUGLOG_1(igb,
   4762 			    "Remove intr handler failed: %d", rc);
   4763 		}
   4764 	}
   4765 }
   4766 
   4767 /*
   4768  * igb_rem_intrs - remove the allocated interrupts
   4769  */
   4770 static void
   4771 igb_rem_intrs(igb_t *igb)
   4772 {
   4773 	int i;
   4774 	int rc;
   4775 
   4776 	for (i = 0; i < igb->intr_cnt; i++) {
   4777 		rc = ddi_intr_free(igb->htable[i]);
   4778 		if (rc != DDI_SUCCESS) {
   4779 			IGB_DEBUGLOG_1(igb,
   4780 			    "Free intr failed: %d", rc);
   4781 		}
   4782 	}
   4783 
   4784 	kmem_free(igb->htable, igb->intr_size);
   4785 	igb->htable = NULL;
   4786 }
   4787 
   4788 /*
   4789  * igb_enable_intrs - enable all the ddi interrupts
   4790  */
   4791 static int
   4792 igb_enable_intrs(igb_t *igb)
   4793 {
   4794 	int i;
   4795 	int rc;
   4796 
   4797 	/* Enable interrupts */
   4798 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
   4799 		/* Call ddi_intr_block_enable() for MSI */
   4800 		rc = ddi_intr_block_enable(igb->htable, igb->intr_cnt);
   4801 		if (rc != DDI_SUCCESS) {
   4802 			igb_log(igb,
   4803 			    "Enable block intr failed: %d", rc);
   4804 			return (IGB_FAILURE);
   4805 		}
   4806 	} else {
   4807 		/* Call ddi_intr_enable() for Legacy/MSI non block enable */
   4808 		for (i = 0; i < igb->intr_cnt; i++) {
   4809 			rc = ddi_intr_enable(igb->htable[i]);
   4810 			if (rc != DDI_SUCCESS) {
   4811 				igb_log(igb,
   4812 				    "Enable intr failed: %d", rc);
   4813 				return (IGB_FAILURE);
   4814 			}
   4815 		}
   4816 	}
   4817 
   4818 	return (IGB_SUCCESS);
   4819 }
   4820 
   4821 /*
   4822  * igb_disable_intrs - disable all the ddi interrupts
   4823  */
   4824 static int
   4825 igb_disable_intrs(igb_t *igb)
   4826 {
   4827 	int i;
   4828 	int rc;
   4829 
   4830 	/* Disable all interrupts */
   4831 	if (igb->intr_cap & DDI_INTR_FLAG_BLOCK) {
   4832 		rc = ddi_intr_block_disable(igb->htable, igb->intr_cnt);
   4833 		if (rc != DDI_SUCCESS) {
   4834 			igb_log(igb,
   4835 			    "Disable block intr failed: %d", rc);
   4836 			return (IGB_FAILURE);
   4837 		}
   4838 	} else {
   4839 		for (i = 0; i < igb->intr_cnt; i++) {
   4840 			rc = ddi_intr_disable(igb->htable[i]);
   4841 			if (rc != DDI_SUCCESS) {
   4842 				igb_log(igb,
   4843 				    "Disable intr failed: %d", rc);
   4844 				return (IGB_FAILURE);
   4845 			}
   4846 		}
   4847 	}
   4848 
   4849 	return (IGB_SUCCESS);
   4850 }
   4851 
   4852 /*
   4853  * igb_get_phy_state - Get and save the parameters read from PHY registers
   4854  */
   4855 static void
   4856 igb_get_phy_state(igb_t *igb)
   4857 {
   4858 	struct e1000_hw *hw = &igb->hw;
   4859 	uint16_t phy_ctrl;
   4860 	uint16_t phy_status;
   4861 	uint16_t phy_an_adv;
   4862 	uint16_t phy_an_exp;
   4863 	uint16_t phy_ext_status;
   4864 	uint16_t phy_1000t_ctrl;
   4865 	uint16_t phy_1000t_status;
   4866 	uint16_t phy_lp_able;
   4867 
   4868 	ASSERT(mutex_owned(&igb->gen_lock));
   4869 
   4870 	if (hw->phy.media_type == e1000_media_type_copper) {
   4871 		(void) e1000_read_phy_reg(hw, PHY_CONTROL, &phy_ctrl);
   4872 		(void) e1000_read_phy_reg(hw, PHY_STATUS, &phy_status);
   4873 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_ADV, &phy_an_adv);
   4874 		(void) e1000_read_phy_reg(hw, PHY_AUTONEG_EXP, &phy_an_exp);
   4875 		(void) e1000_read_phy_reg(hw, PHY_EXT_STATUS, &phy_ext_status);
   4876 		(void) e1000_read_phy_reg(hw, PHY_1000T_CTRL, &phy_1000t_ctrl);
   4877 		(void) e1000_read_phy_reg(hw,
   4878 		    PHY_1000T_STATUS, &phy_1000t_status);
   4879 		(void) e1000_read_phy_reg(hw, PHY_LP_ABILITY, &phy_lp_able);
   4880 
   4881 		igb->param_autoneg_cap =
   4882 		    (phy_status & MII_SR_AUTONEG_CAPS) ? 1 : 0;
   4883 		igb->param_pause_cap =
   4884 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
   4885 		igb->param_asym_pause_cap =
   4886 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
   4887 		igb->param_1000fdx_cap =
   4888 		    ((phy_ext_status & IEEE_ESR_1000T_FD_CAPS) ||
   4889 		    (phy_ext_status & IEEE_ESR_1000X_FD_CAPS)) ? 1 : 0;
   4890 		igb->param_1000hdx_cap =
   4891 		    ((phy_ext_status & IEEE_ESR_1000T_HD_CAPS) ||
   4892 		    (phy_ext_status & IEEE_ESR_1000X_HD_CAPS)) ? 1 : 0;
   4893 		igb->param_100t4_cap =
   4894 		    (phy_status & MII_SR_100T4_CAPS) ? 1 : 0;
   4895 		igb->param_100fdx_cap = ((phy_status & MII_SR_100X_FD_CAPS) ||
   4896 		    (phy_status & MII_SR_100T2_FD_CAPS)) ? 1 : 0;
   4897 		igb->param_100hdx_cap = ((phy_status & MII_SR_100X_HD_CAPS) ||
   4898 		    (phy_status & MII_SR_100T2_HD_CAPS)) ? 1 : 0;
   4899 		igb->param_10fdx_cap =
   4900 		    (phy_status & MII_SR_10T_FD_CAPS) ? 1 : 0;
   4901 		igb->param_10hdx_cap =
   4902 		    (phy_status & MII_SR_10T_HD_CAPS) ? 1 : 0;
   4903 		igb->param_rem_fault =
   4904 		    (phy_status & MII_SR_REMOTE_FAULT) ? 1 : 0;
   4905 
   4906 		igb->param_adv_autoneg_cap = hw->mac.autoneg;
   4907 		igb->param_adv_pause_cap =
   4908 		    (phy_an_adv & NWAY_AR_PAUSE) ? 1 : 0;
   4909 		igb->param_adv_asym_pause_cap =
   4910 		    (phy_an_adv & NWAY_AR_ASM_DIR) ? 1 : 0;
   4911 		igb->param_adv_1000hdx_cap =
   4912 		    (phy_1000t_ctrl & CR_1000T_HD_CAPS) ? 1 : 0;
   4913 		igb->param_adv_100t4_cap =
   4914 		    (phy_an_adv & NWAY_AR_100T4_CAPS) ? 1 : 0;
   4915 		igb->param_adv_rem_fault =
   4916 		    (phy_an_adv & NWAY_AR_REMOTE_FAULT) ? 1 : 0;
   4917 		if (igb->param_adv_autoneg_cap == 1) {
   4918 			igb->param_adv_1000fdx_cap =
   4919 			    (phy_1000t_ctrl & CR_1000T_FD_CAPS) ? 1 : 0;
   4920 			igb->param_adv_100fdx_cap =
   4921 			    (phy_an_adv & NWAY_AR_100TX_FD_CAPS) ? 1 : 0;
   4922 			igb->param_adv_100hdx_cap =
   4923 			    (phy_an_adv & NWAY_AR_100TX_HD_CAPS) ? 1 : 0;
   4924 			igb->param_adv_10fdx_cap =
   4925 			    (phy_an_adv & NWAY_AR_10T_FD_CAPS) ? 1 : 0;
   4926 			igb->param_adv_10hdx_cap =
   4927 			    (phy_an_adv & NWAY_AR_10T_HD_CAPS) ? 1 : 0;
   4928 		}
   4929 
   4930 		igb->param_lp_autoneg_cap =
   4931 		    (phy_an_exp & NWAY_ER_LP_NWAY_CAPS) ? 1 : 0;
   4932 		igb->param_lp_pause_cap =
   4933 		    (phy_lp_able & NWAY_LPAR_PAUSE) ? 1 : 0;
   4934 		igb->param_lp_asym_pause_cap =
   4935 		    (phy_lp_able & NWAY_LPAR_ASM_DIR) ? 1 : 0;
   4936 		igb->param_lp_1000fdx_cap =
   4937 		    (phy_1000t_status & SR_1000T_LP_FD_CAPS) ? 1 : 0;
   4938 		igb->param_lp_1000hdx_cap =
   4939 		    (phy_1000t_status & SR_1000T_LP_HD_CAPS) ? 1 : 0;
   4940 		igb->param_lp_100t4_cap =
   4941 		    (phy_lp_able & NWAY_LPAR_100T4_CAPS) ? 1 : 0;
   4942 		igb->param_lp_100fdx_cap =
   4943 		    (phy_lp_able & NWAY_LPAR_100TX_FD_CAPS) ? 1 : 0;
   4944 		igb->param_lp_100hdx_cap =
   4945 		    (phy_lp_able & NWAY_LPAR_100TX_HD_CAPS) ? 1 : 0;
   4946 		igb->param_lp_10fdx_cap =
   4947 		    (phy_lp_able & NWAY_LPAR_10T_FD_CAPS) ? 1 : 0;
   4948 		igb->param_lp_10hdx_cap =
   4949 		    (phy_lp_able & NWAY_LPAR_10T_HD_CAPS) ? 1 : 0;
   4950 		igb->param_lp_rem_fault =
   4951 		    (phy_lp_able & NWAY_LPAR_REMOTE_FAULT) ? 1 : 0;
   4952 	} else {
   4953 		/*
   4954 		 * 1Gig Fiber adapter only offers 1Gig Full Duplex.
   4955 		 */
   4956 		igb->param_autoneg_cap = 0;
   4957 		igb->param_pause_cap = 1;
   4958 		igb->param_asym_pause_cap = 1;
   4959 		igb->param_1000fdx_cap = 1;
   4960 		igb->param_1000hdx_cap = 0;
   4961 		igb->param_100t4_cap = 0;
   4962 		igb->param_100fdx_cap = 0;
   4963 		igb->param_100hdx_cap = 0;
   4964 		igb->param_10fdx_cap = 0;
   4965 		igb->param_10hdx_cap = 0;
   4966 
   4967 		igb->param_adv_autoneg_cap = 0;
   4968 		igb->param_adv_pause_cap = 1;
   4969 		igb->param_adv_asym_pause_cap = 1;
   4970 		igb->param_adv_1000fdx_cap = 1;
   4971 		igb->param_adv_1000hdx_cap = 0;
   4972 		igb->param_adv_100t4_cap = 0;
   4973 		igb->param_adv_100fdx_cap = 0;
   4974 		igb->param_adv_100hdx_cap = 0;
   4975 		igb->param_adv_10fdx_cap = 0;
   4976 		igb->param_adv_10hdx_cap = 0;
   4977 
   4978 		igb->param_lp_autoneg_cap = 0;
   4979 		igb->param_lp_pause_cap = 0;
   4980 		igb->param_lp_asym_pause_cap = 0;
   4981 		igb->param_lp_1000fdx_cap = 0;
   4982 		igb->param_lp_1000hdx_cap = 0;
   4983 		igb->param_lp_100t4_cap = 0;
   4984 		igb->param_lp_100fdx_cap = 0;
   4985 		igb->param_lp_100hdx_cap = 0;
   4986 		igb->param_lp_10fdx_cap = 0;
   4987 		igb->param_lp_10hdx_cap = 0;
   4988 		igb->param_lp_rem_fault = 0;
   4989 	}
   4990 }
   4991 
   4992 /*
   4993  * synchronize the adv* and en* parameters.
   4994  *
   4995  * See comments in <sys/dld.h> for details of the *_en_*
   4996  * parameters. The usage of ndd for setting adv parameters will
   4997  * synchronize all the en parameters with the e1000g parameters,
   4998  * implicitly disabling any settings made via dladm.
   4999  */
   5000 static void
   5001 igb_param_sync(igb_t *igb)
   5002 {
   5003 	igb->param_en_1000fdx_cap = igb->param_adv_1000fdx_cap;
   5004 	igb->param_en_1000hdx_cap = igb->param_adv_1000hdx_cap;
   5005 	igb->param_en_100t4_cap = igb->param_adv_100t4_cap;
   5006 	igb->param_en_100fdx_cap = igb->param_adv_100fdx_cap;
   5007 	igb->param_en_100hdx_cap = igb->param_adv_100hdx_cap;
   5008 	igb->param_en_10fdx_cap = igb->param_adv_10fdx_cap;
   5009 	igb->param_en_10hdx_cap = igb->param_adv_10hdx_cap;
   5010 }
   5011 
   5012 /*
   5013  * igb_get_driver_control
   5014  */
   5015 static void
   5016 igb_get_driver_control(struct e1000_hw *hw)
   5017 {
   5018 	uint32_t ctrl_ext;
   5019 
   5020 	/* Notify firmware that driver is in control of device */
   5021 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
   5022 	ctrl_ext |= E1000_CTRL_EXT_DRV_LOAD;
   5023 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
   5024 }
   5025 
   5026 /*
   5027  * igb_release_driver_control
   5028  */
   5029 static void
   5030 igb_release_driver_control(struct e1000_hw *hw)
   5031 {
   5032 	uint32_t ctrl_ext;
   5033 
   5034 	/* Notify firmware that driver is no longer in control of device */
   5035 	ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT);
   5036 	ctrl_ext &= ~E1000_CTRL_EXT_DRV_LOAD;
   5037 	E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext);
   5038 }
   5039 
   5040 /*
   5041  * igb_atomic_reserve - Atomic decrease operation
   5042  */
   5043 int
   5044 igb_atomic_reserve(uint32_t *count_p, uint32_t n)
   5045 {
   5046 	uint32_t oldval;
   5047 	uint32_t newval;
   5048 
   5049 	/* ATOMICALLY */
   5050 	do {
   5051 		oldval = *count_p;
   5052 		if (oldval < n)
   5053 			return (-1);
   5054 		newval = oldval - n;
   5055 	} while (atomic_cas_32(count_p, oldval, newval) != oldval);
   5056 
   5057 	return (newval);
   5058 }
   5059 
   5060 /*
   5061  * FMA support
   5062  */
   5063 
   5064 int
   5065 igb_check_acc_handle(ddi_acc_handle_t handle)
   5066 {
   5067 	ddi_fm_error_t de;
   5068 
   5069 	ddi_fm_acc_err_get(handle, &de, DDI_FME_VERSION);
   5070 	ddi_fm_acc_err_clear(handle, DDI_FME_VERSION);
   5071 	return (de.fme_status);
   5072 }
   5073 
   5074 int
   5075 igb_check_dma_handle(ddi_dma_handle_t handle)
   5076 {
   5077 	ddi_fm_error_t de;
   5078 
   5079 	ddi_fm_dma_err_get(handle, &de, DDI_FME_VERSION);
   5080 	return (de.fme_status);
   5081 }
   5082 
   5083 /*
   5084  * The IO fault service error handling callback function
   5085  */
   5086 /*ARGSUSED*/
   5087 static int
   5088 igb_fm_error_cb(dev_info_t *dip, ddi_fm_error_t *err, const void *impl_data)
   5089 {
   5090 	/*
   5091 	 * as the driver can always deal with an error in any dma or
   5092 	 * access handle, we can just return the fme_status value.
   5093 	 */
   5094 	pci_ereport_post(dip, err, NULL);
   5095 	return (err->fme_status);
   5096 }
   5097 
   5098 static void
   5099 igb_fm_init(igb_t *igb)
   5100 {
   5101 	ddi_iblock_cookie_t iblk;
   5102 	int fma_dma_flag;
   5103 
   5104 	/* Only register with IO Fault Services if we have some capability */
   5105 	if (igb->fm_capabilities & DDI_FM_ACCCHK_CAPABLE) {
   5106 		igb_regs_acc_attr.devacc_attr_access = DDI_FLAGERR_ACC;
   5107 	} else {
   5108 		igb_regs_acc_attr.devacc_attr_access = DDI_DEFAULT_ACC;
   5109 	}
   5110 
   5111 	if (igb->fm_capabilities & DDI_FM_DMACHK_CAPABLE) {
   5112 		fma_dma_flag = 1;
   5113 	} else {
   5114 		fma_dma_flag = 0;
   5115 	}
   5116 
   5117 	(void) igb_set_fma_flags(fma_dma_flag);
   5118 
   5119 	if (igb->fm_capabilities) {
   5120 
   5121 		/* Register capabilities with IO Fault Services */
   5122 		ddi_fm_init(igb->dip, &igb->fm_capabilities, &iblk);
   5123 
   5124 		/*
   5125 		 * Initialize pci ereport capabilities if ereport capable
   5126 		 */
   5127 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
   5128 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
   5129 			pci_ereport_setup(igb->dip);
   5130 
   5131 		/*
   5132 		 * Register error callback if error callback capable
   5133 		 */
   5134 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
   5135 			ddi_fm_handler_register(igb->dip,
   5136 			    igb_fm_error_cb, (void*) igb);
   5137 	}
   5138 }
   5139 
   5140 static void
   5141 igb_fm_fini(igb_t *igb)
   5142 {
   5143 	/* Only unregister FMA capabilities if we registered some */
   5144 	if (igb->fm_capabilities) {
   5145 
   5146 		/*
   5147 		 * Release any resources allocated by pci_ereport_setup()
   5148 		 */
   5149 		if (DDI_FM_EREPORT_CAP(igb->fm_capabilities) ||
   5150 		    DDI_FM_ERRCB_CAP(igb->fm_capabilities))
   5151 			pci_ereport_teardown(igb->dip);
   5152 
   5153 		/*
   5154 		 * Un-register error callback if error callback capable
   5155 		 */
   5156 		if (DDI_FM_ERRCB_CAP(igb->fm_capabilities))
   5157 			ddi_fm_handler_unregister(igb->dip);
   5158 
   5159 		/* Unregister from IO Fault Services */
   5160 		ddi_fm_fini(igb->dip);
   5161 	}
   5162 }
   5163 
   5164 void
   5165 igb_fm_ereport(igb_t *igb, char *detail)
   5166 {
   5167 	uint64_t ena;
   5168 	char buf[FM_MAX_CLASS];
   5169 
   5170 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
   5171 	ena = fm_ena_generate(0, FM_ENA_FMT1);
   5172 	if (DDI_FM_EREPORT_CAP(igb->fm_capabilities)) {
   5173 		ddi_fm_ereport_post(igb->dip, buf, ena, DDI_NOSLEEP,
   5174 		    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0, NULL);
   5175 	}
   5176 }
   5177