Home | History | Annotate | Download | only in io
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/types.h>
     28 #include <sys/errno.h>
     29 #include <sys/debug.h>
     30 #include <sys/time.h>
     31 #include <sys/sysmacros.h>
     32 #include <sys/systm.h>
     33 #include <sys/user.h>
     34 #include <sys/stropts.h>
     35 #include <sys/stream.h>
     36 #include <sys/strlog.h>
     37 #include <sys/strsubr.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/cpu.h>
     40 #include <sys/kmem.h>
     41 #include <sys/conf.h>
     42 #include <sys/ddi.h>
     43 #include <sys/sunddi.h>
     44 #include <sys/ksynch.h>
     45 #include <sys/stat.h>
     46 #include <sys/kstat.h>
     47 #include <sys/vtrace.h>
     48 #include <sys/strsun.h>
     49 #include <sys/dlpi.h>
     50 #include <sys/ethernet.h>
     51 #include <net/if.h>
     52 #include <sys/varargs.h>
     53 #include <sys/machsystm.h>
     54 #include <sys/modctl.h>
     55 #include <sys/modhash.h>
     56 #include <sys/mac.h>
     57 #include <sys/mac_ether.h>
     58 #include <sys/taskq.h>
     59 #include <sys/note.h>
     60 #include <sys/mach_descrip.h>
     61 #include <sys/mdeg.h>
     62 #include <sys/ldc.h>
     63 #include <sys/vsw_fdb.h>
     64 #include <sys/vsw.h>
     65 #include <sys/vio_mailbox.h>
     66 #include <sys/vnet_mailbox.h>
     67 #include <sys/vnet_common.h>
     68 #include <sys/vio_util.h>
     69 #include <sys/sdt.h>
     70 #include <sys/atomic.h>
     71 #include <sys/vlan.h>
     72 
     73 /* Switching setup routines */
     74 void vsw_setup_switching_thread(void *arg);
     75 int vsw_setup_switching_start(vsw_t *vswp);
     76 void vsw_setup_switching_stop(vsw_t *vswp);
     77 int vsw_setup_switching(vsw_t *);
     78 void vsw_setup_switching_post_process(vsw_t *vswp);
     79 void vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller,
     80     vsw_port_t *port, mac_resource_handle_t mrh);
     81 static	int vsw_setup_layer2(vsw_t *);
     82 static	int vsw_setup_layer3(vsw_t *);
     83 
     84 /* Switching/data transmit routines */
     85 static	void vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
     86     vsw_port_t *port, mac_resource_handle_t);
     87 static	void vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
     88 	vsw_port_t *port, mac_resource_handle_t);
     89 static	void vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
     90 	vsw_port_t *port, mac_resource_handle_t);
     91 static	int vsw_forward_all(vsw_t *vswp, mblk_t *mp,
     92 	int caller, vsw_port_t *port);
     93 static	int vsw_forward_grp(vsw_t *vswp, mblk_t *mp,
     94     int caller, vsw_port_t *port);
     95 
     96 /* VLAN routines */
     97 void vsw_create_vlans(void *arg, int type);
     98 void vsw_destroy_vlans(void *arg, int type);
     99 void vsw_vlan_add_ids(void *arg, int type);
    100 void vsw_vlan_remove_ids(void *arg, int type);
    101 static	void vsw_vlan_create_hash(void *arg, int type);
    102 static	void vsw_vlan_destroy_hash(void *arg, int type);
    103 boolean_t vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
    104 	uint16_t *vidp);
    105 mblk_t *vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp);
    106 uint32_t vsw_vlan_frames_untag(void *arg, int type, mblk_t **np, mblk_t **npt);
    107 boolean_t vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid);
    108 
    109 /* Forwarding database (FDB) routines */
    110 void vsw_fdbe_add(vsw_t *vswp, void *port);
    111 void vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr);
    112 static	vsw_fdbe_t *vsw_fdbe_find(vsw_t *vswp, struct ether_addr *);
    113 static void vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val);
    114 
    115 int vsw_add_rem_mcst(vnet_mcast_msg_t *, vsw_port_t *);
    116 int vsw_add_mcst(vsw_t *, uint8_t, uint64_t, void *);
    117 int vsw_del_mcst(vsw_t *, uint8_t, uint64_t, void *);
    118 void vsw_del_mcst_vsw(vsw_t *);
    119 
    120 /* Support functions */
    121 static mblk_t *vsw_dupmsgchain(mblk_t *mp);
    122 static mblk_t *vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp);
    123 
    124 
    125 /*
    126  * Functions imported from other files.
    127  */
    128 extern mblk_t *vsw_tx_msg(vsw_t *, mblk_t *, int, vsw_port_t *);
    129 extern mcst_addr_t *vsw_del_addr(uint8_t, void *, uint64_t);
    130 extern int vsw_mac_open(vsw_t *vswp);
    131 extern void vsw_mac_close(vsw_t *vswp);
    132 extern void vsw_mac_rx(vsw_t *vswp, mac_resource_handle_t mrh,
    133     mblk_t *mp, vsw_macrx_flags_t flags);
    134 extern void vsw_set_addrs(vsw_t *vswp);
    135 extern int vsw_portsend(vsw_port_t *port, mblk_t *mp);
    136 extern void vsw_hio_init(vsw_t *vswp);
    137 extern void vsw_hio_start_ports(vsw_t *vswp);
    138 extern int vsw_mac_multicast_add(vsw_t *vswp, vsw_port_t *port,
    139     mcst_addr_t *mcst_p, int type);
    140 extern void vsw_mac_multicast_remove(vsw_t *vswp, vsw_port_t *port,
    141     mcst_addr_t *mcst_p, int type);
    142 extern void vsw_mac_link_update(vsw_t *vswp, link_state_t link_state);
    143 extern void vsw_physlink_update_ports(vsw_t *vswp);
    144 
    145 /*
    146  * Tunables used in this file.
    147  */
    148 extern	int vsw_setup_switching_delay;
    149 extern	uint32_t vsw_vlan_nchains;
    150 extern	uint32_t vsw_fdbe_refcnt_delay;
    151 
    152 #define	VSW_FDBE_REFHOLD(p)						\
    153 {									\
    154 	atomic_inc_32(&(p)->refcnt);					\
    155 	ASSERT((p)->refcnt != 0);					\
    156 }
    157 
    158 #define	VSW_FDBE_REFRELE(p)						\
    159 {									\
    160 	ASSERT((p)->refcnt != 0);					\
    161 	atomic_dec_32(&(p)->refcnt);					\
    162 }
    163 
    164 /*
    165  * Thread to setup switching mode. This thread is created during vsw_attach()
    166  * initially. It invokes vsw_setup_switching() and keeps retrying while the
    167  * returned value is EAGAIN. The thread exits when the switching mode setup is
    168  * done successfully or when the error returned is not EAGAIN. This thread may
    169  * also get created from vsw_update_md_prop() if the switching mode needs to be
    170  * updated.
    171  */
    172 void
    173 vsw_setup_switching_thread(void *arg)
    174 {
    175 	callb_cpr_t	cprinfo;
    176 	vsw_t		*vswp =  (vsw_t *)arg;
    177 	clock_t		wait_time;
    178 	clock_t		xwait;
    179 	clock_t		wait_rv;
    180 	int		rv;
    181 
    182 	/* wait time used on successive retries */
    183 	xwait = drv_usectohz(vsw_setup_switching_delay * MICROSEC);
    184 
    185 	CALLB_CPR_INIT(&cprinfo, &vswp->sw_thr_lock, callb_generic_cpr,
    186 	    "vsw_setup_sw_thread");
    187 
    188 	mutex_enter(&vswp->sw_thr_lock);
    189 
    190 	while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
    191 
    192 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
    193 
    194 		/* Wait for sometime before (re)trying setup_switching() */
    195 		wait_time = ddi_get_lbolt() + xwait;
    196 		while ((vswp->sw_thr_flags & VSW_SWTHR_STOP) == 0) {
    197 			wait_rv = cv_timedwait(&vswp->sw_thr_cv,
    198 			    &vswp->sw_thr_lock, wait_time);
    199 			if (wait_rv == -1) {	/* timed out */
    200 				break;
    201 			}
    202 		}
    203 
    204 		CALLB_CPR_SAFE_END(&cprinfo, &vswp->sw_thr_lock)
    205 
    206 		if ((vswp->sw_thr_flags & VSW_SWTHR_STOP) != 0) {
    207 			/*
    208 			 * If there is a stop request, process that first and
    209 			 * exit the loop. Continue to hold the mutex which gets
    210 			 * released in CALLB_CPR_EXIT().
    211 			 */
    212 			break;
    213 		}
    214 
    215 		mutex_exit(&vswp->sw_thr_lock);
    216 		rv = vsw_setup_switching(vswp);
    217 		if (rv == 0) {
    218 			vsw_setup_switching_post_process(vswp);
    219 		}
    220 		mutex_enter(&vswp->sw_thr_lock);
    221 		if (rv != EAGAIN) {
    222 			break;
    223 		}
    224 
    225 	}
    226 
    227 	vswp->sw_thr_flags &= ~VSW_SWTHR_STOP;
    228 	vswp->sw_thread = NULL;
    229 	CALLB_CPR_EXIT(&cprinfo);
    230 	thread_exit();
    231 }
    232 
    233 /*
    234  * Create a thread to setup the switching mode.
    235  * Returns 0 on success; 1 on failure.
    236  */
    237 int
    238 vsw_setup_switching_start(vsw_t *vswp)
    239 {
    240 	mutex_enter(&vswp->sw_thr_lock);
    241 
    242 	vswp->sw_thread = thread_create(NULL, 2 * DEFAULTSTKSZ,
    243 	    vsw_setup_switching_thread, vswp, 0, &p0, TS_RUN, minclsyspri);
    244 
    245 	if (vswp->sw_thread == NULL) {
    246 		mutex_exit(&vswp->sw_thr_lock);
    247 		return (1);
    248 	}
    249 
    250 	mutex_exit(&vswp->sw_thr_lock);
    251 	return (0);
    252 }
    253 
    254 /*
    255  * Stop the thread to setup switching mode.
    256  */
    257 void
    258 vsw_setup_switching_stop(vsw_t *vswp)
    259 {
    260 	kt_did_t	tid = 0;
    261 
    262 	/*
    263 	 * Signal the setup_switching thread to stop and wait until it stops.
    264 	 */
    265 	mutex_enter(&vswp->sw_thr_lock);
    266 
    267 	if (vswp->sw_thread != NULL) {
    268 		tid = vswp->sw_thread->t_did;
    269 		vswp->sw_thr_flags |= VSW_SWTHR_STOP;
    270 		cv_signal(&vswp->sw_thr_cv);
    271 	}
    272 
    273 	mutex_exit(&vswp->sw_thr_lock);
    274 
    275 	if (tid != 0)
    276 		thread_join(tid);
    277 
    278 	(void) atomic_swap_32(&vswp->switching_setup_done, B_FALSE);
    279 
    280 	vswp->mac_open_retries = 0;
    281 }
    282 
    283 /*
    284  * Setup the required switching mode.
    285  * Returns:
    286  *  0 on success.
    287  *  EAGAIN if retry is needed.
    288  *  1 on all other failures.
    289  */
    290 int
    291 vsw_setup_switching(vsw_t *vswp)
    292 {
    293 	int	rv = 1;
    294 
    295 	D1(vswp, "%s: enter", __func__);
    296 
    297 	/*
    298 	 * Select best switching mode.
    299 	 * This is done as this routine can be called from the timeout
    300 	 * handler to retry setting up a specific mode. Currently only
    301 	 * the function which sets up layer2/promisc mode returns EAGAIN
    302 	 * if the underlying network device is not available yet, causing
    303 	 * retries.
    304 	 */
    305 	if (vswp->smode & VSW_LAYER2) {
    306 		rv = vsw_setup_layer2(vswp);
    307 	} else if (vswp->smode & VSW_LAYER3) {
    308 		rv = vsw_setup_layer3(vswp);
    309 	} else {
    310 		DERR(vswp, "unknown switch mode");
    311 		rv = 1;
    312 	}
    313 
    314 	if (rv && (rv != EAGAIN)) {
    315 		cmn_err(CE_WARN, "!vsw%d: Unable to setup specified "
    316 		    "switching mode", vswp->instance);
    317 	} else if (rv == 0) {
    318 		(void) atomic_swap_32(&vswp->switching_setup_done, B_TRUE);
    319 	}
    320 
    321 	D2(vswp, "%s: Operating in mode %d", __func__,
    322 	    vswp->smode);
    323 
    324 	D1(vswp, "%s: exit", __func__);
    325 
    326 	return (rv);
    327 }
    328 
    329 /*
    330  * Setup for layer 2 switching.
    331  *
    332  * Returns:
    333  *  0 on success.
    334  *  EAGAIN if retry is needed.
    335  *  EIO on all other failures.
    336  */
    337 static int
    338 vsw_setup_layer2(vsw_t *vswp)
    339 {
    340 	int	rv;
    341 
    342 	D1(vswp, "%s: enter", __func__);
    343 
    344 	/*
    345 	 * Until the network device is successfully opened,
    346 	 * set the switching to use vsw_switch_l2_frame.
    347 	 */
    348 	vswp->vsw_switch_frame = vsw_switch_l2_frame;
    349 	vswp->mac_cl_switching = B_FALSE;
    350 
    351 	rv = strlen(vswp->physname);
    352 	if (rv == 0) {
    353 		/*
    354 		 * Physical device name is NULL, which is
    355 		 * required for layer 2.
    356 		 */
    357 		cmn_err(CE_WARN, "!vsw%d: no network device name specified",
    358 		    vswp->instance);
    359 		return (EIO);
    360 	}
    361 
    362 	mutex_enter(&vswp->mac_lock);
    363 
    364 	rv = vsw_mac_open(vswp);
    365 	if (rv != 0) {
    366 		if (rv != EAGAIN) {
    367 			cmn_err(CE_WARN, "!vsw%d: Unable to open network "
    368 			    "device: %s\n", vswp->instance, vswp->physname);
    369 		}
    370 		mutex_exit(&vswp->mac_lock);
    371 		return (rv);
    372 	}
    373 
    374 	/*
    375 	 * Now we can use the mac client switching, so set the switching
    376 	 * function to use vsw_switch_l2_frame_mac_client(), which simply
    377 	 * sends the packets to MAC layer for switching.
    378 	 */
    379 	vswp->vsw_switch_frame = vsw_switch_l2_frame_mac_client;
    380 	vswp->mac_cl_switching = B_TRUE;
    381 
    382 	D1(vswp, "%s: exit", __func__);
    383 
    384 	/* Initialize HybridIO related stuff */
    385 	vsw_hio_init(vswp);
    386 
    387 	mutex_exit(&vswp->mac_lock);
    388 	return (0);
    389 
    390 exit_error:
    391 	vsw_mac_close(vswp);
    392 	mutex_exit(&vswp->mac_lock);
    393 	return (EIO);
    394 }
    395 
    396 static int
    397 vsw_setup_layer3(vsw_t *vswp)
    398 {
    399 	D1(vswp, "%s: enter", __func__);
    400 
    401 	D2(vswp, "%s: operating in layer 3 mode", __func__);
    402 	vswp->vsw_switch_frame = vsw_switch_l3_frame;
    403 
    404 	D1(vswp, "%s: exit", __func__);
    405 
    406 	return (0);
    407 }
    408 
    409 /* ARGSUSED */
    410 void
    411 vsw_switch_frame_nop(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *port,
    412 			mac_resource_handle_t mrh)
    413 {
    414 	freemsgchain(mp);
    415 }
    416 
    417 /*
    418  * Use mac client for layer 2 switching .
    419  */
    420 static void
    421 vsw_switch_l2_frame_mac_client(vsw_t *vswp, mblk_t *mp, int caller,
    422     vsw_port_t *port, mac_resource_handle_t mrh)
    423 {
    424 	_NOTE(ARGUNUSED(mrh))
    425 
    426 	mblk_t		*ret_m;
    427 
    428 	/*
    429 	 * This switching function is expected to be called by
    430 	 * the ports or the interface only. The packets from
    431 	 * physical interface already switched.
    432 	 */
    433 	ASSERT((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV));
    434 
    435 	if ((ret_m = vsw_tx_msg(vswp, mp, caller, port)) != NULL) {
    436 		DERR(vswp, "%s: drop mblks to "
    437 		    "phys dev", __func__);
    438 		freemsgchain(ret_m);
    439 	}
    440 }
    441 
    442 /*
    443  * Switch the given ethernet frame when operating in layer 2 mode.
    444  *
    445  * vswp: pointer to the vsw instance
    446  * mp: pointer to chain of ethernet frame(s) to be switched
    447  * caller: identifies the source of this frame as:
    448  * 		1. VSW_VNETPORT - a vsw port (connected to a vnet).
    449  *		2. VSW_PHYSDEV - the physical ethernet device
    450  *		3. VSW_LOCALDEV - vsw configured as a virtual interface
    451  * arg: argument provided by the caller.
    452  *		1. for VNETPORT - pointer to the corresponding vsw_port_t.
    453  *		2. for PHYSDEV - NULL
    454  *		3. for LOCALDEV - pointer to to this vsw_t(self)
    455  */
    456 void
    457 vsw_switch_l2_frame(vsw_t *vswp, mblk_t *mp, int caller,
    458 			vsw_port_t *arg, mac_resource_handle_t mrh)
    459 {
    460 	struct ether_header	*ehp;
    461 	mblk_t			*bp, *ret_m;
    462 	vsw_fdbe_t		*fp;
    463 
    464 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
    465 
    466 	/*
    467 	 * PERF: rather than breaking up the chain here, scan it
    468 	 * to find all mblks heading to same destination and then
    469 	 * pass that sub-chain to the lower transmit functions.
    470 	 */
    471 
    472 	/* process the chain of packets */
    473 	bp = mp;
    474 	while (bp) {
    475 		ehp = (struct ether_header *)bp->b_rptr;
    476 		mp = vsw_get_same_dest_list(ehp, &bp);
    477 		ASSERT(mp != NULL);
    478 
    479 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
    480 		    __func__, MBLKSIZE(mp), MBLKL(mp));
    481 
    482 		if (ether_cmp(&ehp->ether_dhost, &vswp->if_addr) == 0) {
    483 			/*
    484 			 * If destination is VSW_LOCALDEV (vsw as an eth
    485 			 * interface) and if the device is up & running,
    486 			 * send the packet up the stack on this host.
    487 			 * If the virtual interface is down, drop the packet.
    488 			 */
    489 			if (caller != VSW_LOCALDEV) {
    490 				vsw_mac_rx(vswp, mrh, mp, VSW_MACRX_FREEMSG);
    491 			} else {
    492 				freemsgchain(mp);
    493 			}
    494 			continue;
    495 		}
    496 
    497 		/*
    498 		 * Find fdb entry for the destination
    499 		 * and hold a reference to it.
    500 		 */
    501 		fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
    502 		if (fp != NULL) {
    503 
    504 			/*
    505 			 * If plumbed and in promisc mode then copy msg
    506 			 * and send up the stack.
    507 			 */
    508 			vsw_mac_rx(vswp, mrh, mp,
    509 			    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
    510 
    511 			/*
    512 			 * If the destination is in FDB, the packet
    513 			 * should be forwarded to the correponding
    514 			 * vsw_port (connected to a vnet device -
    515 			 * VSW_VNETPORT)
    516 			 */
    517 			(void) vsw_portsend(fp->portp, mp);
    518 
    519 			/* Release the reference on the fdb entry */
    520 			VSW_FDBE_REFRELE(fp);
    521 		} else {
    522 			/*
    523 			 * Destination not in FDB.
    524 			 *
    525 			 * If the destination is broadcast or
    526 			 * multicast forward the packet to all
    527 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
    528 			 * except the caller.
    529 			 */
    530 			if (IS_BROADCAST(ehp)) {
    531 				D2(vswp, "%s: BROADCAST pkt", __func__);
    532 				(void) vsw_forward_all(vswp, mp, caller, arg);
    533 			} else if (IS_MULTICAST(ehp)) {
    534 				D2(vswp, "%s: MULTICAST pkt", __func__);
    535 				(void) vsw_forward_grp(vswp, mp, caller, arg);
    536 			} else {
    537 				/*
    538 				 * If the destination is unicast, and came
    539 				 * from either a logical network device or
    540 				 * the switch itself when it is plumbed, then
    541 				 * send it out on the physical device and also
    542 				 * up the stack if the logical interface is
    543 				 * in promiscious mode.
    544 				 *
    545 				 * NOTE:  The assumption here is that if we
    546 				 * cannot find the destination in our fdb, its
    547 				 * a unicast address, and came from either a
    548 				 * vnet or down the stack (when plumbed) it
    549 				 * must be destinded for an ethernet device
    550 				 * outside our ldoms.
    551 				 */
    552 				if (caller == VSW_VNETPORT) {
    553 					/* promisc check copy etc */
    554 					vsw_mac_rx(vswp, mrh, mp,
    555 					    VSW_MACRX_PROMISC |
    556 					    VSW_MACRX_COPYMSG);
    557 
    558 					if ((ret_m = vsw_tx_msg(vswp, mp,
    559 					    caller, arg)) != NULL) {
    560 						DERR(vswp, "%s: drop mblks to "
    561 						    "phys dev", __func__);
    562 						freemsgchain(ret_m);
    563 					}
    564 
    565 				} else if (caller == VSW_PHYSDEV) {
    566 					/*
    567 					 * Pkt seen because card in promisc
    568 					 * mode. Send up stack if plumbed in
    569 					 * promisc mode, else drop it.
    570 					 */
    571 					vsw_mac_rx(vswp, mrh, mp,
    572 					    VSW_MACRX_PROMISC |
    573 					    VSW_MACRX_FREEMSG);
    574 
    575 				} else if (caller == VSW_LOCALDEV) {
    576 					/*
    577 					 * Pkt came down the stack, send out
    578 					 * over physical device.
    579 					 */
    580 					if ((ret_m = vsw_tx_msg(vswp, mp,
    581 					    caller, NULL)) != NULL) {
    582 						DERR(vswp, "%s: drop mblks to "
    583 						    "phys dev", __func__);
    584 						freemsgchain(ret_m);
    585 					}
    586 				}
    587 			}
    588 		}
    589 	}
    590 	D1(vswp, "%s: exit\n", __func__);
    591 }
    592 
    593 /*
    594  * Switch ethernet frame when in layer 3 mode (i.e. using IP
    595  * layer to do the routing).
    596  *
    597  * There is a large amount of overlap between this function and
    598  * vsw_switch_l2_frame. At some stage we need to revisit and refactor
    599  * both these functions.
    600  */
    601 void
    602 vsw_switch_l3_frame(vsw_t *vswp, mblk_t *mp, int caller,
    603 			vsw_port_t *arg, mac_resource_handle_t mrh)
    604 {
    605 	struct ether_header	*ehp;
    606 	mblk_t			*bp = NULL;
    607 	vsw_fdbe_t		*fp;
    608 
    609 	D1(vswp, "%s: enter (caller %d)", __func__, caller);
    610 
    611 	/*
    612 	 * In layer 3 mode should only ever be switching packets
    613 	 * between IP layer and vnet devices. So make sure thats
    614 	 * who is invoking us.
    615 	 */
    616 	if ((caller != VSW_LOCALDEV) && (caller != VSW_VNETPORT)) {
    617 		DERR(vswp, "%s: unexpected caller (%d)", __func__, caller);
    618 		freemsgchain(mp);
    619 		return;
    620 	}
    621 
    622 	/* process the chain of packets */
    623 	bp = mp;
    624 	while (bp) {
    625 		ehp = (struct ether_header *)bp->b_rptr;
    626 		mp = vsw_get_same_dest_list(ehp, &bp);
    627 		ASSERT(mp != NULL);
    628 
    629 		D2(vswp, "%s: mblk data buffer %lld : actual data size %lld",
    630 		    __func__, MBLKSIZE(mp), MBLKL(mp));
    631 
    632 		/*
    633 		 * Find fdb entry for the destination
    634 		 * and hold a reference to it.
    635 		 */
    636 		fp = vsw_fdbe_find(vswp, &ehp->ether_dhost);
    637 		if (fp != NULL) {
    638 
    639 			D2(vswp, "%s: sending to target port", __func__);
    640 			(void) vsw_portsend(fp->portp, mp);
    641 
    642 			/* Release the reference on the fdb entry */
    643 			VSW_FDBE_REFRELE(fp);
    644 		} else {
    645 			/*
    646 			 * Destination not in FDB
    647 			 *
    648 			 * If the destination is broadcast or
    649 			 * multicast forward the packet to all
    650 			 * (VNETPORTs, PHYSDEV, LOCALDEV),
    651 			 * except the caller.
    652 			 */
    653 			if (IS_BROADCAST(ehp)) {
    654 				D2(vswp, "%s: BROADCAST pkt", __func__);
    655 				(void) vsw_forward_all(vswp, mp, caller, arg);
    656 			} else if (IS_MULTICAST(ehp)) {
    657 				D2(vswp, "%s: MULTICAST pkt", __func__);
    658 				(void) vsw_forward_grp(vswp, mp, caller, arg);
    659 			} else {
    660 				/*
    661 				 * Unicast pkt from vnet that we don't have
    662 				 * an FDB entry for, so must be destinded for
    663 				 * the outside world. Attempt to send up to the
    664 				 * IP layer to allow it to deal with it.
    665 				 */
    666 				if (caller == VSW_VNETPORT) {
    667 					vsw_mac_rx(vswp, mrh,
    668 					    mp, VSW_MACRX_FREEMSG);
    669 				}
    670 			}
    671 		}
    672 	}
    673 
    674 	D1(vswp, "%s: exit", __func__);
    675 }
    676 
    677 /*
    678  * Additional initializations that are needed for the specific switching mode.
    679  */
    680 void
    681 vsw_setup_switching_post_process(vsw_t *vswp)
    682 {
    683 	link_state_t	link_state = LINK_STATE_UP;
    684 
    685 	if (vswp->smode & VSW_LAYER2) {
    686 		/*
    687 		 * Program unicst, mcst addrs of vsw
    688 		 * interface and ports in the physdev.
    689 		 */
    690 		vsw_set_addrs(vswp);
    691 
    692 		/* Start HIO for ports that have already connected */
    693 		vsw_hio_start_ports(vswp);
    694 
    695 		if (vswp->pls_update == B_TRUE) {
    696 			link_state = vswp->phys_link_state;
    697 		}
    698 
    699 		/* Update physical link info to any ports already connected */
    700 		vsw_physlink_update_ports(vswp);
    701 	}
    702 
    703 	vsw_mac_link_update(vswp, link_state);
    704 }
    705 
    706 /*
    707  * Forward the ethernet frame to all ports (VNETPORTs, PHYSDEV, LOCALDEV),
    708  * except the caller (port on which frame arrived).
    709  */
    710 static int
    711 vsw_forward_all(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
    712 {
    713 	vsw_port_list_t	*plist = &vswp->plist;
    714 	vsw_port_t	*portp;
    715 	mblk_t		*nmp = NULL;
    716 	mblk_t		*ret_m = NULL;
    717 	int		skip_port = 0;
    718 
    719 	D1(vswp, "vsw_forward_all: enter\n");
    720 
    721 	/*
    722 	 * Broadcast message from inside ldoms so send to outside
    723 	 * world if in either of layer 2 modes.
    724 	 */
    725 	if ((vswp->smode & VSW_LAYER2) &&
    726 	    ((caller == VSW_LOCALDEV) || (caller == VSW_VNETPORT))) {
    727 
    728 		nmp = vsw_dupmsgchain(mp);
    729 		if (nmp) {
    730 			if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
    731 			    != NULL) {
    732 				DERR(vswp, "%s: dropping pkt(s) "
    733 				    "consisting of %ld bytes of data for"
    734 				    " physical device", __func__, MBLKL(ret_m));
    735 				freemsgchain(ret_m);
    736 			}
    737 		}
    738 	}
    739 
    740 	if (caller == VSW_VNETPORT)
    741 		skip_port = 1;
    742 
    743 	/*
    744 	 * Broadcast message from other vnet (layer 2 or 3) or outside
    745 	 * world (layer 2 only), send up stack if plumbed.
    746 	 */
    747 	if ((caller == VSW_PHYSDEV) || (caller == VSW_VNETPORT)) {
    748 		vsw_mac_rx(vswp, NULL, mp, VSW_MACRX_COPYMSG);
    749 	}
    750 
    751 	/* send it to all VNETPORTs */
    752 	READ_ENTER(&plist->lockrw);
    753 	for (portp = plist->head; portp != NULL; portp = portp->p_next) {
    754 		D2(vswp, "vsw_forward_all: port %d", portp->p_instance);
    755 		/*
    756 		 * Caution ! - don't reorder these two checks as arg
    757 		 * will be NULL if the caller is PHYSDEV. skip_port is
    758 		 * only set if caller is VNETPORT.
    759 		 */
    760 		if ((skip_port) && (portp == arg)) {
    761 			continue;
    762 		} else {
    763 			nmp = vsw_dupmsgchain(mp);
    764 			if (nmp) {
    765 				/*
    766 				 * The plist->lockrw is protecting the
    767 				 * portp from getting destroyed here.
    768 				 * So, no ref_cnt is incremented here.
    769 				 */
    770 				(void) vsw_portsend(portp, nmp);
    771 			} else {
    772 				DERR(vswp, "vsw_forward_all: nmp NULL");
    773 			}
    774 		}
    775 	}
    776 	RW_EXIT(&plist->lockrw);
    777 
    778 	freemsgchain(mp);
    779 
    780 	D1(vswp, "vsw_forward_all: exit\n");
    781 	return (0);
    782 }
    783 
    784 /*
    785  * Forward pkts to any devices or interfaces which have registered
    786  * an interest in them (i.e. multicast groups).
    787  */
    788 static int
    789 vsw_forward_grp(vsw_t *vswp, mblk_t *mp, int caller, vsw_port_t *arg)
    790 {
    791 	struct ether_header	*ehp = (struct ether_header *)mp->b_rptr;
    792 	mfdb_ent_t		*entp = NULL;
    793 	mfdb_ent_t		*tpp = NULL;
    794 	vsw_port_t 		*port;
    795 	uint64_t		key = 0;
    796 	mblk_t			*nmp = NULL;
    797 	mblk_t			*ret_m = NULL;
    798 	boolean_t		check_if = B_TRUE;
    799 
    800 	/*
    801 	 * Convert address to hash table key
    802 	 */
    803 	KEY_HASH(key, &ehp->ether_dhost);
    804 
    805 	D1(vswp, "%s: key 0x%llx", __func__, key);
    806 
    807 	/*
    808 	 * If pkt came from either a vnet or down the stack (if we are
    809 	 * plumbed) and we are in layer 2 mode, then we send the pkt out
    810 	 * over the physical adapter, and then check to see if any other
    811 	 * vnets are interested in it.
    812 	 */
    813 	if ((vswp->smode & VSW_LAYER2) &&
    814 	    ((caller == VSW_VNETPORT) || (caller == VSW_LOCALDEV))) {
    815 		nmp = vsw_dupmsgchain(mp);
    816 		if (nmp) {
    817 			if ((ret_m = vsw_tx_msg(vswp, nmp, caller, arg))
    818 			    != NULL) {
    819 				DERR(vswp, "%s: dropping pkt(s) consisting of "
    820 				    "%ld bytes of data for physical device",
    821 				    __func__, MBLKL(ret_m));
    822 				freemsgchain(ret_m);
    823 			}
    824 		}
    825 	}
    826 
    827 	READ_ENTER(&vswp->mfdbrw);
    828 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)key,
    829 	    (mod_hash_val_t *)&entp) != 0) {
    830 		D3(vswp, "%s: no table entry found for addr 0x%llx",
    831 		    __func__, key);
    832 	} else {
    833 		/*
    834 		 * Send to list of devices associated with this address...
    835 		 */
    836 		for (tpp = entp; tpp != NULL; tpp = tpp->nextp) {
    837 
    838 			/* dont send to ourselves */
    839 			if ((caller == VSW_VNETPORT) &&
    840 			    (tpp->d_addr == (void *)arg)) {
    841 				port = (vsw_port_t *)tpp->d_addr;
    842 				D3(vswp, "%s: not sending to ourselves"
    843 				    " : port %d", __func__, port->p_instance);
    844 				continue;
    845 
    846 			} else if ((caller == VSW_LOCALDEV) &&
    847 			    (tpp->d_type == VSW_LOCALDEV)) {
    848 				D2(vswp, "%s: not sending back up stack",
    849 				    __func__);
    850 				continue;
    851 			}
    852 
    853 			if (tpp->d_type == VSW_VNETPORT) {
    854 				port = (vsw_port_t *)tpp->d_addr;
    855 				D3(vswp, "%s: sending to port %ld for addr "
    856 				    "0x%llx", __func__, port->p_instance, key);
    857 
    858 				nmp = vsw_dupmsgchain(mp);
    859 				if (nmp) {
    860 					/*
    861 					 * The vswp->mfdbrw is protecting the
    862 					 * portp from getting destroyed here.
    863 					 * So, no ref_cnt is incremented here.
    864 					 */
    865 					(void) vsw_portsend(port, nmp);
    866 				}
    867 			} else {
    868 				vsw_mac_rx(vswp, NULL,
    869 				    mp, VSW_MACRX_COPYMSG);
    870 				D2(vswp, "%s: sending up stack"
    871 				    " for addr 0x%llx", __func__, key);
    872 				check_if = B_FALSE;
    873 			}
    874 		}
    875 	}
    876 
    877 	RW_EXIT(&vswp->mfdbrw);
    878 
    879 	/*
    880 	 * If the pkt came from either a vnet or from physical device,
    881 	 * and if we havent already sent the pkt up the stack then we
    882 	 * check now if we can/should (i.e. the interface is plumbed
    883 	 * and in promisc mode).
    884 	 */
    885 	if ((check_if) &&
    886 	    ((caller == VSW_VNETPORT) || (caller == VSW_PHYSDEV))) {
    887 		vsw_mac_rx(vswp, NULL, mp,
    888 		    VSW_MACRX_PROMISC | VSW_MACRX_COPYMSG);
    889 	}
    890 
    891 	freemsgchain(mp);
    892 
    893 	D1(vswp, "%s: exit", __func__);
    894 
    895 	return (0);
    896 }
    897 
    898 /*
    899  * This function creates the vlan id hash table for the given vsw device or
    900  * port. It then adds each vlan that the device or port has been assigned,
    901  * into this hash table.
    902  * Arguments:
    903  *   arg:  vsw device or port.
    904  *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
    905  */
    906 void
    907 vsw_create_vlans(void *arg, int type)
    908 {
    909 	/* create vlan hash table */
    910 	vsw_vlan_create_hash(arg, type);
    911 
    912 	/* add vlan ids of the vsw device into its hash table */
    913 	vsw_vlan_add_ids(arg, type);
    914 }
    915 
    916 /*
    917  * This function removes the vlan ids of the vsw device or port from its hash
    918  * table. It then destroys the vlan hash table.
    919  * Arguments:
    920  *   arg:  vsw device or port.
    921  *   type: type of arg; VSW_LOCALDEV(vsw device) or VSW_VNETPORT(port).
    922  */
    923 void
    924 vsw_destroy_vlans(void *arg, int type)
    925 {
    926 	/* remove vlan ids from the hash table */
    927 	vsw_vlan_remove_ids(arg, type);
    928 
    929 	/* destroy vlan-hash-table */
    930 	vsw_vlan_destroy_hash(arg, type);
    931 }
    932 
    933 /*
    934  * Create a vlan-id hash table for the given vsw device or port.
    935  */
    936 static void
    937 vsw_vlan_create_hash(void *arg, int type)
    938 {
    939 	char		hashname[MAXNAMELEN];
    940 
    941 	if (type == VSW_LOCALDEV) {
    942 		vsw_t		*vswp = (vsw_t *)arg;
    943 
    944 		(void) snprintf(hashname, MAXNAMELEN, "vsw%d-vlan-hash",
    945 		    vswp->instance);
    946 
    947 		vswp->vlan_nchains = vsw_vlan_nchains;
    948 		vswp->vlan_hashp = mod_hash_create_idhash(hashname,
    949 		    vswp->vlan_nchains, mod_hash_null_valdtor);
    950 
    951 	} else if (type == VSW_VNETPORT) {
    952 		vsw_port_t	*portp = (vsw_port_t *)arg;
    953 
    954 		(void) snprintf(hashname, MAXNAMELEN, "port%d-vlan-hash",
    955 		    portp->p_instance);
    956 
    957 		portp->vlan_nchains = vsw_vlan_nchains;
    958 		portp->vlan_hashp = mod_hash_create_idhash(hashname,
    959 		    portp->vlan_nchains, mod_hash_null_valdtor);
    960 
    961 	} else {
    962 		return;
    963 	}
    964 }
    965 
    966 /*
    967  * Destroy the vlan-id hash table for the given vsw device or port.
    968  */
    969 static void
    970 vsw_vlan_destroy_hash(void *arg, int type)
    971 {
    972 	if (type == VSW_LOCALDEV) {
    973 		vsw_t		*vswp = (vsw_t *)arg;
    974 
    975 		mod_hash_destroy_hash(vswp->vlan_hashp);
    976 		vswp->vlan_nchains = 0;
    977 	} else if (type == VSW_VNETPORT) {
    978 		vsw_port_t	*portp = (vsw_port_t *)arg;
    979 
    980 		mod_hash_destroy_hash(portp->vlan_hashp);
    981 		portp->vlan_nchains = 0;
    982 	} else {
    983 		return;
    984 	}
    985 }
    986 
    987 /*
    988  * Add vlan ids of the given vsw device or port into its hash table.
    989  */
    990 void
    991 vsw_vlan_add_ids(void *arg, int type)
    992 {
    993 	int	rv;
    994 	int	i;
    995 
    996 	if (type == VSW_LOCALDEV) {
    997 		vsw_t		*vswp = (vsw_t *)arg;
    998 
    999 		rv = mod_hash_insert(vswp->vlan_hashp,
   1000 		    (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
   1001 		    (mod_hash_val_t)B_TRUE);
   1002 		if (rv != 0) {
   1003 			cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
   1004 			    "the interface", vswp->instance, vswp->pvid);
   1005 		}
   1006 
   1007 		for (i = 0; i < vswp->nvids; i++) {
   1008 			rv = mod_hash_insert(vswp->vlan_hashp,
   1009 			    (mod_hash_key_t)VLAN_ID_KEY(vswp->vids[i].vl_vid),
   1010 			    (mod_hash_val_t)B_TRUE);
   1011 			if (rv != 0) {
   1012 				cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
   1013 				    " for the interface", vswp->instance,
   1014 				    vswp->pvid);
   1015 			}
   1016 		}
   1017 
   1018 	} else if (type == VSW_VNETPORT) {
   1019 		vsw_port_t	*portp = (vsw_port_t *)arg;
   1020 		vsw_t		*vswp = portp->p_vswp;
   1021 
   1022 		rv = mod_hash_insert(portp->vlan_hashp,
   1023 		    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
   1024 		    (mod_hash_val_t)B_TRUE);
   1025 		if (rv != 0) {
   1026 			cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d) for "
   1027 			    "the port(%d)", vswp->instance, vswp->pvid,
   1028 			    portp->p_instance);
   1029 		}
   1030 
   1031 		for (i = 0; i < portp->nvids; i++) {
   1032 			rv = mod_hash_insert(portp->vlan_hashp,
   1033 			    (mod_hash_key_t)VLAN_ID_KEY(portp->vids[i].vl_vid),
   1034 			    (mod_hash_val_t)B_TRUE);
   1035 			if (rv != 0) {
   1036 				cmn_err(CE_WARN, "vsw%d: Duplicate vlan-id(%d)"
   1037 				    " for the port(%d)", vswp->instance,
   1038 				    vswp->pvid, portp->p_instance);
   1039 			}
   1040 		}
   1041 
   1042 	}
   1043 }
   1044 
   1045 /*
   1046  * Remove vlan ids of the given vsw device or port from its hash table.
   1047  */
   1048 void
   1049 vsw_vlan_remove_ids(void *arg, int type)
   1050 {
   1051 	mod_hash_val_t	vp;
   1052 	int		rv;
   1053 	int		i;
   1054 
   1055 	if (type == VSW_LOCALDEV) {
   1056 		vsw_t		*vswp = (vsw_t *)arg;
   1057 
   1058 		rv = vsw_vlan_lookup(vswp->vlan_hashp, vswp->pvid);
   1059 		if (rv == B_TRUE) {
   1060 			rv = mod_hash_remove(vswp->vlan_hashp,
   1061 			    (mod_hash_key_t)VLAN_ID_KEY(vswp->pvid),
   1062 			    (mod_hash_val_t *)&vp);
   1063 			ASSERT(rv == 0);
   1064 		}
   1065 
   1066 		for (i = 0; i < vswp->nvids; i++) {
   1067 			rv = vsw_vlan_lookup(vswp->vlan_hashp,
   1068 			    vswp->vids[i].vl_vid);
   1069 			if (rv == B_TRUE) {
   1070 				rv = mod_hash_remove(vswp->vlan_hashp,
   1071 				    (mod_hash_key_t)VLAN_ID_KEY(
   1072 				    vswp->vids[i].vl_vid),
   1073 				    (mod_hash_val_t *)&vp);
   1074 				ASSERT(rv == 0);
   1075 			}
   1076 		}
   1077 
   1078 	} else if (type == VSW_VNETPORT) {
   1079 		vsw_port_t	*portp = (vsw_port_t *)arg;
   1080 
   1081 		portp = (vsw_port_t *)arg;
   1082 		rv = vsw_vlan_lookup(portp->vlan_hashp, portp->pvid);
   1083 		if (rv == B_TRUE) {
   1084 			rv = mod_hash_remove(portp->vlan_hashp,
   1085 			    (mod_hash_key_t)VLAN_ID_KEY(portp->pvid),
   1086 			    (mod_hash_val_t *)&vp);
   1087 			ASSERT(rv == 0);
   1088 		}
   1089 
   1090 		for (i = 0; i < portp->nvids; i++) {
   1091 			rv = vsw_vlan_lookup(portp->vlan_hashp,
   1092 			    portp->vids[i].vl_vid);
   1093 			if (rv == B_TRUE) {
   1094 				rv = mod_hash_remove(portp->vlan_hashp,
   1095 				    (mod_hash_key_t)VLAN_ID_KEY(
   1096 				    portp->vids[i].vl_vid),
   1097 				    (mod_hash_val_t *)&vp);
   1098 				ASSERT(rv == 0);
   1099 			}
   1100 		}
   1101 
   1102 	} else {
   1103 		return;
   1104 	}
   1105 }
   1106 
   1107 /*
   1108  * Find the given vlan id in the hash table.
   1109  * Return: B_TRUE if the id is found; B_FALSE if not found.
   1110  */
   1111 boolean_t
   1112 vsw_vlan_lookup(mod_hash_t *vlan_hashp, uint16_t vid)
   1113 {
   1114 	int		rv;
   1115 	mod_hash_val_t	vp;
   1116 
   1117 	rv = mod_hash_find(vlan_hashp, VLAN_ID_KEY(vid), (mod_hash_val_t *)&vp);
   1118 
   1119 	if (rv != 0)
   1120 		return (B_FALSE);
   1121 
   1122 	return (B_TRUE);
   1123 }
   1124 
   1125 /*
   1126  * Add an entry into FDB for the given vsw.
   1127  */
   1128 void
   1129 vsw_fdbe_add(vsw_t *vswp, void *port)
   1130 {
   1131 	uint64_t	addr = 0;
   1132 	vsw_port_t	*portp;
   1133 	vsw_fdbe_t	*fp;
   1134 	int		rv;
   1135 
   1136 	portp = (vsw_port_t *)port;
   1137 	KEY_HASH(addr, &portp->p_macaddr);
   1138 
   1139 	fp = kmem_zalloc(sizeof (vsw_fdbe_t), KM_SLEEP);
   1140 	fp->portp = port;
   1141 
   1142 	/*
   1143 	 * Note: duplicate keys will be rejected by mod_hash.
   1144 	 */
   1145 	rv = mod_hash_insert(vswp->fdb_hashp, (mod_hash_key_t)addr,
   1146 	    (mod_hash_val_t)fp);
   1147 	if (rv != 0) {
   1148 		cmn_err(CE_WARN, "vsw%d: Duplicate mac-address(%s) for "
   1149 		    "the port(%d)", vswp->instance,
   1150 		    ether_sprintf(&portp->p_macaddr), portp->p_instance);
   1151 	}
   1152 }
   1153 
   1154 /*
   1155  * Remove an entry from FDB.
   1156  */
   1157 void
   1158 vsw_fdbe_del(vsw_t *vswp, struct ether_addr *eaddr)
   1159 {
   1160 	uint64_t	addr = 0;
   1161 	vsw_fdbe_t	*fp;
   1162 	int		rv;
   1163 
   1164 	KEY_HASH(addr, eaddr);
   1165 
   1166 	/*
   1167 	 * Remove the entry from fdb hash table.
   1168 	 * This prevents further references to this fdb entry.
   1169 	 */
   1170 	rv = mod_hash_remove(vswp->fdb_hashp, (mod_hash_key_t)addr,
   1171 	    (mod_hash_val_t *)&fp);
   1172 	if (rv != 0) {
   1173 		/* invalid key? */
   1174 		return;
   1175 	}
   1176 
   1177 	/*
   1178 	 * If there are threads already ref holding before the entry was
   1179 	 * removed from hash table, then wait for ref count to drop to zero.
   1180 	 */
   1181 	while (fp->refcnt != 0) {
   1182 		delay(drv_usectohz(vsw_fdbe_refcnt_delay));
   1183 	}
   1184 
   1185 	kmem_free(fp, sizeof (*fp));
   1186 }
   1187 
   1188 /*
   1189  * Search fdb for a given mac address. If an entry is found, hold
   1190  * a reference to it and return the entry, else returns NULL.
   1191  */
   1192 static vsw_fdbe_t *
   1193 vsw_fdbe_find(vsw_t *vswp, struct ether_addr *addrp)
   1194 {
   1195 	uint64_t	key = 0;
   1196 	vsw_fdbe_t	*fp;
   1197 	int		rv;
   1198 
   1199 	KEY_HASH(key, addrp);
   1200 
   1201 	rv = mod_hash_find_cb(vswp->fdb_hashp, (mod_hash_key_t)key,
   1202 	    (mod_hash_val_t *)&fp, vsw_fdbe_find_cb);
   1203 
   1204 	if (rv != 0)
   1205 		return (NULL);
   1206 
   1207 	return (fp);
   1208 }
   1209 
   1210 /*
   1211  * Callback function provided to mod_hash_find_cb(). After finding the fdb
   1212  * entry corresponding to the key (macaddr), this callback will be invoked by
   1213  * mod_hash_find_cb() to atomically increment the reference count on the fdb
   1214  * entry before returning the found entry.
   1215  */
   1216 static void
   1217 vsw_fdbe_find_cb(mod_hash_key_t key, mod_hash_val_t val)
   1218 {
   1219 	_NOTE(ARGUNUSED(key))
   1220 	VSW_FDBE_REFHOLD((vsw_fdbe_t *)val);
   1221 }
   1222 
   1223 /*
   1224  * A given frame must be always tagged with the appropriate vlan id (unless it
   1225  * is in the default-vlan) before the mac address switching function is called.
   1226  * Otherwise, after switching function determines the destination, we cannot
   1227  * figure out if the destination belongs to the the same vlan that the frame
   1228  * originated from and if it needs tag/untag. Frames which are inbound from
   1229  * the external(physical) network over a vlan trunk link are always tagged.
   1230  * However frames which are received from a vnet-port over ldc or frames which
   1231  * are coming down the stack on the service domain over vsw interface may be
   1232  * untagged. These frames must be tagged with the appropriate pvid of the
   1233  * sender (vnet-port or vsw device), before invoking the switching function.
   1234  *
   1235  * Arguments:
   1236  *   arg:    caller of the function.
   1237  *   type:   type of arg(caller): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
   1238  *   mp:     frame(s) to be tagged.
   1239  */
   1240 mblk_t *
   1241 vsw_vlan_frame_pretag(void *arg, int type, mblk_t *mp)
   1242 {
   1243 	vsw_t			*vswp;
   1244 	vsw_port_t		*portp;
   1245 	struct ether_header	*ehp;
   1246 	mblk_t			*bp;
   1247 	mblk_t			*bpt;
   1248 	mblk_t			*bph;
   1249 	mblk_t			*bpn;
   1250 	uint16_t		pvid;
   1251 
   1252 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
   1253 
   1254 	if (type == VSW_LOCALDEV) {
   1255 		vswp = (vsw_t *)arg;
   1256 		pvid = vswp->pvid;
   1257 		portp = NULL;
   1258 	} else {
   1259 		/* VSW_VNETPORT */
   1260 		portp = (vsw_port_t *)arg;
   1261 		pvid = portp->pvid;
   1262 		vswp = portp->p_vswp;
   1263 	}
   1264 
   1265 	bpn = bph = bpt = NULL;
   1266 
   1267 	for (bp = mp; bp != NULL; bp = bpn) {
   1268 
   1269 		bpn = bp->b_next;
   1270 		bp->b_next = bp->b_prev = NULL;
   1271 
   1272 		/* Determine if it is an untagged frame */
   1273 		ehp = (struct ether_header *)bp->b_rptr;
   1274 
   1275 		if (ehp->ether_type != ETHERTYPE_VLAN) {	/* untagged */
   1276 
   1277 			/* no need to tag if the frame is in default vlan */
   1278 			if (pvid != vswp->default_vlan_id) {
   1279 				bp = vnet_vlan_insert_tag(bp, pvid);
   1280 				if (bp == NULL) {
   1281 					continue;
   1282 				}
   1283 			}
   1284 		}
   1285 
   1286 		/* build a chain of processed packets */
   1287 		if (bph == NULL) {
   1288 			bph = bpt = bp;
   1289 		} else {
   1290 			bpt->b_next = bp;
   1291 			bpt = bp;
   1292 		}
   1293 
   1294 	}
   1295 
   1296 	return (bph);
   1297 }
   1298 
   1299 /*
   1300  * Frames destined to a vnet-port or to the local vsw interface, must be
   1301  * untagged if necessary before sending. This function first checks that the
   1302  * frame can be sent to the destination in the vlan identified by the frame
   1303  * tag. Note that when this function is invoked the frame must have been
   1304  * already tagged (unless it is in the default-vlan). Because, this function is
   1305  * called when the switching function determines the destination and invokes
   1306  * its send function (vnet-port or vsw interface) and all frames would have
   1307  * been tagged by this time (see comments in vsw_vlan_frame_pretag()).
   1308  *
   1309  * Arguments:
   1310  *   arg:    destination device.
   1311  *   type:   type of arg(destination): VSW_LOCALDEV(vsw) or VSW_VNETPORT(port)
   1312  *   np:     head of pkt chain to be validated and untagged.
   1313  *   npt:    tail of pkt chain to be validated and untagged.
   1314  *
   1315  * Returns:
   1316  *   np:     head of updated chain of packets
   1317  *   npt:    tail of updated chain of packets
   1318  *   rv:     count of the packets in the returned list
   1319  */
   1320 uint32_t
   1321 vsw_vlan_frame_untag(void *arg, int type, mblk_t **np, mblk_t **npt)
   1322 {
   1323 	mblk_t			*bp;
   1324 	mblk_t			*bpt;
   1325 	mblk_t			*bph;
   1326 	mblk_t			*bpn;
   1327 	vsw_port_t		*portp;
   1328 	vsw_t			*vswp;
   1329 	uint32_t		count;
   1330 	struct ether_header	*ehp;
   1331 	boolean_t		is_tagged;
   1332 	boolean_t		rv;
   1333 	uint16_t		vlan_id;
   1334 	uint16_t		pvid;
   1335 	mod_hash_t		*vlan_hashp;
   1336 
   1337 	ASSERT((type == VSW_LOCALDEV) || (type == VSW_VNETPORT));
   1338 
   1339 
   1340 	if (type == VSW_LOCALDEV) {
   1341 		vswp = (vsw_t *)arg;
   1342 		pvid = vswp->pvid;
   1343 		vlan_hashp = vswp->vlan_hashp;
   1344 		portp = NULL;
   1345 	} else {
   1346 		/* type == VSW_VNETPORT */
   1347 		portp = (vsw_port_t *)arg;
   1348 		vswp = portp->p_vswp;
   1349 		vlan_hashp = portp->vlan_hashp;
   1350 		pvid = portp->pvid;
   1351 	}
   1352 
   1353 	/*
   1354 	 * If the MAC layer switching in place, then
   1355 	 * untagging required only if the pvid is not
   1356 	 * the same as default_vlan_id. This is because,
   1357 	 * the MAC layer will send packets for the
   1358 	 * registered vlans only.
   1359 	 */
   1360 	if ((vswp->mac_cl_switching == B_TRUE) &&
   1361 	    (pvid == vswp->default_vlan_id)) {
   1362 		/* simply count and set the tail */
   1363 		count = 1;
   1364 		bp = *np;
   1365 		ASSERT(bp != NULL);
   1366 		while (bp->b_next != NULL) {
   1367 			bp = bp->b_next;
   1368 			count++;
   1369 		}
   1370 		*npt = bp;
   1371 		return (count);
   1372 	}
   1373 
   1374 	bpn = bph = bpt = NULL;
   1375 	count = 0;
   1376 
   1377 	for (bp = *np; bp != NULL; bp = bpn) {
   1378 
   1379 		bpn = bp->b_next;
   1380 		bp->b_next = bp->b_prev = NULL;
   1381 
   1382 		/*
   1383 		 * Determine the vlan id that the frame belongs to.
   1384 		 */
   1385 		ehp = (struct ether_header *)bp->b_rptr;
   1386 		is_tagged = vsw_frame_lookup_vid(arg, type, ehp, &vlan_id);
   1387 
   1388 		/*
   1389 		 * If MAC layer switching in place, then we
   1390 		 * need to untag only if the tagged packet has
   1391 		 * vlan-id same as the pvid.
   1392 		 */
   1393 		if (vswp->mac_cl_switching == B_TRUE) {
   1394 
   1395 			/* only tagged packets expected here */
   1396 			ASSERT(is_tagged == B_TRUE);
   1397 			if (vlan_id == pvid) {
   1398 				bp = vnet_vlan_remove_tag(bp);
   1399 				if (bp == NULL) {
   1400 					/* packet dropped */
   1401 					continue;
   1402 				}
   1403 			}
   1404 		} else { /* No MAC layer switching */
   1405 
   1406 			/*
   1407 			 * Check the frame header if tag/untag is  needed.
   1408 			 */
   1409 			if (is_tagged == B_FALSE) {
   1410 				/*
   1411 				 * Untagged frame. We shouldn't have an
   1412 				 * untagged packet at this point, unless
   1413 				 * the destination's  vlan id is
   1414 				 * default-vlan-id; if it is not the
   1415 				 * default-vlan-id, we drop the packet.
   1416 				 */
   1417 				if (vlan_id != vswp->default_vlan_id) {
   1418 					/* drop the packet */
   1419 					freemsg(bp);
   1420 					continue;
   1421 				}
   1422 			} else {	/* Tagged */
   1423 				/*
   1424 				 * Tagged frame, untag if it's the
   1425 				 * destination's pvid.
   1426 				 */
   1427 				if (vlan_id == pvid) {
   1428 
   1429 					bp = vnet_vlan_remove_tag(bp);
   1430 					if (bp == NULL) {
   1431 						/* packet dropped */
   1432 						continue;
   1433 					}
   1434 				} else {
   1435 
   1436 					/*
   1437 					 * Check if the destination is in the
   1438 					 * same vlan.
   1439 					 */
   1440 					rv = vsw_vlan_lookup(vlan_hashp,
   1441 					    vlan_id);
   1442 					if (rv == B_FALSE) {
   1443 						/* drop the packet */
   1444 						freemsg(bp);
   1445 						continue;
   1446 					}
   1447 				}
   1448 
   1449 			}
   1450 		}
   1451 
   1452 		/* build a chain of processed packets */
   1453 		if (bph == NULL) {
   1454 			bph = bpt = bp;
   1455 		} else {
   1456 			bpt->b_next = bp;
   1457 			bpt = bp;
   1458 		}
   1459 		count++;
   1460 	}
   1461 
   1462 	*np = bph;
   1463 	*npt = bpt;
   1464 	return (count);
   1465 }
   1466 
   1467 /*
   1468  * Lookup the vlan id of the given frame. If it is a vlan-tagged frame,
   1469  * then the vlan-id is available in the tag; otherwise, its vlan id is
   1470  * implicitly obtained based on the caller (destination of the frame:
   1471  * VSW_VNETPORT or VSW_LOCALDEV).
   1472  * The vlan id determined is returned in vidp.
   1473  * Returns: B_TRUE if it is a tagged frame; B_FALSE if it is untagged.
   1474  */
   1475 boolean_t
   1476 vsw_frame_lookup_vid(void *arg, int caller, struct ether_header *ehp,
   1477 	uint16_t *vidp)
   1478 {
   1479 	struct ether_vlan_header	*evhp;
   1480 	vsw_t				*vswp;
   1481 	vsw_port_t			*portp;
   1482 
   1483 	/* If it's a tagged frame, get the vid from vlan header */
   1484 	if (ehp->ether_type == ETHERTYPE_VLAN) {
   1485 
   1486 		evhp = (struct ether_vlan_header *)ehp;
   1487 		*vidp = VLAN_ID(ntohs(evhp->ether_tci));
   1488 		return (B_TRUE);
   1489 	}
   1490 
   1491 	/* Untagged frame; determine vlan id based on caller */
   1492 	switch (caller) {
   1493 
   1494 	case VSW_VNETPORT:
   1495 		/*
   1496 		 * packet destined to a vnet; vlan-id is pvid of vnet-port.
   1497 		 */
   1498 		portp = (vsw_port_t *)arg;
   1499 		*vidp = portp->pvid;
   1500 		break;
   1501 
   1502 	case VSW_LOCALDEV:
   1503 
   1504 		/*
   1505 		 * packet destined to vsw interface;
   1506 		 * vlan-id is port-vlan-id of vsw device.
   1507 		 */
   1508 		vswp = (vsw_t *)arg;
   1509 		*vidp = vswp->pvid;
   1510 		break;
   1511 	}
   1512 
   1513 	return (B_FALSE);
   1514 }
   1515 
   1516 /*
   1517  * Add or remove multicast address(es).
   1518  *
   1519  * Returns 0 on success, 1 on failure.
   1520  */
   1521 int
   1522 vsw_add_rem_mcst(vnet_mcast_msg_t *mcst_pkt, vsw_port_t *port)
   1523 {
   1524 	mcst_addr_t		*mcst_p = NULL;
   1525 	vsw_t			*vswp = port->p_vswp;
   1526 	uint64_t		addr = 0x0;
   1527 	int			i;
   1528 
   1529 	D1(vswp, "%s: enter", __func__);
   1530 
   1531 	D2(vswp, "%s: %d addresses", __func__, mcst_pkt->count);
   1532 
   1533 	for (i = 0; i < mcst_pkt->count; i++) {
   1534 		/*
   1535 		 * Convert address into form that can be used
   1536 		 * as hash table key.
   1537 		 */
   1538 		KEY_HASH(addr, &(mcst_pkt->mca[i]));
   1539 
   1540 		/*
   1541 		 * Add or delete the specified address/port combination.
   1542 		 */
   1543 		if (mcst_pkt->set == 0x1) {
   1544 			D3(vswp, "%s: adding multicast address 0x%llx for "
   1545 			    "port %ld", __func__, addr, port->p_instance);
   1546 			if (vsw_add_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
   1547 				/*
   1548 				 * Update the list of multicast
   1549 				 * addresses contained within the
   1550 				 * port structure to include this new
   1551 				 * one.
   1552 				 */
   1553 				mcst_p = kmem_zalloc(sizeof (mcst_addr_t),
   1554 				    KM_NOSLEEP);
   1555 				if (mcst_p == NULL) {
   1556 					DERR(vswp, "%s: unable to alloc mem",
   1557 					    __func__);
   1558 					(void) vsw_del_mcst(vswp,
   1559 					    VSW_VNETPORT, addr, port);
   1560 					return (1);
   1561 				}
   1562 
   1563 				mcst_p->nextp = NULL;
   1564 				mcst_p->addr = addr;
   1565 				ether_copy(&mcst_pkt->mca[i], &mcst_p->mca);
   1566 
   1567 				/*
   1568 				 * Program the address into HW. If the addr
   1569 				 * has already been programmed then the MAC
   1570 				 * just increments a ref counter (which is
   1571 				 * used when the address is being deleted)
   1572 				 */
   1573 				if (vsw_mac_multicast_add(vswp, port, mcst_p,
   1574 				    VSW_VNETPORT)) {
   1575 					(void) vsw_del_mcst(vswp,
   1576 					    VSW_VNETPORT, addr, port);
   1577 					kmem_free(mcst_p, sizeof (*mcst_p));
   1578 					return (1);
   1579 				}
   1580 
   1581 				mutex_enter(&port->mca_lock);
   1582 				mcst_p->nextp = port->mcap;
   1583 				port->mcap = mcst_p;
   1584 				mutex_exit(&port->mca_lock);
   1585 
   1586 			} else {
   1587 				DERR(vswp, "%s: error adding multicast "
   1588 				    "address 0x%llx for port %ld",
   1589 				    __func__, addr, port->p_instance);
   1590 				return (1);
   1591 			}
   1592 		} else {
   1593 			/*
   1594 			 * Delete an entry from the multicast hash
   1595 			 * table and update the address list
   1596 			 * appropriately.
   1597 			 */
   1598 			if (vsw_del_mcst(vswp, VSW_VNETPORT, addr, port) == 0) {
   1599 				D3(vswp, "%s: deleting multicast address "
   1600 				    "0x%llx for port %ld", __func__, addr,
   1601 				    port->p_instance);
   1602 
   1603 				mcst_p = vsw_del_addr(VSW_VNETPORT, port, addr);
   1604 				ASSERT(mcst_p != NULL);
   1605 
   1606 				/*
   1607 				 * Remove the address from HW. The address
   1608 				 * will actually only be removed once the ref
   1609 				 * count within the MAC layer has dropped to
   1610 				 * zero. I.e. we can safely call this fn even
   1611 				 * if other ports are interested in this
   1612 				 * address.
   1613 				 */
   1614 				vsw_mac_multicast_remove(vswp, port, mcst_p,
   1615 				    VSW_VNETPORT);
   1616 				kmem_free(mcst_p, sizeof (*mcst_p));
   1617 
   1618 			} else {
   1619 				DERR(vswp, "%s: error deleting multicast "
   1620 				    "addr 0x%llx for port %ld",
   1621 				    __func__, addr, port->p_instance);
   1622 				return (1);
   1623 			}
   1624 		}
   1625 	}
   1626 	D1(vswp, "%s: exit", __func__);
   1627 	return (0);
   1628 }
   1629 
   1630 /*
   1631  * Add a new multicast entry.
   1632  *
   1633  * Search hash table based on address. If match found then
   1634  * update associated val (which is chain of ports), otherwise
   1635  * create new key/val (addr/port) pair and insert into table.
   1636  */
   1637 int
   1638 vsw_add_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
   1639 {
   1640 	int		dup = 0;
   1641 	int		rv = 0;
   1642 	mfdb_ent_t	*ment = NULL;
   1643 	mfdb_ent_t	*tmp_ent = NULL;
   1644 	mfdb_ent_t	*new_ent = NULL;
   1645 	void		*tgt = NULL;
   1646 
   1647 	if (devtype == VSW_VNETPORT) {
   1648 		/*
   1649 		 * Being invoked from a vnet.
   1650 		 */
   1651 		ASSERT(arg != NULL);
   1652 		tgt = arg;
   1653 		D2(NULL, "%s: port %d : address 0x%llx", __func__,
   1654 		    ((vsw_port_t *)arg)->p_instance, addr);
   1655 	} else {
   1656 		/*
   1657 		 * We are being invoked via the m_multicst mac entry
   1658 		 * point.
   1659 		 */
   1660 		D2(NULL, "%s: address 0x%llx", __func__, addr);
   1661 		tgt = (void *)vswp;
   1662 	}
   1663 
   1664 	WRITE_ENTER(&vswp->mfdbrw);
   1665 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
   1666 	    (mod_hash_val_t *)&ment) != 0) {
   1667 
   1668 		/* address not currently in table */
   1669 		ment = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
   1670 		ment->d_addr = (void *)tgt;
   1671 		ment->d_type = devtype;
   1672 		ment->nextp = NULL;
   1673 
   1674 		if (mod_hash_insert(vswp->mfdb, (mod_hash_key_t)addr,
   1675 		    (mod_hash_val_t)ment) != 0) {
   1676 			DERR(vswp, "%s: hash table insertion failed", __func__);
   1677 			kmem_free(ment, sizeof (mfdb_ent_t));
   1678 			rv = 1;
   1679 		} else {
   1680 			D2(vswp, "%s: added initial entry for 0x%llx to "
   1681 			    "table", __func__, addr);
   1682 		}
   1683 	} else {
   1684 		/*
   1685 		 * Address in table. Check to see if specified port
   1686 		 * is already associated with the address. If not add
   1687 		 * it now.
   1688 		 */
   1689 		tmp_ent = ment;
   1690 		while (tmp_ent != NULL) {
   1691 			if (tmp_ent->d_addr == (void *)tgt) {
   1692 				if (devtype == VSW_VNETPORT) {
   1693 					DERR(vswp, "%s: duplicate port entry "
   1694 					    "found for portid %ld and key "
   1695 					    "0x%llx", __func__,
   1696 					    ((vsw_port_t *)arg)->p_instance,
   1697 					    addr);
   1698 				} else {
   1699 					DERR(vswp, "%s: duplicate entry found"
   1700 					    "for key 0x%llx", __func__, addr);
   1701 				}
   1702 				rv = 1;
   1703 				dup = 1;
   1704 				break;
   1705 			}
   1706 			tmp_ent = tmp_ent->nextp;
   1707 		}
   1708 
   1709 		/*
   1710 		 * Port not on list so add it to end now.
   1711 		 */
   1712 		if (0 == dup) {
   1713 			D2(vswp, "%s: added entry for 0x%llx to table",
   1714 			    __func__, addr);
   1715 			new_ent = kmem_alloc(sizeof (mfdb_ent_t), KM_SLEEP);
   1716 			new_ent->d_addr = (void *)tgt;
   1717 			new_ent->d_type = devtype;
   1718 			new_ent->nextp = NULL;
   1719 
   1720 			tmp_ent = ment;
   1721 			while (tmp_ent->nextp != NULL)
   1722 				tmp_ent = tmp_ent->nextp;
   1723 
   1724 			tmp_ent->nextp = new_ent;
   1725 		}
   1726 	}
   1727 
   1728 	RW_EXIT(&vswp->mfdbrw);
   1729 	return (rv);
   1730 }
   1731 
   1732 /*
   1733  * Remove a multicast entry from the hashtable.
   1734  *
   1735  * Search hash table based on address. If match found, scan
   1736  * list of ports associated with address. If specified port
   1737  * found remove it from list.
   1738  */
   1739 int
   1740 vsw_del_mcst(vsw_t *vswp, uint8_t devtype, uint64_t addr, void *arg)
   1741 {
   1742 	mfdb_ent_t	*ment = NULL;
   1743 	mfdb_ent_t	*curr_p, *prev_p;
   1744 	void		*tgt = NULL;
   1745 
   1746 	D1(vswp, "%s: enter", __func__);
   1747 
   1748 	if (devtype == VSW_VNETPORT) {
   1749 		tgt = (vsw_port_t *)arg;
   1750 		D2(vswp, "%s: removing port %d from mFDB for address"
   1751 		    " 0x%llx", __func__, ((vsw_port_t *)tgt)->p_instance, addr);
   1752 	} else {
   1753 		D2(vswp, "%s: removing entry", __func__);
   1754 		tgt = (void *)vswp;
   1755 	}
   1756 
   1757 	WRITE_ENTER(&vswp->mfdbrw);
   1758 	if (mod_hash_find(vswp->mfdb, (mod_hash_key_t)addr,
   1759 	    (mod_hash_val_t *)&ment) != 0) {
   1760 		D2(vswp, "%s: address 0x%llx not in table", __func__, addr);
   1761 		RW_EXIT(&vswp->mfdbrw);
   1762 		return (1);
   1763 	}
   1764 
   1765 	prev_p = curr_p = ment;
   1766 
   1767 	while (curr_p != NULL) {
   1768 		if (curr_p->d_addr == (void *)tgt) {
   1769 			if (devtype == VSW_VNETPORT) {
   1770 				D2(vswp, "%s: port %d found", __func__,
   1771 				    ((vsw_port_t *)tgt)->p_instance);
   1772 			} else {
   1773 				D2(vswp, "%s: instance found", __func__);
   1774 			}
   1775 
   1776 			if (prev_p == curr_p) {
   1777 				/*
   1778 				 * head of list, if no other element is in
   1779 				 * list then destroy this entry, otherwise
   1780 				 * just replace it with updated value.
   1781 				 */
   1782 				ment = curr_p->nextp;
   1783 				if (ment == NULL) {
   1784 					(void) mod_hash_destroy(vswp->mfdb,
   1785 					    (mod_hash_val_t)addr);
   1786 				} else {
   1787 					(void) mod_hash_replace(vswp->mfdb,
   1788 					    (mod_hash_key_t)addr,
   1789 					    (mod_hash_val_t)ment);
   1790 				}
   1791 			} else {
   1792 				/*
   1793 				 * Not head of list, no need to do
   1794 				 * replacement, just adjust list pointers.
   1795 				 */
   1796 				prev_p->nextp = curr_p->nextp;
   1797 			}
   1798 			break;
   1799 		}
   1800 
   1801 		prev_p = curr_p;
   1802 		curr_p = curr_p->nextp;
   1803 	}
   1804 
   1805 	RW_EXIT(&vswp->mfdbrw);
   1806 
   1807 	D1(vswp, "%s: exit", __func__);
   1808 
   1809 	if (curr_p == NULL)
   1810 		return (1);
   1811 	kmem_free(curr_p, sizeof (mfdb_ent_t));
   1812 	return (0);
   1813 }
   1814 
   1815 /*
   1816  * Port is being deleted, but has registered an interest in one
   1817  * or more multicast groups. Using the list of addresses maintained
   1818  * within the port structure find the appropriate entry in the hash
   1819  * table and remove this port from the list of interested ports.
   1820  */
   1821 void
   1822 vsw_del_mcst_port(vsw_port_t *port)
   1823 {
   1824 	mcst_addr_t	*mcap = NULL;
   1825 	vsw_t		*vswp = port->p_vswp;
   1826 
   1827 	D1(vswp, "%s: enter", __func__);
   1828 
   1829 	mutex_enter(&port->mca_lock);
   1830 
   1831 	while ((mcap = port->mcap) != NULL) {
   1832 
   1833 		port->mcap = mcap->nextp;
   1834 
   1835 		mutex_exit(&port->mca_lock);
   1836 
   1837 		(void) vsw_del_mcst(vswp, VSW_VNETPORT,
   1838 		    mcap->addr, port);
   1839 
   1840 		/*
   1841 		 * Remove the address from HW. The address
   1842 		 * will actually only be removed once the ref
   1843 		 * count within the MAC layer has dropped to
   1844 		 * zero. I.e. we can safely call this fn even
   1845 		 * if other ports are interested in this
   1846 		 * address.
   1847 		 */
   1848 		vsw_mac_multicast_remove(vswp, port, mcap, VSW_VNETPORT);
   1849 		kmem_free(mcap, sizeof (*mcap));
   1850 
   1851 		mutex_enter(&port->mca_lock);
   1852 
   1853 	}
   1854 
   1855 	mutex_exit(&port->mca_lock);
   1856 
   1857 	D1(vswp, "%s: exit", __func__);
   1858 }
   1859 
   1860 /*
   1861  * This vsw instance is detaching, but has registered an interest in one
   1862  * or more multicast groups. Using the list of addresses maintained
   1863  * within the vsw structure find the appropriate entry in the hash
   1864  * table and remove this instance from the list of interested ports.
   1865  */
   1866 void
   1867 vsw_del_mcst_vsw(vsw_t *vswp)
   1868 {
   1869 	mcst_addr_t	*next_p = NULL;
   1870 
   1871 	D1(vswp, "%s: enter", __func__);
   1872 
   1873 	mutex_enter(&vswp->mca_lock);
   1874 
   1875 	while (vswp->mcap != NULL) {
   1876 		DERR(vswp, "%s: deleting addr 0x%llx",
   1877 		    __func__, vswp->mcap->addr);
   1878 		(void) vsw_del_mcst(vswp, VSW_LOCALDEV, vswp->mcap->addr, NULL);
   1879 
   1880 		next_p = vswp->mcap->nextp;
   1881 		kmem_free(vswp->mcap, sizeof (mcst_addr_t));
   1882 		vswp->mcap = next_p;
   1883 	}
   1884 
   1885 	vswp->mcap = NULL;
   1886 	mutex_exit(&vswp->mca_lock);
   1887 
   1888 	D1(vswp, "%s: exit", __func__);
   1889 }
   1890 
   1891 mblk_t *
   1892 vsw_get_same_dest_list(struct ether_header *ehp, mblk_t **mpp)
   1893 {
   1894 	mblk_t			*bp;
   1895 	mblk_t			*nbp;
   1896 	mblk_t			*head = NULL;
   1897 	mblk_t			*tail = NULL;
   1898 	mblk_t			*prev = NULL;
   1899 	struct ether_header	*behp;
   1900 
   1901 	/* process the chain of packets */
   1902 	bp = *mpp;
   1903 	while (bp) {
   1904 		nbp = bp->b_next;
   1905 		behp = (struct ether_header *)bp->b_rptr;
   1906 		bp->b_prev = NULL;
   1907 		if (ether_cmp(&ehp->ether_dhost, &behp->ether_dhost) == 0) {
   1908 			if (prev == NULL) {
   1909 				*mpp = nbp;
   1910 			} else {
   1911 				prev->b_next = nbp;
   1912 			}
   1913 			bp->b_next =  NULL;
   1914 			if (head == NULL) {
   1915 				head = tail = bp;
   1916 			} else {
   1917 				tail->b_next = bp;
   1918 				tail = bp;
   1919 			}
   1920 		} else {
   1921 			prev = bp;
   1922 		}
   1923 		bp = nbp;
   1924 	}
   1925 	return (head);
   1926 }
   1927 
   1928 static mblk_t *
   1929 vsw_dupmsgchain(mblk_t *mp)
   1930 {
   1931 	mblk_t	*nmp = NULL;
   1932 	mblk_t	**nmpp = &nmp;
   1933 
   1934 	for (; mp != NULL; mp = mp->b_next) {
   1935 		if ((*nmpp = dupmsg(mp)) == NULL) {
   1936 			freemsgchain(nmp);
   1937 			return (NULL);
   1938 		}
   1939 
   1940 		nmpp = &((*nmpp)->b_next);
   1941 	}
   1942 
   1943 	return (nmp);
   1944 }
   1945