Home | History | Annotate | Download | only in mac
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #include <sys/strsun.h>
     28 #include <sys/sdt.h>
     29 #include <sys/mac.h>
     30 #include <sys/mac_impl.h>
     31 #include <sys/mac_client_impl.h>
     32 #include <sys/dls.h>
     33 #include <sys/dls_impl.h>
     34 #include <sys/mac_soft_ring.h>
     35 #include <sys/ethernet.h>
     36 #include <sys/vlan.h>
     37 #include <inet/ip.h>
     38 #include <inet/ip6.h>
     39 #include <netinet/tcp.h>
     40 #include <netinet/udp.h>
     41 #include <netinet/sctp.h>
     42 
     43 /* global flow table, will be a per exclusive-zone table later */
     44 static mod_hash_t	*flow_hash;
     45 static krwlock_t	flow_tab_lock;
     46 
     47 static kmem_cache_t	*flow_cache;
     48 static kmem_cache_t	*flow_tab_cache;
     49 static flow_ops_t	flow_l2_ops;
     50 
     51 typedef struct {
     52 	const char	*fs_name;
     53 	uint_t		fs_offset;
     54 } flow_stats_info_t;
     55 
     56 #define	FS_OFF(f)	(offsetof(flow_stats_t, f))
     57 static flow_stats_info_t flow_stats_list[] = {
     58 	{"rbytes",	FS_OFF(fs_rbytes)},
     59 	{"ipackets",	FS_OFF(fs_ipackets)},
     60 	{"ierrors",	FS_OFF(fs_ierrors)},
     61 	{"obytes",	FS_OFF(fs_obytes)},
     62 	{"opackets",	FS_OFF(fs_opackets)},
     63 	{"oerrors",	FS_OFF(fs_oerrors)}
     64 };
     65 #define	FS_SIZE		(sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
     66 
     67 /*
     68  * Checks whether a flow mask is legal.
     69  */
     70 static flow_tab_info_t	*mac_flow_tab_info_get(flow_mask_t);
     71 
     72 static void
     73 flow_stat_init(kstat_named_t *knp)
     74 {
     75 	int	i;
     76 
     77 	for (i = 0; i < FS_SIZE; i++, knp++) {
     78 		kstat_named_init(knp, flow_stats_list[i].fs_name,
     79 		    KSTAT_DATA_UINT64);
     80 	}
     81 }
     82 
     83 static int
     84 flow_stat_update(kstat_t *ksp, int rw)
     85 {
     86 	flow_entry_t	*fep = ksp->ks_private;
     87 	flow_stats_t 	*fsp = &fep->fe_flowstats;
     88 	kstat_named_t	*knp = ksp->ks_data;
     89 	uint64_t	*statp;
     90 	int		i;
     91 
     92 	if (rw != KSTAT_READ)
     93 		return (EACCES);
     94 
     95 	for (i = 0; i < FS_SIZE; i++, knp++) {
     96 		statp = (uint64_t *)
     97 		    ((uchar_t *)fsp + flow_stats_list[i].fs_offset);
     98 
     99 		knp->value.ui64 = *statp;
    100 	}
    101 	return (0);
    102 }
    103 
    104 static void
    105 flow_stat_create(flow_entry_t *fep)
    106 {
    107 	kstat_t		*ksp;
    108 	kstat_named_t	*knp;
    109 	uint_t		nstats = FS_SIZE;
    110 
    111 	/*
    112 	 * Fow now, flow entries are only manipulated and visible from the
    113 	 * global zone.
    114 	 */
    115 	ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow",
    116 	    KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID);
    117 	if (ksp == NULL)
    118 		return;
    119 
    120 	ksp->ks_update = flow_stat_update;
    121 	ksp->ks_private = fep;
    122 	fep->fe_ksp = ksp;
    123 
    124 	knp = (kstat_named_t *)ksp->ks_data;
    125 	flow_stat_init(knp);
    126 	kstat_install(ksp);
    127 }
    128 
    129 void
    130 flow_stat_destroy(flow_entry_t *fep)
    131 {
    132 	if (fep->fe_ksp != NULL) {
    133 		kstat_delete(fep->fe_ksp);
    134 		fep->fe_ksp = NULL;
    135 	}
    136 }
    137 
    138 /*
    139  * Initialize the flow table
    140  */
    141 void
    142 mac_flow_init()
    143 {
    144 	flow_cache = kmem_cache_create("flow_entry_cache",
    145 	    sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    146 	flow_tab_cache = kmem_cache_create("flow_tab_cache",
    147 	    sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    148 	flow_hash = mod_hash_create_extended("flow_hash",
    149 	    100, mod_hash_null_keydtor, mod_hash_null_valdtor,
    150 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
    151 	rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL);
    152 }
    153 
    154 /*
    155  * Cleanup and release the flow table
    156  */
    157 void
    158 mac_flow_fini()
    159 {
    160 	kmem_cache_destroy(flow_cache);
    161 	kmem_cache_destroy(flow_tab_cache);
    162 	mod_hash_destroy_hash(flow_hash);
    163 	rw_destroy(&flow_tab_lock);
    164 }
    165 
    166 /*
    167  * mac_create_flow(): create a flow_entry_t.
    168  */
    169 int
    170 mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name,
    171     void *client_cookie, uint_t type, flow_entry_t **flentp)
    172 {
    173 	flow_entry_t	*flent = *flentp;
    174 	int		err = 0;
    175 
    176 	if (mrp != NULL) {
    177 		err = mac_validate_props(mrp);
    178 		if (err != 0)
    179 			return (err);
    180 	}
    181 
    182 	if (flent == NULL) {
    183 		flent = kmem_cache_alloc(flow_cache, KM_SLEEP);
    184 		bzero(flent, sizeof (*flent));
    185 		mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL);
    186 		cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL);
    187 
    188 		/* Initialize the receiver function to a safe routine */
    189 		flent->fe_cb_fn = (flow_fn_t)mac_pkt_drop;
    190 		flent->fe_index = -1;
    191 	}
    192 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
    193 
    194 	/* This is an initial flow, will be configured later */
    195 	if (fd == NULL) {
    196 		*flentp = flent;
    197 		return (0);
    198 	}
    199 
    200 	flent->fe_client_cookie = client_cookie;
    201 	flent->fe_type = type;
    202 
    203 	/* Save flow desc */
    204 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
    205 
    206 	if (mrp != NULL) {
    207 		/*
    208 		 * We have already set fe_resource_props for a Link.
    209 		 */
    210 		if (type & FLOW_USER) {
    211 			bcopy(mrp, &flent->fe_resource_props,
    212 			    sizeof (mac_resource_props_t));
    213 		}
    214 		/*
    215 		 * The effective resource list should reflect the priority
    216 		 * that we set implicitly.
    217 		 */
    218 		if (!(mrp->mrp_mask & MRP_PRIORITY))
    219 			mrp->mrp_mask |= MRP_PRIORITY;
    220 		if (type & FLOW_USER)
    221 			mrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
    222 		else
    223 			mrp->mrp_priority = MPL_LINK_DEFAULT;
    224 		bcopy(mrp, &flent->fe_effective_props,
    225 		    sizeof (mac_resource_props_t));
    226 	}
    227 	flow_stat_create(flent);
    228 
    229 	*flentp = flent;
    230 	return (0);
    231 }
    232 
    233 /*
    234  * Validate flow entry and add it to a flow table.
    235  */
    236 int
    237 mac_flow_add(flow_tab_t *ft, flow_entry_t *flent)
    238 {
    239 	flow_entry_t	**headp, **p;
    240 	flow_ops_t	*ops = &ft->ft_ops;
    241 	flow_mask_t	mask;
    242 	uint32_t	index;
    243 	int		err;
    244 
    245 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
    246 
    247 	/*
    248 	 * Check for invalid bits in mask.
    249 	 */
    250 	mask = flent->fe_flow_desc.fd_mask;
    251 	if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0)
    252 		return (EOPNOTSUPP);
    253 
    254 	/*
    255 	 * Validate flent.
    256 	 */
    257 	if ((err = ops->fo_accept_fe(ft, flent)) != 0) {
    258 		DTRACE_PROBE3(accept_failed, flow_tab_t *, ft,
    259 		    flow_entry_t *, flent, int, err);
    260 		return (err);
    261 	}
    262 
    263 	/*
    264 	 * Flent is valid. now calculate hash and insert it
    265 	 * into hash table.
    266 	 */
    267 	index = ops->fo_hash_fe(ft, flent);
    268 
    269 	/*
    270 	 * We do not need a lock up until now because we were
    271 	 * not accessing the flow table.
    272 	 */
    273 	rw_enter(&ft->ft_lock, RW_WRITER);
    274 	headp = &ft->ft_table[index];
    275 
    276 	/*
    277 	 * Check for duplicate flow.
    278 	 */
    279 	for (p = headp; *p != NULL; p = &(*p)->fe_next) {
    280 		if ((*p)->fe_flow_desc.fd_mask !=
    281 		    flent->fe_flow_desc.fd_mask)
    282 			continue;
    283 
    284 		if (ft->ft_ops.fo_match_fe(ft, *p, flent)) {
    285 			rw_exit(&ft->ft_lock);
    286 			DTRACE_PROBE3(dup_flow, flow_tab_t *, ft,
    287 			    flow_entry_t *, flent, int, err);
    288 			return (EALREADY);
    289 		}
    290 	}
    291 
    292 	/*
    293 	 * Insert flow to hash list.
    294 	 */
    295 	err = ops->fo_insert_fe(ft, headp, flent);
    296 	if (err != 0) {
    297 		rw_exit(&ft->ft_lock);
    298 		DTRACE_PROBE3(insert_failed, flow_tab_t *, ft,
    299 		    flow_entry_t *, flent, int, err);
    300 		return (err);
    301 	}
    302 
    303 	/*
    304 	 * Save the hash index so it can be used by mac_flow_remove().
    305 	 */
    306 	flent->fe_index = (int)index;
    307 
    308 	/*
    309 	 * Save the flow tab back reference.
    310 	 */
    311 	flent->fe_flow_tab = ft;
    312 	FLOW_MARK(flent, FE_FLOW_TAB);
    313 	ft->ft_flow_count++;
    314 	rw_exit(&ft->ft_lock);
    315 	return (0);
    316 }
    317 
    318 /*
    319  * Remove a flow from a mac client's subflow table
    320  */
    321 void
    322 mac_flow_rem_subflow(flow_entry_t *flent)
    323 {
    324 	flow_tab_t		*ft = flent->fe_flow_tab;
    325 	mac_client_impl_t	*mcip = ft->ft_mcip;
    326 	mac_handle_t		mh = (mac_handle_t)ft->ft_mip;
    327 
    328 	ASSERT(MAC_PERIM_HELD(mh));
    329 
    330 	mac_flow_remove(ft, flent, B_FALSE);
    331 	if (flent->fe_mcip == NULL) {
    332 		/*
    333 		 * The interface is not yet plumbed and mac_client_flow_add
    334 		 * was not done.
    335 		 */
    336 		if (FLOW_TAB_EMPTY(ft)) {
    337 			mac_flow_tab_destroy(ft);
    338 			mcip->mci_subflow_tab = NULL;
    339 		}
    340 	} else {
    341 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
    342 		mac_link_flow_clean((mac_client_handle_t)mcip, flent);
    343 	}
    344 	mac_fastpath_enable(mh);
    345 }
    346 
    347 /*
    348  * Add a flow to a mac client's subflow table and instantiate the flow
    349  * in the mac by creating the associated SRSs etc.
    350  */
    351 int
    352 mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
    353     boolean_t instantiate_flow)
    354 {
    355 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
    356 	mac_handle_t		mh = (mac_handle_t)mcip->mci_mip;
    357 	flow_tab_info_t		*ftinfo;
    358 	flow_mask_t		mask;
    359 	flow_tab_t		*ft;
    360 	int			err;
    361 	boolean_t		ft_created = B_FALSE;
    362 
    363 	ASSERT(MAC_PERIM_HELD(mh));
    364 
    365 	if ((err = mac_fastpath_disable(mh)) != 0)
    366 		return (err);
    367 
    368 	/*
    369 	 * If the subflow table exists already just add the new subflow
    370 	 * to the existing table, else we create a new subflow table below.
    371 	 */
    372 	ft = mcip->mci_subflow_tab;
    373 	if (ft == NULL) {
    374 		mask = flent->fe_flow_desc.fd_mask;
    375 		/*
    376 		 * Try to create a new table and then add the subflow to the
    377 		 * newly created subflow table
    378 		 */
    379 		if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
    380 			mac_fastpath_enable(mh);
    381 			return (EOPNOTSUPP);
    382 		}
    383 
    384 		mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
    385 		    mcip->mci_mip, &ft);
    386 		ft_created = B_TRUE;
    387 	}
    388 
    389 	err = mac_flow_add(ft, flent);
    390 	if (err != 0) {
    391 		if (ft_created)
    392 			mac_flow_tab_destroy(ft);
    393 		mac_fastpath_enable(mh);
    394 		return (err);
    395 	}
    396 
    397 	if (instantiate_flow) {
    398 		/* Now activate the flow by creating its SRSs */
    399 		ASSERT(MCIP_DATAPATH_SETUP(mcip));
    400 		err = mac_link_flow_init((mac_client_handle_t)mcip, flent);
    401 		if (err != 0) {
    402 			mac_flow_remove(ft, flent, B_FALSE);
    403 			if (ft_created)
    404 				mac_flow_tab_destroy(ft);
    405 			mac_fastpath_enable(mh);
    406 			return (err);
    407 		}
    408 	} else {
    409 		FLOW_MARK(flent, FE_UF_NO_DATAPATH);
    410 	}
    411 	if (ft_created) {
    412 		ASSERT(mcip->mci_subflow_tab == NULL);
    413 		ft->ft_mcip = mcip;
    414 		mcip->mci_subflow_tab = ft;
    415 		if (instantiate_flow)
    416 			mac_client_update_classifier(mcip, B_TRUE);
    417 	}
    418 	return (0);
    419 }
    420 
    421 /*
    422  * Remove flow entry from flow table.
    423  */
    424 void
    425 mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp)
    426 {
    427 	flow_entry_t	**fp;
    428 
    429 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
    430 	if (!(flent->fe_flags & FE_FLOW_TAB))
    431 		return;
    432 
    433 	rw_enter(&ft->ft_lock, RW_WRITER);
    434 	/*
    435 	 * If this is a permanent removal from the flow table, mark it
    436 	 * CONDEMNED to prevent future references. If this is a temporary
    437 	 * removal from the table, say to update the flow descriptor then
    438 	 * we don't mark it CONDEMNED
    439 	 */
    440 	if (!temp)
    441 		FLOW_MARK(flent, FE_CONDEMNED);
    442 	/*
    443 	 * Locate the specified flent.
    444 	 */
    445 	fp = &ft->ft_table[flent->fe_index];
    446 	while (*fp != flent)
    447 		fp = &(*fp)->fe_next;
    448 
    449 	/*
    450 	 * The flent must exist. Otherwise it's a bug.
    451 	 */
    452 	ASSERT(fp != NULL);
    453 	*fp = flent->fe_next;
    454 	flent->fe_next = NULL;
    455 
    456 	/*
    457 	 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
    458 	 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
    459 	 * will panic.
    460 	 */
    461 	flent->fe_index = -1;
    462 	FLOW_UNMARK(flent, FE_FLOW_TAB);
    463 	ft->ft_flow_count--;
    464 	rw_exit(&ft->ft_lock);
    465 }
    466 
    467 /*
    468  * This is the flow lookup routine used by the mac sw classifier engine.
    469  */
    470 int
    471 mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp)
    472 {
    473 	flow_state_t	s;
    474 	flow_entry_t	*flent;
    475 	flow_ops_t	*ops = &ft->ft_ops;
    476 	boolean_t	retried = B_FALSE;
    477 	int		i, err;
    478 
    479 	s.fs_flags = flags;
    480 retry:
    481 	s.fs_mp = mp;
    482 
    483 	/*
    484 	 * Walk the list of predeclared accept functions.
    485 	 * Each of these would accumulate enough state to allow the next
    486 	 * accept routine to make progress.
    487 	 */
    488 	for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) {
    489 		if ((err = (ops->fo_accept[i])(ft, &s)) != 0) {
    490 			mblk_t	*last;
    491 
    492 			/*
    493 			 * ENOBUFS indicates that the mp could be too short
    494 			 * and may need a pullup.
    495 			 */
    496 			if (err != ENOBUFS || retried)
    497 				return (err);
    498 
    499 			/*
    500 			 * The pullup is done on the last processed mblk, not
    501 			 * the starting one. pullup is not done if the mblk
    502 			 * has references or if b_cont is NULL.
    503 			 */
    504 			last = s.fs_mp;
    505 			if (DB_REF(last) > 1 || last->b_cont == NULL ||
    506 			    pullupmsg(last, -1) == 0)
    507 				return (EINVAL);
    508 
    509 			retried = B_TRUE;
    510 			DTRACE_PROBE2(need_pullup, flow_tab_t *, ft,
    511 			    flow_state_t *, &s);
    512 			goto retry;
    513 		}
    514 	}
    515 
    516 	/*
    517 	 * The packet is considered sane. We may now attempt to
    518 	 * find the corresponding flent.
    519 	 */
    520 	rw_enter(&ft->ft_lock, RW_READER);
    521 	flent = ft->ft_table[ops->fo_hash(ft, &s)];
    522 	for (; flent != NULL; flent = flent->fe_next) {
    523 		if (flent->fe_match(ft, flent, &s)) {
    524 			FLOW_TRY_REFHOLD(flent, err);
    525 			if (err != 0)
    526 				continue;
    527 			*flentp = flent;
    528 			rw_exit(&ft->ft_lock);
    529 			return (0);
    530 		}
    531 	}
    532 	rw_exit(&ft->ft_lock);
    533 	return (ENOENT);
    534 }
    535 
    536 /*
    537  * Walk flow table.
    538  * The caller is assumed to have proper perimeter protection.
    539  */
    540 int
    541 mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
    542     void *arg)
    543 {
    544 	int		err, i, cnt = 0;
    545 	flow_entry_t	*flent;
    546 
    547 	if (ft == NULL)
    548 		return (0);
    549 
    550 	for (i = 0; i < ft->ft_size; i++) {
    551 		for (flent = ft->ft_table[i]; flent != NULL;
    552 		    flent = flent->fe_next) {
    553 			cnt++;
    554 			err = (*fn)(flent, arg);
    555 			if (err != 0)
    556 				return (err);
    557 		}
    558 	}
    559 	VERIFY(cnt == ft->ft_flow_count);
    560 	return (0);
    561 }
    562 
    563 /*
    564  * Same as the above except a mutex is used for protection here.
    565  */
    566 int
    567 mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
    568     void *arg)
    569 {
    570 	int		err;
    571 
    572 	if (ft == NULL)
    573 		return (0);
    574 
    575 	rw_enter(&ft->ft_lock, RW_WRITER);
    576 	err = mac_flow_walk_nolock(ft, fn, arg);
    577 	rw_exit(&ft->ft_lock);
    578 	return (err);
    579 }
    580 
    581 static boolean_t	mac_flow_clean(flow_entry_t *);
    582 
    583 /*
    584  * Destroy a flow entry. Called when the last reference on a flow is released.
    585  */
    586 void
    587 mac_flow_destroy(flow_entry_t *flent)
    588 {
    589 	ASSERT(flent->fe_refcnt == 0);
    590 
    591 	if ((flent->fe_type & FLOW_USER) != 0) {
    592 		ASSERT(mac_flow_clean(flent));
    593 	} else {
    594 		mac_flow_cleanup(flent);
    595 	}
    596 
    597 	mutex_destroy(&flent->fe_lock);
    598 	cv_destroy(&flent->fe_cv);
    599 	flow_stat_destroy(flent);
    600 	kmem_cache_free(flow_cache, flent);
    601 }
    602 
    603 /*
    604  * XXX eric
    605  * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
    606  * mac_link_flow_modify() should really be moved/reworked into the
    607  * two functions below. This would consolidate all the mac property
    608  * checking in one place. I'm leaving this alone for now since it's
    609  * out of scope of the new flows work.
    610  */
    611 /* ARGSUSED */
    612 uint32_t
    613 mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp)
    614 {
    615 	uint32_t		changed_mask = 0;
    616 	mac_resource_props_t	*fmrp = &flent->fe_effective_props;
    617 	int			i;
    618 
    619 	if ((mrp->mrp_mask & MRP_MAXBW) != 0 &&
    620 	    (fmrp->mrp_maxbw != mrp->mrp_maxbw)) {
    621 		changed_mask |= MRP_MAXBW;
    622 		fmrp->mrp_maxbw = mrp->mrp_maxbw;
    623 		if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
    624 			fmrp->mrp_mask &= ~MRP_MAXBW;
    625 		} else {
    626 			fmrp->mrp_mask |= MRP_MAXBW;
    627 		}
    628 	}
    629 
    630 	if ((mrp->mrp_mask & MRP_PRIORITY) != 0) {
    631 		if (fmrp->mrp_priority != mrp->mrp_priority)
    632 			changed_mask |= MRP_PRIORITY;
    633 		if (mrp->mrp_priority == MPL_RESET) {
    634 			fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
    635 			fmrp->mrp_mask &= ~MRP_PRIORITY;
    636 		} else {
    637 			fmrp->mrp_priority = mrp->mrp_priority;
    638 			fmrp->mrp_mask |= MRP_PRIORITY;
    639 		}
    640 	}
    641 
    642 	/* modify fanout */
    643 	if ((mrp->mrp_mask & MRP_CPUS) != 0) {
    644 		if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) &&
    645 		    (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) {
    646 			for (i = 0; i < mrp->mrp_ncpus; i++) {
    647 				if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i])
    648 					break;
    649 			}
    650 			if (i == mrp->mrp_ncpus) {
    651 				/*
    652 				 * The new set of cpus passed is exactly
    653 				 * the same as the existing set.
    654 				 */
    655 				return (changed_mask);
    656 			}
    657 		}
    658 		changed_mask |= MRP_CPUS;
    659 		MAC_COPY_CPUS(mrp, fmrp);
    660 	}
    661 	return (changed_mask);
    662 }
    663 
    664 void
    665 mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp)
    666 {
    667 	uint32_t changed_mask;
    668 	mac_client_impl_t *mcip = flent->fe_mcip;
    669 	mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
    670 
    671 	ASSERT(flent != NULL);
    672 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
    673 
    674 	rw_enter(&ft->ft_lock, RW_WRITER);
    675 
    676 	/* Update the cached values inside the subflow entry */
    677 	changed_mask = mac_flow_modify_props(flent, mrp);
    678 	rw_exit(&ft->ft_lock);
    679 	/*
    680 	 * Push the changed parameters to the scheduling code in the
    681 	 * SRS's, to take effect right away.
    682 	 */
    683 	if (changed_mask & MRP_MAXBW) {
    684 		mac_srs_update_bwlimit(flent, mrp);
    685 		/*
    686 		 * If bandwidth is changed, we may have to change
    687 		 * the number of soft ring to be used for fanout.
    688 		 * Call mac_flow_update_fanout() if MAC_BIND_CPU
    689 		 * is not set and there is no user supplied cpu
    690 		 * info. This applies only to link at this time.
    691 		 */
    692 		if (!(flent->fe_type & FLOW_USER) &&
    693 		    !(changed_mask & MRP_CPUS) &&
    694 		    !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) {
    695 			mac_fanout_setup(mcip, flent, mcip_mrp,
    696 			    mac_rx_deliver, mcip, NULL);
    697 		}
    698 	}
    699 	if (mrp->mrp_mask & MRP_PRIORITY)
    700 		mac_flow_update_priority(mcip, flent);
    701 
    702 	if (changed_mask & MRP_CPUS)
    703 		mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL);
    704 }
    705 
    706 /*
    707  * This function waits for a certain condition to be met and is generally
    708  * used before a destructive or quiescing operation.
    709  */
    710 void
    711 mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event)
    712 {
    713 	mutex_enter(&flent->fe_lock);
    714 	flent->fe_flags |= FE_WAITER;
    715 
    716 	switch (event) {
    717 	case FLOW_DRIVER_UPCALL:
    718 		/*
    719 		 * We want to make sure the driver upcalls have finished before
    720 		 * we signal the Rx SRS worker to quit.
    721 		 */
    722 		while (flent->fe_refcnt != 1)
    723 			cv_wait(&flent->fe_cv, &flent->fe_lock);
    724 		break;
    725 
    726 	case FLOW_USER_REF:
    727 		/*
    728 		 * Wait for the fe_user_refcnt to drop to 0. The flow has
    729 		 * been removed from the global flow hash.
    730 		 */
    731 		ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH));
    732 		while (flent->fe_user_refcnt != 0)
    733 			cv_wait(&flent->fe_cv, &flent->fe_lock);
    734 		break;
    735 
    736 	default:
    737 		ASSERT(0);
    738 	}
    739 
    740 	flent->fe_flags &= ~FE_WAITER;
    741 	mutex_exit(&flent->fe_lock);
    742 }
    743 
    744 static boolean_t
    745 mac_flow_clean(flow_entry_t *flent)
    746 {
    747 	ASSERT(flent->fe_next == NULL);
    748 	ASSERT(flent->fe_tx_srs == NULL);
    749 	ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL);
    750 	ASSERT(flent->fe_mbg == NULL);
    751 
    752 	return (B_TRUE);
    753 }
    754 
    755 void
    756 mac_flow_cleanup(flow_entry_t *flent)
    757 {
    758 	if ((flent->fe_type & FLOW_USER) == 0) {
    759 		ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) ||
    760 		    (flent->fe_mbg != NULL && flent->fe_mcip == NULL));
    761 		ASSERT(flent->fe_refcnt == 0);
    762 	} else {
    763 		ASSERT(flent->fe_refcnt == 1);
    764 	}
    765 
    766 	if (flent->fe_mbg != NULL) {
    767 		ASSERT(flent->fe_tx_srs == NULL);
    768 		/* This is a multicast or broadcast flow entry */
    769 		mac_bcast_grp_free(flent->fe_mbg);
    770 		flent->fe_mbg = NULL;
    771 	}
    772 
    773 	if (flent->fe_tx_srs != NULL) {
    774 		ASSERT(flent->fe_mbg == NULL);
    775 		mac_srs_free(flent->fe_tx_srs);
    776 		flent->fe_tx_srs = NULL;
    777 	}
    778 
    779 	/*
    780 	 * In the normal case fe_rx_srs_cnt is 1. However in the error case
    781 	 * when mac_unicast_add fails we may not have set up any SRS
    782 	 * in which case fe_rx_srs_cnt will be zero.
    783 	 */
    784 	if (flent->fe_rx_srs_cnt != 0) {
    785 		ASSERT(flent->fe_rx_srs_cnt == 1);
    786 		mac_srs_free(flent->fe_rx_srs[0]);
    787 		flent->fe_rx_srs[0] = NULL;
    788 		flent->fe_rx_srs_cnt = 0;
    789 	}
    790 	ASSERT(flent->fe_rx_srs[0] == NULL);
    791 }
    792 
    793 void
    794 mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd)
    795 {
    796 	/*
    797 	 * Grab the fe_lock to see a self-consistent fe_flow_desc.
    798 	 * Updates to the fe_flow_desc happen under the fe_lock
    799 	 * after removing the flent from the flow table
    800 	 */
    801 	mutex_enter(&flent->fe_lock);
    802 	bcopy(&flent->fe_flow_desc, fd, sizeof (*fd));
    803 	mutex_exit(&flent->fe_lock);
    804 }
    805 
    806 /*
    807  * Update a field of a flow entry. The mac perimeter ensures that
    808  * this is the only thread doing a modify operation on this mac end point.
    809  * So the flow table can't change or disappear. The ft_lock protects access
    810  * to the flow entry, and holding the lock ensures that there isn't any thread
    811  * accessing the flow entry or attempting a flow table lookup. However
    812  * data threads that are using the flow entry based on the old descriptor
    813  * will continue to use the flow entry. If strong coherence is required
    814  * then the flow will have to be quiesced before the descriptor can be
    815  * changed.
    816  */
    817 void
    818 mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd)
    819 {
    820 	flow_tab_t	*ft = flent->fe_flow_tab;
    821 	flow_desc_t	old_desc;
    822 	int		err;
    823 
    824 	if (ft == NULL) {
    825 		/*
    826 		 * The flow hasn't yet been inserted into the table,
    827 		 * so only the caller knows about this flow, however for
    828 		 * uniformity we grab the fe_lock here.
    829 		 */
    830 		mutex_enter(&flent->fe_lock);
    831 		bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
    832 		mutex_exit(&flent->fe_lock);
    833 	}
    834 
    835 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
    836 
    837 	/*
    838 	 * Need to remove the flow entry from the table and reinsert it,
    839 	 * into a potentially diference hash line. The hash depends on
    840 	 * the new descriptor fields. However access to fe_desc itself
    841 	 * is always under the fe_lock. This helps log and stat functions
    842 	 * see a self-consistent fe_flow_desc.
    843 	 */
    844 	mac_flow_remove(ft, flent, B_TRUE);
    845 	old_desc = flent->fe_flow_desc;
    846 
    847 	mutex_enter(&flent->fe_lock);
    848 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
    849 	mutex_exit(&flent->fe_lock);
    850 
    851 	if (mac_flow_add(ft, flent) != 0) {
    852 		/*
    853 		 * The add failed say due to an invalid flow descriptor.
    854 		 * Undo the update
    855 		 */
    856 		flent->fe_flow_desc = old_desc;
    857 		err = mac_flow_add(ft, flent);
    858 		ASSERT(err == 0);
    859 	}
    860 }
    861 
    862 void
    863 mac_flow_set_name(flow_entry_t *flent, const char *name)
    864 {
    865 	flow_tab_t	*ft = flent->fe_flow_tab;
    866 
    867 	if (ft == NULL) {
    868 		/*
    869 		 *  The flow hasn't yet been inserted into the table,
    870 		 * so only the caller knows about this flow
    871 		 */
    872 		(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
    873 	} else {
    874 		ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
    875 	}
    876 
    877 	mutex_enter(&flent->fe_lock);
    878 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
    879 	mutex_exit(&flent->fe_lock);
    880 }
    881 
    882 /*
    883  * Return the client-private cookie that was associated with
    884  * the flow when it was created.
    885  */
    886 void *
    887 mac_flow_get_client_cookie(flow_entry_t *flent)
    888 {
    889 	return (flent->fe_client_cookie);
    890 }
    891 
    892 /*
    893  * Forward declarations.
    894  */
    895 static uint32_t	flow_l2_hash(flow_tab_t *, flow_state_t *);
    896 static uint32_t	flow_l2_hash_fe(flow_tab_t *, flow_entry_t *);
    897 static int	flow_l2_accept(flow_tab_t *, flow_state_t *);
    898 static uint32_t	flow_ether_hash(flow_tab_t *, flow_state_t *);
    899 static uint32_t	flow_ether_hash_fe(flow_tab_t *, flow_entry_t *);
    900 static int	flow_ether_accept(flow_tab_t *, flow_state_t *);
    901 
    902 /*
    903  * Create flow table.
    904  */
    905 void
    906 mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size,
    907     mac_impl_t *mip, flow_tab_t **ftp)
    908 {
    909 	flow_tab_t	*ft;
    910 	flow_ops_t	*new_ops;
    911 
    912 	ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP);
    913 	bzero(ft, sizeof (*ft));
    914 
    915 	ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP);
    916 
    917 	/*
    918 	 * We make a copy of the ops vector instead of just pointing to it
    919 	 * because we might want to customize the ops vector on a per table
    920 	 * basis (e.g. for optimization).
    921 	 */
    922 	new_ops = &ft->ft_ops;
    923 	bcopy(ops, new_ops, sizeof (*ops));
    924 	ft->ft_mask = mask;
    925 	ft->ft_size = size;
    926 	ft->ft_mip = mip;
    927 
    928 	/*
    929 	 * Optimizations for DL_ETHER media.
    930 	 */
    931 	if (mip->mi_info.mi_nativemedia == DL_ETHER) {
    932 		if (new_ops->fo_hash == flow_l2_hash)
    933 			new_ops->fo_hash = flow_ether_hash;
    934 		if (new_ops->fo_hash_fe == flow_l2_hash_fe)
    935 			new_ops->fo_hash_fe = flow_ether_hash_fe;
    936 		if (new_ops->fo_accept[0] == flow_l2_accept)
    937 			new_ops->fo_accept[0] = flow_ether_accept;
    938 	}
    939 	*ftp = ft;
    940 }
    941 
    942 void
    943 mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp)
    944 {
    945 	mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID,
    946 	    1024, mip, ftp);
    947 }
    948 
    949 /*
    950  * Destroy flow table.
    951  */
    952 void
    953 mac_flow_tab_destroy(flow_tab_t *ft)
    954 {
    955 	if (ft == NULL)
    956 		return;
    957 
    958 	ASSERT(ft->ft_flow_count == 0);
    959 	kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *));
    960 	bzero(ft, sizeof (*ft));
    961 	kmem_cache_free(flow_tab_cache, ft);
    962 }
    963 
    964 /*
    965  * Add a new flow entry to the global flow hash table
    966  */
    967 int
    968 mac_flow_hash_add(flow_entry_t *flent)
    969 {
    970 	int	err;
    971 
    972 	rw_enter(&flow_tab_lock, RW_WRITER);
    973 	err = mod_hash_insert(flow_hash,
    974 	    (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent);
    975 	if (err != 0) {
    976 		rw_exit(&flow_tab_lock);
    977 		return (EEXIST);
    978 	}
    979 	/* Mark as inserted into the global flow hash table */
    980 	FLOW_MARK(flent, FE_G_FLOW_HASH);
    981 	rw_exit(&flow_tab_lock);
    982 	return (err);
    983 }
    984 
    985 /*
    986  * Remove a flow entry from the global flow hash table
    987  */
    988 void
    989 mac_flow_hash_remove(flow_entry_t *flent)
    990 {
    991 	mod_hash_val_t	val;
    992 
    993 	rw_enter(&flow_tab_lock, RW_WRITER);
    994 	VERIFY(mod_hash_remove(flow_hash,
    995 	    (mod_hash_key_t)flent->fe_flow_name, &val) == 0);
    996 
    997 	/* Clear the mark that says inserted into the global flow hash table */
    998 	FLOW_UNMARK(flent, FE_G_FLOW_HASH);
    999 	rw_exit(&flow_tab_lock);
   1000 }
   1001 
   1002 /*
   1003  * Retrieve a flow entry from the global flow hash table.
   1004  */
   1005 int
   1006 mac_flow_lookup_byname(char *name, flow_entry_t **flentp)
   1007 {
   1008 	int		err;
   1009 	flow_entry_t	*flent;
   1010 
   1011 	rw_enter(&flow_tab_lock, RW_READER);
   1012 	err = mod_hash_find(flow_hash, (mod_hash_key_t)name,
   1013 	    (mod_hash_val_t *)&flent);
   1014 	if (err != 0) {
   1015 		rw_exit(&flow_tab_lock);
   1016 		return (ENOENT);
   1017 	}
   1018 	ASSERT(flent != NULL);
   1019 	FLOW_USER_REFHOLD(flent);
   1020 	rw_exit(&flow_tab_lock);
   1021 
   1022 	*flentp = flent;
   1023 	return (0);
   1024 }
   1025 
   1026 /*
   1027  * Initialize or release mac client flows by walking the subflow table.
   1028  * These are typically invoked during plumb/unplumb of links.
   1029  */
   1030 
   1031 static int
   1032 mac_link_init_flows_cb(flow_entry_t *flent, void *arg)
   1033 {
   1034 	mac_client_impl_t	*mcip = arg;
   1035 
   1036 	if (mac_link_flow_init(arg, flent) != 0) {
   1037 		cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'",
   1038 		    flent->fe_flow_name, mcip->mci_name);
   1039 	} else {
   1040 		FLOW_UNMARK(flent, FE_UF_NO_DATAPATH);
   1041 	}
   1042 	return (0);
   1043 }
   1044 
   1045 void
   1046 mac_link_init_flows(mac_client_handle_t mch)
   1047 {
   1048 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1049 
   1050 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   1051 	    mac_link_init_flows_cb, mcip);
   1052 	/*
   1053 	 * If mac client had subflow(s) configured before plumb, change
   1054 	 * function to mac_rx_srs_subflow_process and in case of hardware
   1055 	 * classification, disable polling.
   1056 	 */
   1057 	mac_client_update_classifier(mcip, B_TRUE);
   1058 
   1059 }
   1060 
   1061 boolean_t
   1062 mac_link_has_flows(mac_client_handle_t mch)
   1063 {
   1064 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1065 
   1066 	if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
   1067 		return (B_TRUE);
   1068 
   1069 	return (B_FALSE);
   1070 }
   1071 
   1072 static int
   1073 mac_link_release_flows_cb(flow_entry_t *flent, void *arg)
   1074 {
   1075 	FLOW_MARK(flent, FE_UF_NO_DATAPATH);
   1076 	mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
   1077 	mac_link_flow_clean(arg, flent);
   1078 	return (0);
   1079 }
   1080 
   1081 void
   1082 mac_link_release_flows(mac_client_handle_t mch)
   1083 {
   1084 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
   1085 
   1086 	/*
   1087 	 * Change the mci_flent callback back to mac_rx_srs_process()
   1088 	 * because flows are about to be deactivated.
   1089 	 */
   1090 	mac_client_update_classifier(mcip, B_FALSE);
   1091 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
   1092 	    mac_link_release_flows_cb, mcip);
   1093 }
   1094 
   1095 void
   1096 mac_rename_flow(flow_entry_t *fep, const char *new_name)
   1097 {
   1098 	mac_flow_set_name(fep, new_name);
   1099 	if (fep->fe_ksp != NULL) {
   1100 		flow_stat_destroy(fep);
   1101 		flow_stat_create(fep);
   1102 	}
   1103 }
   1104 
   1105 /*
   1106  * mac_link_flow_init()
   1107  * Internal flow interface used for allocating SRSs and related
   1108  * data structures. Not meant to be used by mac clients.
   1109  */
   1110 int
   1111 mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow)
   1112 {
   1113 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
   1114 	mac_impl_t		*mip = mcip->mci_mip;
   1115 	int			err;
   1116 
   1117 	ASSERT(mch != NULL);
   1118 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1119 
   1120 	if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0)
   1121 		return (err);
   1122 
   1123 	sub_flow->fe_mcip = mcip;
   1124 
   1125 	return (0);
   1126 }
   1127 
   1128 /*
   1129  * mac_link_flow_add()
   1130  * Used by flowadm(1m) or kernel mac clients for creating flows.
   1131  */
   1132 int
   1133 mac_link_flow_add(datalink_id_t linkid, char *flow_name,
   1134     flow_desc_t *flow_desc, mac_resource_props_t *mrp)
   1135 {
   1136 	flow_entry_t		*flent = NULL;
   1137 	int			err;
   1138 	dls_dl_handle_t		dlh;
   1139 	dls_link_t		*dlp;
   1140 	boolean_t		link_held = B_FALSE;
   1141 	boolean_t		hash_added = B_FALSE;
   1142 	mac_perim_handle_t	mph;
   1143 
   1144 	err = mac_flow_lookup_byname(flow_name, &flent);
   1145 	if (err == 0) {
   1146 		FLOW_USER_REFRELE(flent);
   1147 		return (EEXIST);
   1148 	}
   1149 
   1150 	/*
   1151 	 * First create a flow entry given the description provided
   1152 	 * by the caller.
   1153 	 */
   1154 	err = mac_flow_create(flow_desc, mrp, flow_name, NULL,
   1155 	    FLOW_USER | FLOW_OTHER, &flent);
   1156 
   1157 	if (err != 0)
   1158 		return (err);
   1159 
   1160 	/*
   1161 	 * We've got a local variable referencing this flow now, so we need
   1162 	 * to hold it. We'll release this flow before returning.
   1163 	 * All failures until we return will undo any action that may internally
   1164 	 * held the flow, so the last REFRELE will assure a clean freeing
   1165 	 * of resources.
   1166 	 */
   1167 	FLOW_REFHOLD(flent);
   1168 
   1169 	flent->fe_link_id = linkid;
   1170 	FLOW_MARK(flent, FE_INCIPIENT);
   1171 
   1172 	err = mac_perim_enter_by_linkid(linkid, &mph);
   1173 	if (err != 0) {
   1174 		FLOW_FINAL_REFRELE(flent);
   1175 		return (err);
   1176 	}
   1177 
   1178 	/*
   1179 	 * dls will eventually be merged with mac so it's ok
   1180 	 * to call dls' internal functions.
   1181 	 */
   1182 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
   1183 	if (err != 0)
   1184 		goto bail;
   1185 
   1186 	link_held = B_TRUE;
   1187 
   1188 	/*
   1189 	 * Add the flow to the global flow table, this table will be per
   1190 	 * exclusive zone so each zone can have its own flow namespace.
   1191 	 * RFE 6625651 will fix this.
   1192 	 *
   1193 	 */
   1194 	if ((err = mac_flow_hash_add(flent)) != 0)
   1195 		goto bail;
   1196 
   1197 	hash_added = B_TRUE;
   1198 
   1199 	/*
   1200 	 * do not allow flows to be configured on an anchor VNIC
   1201 	 */
   1202 	if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
   1203 		err = ENOTSUP;
   1204 		goto bail;
   1205 	}
   1206 
   1207 	/*
   1208 	 * Add the subflow to the subflow table. Also instantiate the flow
   1209 	 * in the mac if there is an active user (we check if the MAC client's
   1210 	 * datapath has been setup).
   1211 	 */
   1212 	err = mac_flow_add_subflow(dlp->dl_mch, flent,
   1213 	    MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch));
   1214 	if (err != 0)
   1215 		goto bail;
   1216 
   1217 	FLOW_UNMARK(flent, FE_INCIPIENT);
   1218 	dls_devnet_rele_link(dlh, dlp);
   1219 	mac_perim_exit(mph);
   1220 	return (0);
   1221 
   1222 bail:
   1223 	if (hash_added)
   1224 		mac_flow_hash_remove(flent);
   1225 
   1226 	if (link_held)
   1227 		dls_devnet_rele_link(dlh, dlp);
   1228 
   1229 	/*
   1230 	 * Wait for any transient global flow hash refs to clear
   1231 	 * and then release the creation reference on the flow
   1232 	 */
   1233 	mac_flow_wait(flent, FLOW_USER_REF);
   1234 	FLOW_FINAL_REFRELE(flent);
   1235 	mac_perim_exit(mph);
   1236 	return (err);
   1237 }
   1238 
   1239 /*
   1240  * mac_link_flow_clean()
   1241  * Internal flow interface used for freeing SRSs and related
   1242  * data structures. Not meant to be used by mac clients.
   1243  */
   1244 void
   1245 mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow)
   1246 {
   1247 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
   1248 	mac_impl_t		*mip = mcip->mci_mip;
   1249 	boolean_t		last_subflow;
   1250 
   1251 	ASSERT(mch != NULL);
   1252 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
   1253 
   1254 	/*
   1255 	 * This sub flow entry may fail to be fully initialized by
   1256 	 * mac_link_flow_init(). If so, simply return.
   1257 	 */
   1258 	if (sub_flow->fe_mcip == NULL)
   1259 		return;
   1260 
   1261 	last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab);
   1262 	/*
   1263 	 * Tear down the data path
   1264 	 */
   1265 	mac_datapath_teardown(mcip, sub_flow, SRST_FLOW);
   1266 	sub_flow->fe_mcip = NULL;
   1267 
   1268 	/*
   1269 	 * Delete the SRSs associated with this subflow. If this is being
   1270 	 * driven by flowadm(1M) then the subflow will be deleted by
   1271 	 * dls_rem_flow. However if this is a result of the interface being
   1272 	 * unplumbed then the subflow itself won't be deleted.
   1273 	 */
   1274 	mac_flow_cleanup(sub_flow);
   1275 
   1276 	/*
   1277 	 * If all the subflows are gone, renable some of the stuff
   1278 	 * we disabled when adding a subflow, polling etc.
   1279 	 */
   1280 	if (last_subflow) {
   1281 		/*
   1282 		 * The subflow table itself is not protected by any locks or
   1283 		 * refcnts. Hence quiesce the client upfront before clearing
   1284 		 * mci_subflow_tab.
   1285 		 */
   1286 		mac_client_quiesce(mcip);
   1287 		mac_client_update_classifier(mcip, B_FALSE);
   1288 		mac_flow_tab_destroy(mcip->mci_subflow_tab);
   1289 		mcip->mci_subflow_tab = NULL;
   1290 		mac_client_restart(mcip);
   1291 	}
   1292 }
   1293 
   1294 /*
   1295  * mac_link_flow_remove()
   1296  * Used by flowadm(1m) or kernel mac clients for removing flows.
   1297  */
   1298 int
   1299 mac_link_flow_remove(char *flow_name)
   1300 {
   1301 	flow_entry_t		*flent;
   1302 	mac_perim_handle_t	mph;
   1303 	int			err;
   1304 	datalink_id_t		linkid;
   1305 
   1306 	err = mac_flow_lookup_byname(flow_name, &flent);
   1307 	if (err != 0)
   1308 		return (err);
   1309 
   1310 	linkid = flent->fe_link_id;
   1311 	FLOW_USER_REFRELE(flent);
   1312 
   1313 	/*
   1314 	 * The perim must be acquired before acquiring any other references
   1315 	 * to maintain the lock and perimeter hierarchy. Please note the
   1316 	 * FLOW_REFRELE above.
   1317 	 */
   1318 	err = mac_perim_enter_by_linkid(linkid, &mph);
   1319 	if (err != 0)
   1320 		return (err);
   1321 
   1322 	/*
   1323 	 * Note the second lookup of the flow, because a concurrent thread
   1324 	 * may have removed it already while we were waiting to enter the
   1325 	 * link's perimeter.
   1326 	 */
   1327 	err = mac_flow_lookup_byname(flow_name, &flent);
   1328 	if (err != 0) {
   1329 		mac_perim_exit(mph);
   1330 		return (err);
   1331 	}
   1332 	FLOW_USER_REFRELE(flent);
   1333 
   1334 	/*
   1335 	 * Remove the flow from the subflow table and deactivate the flow
   1336 	 * by quiescing and removings its SRSs
   1337 	 */
   1338 	mac_flow_rem_subflow(flent);
   1339 
   1340 	/*
   1341 	 * Finally, remove the flow from the global table.
   1342 	 */
   1343 	mac_flow_hash_remove(flent);
   1344 
   1345 	/*
   1346 	 * Wait for any transient global flow hash refs to clear
   1347 	 * and then release the creation reference on the flow
   1348 	 */
   1349 	mac_flow_wait(flent, FLOW_USER_REF);
   1350 	FLOW_FINAL_REFRELE(flent);
   1351 
   1352 	mac_perim_exit(mph);
   1353 
   1354 	return (0);
   1355 }
   1356 
   1357 /*
   1358  * mac_link_flow_modify()
   1359  * Modifies the properties of a flow identified by its name.
   1360  */
   1361 int
   1362 mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp)
   1363 {
   1364 	flow_entry_t		*flent;
   1365 	mac_client_impl_t 	*mcip;
   1366 	int			err = 0;
   1367 	mac_perim_handle_t	mph;
   1368 	datalink_id_t		linkid;
   1369 	flow_tab_t		*flow_tab;
   1370 
   1371 	err = mac_validate_props(mrp);
   1372 	if (err != 0)
   1373 		return (err);
   1374 
   1375 	err = mac_flow_lookup_byname(flow_name, &flent);
   1376 	if (err != 0)
   1377 		return (err);
   1378 
   1379 	linkid = flent->fe_link_id;
   1380 	FLOW_USER_REFRELE(flent);
   1381 
   1382 	/*
   1383 	 * The perim must be acquired before acquiring any other references
   1384 	 * to maintain the lock and perimeter hierarchy. Please note the
   1385 	 * FLOW_REFRELE above.
   1386 	 */
   1387 	err = mac_perim_enter_by_linkid(linkid, &mph);
   1388 	if (err != 0)
   1389 		return (err);
   1390 
   1391 	/*
   1392 	 * Note the second lookup of the flow, because a concurrent thread
   1393 	 * may have removed it already while we were waiting to enter the
   1394 	 * link's perimeter.
   1395 	 */
   1396 	err = mac_flow_lookup_byname(flow_name, &flent);
   1397 	if (err != 0) {
   1398 		mac_perim_exit(mph);
   1399 		return (err);
   1400 	}
   1401 	FLOW_USER_REFRELE(flent);
   1402 
   1403 	/*
   1404 	 * If this flow is attached to a MAC client, then pass the request
   1405 	 * along to the client.
   1406 	 * Otherwise, just update the cached values.
   1407 	 */
   1408 	mcip = flent->fe_mcip;
   1409 	mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE);
   1410 	if (mcip != NULL) {
   1411 		if ((flow_tab = mcip->mci_subflow_tab) == NULL) {
   1412 			err = ENOENT;
   1413 		} else {
   1414 			mac_flow_modify(flow_tab, flent, mrp);
   1415 		}
   1416 	} else {
   1417 		(void) mac_flow_modify_props(flent, mrp);
   1418 	}
   1419 
   1420 done:
   1421 	mac_perim_exit(mph);
   1422 	return (err);
   1423 }
   1424 
   1425 
   1426 /*
   1427  * State structure and misc functions used by mac_link_flow_walk().
   1428  */
   1429 typedef struct {
   1430 	int	(*ws_func)(mac_flowinfo_t *, void *);
   1431 	void	*ws_arg;
   1432 } flow_walk_state_t;
   1433 
   1434 static void
   1435 mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent)
   1436 {
   1437 	(void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name,
   1438 	    MAXFLOWNAMELEN);
   1439 	finfop->fi_link_id = flent->fe_link_id;
   1440 	finfop->fi_flow_desc = flent->fe_flow_desc;
   1441 	finfop->fi_resource_props = flent->fe_resource_props;
   1442 }
   1443 
   1444 static int
   1445 mac_link_flow_walk_cb(flow_entry_t *flent, void *arg)
   1446 {
   1447 	flow_walk_state_t	*statep = arg;
   1448 	mac_flowinfo_t		finfo;
   1449 
   1450 	mac_link_flowinfo_copy(&finfo, flent);
   1451 	return (statep->ws_func(&finfo, statep->ws_arg));
   1452 }
   1453 
   1454 /*
   1455  * mac_link_flow_walk()
   1456  * Invokes callback 'func' for all flows belonging to the specified link.
   1457  */
   1458 int
   1459 mac_link_flow_walk(datalink_id_t linkid,
   1460     int (*func)(mac_flowinfo_t *, void *), void *arg)
   1461 {
   1462 	mac_client_impl_t	*mcip;
   1463 	mac_perim_handle_t	mph;
   1464 	flow_walk_state_t	state;
   1465 	dls_dl_handle_t		dlh;
   1466 	dls_link_t		*dlp;
   1467 	int			err;
   1468 
   1469 	err = mac_perim_enter_by_linkid(linkid, &mph);
   1470 	if (err != 0)
   1471 		return (err);
   1472 
   1473 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
   1474 	if (err != 0) {
   1475 		mac_perim_exit(mph);
   1476 		return (err);
   1477 	}
   1478 
   1479 	mcip = (mac_client_impl_t *)dlp->dl_mch;
   1480 	state.ws_func = func;
   1481 	state.ws_arg = arg;
   1482 
   1483 	err = mac_flow_walk_nolock(mcip->mci_subflow_tab,
   1484 	    mac_link_flow_walk_cb, &state);
   1485 
   1486 	dls_devnet_rele_link(dlh, dlp);
   1487 	mac_perim_exit(mph);
   1488 	return (err);
   1489 }
   1490 
   1491 /*
   1492  * mac_link_flow_info()
   1493  * Retrieves information about a specific flow.
   1494  */
   1495 int
   1496 mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo)
   1497 {
   1498 	flow_entry_t	*flent;
   1499 	int		err;
   1500 
   1501 	err = mac_flow_lookup_byname(flow_name, &flent);
   1502 	if (err != 0)
   1503 		return (err);
   1504 
   1505 	mac_link_flowinfo_copy(finfo, flent);
   1506 	FLOW_USER_REFRELE(flent);
   1507 	return (0);
   1508 }
   1509 
   1510 /*
   1511  * Hash function macro that takes an Ethernet address and VLAN id as input.
   1512  */
   1513 #define	HASH_ETHER_VID(a, v, s)	\
   1514 	((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
   1515 
   1516 /*
   1517  * Generic layer-2 address hashing function that takes an address and address
   1518  * length as input.  This is the DJB hash function.
   1519  */
   1520 static uint32_t
   1521 flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize)
   1522 {
   1523 	uint32_t	hash = 5381;
   1524 	size_t		i;
   1525 
   1526 	for (i = 0; i < addrlen; i++)
   1527 		hash = ((hash << 5) + hash) + addr[i];
   1528 	return (hash % htsize);
   1529 }
   1530 
   1531 #define	PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
   1532 
   1533 #define	CHECK_AND_ADJUST_START_PTR(s, start) {		\
   1534 	if ((s)->fs_mp->b_wptr == (start)) {		\
   1535 		mblk_t	*next = (s)->fs_mp->b_cont;	\
   1536 		if (next == NULL)			\
   1537 			return (EINVAL);		\
   1538 							\
   1539 		(s)->fs_mp = next;			\
   1540 		(start) = next->b_rptr;			\
   1541 	}						\
   1542 }
   1543 
   1544 /* ARGSUSED */
   1545 static boolean_t
   1546 flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   1547 {
   1548 	flow_l2info_t		*l2 = &s->fs_l2info;
   1549 	flow_desc_t		*fd = &flent->fe_flow_desc;
   1550 
   1551 	return (l2->l2_vid == fd->fd_vid &&
   1552 	    bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0);
   1553 }
   1554 
   1555 /*
   1556  * Layer 2 hash function.
   1557  * Must be paired with flow_l2_accept() within a set of flow_ops
   1558  * because it assumes the dest address is already extracted.
   1559  */
   1560 static uint32_t
   1561 flow_l2_hash(flow_tab_t *ft, flow_state_t *s)
   1562 {
   1563 	return (flow_l2_addrhash(s->fs_l2info.l2_daddr,
   1564 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
   1565 }
   1566 
   1567 /*
   1568  * This is the generic layer 2 accept function.
   1569  * It makes use of mac_header_info() to extract the header length,
   1570  * sap, vlan ID and destination address.
   1571  */
   1572 static int
   1573 flow_l2_accept(flow_tab_t *ft, flow_state_t *s)
   1574 {
   1575 	boolean_t		is_ether;
   1576 	flow_l2info_t		*l2 = &s->fs_l2info;
   1577 	mac_header_info_t	mhi;
   1578 	int			err;
   1579 
   1580 	is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER);
   1581 	if ((err = mac_header_info((mac_handle_t)ft->ft_mip,
   1582 	    s->fs_mp, &mhi)) != 0) {
   1583 		if (err == EINVAL)
   1584 			err = ENOBUFS;
   1585 
   1586 		return (err);
   1587 	}
   1588 
   1589 	l2->l2_start = s->fs_mp->b_rptr;
   1590 	l2->l2_daddr = (uint8_t *)mhi.mhi_daddr;
   1591 
   1592 	if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN &&
   1593 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
   1594 		struct ether_vlan_header	*evhp =
   1595 		    (struct ether_vlan_header *)l2->l2_start;
   1596 
   1597 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
   1598 			return (ENOBUFS);
   1599 
   1600 		l2->l2_sap = ntohs(evhp->ether_type);
   1601 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
   1602 		l2->l2_hdrsize = sizeof (*evhp);
   1603 	} else {
   1604 		l2->l2_sap = mhi.mhi_bindsap;
   1605 		l2->l2_vid = 0;
   1606 		l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize;
   1607 	}
   1608 	return (0);
   1609 }
   1610 
   1611 /*
   1612  * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
   1613  * accept(). The notable difference is that dest address is now extracted
   1614  * by hash() rather than by accept(). This saves a few memory references
   1615  * for flow tables that do not care about mac addresses.
   1616  */
   1617 static uint32_t
   1618 flow_ether_hash(flow_tab_t *ft, flow_state_t *s)
   1619 {
   1620 	flow_l2info_t			*l2 = &s->fs_l2info;
   1621 	struct ether_vlan_header	*evhp;
   1622 
   1623 	evhp = (struct ether_vlan_header *)l2->l2_start;
   1624 	l2->l2_daddr = evhp->ether_dhost.ether_addr_octet;
   1625 	return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size));
   1626 }
   1627 
   1628 static uint32_t
   1629 flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
   1630 {
   1631 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1632 
   1633 	ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0);
   1634 	return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size));
   1635 }
   1636 
   1637 /* ARGSUSED */
   1638 static int
   1639 flow_ether_accept(flow_tab_t *ft, flow_state_t *s)
   1640 {
   1641 	flow_l2info_t			*l2 = &s->fs_l2info;
   1642 	struct ether_vlan_header	*evhp;
   1643 	uint16_t			sap;
   1644 
   1645 	evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr;
   1646 	l2->l2_start = (uchar_t *)evhp;
   1647 
   1648 	if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header)))
   1649 		return (ENOBUFS);
   1650 
   1651 	if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN &&
   1652 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
   1653 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
   1654 			return (ENOBUFS);
   1655 
   1656 		l2->l2_sap = ntohs(evhp->ether_type);
   1657 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
   1658 		l2->l2_hdrsize = sizeof (struct ether_vlan_header);
   1659 	} else {
   1660 		l2->l2_sap = sap;
   1661 		l2->l2_vid = 0;
   1662 		l2->l2_hdrsize = sizeof (struct ether_header);
   1663 	}
   1664 	return (0);
   1665 }
   1666 
   1667 /*
   1668  * Validates a layer 2 flow entry.
   1669  */
   1670 static int
   1671 flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
   1672 {
   1673 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1674 
   1675 	/*
   1676 	 * Dest address is mandatory, and 0 length addresses are not yet
   1677 	 * supported.
   1678 	 */
   1679 	if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0)
   1680 		return (EINVAL);
   1681 
   1682 	if ((fd->fd_mask & FLOW_LINK_VID) != 0) {
   1683 		/*
   1684 		 * VLAN flows are only supported over ethernet macs.
   1685 		 */
   1686 		if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER)
   1687 			return (EINVAL);
   1688 
   1689 		if (fd->fd_vid == 0)
   1690 			return (EINVAL);
   1691 
   1692 	}
   1693 	flent->fe_match = flow_l2_match;
   1694 	return (0);
   1695 }
   1696 
   1697 /*
   1698  * Calculates hash index of flow entry.
   1699  */
   1700 static uint32_t
   1701 flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
   1702 {
   1703 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1704 
   1705 	ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0);
   1706 	return (flow_l2_addrhash(fd->fd_dst_mac,
   1707 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
   1708 }
   1709 
   1710 /*
   1711  * This is used for duplicate flow checking.
   1712  */
   1713 /* ARGSUSED */
   1714 static boolean_t
   1715 flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
   1716 {
   1717 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
   1718 
   1719 	ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0);
   1720 	return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac,
   1721 	    fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid);
   1722 }
   1723 
   1724 /*
   1725  * Generic flow entry insertion function.
   1726  * Used by flow tables that do not have ordering requirements.
   1727  */
   1728 /* ARGSUSED */
   1729 static int
   1730 flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
   1731     flow_entry_t *flent)
   1732 {
   1733 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
   1734 
   1735 	if (*headp != NULL) {
   1736 		ASSERT(flent->fe_next == NULL);
   1737 		flent->fe_next = *headp;
   1738 	}
   1739 	*headp = flent;
   1740 	return (0);
   1741 }
   1742 
   1743 /*
   1744  * IP version independent DSField matching function.
   1745  */
   1746 /* ARGSUSED */
   1747 static boolean_t
   1748 flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   1749 {
   1750 	flow_l3info_t	*l3info = &s->fs_l3info;
   1751 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1752 
   1753 	switch (l3info->l3_version) {
   1754 	case IPV4_VERSION: {
   1755 		ipha_t		*ipha = (ipha_t *)l3info->l3_start;
   1756 
   1757 		return ((ipha->ipha_type_of_service &
   1758 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
   1759 	}
   1760 	case IPV6_VERSION: {
   1761 		ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
   1762 
   1763 		return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) &
   1764 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
   1765 	}
   1766 	default:
   1767 		return (B_FALSE);
   1768 	}
   1769 }
   1770 
   1771 /*
   1772  * IP v4 and v6 address matching.
   1773  * The netmask only needs to be applied on the packet but not on the
   1774  * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
   1775  */
   1776 
   1777 /* ARGSUSED */
   1778 static boolean_t
   1779 flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   1780 {
   1781 	flow_l3info_t	*l3info = &s->fs_l3info;
   1782 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1783 	ipha_t		*ipha = (ipha_t *)l3info->l3_start;
   1784 	in_addr_t	addr;
   1785 
   1786 	addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src);
   1787 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
   1788 		return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) ==
   1789 		    V4_PART_OF_V6(fd->fd_local_addr));
   1790 	}
   1791 	return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) ==
   1792 	    V4_PART_OF_V6(fd->fd_remote_addr));
   1793 }
   1794 
   1795 /* ARGSUSED */
   1796 static boolean_t
   1797 flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   1798 {
   1799 	flow_l3info_t	*l3info = &s->fs_l3info;
   1800 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1801 	ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
   1802 	in6_addr_t	*addrp;
   1803 
   1804 	addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src);
   1805 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
   1806 		return (V6_MASK_EQ(*addrp, fd->fd_local_netmask,
   1807 		    fd->fd_local_addr));
   1808 	}
   1809 	return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr));
   1810 }
   1811 
   1812 /* ARGSUSED */
   1813 static boolean_t
   1814 flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   1815 {
   1816 	flow_l3info_t	*l3info = &s->fs_l3info;
   1817 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1818 
   1819 	return (l3info->l3_protocol == fd->fd_protocol);
   1820 }
   1821 
   1822 static uint32_t
   1823 flow_ip_hash(flow_tab_t *ft, flow_state_t *s)
   1824 {
   1825 	flow_l3info_t	*l3info = &s->fs_l3info;
   1826 	flow_mask_t	mask = ft->ft_mask;
   1827 
   1828 	if ((mask & FLOW_IP_LOCAL) != 0) {
   1829 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
   1830 	} else if ((mask & FLOW_IP_REMOTE) != 0) {
   1831 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
   1832 	} else if ((mask & FLOW_IP_DSFIELD) != 0) {
   1833 		/*
   1834 		 * DSField flents are arranged as a single list.
   1835 		 */
   1836 		return (0);
   1837 	}
   1838 	/*
   1839 	 * IP addr flents are hashed into two lists, v4 or v6.
   1840 	 */
   1841 	ASSERT(ft->ft_size >= 2);
   1842 	return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1);
   1843 }
   1844 
   1845 static uint32_t
   1846 flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s)
   1847 {
   1848 	flow_l3info_t	*l3info = &s->fs_l3info;
   1849 
   1850 	return (l3info->l3_protocol % ft->ft_size);
   1851 }
   1852 
   1853 /* ARGSUSED */
   1854 static int
   1855 flow_ip_accept(flow_tab_t *ft, flow_state_t *s)
   1856 {
   1857 	flow_l2info_t	*l2info = &s->fs_l2info;
   1858 	flow_l3info_t	*l3info = &s->fs_l3info;
   1859 	uint16_t	sap = l2info->l2_sap;
   1860 	uchar_t		*l3_start;
   1861 
   1862 	l3_start = l2info->l2_start + l2info->l2_hdrsize;
   1863 
   1864 	/*
   1865 	 * Adjust start pointer if we're at the end of an mblk.
   1866 	 */
   1867 	CHECK_AND_ADJUST_START_PTR(s, l3_start);
   1868 
   1869 	l3info->l3_start = l3_start;
   1870 	if (!OK_32PTR(l3_start))
   1871 		return (EINVAL);
   1872 
   1873 	switch (sap) {
   1874 	case ETHERTYPE_IP: {
   1875 		ipha_t	*ipha = (ipha_t *)l3_start;
   1876 
   1877 		if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH))
   1878 			return (ENOBUFS);
   1879 
   1880 		l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha);
   1881 		l3info->l3_protocol = ipha->ipha_protocol;
   1882 		l3info->l3_version = IPV4_VERSION;
   1883 		l3info->l3_fragmented =
   1884 		    IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags);
   1885 		break;
   1886 	}
   1887 	case ETHERTYPE_IPV6: {
   1888 		ip6_t   *ip6h = (ip6_t *)l3_start;
   1889 		uint16_t ip6_hdrlen;
   1890 		uint8_t	 nexthdr;
   1891 
   1892 		if (!mac_ip_hdr_length_v6(s->fs_mp, ip6h, &ip6_hdrlen,
   1893 		    &nexthdr, NULL, NULL)) {
   1894 			return (ENOBUFS);
   1895 		}
   1896 		l3info->l3_hdrsize = ip6_hdrlen;
   1897 		l3info->l3_protocol = nexthdr;
   1898 		l3info->l3_version = IPV6_VERSION;
   1899 		l3info->l3_fragmented = B_FALSE;
   1900 		break;
   1901 	}
   1902 	default:
   1903 		return (EINVAL);
   1904 	}
   1905 	return (0);
   1906 }
   1907 
   1908 /* ARGSUSED */
   1909 static int
   1910 flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
   1911 {
   1912 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1913 
   1914 	switch (fd->fd_protocol) {
   1915 	case IPPROTO_TCP:
   1916 	case IPPROTO_UDP:
   1917 	case IPPROTO_SCTP:
   1918 	case IPPROTO_ICMP:
   1919 	case IPPROTO_ICMPV6:
   1920 		flent->fe_match = flow_ip_proto_match;
   1921 		return (0);
   1922 	default:
   1923 		return (EINVAL);
   1924 	}
   1925 }
   1926 
   1927 /* ARGSUSED */
   1928 static int
   1929 flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
   1930 {
   1931 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1932 	flow_mask_t	mask;
   1933 	uint8_t		version;
   1934 	in6_addr_t	*addr, *netmask;
   1935 
   1936 	/*
   1937 	 * DSField does not require a IP version.
   1938 	 */
   1939 	if (fd->fd_mask == FLOW_IP_DSFIELD) {
   1940 		if (fd->fd_dsfield_mask == 0)
   1941 			return (EINVAL);
   1942 
   1943 		flent->fe_match = flow_ip_dsfield_match;
   1944 		return (0);
   1945 	}
   1946 
   1947 	/*
   1948 	 * IP addresses must come with a version to avoid ambiguity.
   1949 	 */
   1950 	if ((fd->fd_mask & FLOW_IP_VERSION) == 0)
   1951 		return (EINVAL);
   1952 
   1953 	version = fd->fd_ipversion;
   1954 	if (version != IPV4_VERSION && version != IPV6_VERSION)
   1955 		return (EINVAL);
   1956 
   1957 	mask = fd->fd_mask & ~FLOW_IP_VERSION;
   1958 	switch (mask) {
   1959 	case FLOW_IP_LOCAL:
   1960 		addr = &fd->fd_local_addr;
   1961 		netmask = &fd->fd_local_netmask;
   1962 		break;
   1963 	case FLOW_IP_REMOTE:
   1964 		addr = &fd->fd_remote_addr;
   1965 		netmask = &fd->fd_remote_netmask;
   1966 		break;
   1967 	default:
   1968 		return (EINVAL);
   1969 	}
   1970 
   1971 	/*
   1972 	 * Apply netmask onto specified address.
   1973 	 */
   1974 	V6_MASK_COPY(*addr, *netmask, *addr);
   1975 	if (version == IPV4_VERSION) {
   1976 		ipaddr_t	v4addr = V4_PART_OF_V6((*addr));
   1977 		ipaddr_t	v4mask = V4_PART_OF_V6((*netmask));
   1978 
   1979 		if (v4addr == 0 || v4mask == 0)
   1980 			return (EINVAL);
   1981 		flent->fe_match = flow_ip_v4_match;
   1982 	} else {
   1983 		if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
   1984 		    IN6_IS_ADDR_UNSPECIFIED(netmask))
   1985 			return (EINVAL);
   1986 		flent->fe_match = flow_ip_v6_match;
   1987 	}
   1988 	return (0);
   1989 }
   1990 
   1991 static uint32_t
   1992 flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
   1993 {
   1994 	flow_desc_t	*fd = &flent->fe_flow_desc;
   1995 
   1996 	return (fd->fd_protocol % ft->ft_size);
   1997 }
   1998 
   1999 static uint32_t
   2000 flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
   2001 {
   2002 	flow_desc_t	*fd = &flent->fe_flow_desc;
   2003 
   2004 	/*
   2005 	 * DSField flents are arranged as a single list.
   2006 	 */
   2007 	if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
   2008 		return (0);
   2009 
   2010 	/*
   2011 	 * IP addr flents are hashed into two lists, v4 or v6.
   2012 	 */
   2013 	ASSERT(ft->ft_size >= 2);
   2014 	return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1);
   2015 }
   2016 
   2017 /* ARGSUSED */
   2018 static boolean_t
   2019 flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
   2020 {
   2021 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
   2022 
   2023 	return (fd1->fd_protocol == fd2->fd_protocol);
   2024 }
   2025 
   2026 /* ARGSUSED */
   2027 static boolean_t
   2028 flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
   2029 {
   2030 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
   2031 	in6_addr_t	*a1, *m1, *a2, *m2;
   2032 
   2033 	ASSERT(fd1->fd_mask == fd2->fd_mask);
   2034 	if (fd1->fd_mask == FLOW_IP_DSFIELD) {
   2035 		return (fd1->fd_dsfield == fd2->fd_dsfield &&
   2036 		    fd1->fd_dsfield_mask == fd2->fd_dsfield_mask);
   2037 	}
   2038 
   2039 	/*
   2040 	 * flow_ip_accept_fe() already validated the version.
   2041 	 */
   2042 	ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0);
   2043 	if (fd1->fd_ipversion != fd2->fd_ipversion)
   2044 		return (B_FALSE);
   2045 
   2046 	switch (fd1->fd_mask & ~FLOW_IP_VERSION) {
   2047 	case FLOW_IP_LOCAL:
   2048 		a1 = &fd1->fd_local_addr;
   2049 		m1 = &fd1->fd_local_netmask;
   2050 		a2 = &fd2->fd_local_addr;
   2051 		m2 = &fd2->fd_local_netmask;
   2052 		break;
   2053 	case FLOW_IP_REMOTE:
   2054 		a1 = &fd1->fd_remote_addr;
   2055 		m1 = &fd1->fd_remote_netmask;
   2056 		a2 = &fd2->fd_remote_addr;
   2057 		m2 = &fd2->fd_remote_netmask;
   2058 		break;
   2059 	default:
   2060 		/*
   2061 		 * This is unreachable given the checks in
   2062 		 * flow_ip_accept_fe().
   2063 		 */
   2064 		return (B_FALSE);
   2065 	}
   2066 
   2067 	if (fd1->fd_ipversion == IPV4_VERSION) {
   2068 		return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) &&
   2069 		    V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2)));
   2070 
   2071 	} else {
   2072 		return (IN6_ARE_ADDR_EQUAL(a1, a2) &&
   2073 		    IN6_ARE_ADDR_EQUAL(m1, m2));
   2074 	}
   2075 }
   2076 
   2077 static int
   2078 flow_ip_mask2plen(in6_addr_t *v6mask)
   2079 {
   2080 	int		bits;
   2081 	int		plen = IPV6_ABITS;
   2082 	int		i;
   2083 
   2084 	for (i = 3; i >= 0; i--) {
   2085 		if (v6mask->s6_addr32[i] == 0) {
   2086 			plen -= 32;
   2087 			continue;
   2088 		}
   2089 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
   2090 		if (bits == 0)
   2091 			break;
   2092 		plen -= bits;
   2093 	}
   2094 	return (plen);
   2095 }
   2096 
   2097 /* ARGSUSED */
   2098 static int
   2099 flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
   2100     flow_entry_t *flent)
   2101 {
   2102 	flow_entry_t	**p = headp;
   2103 	flow_desc_t	*fd0, *fd;
   2104 	in6_addr_t	*m0, *m;
   2105 	int		plen0, plen;
   2106 
   2107 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
   2108 
   2109 	/*
   2110 	 * No special ordering needed for dsfield.
   2111 	 */
   2112 	fd0 = &flent->fe_flow_desc;
   2113 	if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) {
   2114 		if (*p != NULL) {
   2115 			ASSERT(flent->fe_next == NULL);
   2116 			flent->fe_next = *p;
   2117 		}
   2118 		*p = flent;
   2119 		return (0);
   2120 	}
   2121 
   2122 	/*
   2123 	 * IP address flows are arranged in descending prefix length order.
   2124 	 */
   2125 	m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ?
   2126 	    &fd0->fd_local_netmask : &fd0->fd_remote_netmask;
   2127 	plen0 = flow_ip_mask2plen(m0);
   2128 	ASSERT(plen0 != 0);
   2129 
   2130 	for (; *p != NULL; p = &(*p)->fe_next) {
   2131 		fd = &(*p)->fe_flow_desc;
   2132 
   2133 		/*
   2134 		 * Normally a dsfield flent shouldn't end up on the same
   2135 		 * list as an IP address because flow tables are (for now)
   2136 		 * disjoint. If we decide to support both IP and dsfield
   2137 		 * in the same table in the future, this check will allow
   2138 		 * for that.
   2139 		 */
   2140 		if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
   2141 			continue;
   2142 
   2143 		/*
   2144 		 * We also allow for the mixing of local and remote address
   2145 		 * flents within one list.
   2146 		 */
   2147 		m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ?
   2148 		    &fd->fd_local_netmask : &fd->fd_remote_netmask;
   2149 		plen = flow_ip_mask2plen(m);
   2150 
   2151 		if (plen <= plen0)
   2152 			break;
   2153 	}
   2154 	if (*p != NULL) {
   2155 		ASSERT(flent->fe_next == NULL);
   2156 		flent->fe_next = *p;
   2157 	}
   2158 	*p = flent;
   2159 	return (0);
   2160 }
   2161 
   2162 /*
   2163  * Transport layer protocol and port matching functions.
   2164  */
   2165 
   2166 /* ARGSUSED */
   2167 static boolean_t
   2168 flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   2169 {
   2170 	flow_l3info_t	*l3info = &s->fs_l3info;
   2171 	flow_l4info_t	*l4info = &s->fs_l4info;
   2172 	flow_desc_t	*fd = &flent->fe_flow_desc;
   2173 
   2174 	return (fd->fd_protocol == l3info->l3_protocol &&
   2175 	    fd->fd_local_port == l4info->l4_hash_port);
   2176 }
   2177 
   2178 /* ARGSUSED */
   2179 static boolean_t
   2180 flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
   2181 {
   2182 	flow_l3info_t	*l3info = &s->fs_l3info;
   2183 	flow_l4info_t	*l4info = &s->fs_l4info;
   2184 	flow_desc_t	*fd = &flent->fe_flow_desc;
   2185 
   2186 	return (fd->fd_protocol == l3info->l3_protocol &&
   2187 	    fd->fd_remote_port == l4info->l4_hash_port);
   2188 }
   2189 
   2190 /*
   2191  * Transport hash function.
   2192  * Since we only support either local or remote port flows,
   2193  * we only need to extract one of the ports to be used for
   2194  * matching.
   2195  */
   2196 static uint32_t
   2197 flow_transport_hash(flow_tab_t *ft, flow_state_t *s)
   2198 {
   2199 	flow_l3info_t	*l3info = &s->fs_l3info;
   2200 	flow_l4info_t	*l4info = &s->fs_l4info;
   2201 	uint8_t		proto = l3info->l3_protocol;
   2202 	boolean_t	dst_or_src;
   2203 
   2204 	if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) {
   2205 		dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
   2206 	} else {
   2207 		dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
   2208 	}
   2209 
   2210 	l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port :
   2211 	    l4info->l4_src_port;
   2212 
   2213 	return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size);
   2214 }
   2215 
   2216 /*
   2217  * Unlike other accept() functions above, we do not need to get the header
   2218  * size because this is our highest layer so far. If we want to do support
   2219  * other higher layer protocols, we would need to save the l4_hdrsize
   2220  * in the code below.
   2221  */
   2222 
   2223 /* ARGSUSED */
   2224 static int
   2225 flow_transport_accept(flow_tab_t *ft, flow_state_t *s)
   2226 {
   2227 	flow_l3info_t	*l3info = &s->fs_l3info;
   2228 	flow_l4info_t	*l4info = &s->fs_l4info;
   2229 	uint8_t		proto = l3info->l3_protocol;
   2230 	uchar_t		*l4_start;
   2231 
   2232 	l4_start = l3info->l3_start + l3info->l3_hdrsize;
   2233 
   2234 	/*
   2235 	 * Adjust start pointer if we're at the end of an mblk.
   2236 	 */
   2237 	CHECK_AND_ADJUST_START_PTR(s, l4_start);
   2238 
   2239 	l4info->l4_start = l4_start;
   2240 	if (!OK_32PTR(l4_start))
   2241 		return (EINVAL);
   2242 
   2243 	if (l3info->l3_fragmented == B_TRUE)
   2244 		return (EINVAL);
   2245 
   2246 	switch (proto) {
   2247 	case IPPROTO_TCP: {
   2248 		struct tcphdr	*tcph = (struct tcphdr *)l4_start;
   2249 
   2250 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph)))
   2251 			return (ENOBUFS);
   2252 
   2253 		l4info->l4_src_port = tcph->th_sport;
   2254 		l4info->l4_dst_port = tcph->th_dport;
   2255 		break;
   2256 	}
   2257 	case IPPROTO_UDP: {
   2258 		struct udphdr	*udph = (struct udphdr *)l4_start;
   2259 
   2260 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph)))
   2261 			return (ENOBUFS);
   2262 
   2263 		l4info->l4_src_port = udph->uh_sport;
   2264 		l4info->l4_dst_port = udph->uh_dport;
   2265 		break;
   2266 	}
   2267 	case IPPROTO_SCTP: {
   2268 		sctp_hdr_t	*sctph = (sctp_hdr_t *)l4_start;
   2269 
   2270 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph)))
   2271 			return (ENOBUFS);
   2272 
   2273 		l4info->l4_src_port = sctph->sh_sport;
   2274 		l4info->l4_dst_port = sctph->sh_dport;
   2275 		break;
   2276 	}
   2277 	default:
   2278 		return (EINVAL);
   2279 	}
   2280 
   2281 	return (0);
   2282 }
   2283 
   2284 /*
   2285  * Validates transport flow entry.
   2286  * The protocol field must be present.
   2287  */
   2288 
   2289 /* ARGSUSED */
   2290 static int
   2291 flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
   2292 {
   2293 	flow_desc_t	*fd = &flent->fe_flow_desc;
   2294 	flow_mask_t	mask = fd->fd_mask;
   2295 
   2296 	if ((mask & FLOW_IP_PROTOCOL) == 0)
   2297 		return (EINVAL);
   2298 
   2299 	switch (fd->fd_protocol) {
   2300 	case IPPROTO_TCP:
   2301 	case IPPROTO_UDP:
   2302 	case IPPROTO_SCTP:
   2303 		break;
   2304 	default:
   2305 		return (EINVAL);
   2306 	}
   2307 
   2308 	switch (mask & ~FLOW_IP_PROTOCOL) {
   2309 	case FLOW_ULP_PORT_LOCAL:
   2310 		if (fd->fd_local_port == 0)
   2311 			return (EINVAL);
   2312 
   2313 		flent->fe_match = flow_transport_lport_match;
   2314 		break;
   2315 	case FLOW_ULP_PORT_REMOTE:
   2316 		if (fd->fd_remote_port == 0)
   2317 			return (EINVAL);
   2318 
   2319 		flent->fe_match = flow_transport_rport_match;
   2320 		break;
   2321 	case 0:
   2322 		/*
   2323 		 * transport-only flows conflicts with our table type.
   2324 		 */
   2325 		return (EOPNOTSUPP);
   2326 	default:
   2327 		return (EINVAL);
   2328 	}
   2329 
   2330 	return (0);
   2331 }
   2332 
   2333 static uint32_t
   2334 flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
   2335 {
   2336 	flow_desc_t	*fd = &flent->fe_flow_desc;
   2337 	uint16_t	port = 0;
   2338 
   2339 	port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ?
   2340 	    fd->fd_local_port : fd->fd_remote_port;
   2341 
   2342 	return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size);
   2343 }
   2344 
   2345 /* ARGSUSED */
   2346 static boolean_t
   2347 flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
   2348 {
   2349 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
   2350 
   2351 	if (fd1->fd_protocol != fd2->fd_protocol)
   2352 		return (B_FALSE);
   2353 
   2354 	if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0)
   2355 		return (fd1->fd_local_port == fd2->fd_local_port);
   2356 
   2357 	if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0)
   2358 		return (fd1->fd_remote_port == fd2->fd_remote_port);
   2359 
   2360 	return (B_TRUE);
   2361 }
   2362 
   2363 static flow_ops_t flow_l2_ops = {
   2364 	flow_l2_accept_fe,
   2365 	flow_l2_hash_fe,
   2366 	flow_l2_match_fe,
   2367 	flow_generic_insert_fe,
   2368 	flow_l2_hash,
   2369 	{flow_l2_accept}
   2370 };
   2371 
   2372 static flow_ops_t flow_ip_ops = {
   2373 	flow_ip_accept_fe,
   2374 	flow_ip_hash_fe,
   2375 	flow_ip_match_fe,
   2376 	flow_ip_insert_fe,
   2377 	flow_ip_hash,
   2378 	{flow_l2_accept, flow_ip_accept}
   2379 };
   2380 
   2381 static flow_ops_t flow_ip_proto_ops = {
   2382 	flow_ip_proto_accept_fe,
   2383 	flow_ip_proto_hash_fe,
   2384 	flow_ip_proto_match_fe,
   2385 	flow_generic_insert_fe,
   2386 	flow_ip_proto_hash,
   2387 	{flow_l2_accept, flow_ip_accept}
   2388 };
   2389 
   2390 static flow_ops_t flow_transport_ops = {
   2391 	flow_transport_accept_fe,
   2392 	flow_transport_hash_fe,
   2393 	flow_transport_match_fe,
   2394 	flow_generic_insert_fe,
   2395 	flow_transport_hash,
   2396 	{flow_l2_accept, flow_ip_accept, flow_transport_accept}
   2397 };
   2398 
   2399 static flow_tab_info_t flow_tab_info_list[] = {
   2400 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2},
   2401 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2},
   2402 	{&flow_ip_ops, FLOW_IP_DSFIELD, 1},
   2403 	{&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256},
   2404 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024},
   2405 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024}
   2406 };
   2407 
   2408 #define	FLOW_MAX_TAB_INFO \
   2409 	((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
   2410 
   2411 static flow_tab_info_t *
   2412 mac_flow_tab_info_get(flow_mask_t mask)
   2413 {
   2414 	int	i;
   2415 
   2416 	for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
   2417 		if (mask == flow_tab_info_list[i].fti_mask)
   2418 			return (&flow_tab_info_list[i]);
   2419 	}
   2420 	return (NULL);
   2421 }
   2422