Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /* Copyright (c) 1990 Mentat Inc. */
     26 
     27 #ifndef	_INET_IP_IRE_H
     28 #define	_INET_IP_IRE_H
     29 
     30 #ifdef	__cplusplus
     31 extern "C" {
     32 #endif
     33 
     34 #define	IPV6_LL_PREFIXLEN	10	/* Number of bits in link-local pref */
     35 
     36 #define	IP_CACHE_TABLE_SIZE	256
     37 #define	IP_MASK_TABLE_SIZE	(IP_ABITS + 1)		/* 33 ptrs */
     38 
     39 #define	IP6_FTABLE_HASH_SIZE	32	/* size of each hash table in ptrs */
     40 #define	IP6_CACHE_TABLE_SIZE	256
     41 #define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */
     42 
     43 /*
     44  * We use the common modulo hash function.  In ip_ire_init(), we make
     45  * sure that the cache table size is always a power of 2.  That's why
     46  * we can use & instead of %.  Also note that we try hard to make sure
     47  * the lower bits of an address capture most info from the whole address.
     48  * The reason being that since our hash table is probably a lot smaller
     49  * than 2^32 buckets so the lower bits are the most important.
     50  */
     51 #define	IRE_ADDR_HASH(addr, table_size) \
     52 	(((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) &	\
     53 	((table_size) - 1))
     54 
     55 /*
     56  * To make a byte-order neutral hash for IPv6, just take all the
     57  * bytes in the bottom 32 bits into account.
     58  */
     59 #define	IRE_ADDR_HASH_V6(addr, table_size) 				\
     60 	IRE_ADDR_HASH((addr).s6_addr32[3], table_size)
     61 
     62 /* This assumes that the ftable size is a power of 2. */
     63 #define	IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) 			\
     64 	((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ 			\
     65 	((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^			\
     66 	((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ 			\
     67 	((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ 			\
     68 	((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ 			\
     69 	((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1))
     70 
     71 /*
     72  * match parameter definitions for IRE lookup routines.
     73  */
     74 #define	MATCH_IRE_DSTONLY	0x0000	/* Match just the address */
     75 #define	MATCH_IRE_TYPE		0x0001	/* Match IRE type */
     76 #define	MATCH_IRE_SRC		0x0002	/* Match IRE source address */
     77 #define	MATCH_IRE_MASK		0x0004	/* Match IRE mask */
     78 #define	MATCH_IRE_WQ		0x0008	/* Match IRE ire_stq to write queue */
     79 #define	MATCH_IRE_GW		0x0010	/* Match IRE gateway */
     80 #define	MATCH_IRE_IPIF		0x0020	/* Match IRE ipif */
     81 #define	MATCH_IRE_RECURSIVE	0x0040	/* Do recursive lookup if necessary */
     82 #define	MATCH_IRE_DEFAULT	0x0080	/* Return default route if no route */
     83 					/* found. */
     84 #define	MATCH_IRE_RJ_BHOLE	0x0100	/* During lookup if we hit an ire */
     85 					/* with RTF_REJECT or RTF_BLACKHOLE, */
     86 					/* return the ire. No recursive */
     87 					/* lookup should be done. */
     88 #define	MATCH_IRE_IHANDLE	0x0200	/* Match IRE on ihandle */
     89 #define	MATCH_IRE_MARK_TESTHIDDEN 0x0400 /* Match IRE_MARK_TESTHIDDEN IREs */
     90 
     91 /*
     92  * MATCH_IRE_PARENT is used whenever we unconditionally want to get the
     93  * parent IRE (sire) while recursively searching IREs for an offsubnet
     94  * destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE
     95  * is found to help resolving IRE_OFFSUBNET in lookup routines, the
     96  * IRE_OFFSUBNET sire, if any, is returned to the caller.
     97  */
     98 /* UNUSED			0x0800  */
     99 #define	MATCH_IRE_ILL		0x1000	/* Match IRE on the ill */
    100 
    101 #define	MATCH_IRE_PARENT	0x2000	/* Match parent ire, if any, */
    102 					/* even if ire is not matched. */
    103 #define	MATCH_IRE_ZONEONLY	0x4000	/* Match IREs in specified zone, ie */
    104 					/* don't match IRE_LOCALs from other */
    105 					/* zones or shared IREs */
    106 #define	MATCH_IRE_MARK_PRIVATE_ADDR	0x8000	/* Match IRE ire_marks with */
    107 						/* IRE_MARK_PRIVATE_ADDR. */
    108 #define	MATCH_IRE_SECATTR	0x10000	/* Match gateway security attributes */
    109 #define	MATCH_IRE_COMPLETE	0x20000	/* ire_ftable_lookup() can return */
    110 					/* IRE_CACHE entry only if it is  */
    111 					/* ND_REACHABLE			  */
    112 
    113 /*
    114  * Any ire to nce association is long term, and
    115  * the refhold and refrele may be done by different
    116  * threads. So all cases of making or breaking ire to
    117  * nce association should all effectively use the NOTR variants.
    118  * To understand the *effectively* part read on.
    119  *
    120  * ndp_lookup() and ndp_add_v4()/ndp_add_v6() implicitly do
    121  * NCE_REFHOLD. So wherever we make ire to nce association after
    122  * calling these functions, we effectively want to end up with
    123  * NCE_REFHOLD_NOTR. We call this macro to achieve this effect. This
    124  * macro changes a NCE_REFHOLD to a NCE_REFHOLD_NOTR. The macro's
    125  * NCE_REFRELE cancels off ndp_lookup[ndp_add]'s implicit NCE_REFHOLD,
    126  * and what you are left with is a NCE_REFHOLD_NOTR
    127  */
    128 #define	NCE_REFHOLD_TO_REFHOLD_NOTR(nce) {	\
    129 	NCE_REFHOLD_NOTR(nce);			\
    130 	NCE_REFRELE(nce);			\
    131 }
    132 
    133 /*
    134  * find the next ire_t entry in the ire_next chain starting at ire
    135  * that is not CONDEMNED.  ire is set to NULL if we reach the end of the list.
    136  * Caller must hold the ire_bucket lock.
    137  */
    138 
    139 #define	IRE_FIND_NEXT_ORIGIN(ire) {					\
    140 	while ((ire) != NULL && ((ire)->ire_marks & IRE_MARK_CONDEMNED))\
    141 		(ire) = (ire)->ire_next;				\
    142 }
    143 
    144 
    145 /* Structure for ire_cache_count() */
    146 typedef struct {
    147 	int	icc_total;	/* Total number of IRE_CACHE */
    148 	int	icc_unused;	/* # off/no PMTU unused since last reclaim */
    149 	int	icc_offlink;	/* # offlink without PMTU information */
    150 	int	icc_pmtu;	/* # offlink with PMTU information */
    151 	int	icc_onlink;	/* # onlink */
    152 } ire_cache_count_t;
    153 
    154 /*
    155  * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning
    156  * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none.
    157  *
    158  * The comment below (and for other netstack_t references) refers
    159  * to the fact that we only do netstack_hold in particular cases,
    160  * such as the references from open streams (ill_t and conn_t's
    161  * pointers). Internally within IP we rely on IP's ability to cleanup e.g.
    162  * ire_t's when an ill goes away.
    163  */
    164 typedef struct {
    165 	int	icr_unused;	/* Fraction for unused since last reclaim */
    166 	int	icr_offlink;	/* Fraction for offlink without PMTU info */
    167 	int	icr_pmtu;	/* Fraction for offlink with PMTU info */
    168 	int	icr_onlink;	/* Fraction for onlink */
    169 	ip_stack_t *icr_ipst;	/* Does not have a netstack_hold */
    170 } ire_cache_reclaim_t;
    171 
    172 /*
    173  * We use atomics so that we get an accurate accounting on the ires.
    174  * Otherwise we can't determine leaks correctly.
    175  */
    176 #define	BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1)
    177 
    178 #ifdef _KERNEL
    179 /*
    180  * Structure for passing args for the IRE cache lookup functions.
    181  */
    182 typedef struct ire_ctable_args_s {
    183 	void			*ict_addr;
    184 	void			*ict_gateway;
    185 	int			ict_type;
    186 	const ipif_t		*ict_ipif;
    187 	zoneid_t		ict_zoneid;
    188 	const ts_label_t	*ict_tsl;
    189 	int			ict_flags;
    190 	ip_stack_t		*ict_ipst;
    191 	queue_t			*ict_wq;
    192 } ire_ctable_args_t;
    193 
    194 struct ts_label_s;
    195 struct nce_s;
    196 
    197 extern	ipaddr_t	ip_plen_to_mask(uint_t);
    198 extern	in6_addr_t	*ip_plen_to_mask_v6(uint_t, in6_addr_t *);
    199 
    200 extern	int	ip_ire_advise(queue_t *, mblk_t *, cred_t *);
    201 extern	int	ip_ire_delete(queue_t *, mblk_t *, cred_t *);
    202 extern	boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *, ip_stack_t *);
    203 extern	void	ip_ire_clookup_and_delete_v6(const in6_addr_t *,
    204     ip_stack_t *);
    205 
    206 extern	void	ip_ire_req(queue_t *, mblk_t *);
    207 
    208 extern	int	ip_mask_to_plen(ipaddr_t);
    209 extern	int	ip_mask_to_plen_v6(const in6_addr_t *);
    210 
    211 extern	ire_t	*ipif_to_ire(const ipif_t *);
    212 extern	ire_t	*ipif_to_ire_v6(const ipif_t *);
    213 
    214 extern	int	ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t, boolean_t);
    215 extern	void	ire_add_then_send(queue_t *, ire_t *, mblk_t *);
    216 extern	int	ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
    217 extern	int	ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q,
    218     mblk_t *mp, ipsq_func_t func);
    219 extern	void	ire_atomic_end(irb_t *irb_ptr, ire_t *ire);
    220 
    221 extern	void	ire_cache_count(ire_t *, char *);
    222 extern	ire_t	*ire_cache_lookup(ipaddr_t, zoneid_t,
    223     const struct ts_label_s *, ip_stack_t *);
    224 extern	ire_t	*ire_cache_lookup_simple(ipaddr_t, ip_stack_t *);
    225 extern	ire_t	*ire_cache_lookup_v6(const in6_addr_t *, zoneid_t,
    226     const struct ts_label_s *, ip_stack_t *);
    227 extern	void	ire_cache_reclaim(ire_t *, char *);
    228 
    229 extern	ire_t	*ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    230     uint_t, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, ipaddr_t,
    231     uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *,
    232     ip_stack_t *);
    233 extern	ire_t	*ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    234     uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
    235     ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
    236     tsol_gcgrp_t *, ip_stack_t *);
    237 
    238 extern	ire_t	**ire_check_and_create_bcast(ipif_t *, ipaddr_t,
    239     ire_t **, int);
    240 extern	ire_t	**ire_create_bcast(ipif_t *, ipaddr_t, ire_t **);
    241 extern	ire_t	*ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    242     uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
    243     ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
    244     tsol_gcgrp_t *, ip_stack_t *);
    245 
    246 extern	boolean_t ire_init_common(ire_t *, uint_t *, struct nce_s *, queue_t *,
    247     queue_t *, ushort_t, ipif_t *, uint32_t, uint32_t, uint32_t, uchar_t,
    248     const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
    249 
    250 extern	ire_t	*ire_create_v6(const in6_addr_t *, const in6_addr_t *,
    251     const in6_addr_t *, const in6_addr_t *, uint_t *, struct nce_s *, queue_t *,
    252     queue_t *, ushort_t, ipif_t *,
    253     const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
    254     tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
    255 
    256 extern	ire_t	*ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *,
    257     const in6_addr_t *, const in6_addr_t *, struct nce_s *, queue_t *,
    258     queue_t *, ushort_t, ipif_t *,
    259     const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
    260     tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
    261 
    262 
    263 extern	void	ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t,
    264     ip_stack_t *);
    265 extern	void	ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t,
    266     ip_stack_t *);
    267 
    268 extern	ire_t	*ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *,
    269     zoneid_t, const struct ts_label_s *, int, ip_stack_t *);
    270 
    271 extern	ire_t	*ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    272     int, const ipif_t *, zoneid_t, const struct ts_label_s *, int,
    273     ip_stack_t *);
    274 
    275 extern	void	ire_delete(ire_t *);
    276 extern	void	ire_delete_cache_gw(ire_t *, char *);
    277 extern	void	ire_delete_cache_gw_v6(ire_t *, char *);
    278 extern	void	ire_delete_cache_v6(ire_t *, char *);
    279 extern	void	ire_delete_v6(ire_t *);
    280 
    281 extern	void	ire_expire(ire_t *, char *);
    282 
    283 extern	void	ire_flush_cache_v4(ire_t *, int);
    284 extern	void	ire_flush_cache_v6(ire_t *, int);
    285 
    286 extern	ire_t	*ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    287     const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
    288     uint32_t, const struct ts_label_s *, int, ip_stack_t *);
    289 
    290 extern	ire_t	*ire_ihandle_lookup_onlink(ire_t *);
    291 extern	ire_t	*ire_ihandle_lookup_offlink(ire_t *, ire_t *);
    292 extern	ire_t	*ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *);
    293 
    294 extern  boolean_t	ire_local_same_lan(ire_t *, ire_t *);
    295 extern	boolean_t	ire_local_ok_across_zones(ire_t *, zoneid_t, void *,
    296     const struct ts_label_s *, ip_stack_t *);
    297 
    298 extern	ire_t 	*ire_lookup_local(zoneid_t, ip_stack_t *);
    299 extern	ire_t 	*ire_lookup_local_v6(zoneid_t, ip_stack_t *);
    300 
    301 extern  ire_t	*ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *);
    302 extern  ire_t	*ire_lookup_multi_v6(const in6_addr_t *, zoneid_t,
    303     ip_stack_t *);
    304 
    305 extern	void	ire_refrele(ire_t *);
    306 extern	void	ire_refrele_notr(ire_t *);
    307 extern	ire_t	*ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int,
    308     const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int,
    309     ip_stack_t *);
    310 
    311 extern	ire_t	*ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    312     const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
    313     const struct ts_label_s *, int, ip_stack_t *);
    314 
    315 extern ill_t	*ire_to_ill(const ire_t *);
    316 
    317 extern	void	ire_walk(pfv_t, void *, ip_stack_t *);
    318 extern	void	ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *);
    319 extern	void	ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *);
    320 extern	void	ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *);
    321 extern	void	ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *);
    322 extern  void	ire_walk_ill_tables(uint_t match_flags, uint_t ire_type,
    323     pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz,
    324     irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill,
    325     zoneid_t zoneid, ip_stack_t *);
    326 extern	void	ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *);
    327 
    328 extern boolean_t	ire_multirt_lookup(ire_t **, ire_t **, uint32_t, int *,
    329     const struct ts_label_s *, ip_stack_t *);
    330 extern boolean_t	ire_multirt_need_resolve(ipaddr_t,
    331     const struct ts_label_s *, ip_stack_t *);
    332 extern boolean_t	ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t,
    333     const struct ts_label_s *, ip_stack_t *);
    334 extern boolean_t	ire_multirt_need_resolve_v6(const in6_addr_t *,
    335     const struct ts_label_s *, ip_stack_t *);
    336 
    337 extern ire_t	*ipif_lookup_multi_ire(ipif_t *, ipaddr_t);
    338 extern ire_t	*ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *);
    339 
    340 extern ire_t	*ire_get_next_bcast_ire(ire_t *, ire_t *);
    341 extern ire_t	*ire_get_next_default_ire(ire_t *, ire_t *);
    342 
    343 extern  void	ire_arpresolve(ire_t *);
    344 extern  void	ire_freemblk(ire_t *);
    345 extern boolean_t	ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t,
    346     int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int,
    347     queue_t *);
    348 extern  int	ire_nce_init(ire_t *, struct nce_s *);
    349 extern  boolean_t	ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *,
    350     zoneid_t, ip_stack_t *);
    351 extern	ire_t	*ire_arpresolve_lookup(ipaddr_t, ipaddr_t, ipif_t *, zoneid_t,
    352     ip_stack_t *, queue_t *);
    353 
    354 #endif /* _KERNEL */
    355 
    356 #ifdef	__cplusplus
    357 }
    358 #endif
    359 
    360 #endif	/* _INET_IP_IRE_H */
    361