Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_INET_IP_NDP_H
     27 #define	_INET_IP_NDP_H
     28 
     29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     30 
     31 #include <sys/mutex.h>
     32 #include <sys/stream.h>
     33 #include <netinet/in.h>
     34 #include <netinet/icmp6.h>
     35 #include <inet/ip.h>
     36 
     37 /*
     38  * Internal definitions for the kernel implementation of the IPv6
     39  * Neighbor Discovery Protocol (NDP).
     40  */
     41 
     42 #ifdef	__cplusplus
     43 extern "C" {
     44 #endif
     45 
     46 #ifdef _KERNEL
     47 #define	NCE_TABLE_SIZE	256
     48 /* NDP Cache Entry */
     49 typedef struct nce_s {
     50 	struct	nce_s	*nce_next;	/* Hash chain next pointer */
     51 	struct	nce_s	**nce_ptpn;	/* Pointer to previous next */
     52 	struct 	ill_s	*nce_ill;	/* Associated ill */
     53 	uint16_t	nce_flags;	/* See below */
     54 	uint16_t	nce_state;	/* See reachability states in if.h */
     55 	int16_t		nce_pcnt;	/* Probe counter */
     56 	uint16_t	nce_rcnt;	/* Retransmit counter */
     57 	in6_addr_t	nce_addr;	/* address of the nighbor */
     58 	in6_addr_t	nce_mask;	/* If not all ones, mask allows an */
     59 	    /* entry  to respond to requests for a group of addresses, for */
     60 	    /* instantance multicast addresses				   */
     61 	in6_addr_t	nce_extract_mask; /* For mappings */
     62 	uint32_t	nce_ll_extract_start;	/* For mappings */
     63 #define	nce_first_mp_to_free	nce_fp_mp
     64 	mblk_t		*nce_fp_mp;	/* link layer fast path mp */
     65 	mblk_t		*nce_res_mp;	/* DL_UNITDATA_REQ */
     66 	mblk_t		*nce_qd_mp;	/* Head outgoing queued packets */
     67 #define	nce_last_mp_to_free	nce_qd_mp
     68 	mblk_t		*nce_timer_mp;	/* NDP timer mblk */
     69 	mblk_t		*nce_mp;	/* mblk we are in, last to be freed */
     70 	uint64_t	nce_last;	/* Time last reachable in msec */
     71 	uint32_t	nce_refcnt;	/* nce active usage count */
     72 	kmutex_t	nce_lock;	/* See comments on top for what */
     73 					/* this field protects */
     74 	int		nce_unsolicit_count; /* Unsolicited Adv count */
     75 	struct nce_s	*nce_fastpath;	/* for fastpath list */
     76 	timeout_id_t	nce_timeout_id;
     77 	uchar_t		nce_ipversion;	/* IPv4(ARP)/IPv6(NDP) version */
     78 	uint_t		nce_defense_count;	/* number of NDP conflicts */
     79 	uint_t		nce_defense_time;	/* last time defended (secs) */
     80 	uint64_t	nce_init_time;  /* time when it was set to ND_INITIAL */
     81 	boolean_t	nce_trace_disable;	/* True when alloc fails */
     82 } nce_t;
     83 
     84 /*
     85  * The ndp_g_t structure contains protocol specific information needed
     86  * to synchronize and manage neighbor cache entries for IPv4 and IPv6.
     87  * There are 2 such structures, ips_ndp4 and ips_ndp6.
     88  * ips_ndp6 contains the data structures needed for IPv6 Neighbor Discovery.
     89  * ips_ndp4 has IPv4 link layer info in its nce_t structures
     90  * Note that the nce_t is not currently used as the arp cache itself;
     91  * it is used for the following purposes:
     92  *   - queue packets in nce_qd_mp while waiting for arp resolution to complete
     93  *   - nce_{res, fp}_mp are used to track DL_UNITDATA request/responses.
     94  *   - track state of ARP resolution in the nce_state;
     95  *
     96  * Locking notes:
     97  * ndp_g_lock protects neighbor cache tables access and
     98  * insertion/removal of cache entries into/from these tables.
     99  * nce_lock protects nce_pcnt, nce_rcnt, nce_qd_mp nce_state,
    100  * nce_res_mp, nce_refcnt and nce_last.
    101  * nce_refcnt is incremented for every ire pointing to this nce and
    102  * every time ndp_lookup() finds an nce.
    103  * Should there be a need to obtain nce_lock and ndp_g_lock, ndp_g_lock is
    104  * acquired first.
    105  * To avoid becoming exclusive when deleting NCEs, ndp_walk() routine holds
    106  * the ndp_g_lock (i.e global lock) and marks NCEs to be deleted with
    107  * NCE_F_CONDEMNED.  When all active users of such NCEs are gone the walk
    108  * routine passes a list for deletion to nce_ire_delete_list().
    109  *
    110  * When the link-layer address of some onlink host changes, ARP will send
    111  * an AR_CN_ANNOUNCE message to ip so that stale neighbor-cache
    112  * information will not get used. This message is processed in ip_arp_news()
    113  * by walking the nce list, and updating as appropriate. The ndp_g_hw_change
    114  * flag is set by ip_arp_news() to notify nce_t users that ip_arp_news() is
    115  * in progress.
    116  */
    117 typedef	struct ndp_g_s {
    118 	kmutex_t	ndp_g_lock;	/* Lock protecting  cache hash table */
    119 	nce_t		*nce_mask_entries;	/* mask not all ones */
    120 	nce_t		*nce_hash_tbl[NCE_TABLE_SIZE];
    121 	int		ndp_g_walker; /* # of active thread walking hash list */
    122 	boolean_t	ndp_g_walker_cleanup; /* true implies defer deletion. */
    123 	int		ndp_g_hw_change; /* non-zero if nce flush in progress */
    124 } ndp_g_t;
    125 
    126 #define	NDP_HW_CHANGE_INCR(ndp) {		\
    127 	mutex_enter(&(ndp)->ndp_g_lock);	\
    128 	(ndp)->ndp_g_hw_change++;		\
    129 	mutex_exit(&(ndp)->ndp_g_lock);		\
    130 }
    131 
    132 #define	NDP_HW_CHANGE_DECR(ndp) {		\
    133 	mutex_enter(&(ndp)->ndp_g_lock);	\
    134 	(ndp)->ndp_g_hw_change--;		\
    135 	mutex_exit(&(ndp)->ndp_g_lock);		\
    136 }
    137 
    138 /* nce_flags  */
    139 #define	NCE_F_PERMANENT		0x1
    140 #define	NCE_F_MAPPING		0x2
    141 #define	NCE_F_ISROUTER		0x4
    142 /*	unused			0x8 */
    143 #define	NCE_F_NONUD		0x10
    144 #define	NCE_F_ANYCAST		0x20
    145 #define	NCE_F_CONDEMNED		0x40
    146 #define	NCE_F_UNSOL_ADV		0x80
    147 #define	NCE_F_BCAST		0x100
    148 
    149 #define	NCE_EXTERNAL_FLAGS_MASK \
    150 	(NCE_F_PERMANENT | NCE_F_MAPPING | NCE_F_ISROUTER | NCE_F_NONUD | \
    151 	NCE_F_ANYCAST | NCE_F_UNSOL_ADV)
    152 
    153 /* State REACHABLE, STALE, DELAY or PROBE */
    154 #define	NCE_ISREACHABLE(nce)			\
    155 	(((((nce)->nce_state) >= ND_REACHABLE) &&	\
    156 	((nce)->nce_state) <= ND_PROBE))
    157 
    158 /* NDP flags set in SOL/ADV requests */
    159 #define	NDP_UNICAST		0x1
    160 #define	NDP_ISROUTER		0x2
    161 #define	NDP_SOLICITED		0x4
    162 #define	NDP_ORIDE		0x8
    163 #define	NDP_PROBE		0x10
    164 
    165 /* Number of packets queued in NDP for a neighbor */
    166 #define	ND_MAX_Q		4
    167 
    168 
    169 #ifdef DEBUG
    170 #define	NCE_TRACE_REF(nce)		nce_trace_ref(nce)
    171 #define	NCE_UNTRACE_REF(nce)		nce_untrace_ref(nce)
    172 #else
    173 #define	NCE_TRACE_REF(nce)
    174 #define	NCE_UNTRACE_REF(nce)
    175 #endif
    176 
    177 #define	NCE_REFHOLD(nce) {		\
    178 	mutex_enter(&(nce)->nce_lock);	\
    179 	(nce)->nce_refcnt++;		\
    180 	ASSERT((nce)->nce_refcnt != 0);	\
    181 	NCE_TRACE_REF(nce);		\
    182 	mutex_exit(&(nce)->nce_lock);	\
    183 }
    184 
    185 #define	NCE_REFHOLD_NOTR(nce) {		\
    186 	mutex_enter(&(nce)->nce_lock);	\
    187 	(nce)->nce_refcnt++;		\
    188 	ASSERT((nce)->nce_refcnt != 0);	\
    189 	mutex_exit(&(nce)->nce_lock);	\
    190 }
    191 
    192 #define	NCE_REFHOLD_LOCKED(nce) {		\
    193 	ASSERT(MUTEX_HELD(&(nce)->nce_lock));	\
    194 	(nce)->nce_refcnt++;			\
    195 	NCE_TRACE_REF(nce);			\
    196 }
    197 
    198 /* nce_inactive destroys the mutex thus no mutex_exit is needed */
    199 #define	NCE_REFRELE(nce) {		\
    200 	mutex_enter(&(nce)->nce_lock);	\
    201 	NCE_UNTRACE_REF(nce);		\
    202 	ASSERT((nce)->nce_refcnt != 0);	\
    203 	if (--(nce)->nce_refcnt == 0)	\
    204 		ndp_inactive(nce);	\
    205 	else {				\
    206 		mutex_exit(&(nce)->nce_lock);\
    207 	}				\
    208 }
    209 
    210 #define	NCE_REFRELE_NOTR(nce) {		\
    211 	mutex_enter(&(nce)->nce_lock);	\
    212 	ASSERT((nce)->nce_refcnt != 0);	\
    213 	if (--(nce)->nce_refcnt == 0)	\
    214 		ndp_inactive(nce);	\
    215 	else {				\
    216 		mutex_exit(&(nce)->nce_lock);\
    217 	}				\
    218 }
    219 
    220 #define	NDP_RESTART_TIMER(nce, ms) {	\
    221 	ASSERT(!MUTEX_HELD(&(nce)->nce_lock));				\
    222 	if ((nce)->nce_timeout_id != 0) {				\
    223 		/* Ok to untimeout bad id. we don't hold a lock. */	\
    224 		(void) untimeout((nce)->nce_timeout_id);		\
    225 	}								\
    226 	mutex_enter(&(nce)->nce_lock);					\
    227 	/* Don't start the timer if the nce has been deleted */		\
    228 	if (!((nce)->nce_flags & NCE_F_CONDEMNED)) 			\
    229 		nce->nce_timeout_id = timeout(ndp_timer, nce, 		\
    230 		    MSEC_TO_TICK(ms) == 0 ? 1 : MSEC_TO_TICK(ms));	\
    231 	mutex_exit(&(nce)->nce_lock);					\
    232 }
    233 
    234 /* Structure for ndp_cache_count() */
    235 typedef struct {
    236 	int	ncc_total;	/* Total number of NCEs */
    237 	int	ncc_host;	/* NCE entries without R bit set */
    238 } ncc_cache_count_t;
    239 
    240 /*
    241  * Structure of ndp_cache_reclaim().  Each field is a fraction i.e. 1 means
    242  * reclaim all, N means reclaim 1/Nth of all entries, 0 means reclaim none.
    243  */
    244 typedef struct {
    245 	int	ncr_host;	/* Fraction for host entries */
    246 } nce_cache_reclaim_t;
    247 
    248 /*
    249  * Structure for nce_delete_hw_changed; specifies an IPv4 address to link-layer
    250  * address mapping.  Any route that has a cached copy of a mapping for that
    251  * IPv4 address that doesn't match the given mapping must be purged.
    252  */
    253 typedef struct {
    254 	ipaddr_t hwm_addr;	/* IPv4 address */
    255 	uint_t hwm_hwlen;	/* Length of hardware address (may be 0) */
    256 	uchar_t *hwm_hwaddr;	/* Pointer to new hardware address, if any */
    257 } nce_hw_map_t;
    258 
    259 /* When SAP is greater than zero address appears before SAP */
    260 #define	NCE_LL_ADDR_OFFSET(ill)	(((ill)->ill_sap_length) < 0 ? \
    261 	(sizeof (dl_unitdata_req_t)) : \
    262 	((sizeof (dl_unitdata_req_t)) + (ABS((ill)->ill_sap_length))))
    263 
    264 #define	NCE_LL_SAP_OFFSET(ill) (((ill)->ill_sap_length) < 0 ? \
    265 	((sizeof (dl_unitdata_req_t)) + ((ill)->ill_phys_addr_length)) : \
    266 	(sizeof (dl_unitdata_req_t)))
    267 
    268 #ifdef _BIG_ENDIAN
    269 #define	NCE_LL_SAP_COPY(ill, mp) \
    270 	{ \
    271 	size_t abs_sap_len = ABS((ill)->ill_sap_length); \
    272 	if (abs_sap_len > 0) { \
    273 		ASSERT(abs_sap_len <= sizeof (uint32_t)); \
    274 		ASSERT((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill) + \
    275 		    abs_sap_len <= ((mp)->b_wptr)); \
    276 		bcopy((uint8_t *)&(ill)->ill_sap + sizeof (ill->ill_sap) - \
    277 		    abs_sap_len, \
    278 		    ((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill)), \
    279 		    abs_sap_len); \
    280 	} \
    281 	}
    282 #else
    283 #define	NCE_LL_SAP_COPY(ill, mp) \
    284 	{ \
    285 	size_t abs_sap_len = ABS((ill)->ill_sap_length); \
    286 	if (abs_sap_len > 0) { \
    287 		uint32_t abs_sap_len = ABS((ill)->ill_sap_length); \
    288 		ASSERT(abs_sap_len <= sizeof (uint32_t)); \
    289 		ASSERT((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill) + \
    290 		    abs_sap_len <= ((mp)->b_wptr)); \
    291 		bcopy(&((ill)->ill_sap), \
    292 		((mp)->b_rptr + NCE_LL_SAP_OFFSET(ill)), \
    293 		abs_sap_len); \
    294 	} \
    295 	}
    296 #endif
    297 
    298 /*
    299  * Exclusive-or the 6 bytes that are likely to contain the MAC
    300  * address. Assumes table_size does not exceed 256.
    301  * Assumes EUI-64 format for good hashing.
    302  */
    303 #define	NCE_ADDR_HASH_V6(addr, table_size)				\
    304 	(((addr).s6_addr8[8] ^ (addr).s6_addr8[9] ^			\
    305 	(addr).s6_addr8[10] ^ (addr).s6_addr8[13] ^			\
    306 	(addr).s6_addr8[14] ^ (addr).s6_addr8[15]) % (table_size))
    307 
    308 /* NDP Cache Entry Hash Table */
    309 #define	NCE_TABLE_SIZE	256
    310 
    311 extern	void	ndp_cache_count(nce_t *, char *);
    312 extern	void	ndp_cache_reclaim(nce_t *, char *);
    313 extern	void	ndp_delete(nce_t *);
    314 extern	void	ndp_delete_per_ill(nce_t *, uchar_t *);
    315 extern	void	ndp_fastpath_flush(nce_t *, char  *);
    316 extern	boolean_t ndp_fastpath_update(nce_t *, void  *);
    317 extern	nd_opt_hdr_t *ndp_get_option(nd_opt_hdr_t *, int, int);
    318 extern	void	ndp_inactive(nce_t *);
    319 extern	void	ndp_input(ill_t *, mblk_t *, mblk_t *);
    320 extern	boolean_t ndp_lookup_ipaddr(in_addr_t, netstack_t *);
    321 extern	nce_t	*ndp_lookup_v6(ill_t *, const in6_addr_t *, boolean_t);
    322 extern	nce_t	*ndp_lookup_v4(ill_t *, const in_addr_t *, boolean_t);
    323 extern	int	ndp_mcastreq(ill_t *, const in6_addr_t *, uint32_t, uint32_t,
    324     mblk_t *);
    325 extern	int	ndp_noresolver(ill_t *, const in6_addr_t *);
    326 extern	void	ndp_process(nce_t *, uchar_t *, uint32_t, boolean_t);
    327 extern	int	ndp_query(ill_t *, lif_nd_req_t *);
    328 extern	int	ndp_resolver(ill_t *, const in6_addr_t *, mblk_t *, zoneid_t);
    329 extern	int	ndp_sioc_update(ill_t *, lif_nd_req_t *);
    330 extern	boolean_t	ndp_verify_optlen(nd_opt_hdr_t *, int);
    331 extern	void	ndp_timer(void *);
    332 extern	void	ndp_walk(ill_t *, pfi_t, void *, ip_stack_t *);
    333 extern	void	ndp_walk_common(ndp_g_t *, ill_t *, pfi_t,
    334     void *, boolean_t);
    335 extern	boolean_t	ndp_restart_dad(nce_t *);
    336 extern	void	ndp_do_recovery(ipif_t *);
    337 extern	void	nce_resolv_failed(nce_t *);
    338 extern	void	arp_resolv_failed(nce_t *);
    339 extern	void	nce_fastpath_list_add(nce_t *);
    340 extern	void	nce_fastpath_list_delete(nce_t *);
    341 extern	void	nce_fastpath_list_dispatch(ill_t *,
    342     boolean_t (*)(nce_t *, void  *), void *);
    343 extern	void	nce_queue_mp_common(nce_t *, mblk_t *, boolean_t);
    344 extern	void	nce_delete_hw_changed(nce_t *, void *);
    345 extern	void	nce_fastpath(nce_t *);
    346 extern	int	ndp_add_v6(ill_t *, uchar_t *, const in6_addr_t *,
    347     const in6_addr_t *, const in6_addr_t *, uint32_t, uint16_t, uint16_t,
    348     nce_t **);
    349 extern	int	ndp_lookup_then_add_v6(ill_t *, uchar_t *,
    350     const in6_addr_t *, const in6_addr_t *, const in6_addr_t *, uint32_t,
    351     uint16_t, uint16_t, nce_t **);
    352 extern	int	ndp_lookup_then_add_v4(ill_t *,
    353     const in_addr_t *, uint16_t, nce_t **, nce_t *);
    354 
    355 #ifdef DEBUG
    356 extern	void	nce_trace_ref(nce_t *);
    357 extern	void	nce_untrace_ref(nce_t *);
    358 #endif
    359 
    360 #endif	/* _KERNEL */
    361 
    362 #ifdef	__cplusplus
    363 }
    364 #endif
    365 
    366 #endif	/* _INET_IP_NDP_H */
    367