Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 #ifndef _INET_IPNET_H
     28 #define	_INET_IPNET_H
     29 
     30 #ifdef __cplusplus
     31 extern "C" {
     32 #endif
     33 
     34 #include <sys/types.h>
     35 #include <sys/netstack.h>
     36 #include <sys/list.h>
     37 #include <netinet/in.h>
     38 #include <net/if.h>
     39 #include <net/bpf.h>
     40 #include <net/bpfdesc.h>
     41 #include <sys/avl.h>
     42 #include <sys/neti.h>
     43 #include <sys/hook_event.h>
     44 #include <sys/zone.h>
     45 #include <sys/kstat.h>
     46 
     47 typedef struct ipnet_kstats_s	{
     48 	kstat_named_t	ik_duplicationFail;
     49 	kstat_named_t	ik_dispatchOk;
     50 	kstat_named_t	ik_dispatchFail;
     51 	kstat_named_t	ik_dispatchHeaderDrop;
     52 	kstat_named_t	ik_dispatchDupDrop;
     53 	kstat_named_t	ik_dispatchPutDrop;
     54 	kstat_named_t	ik_dispatchDeliver;
     55 	kstat_named_t	ik_acceptOk;
     56 	kstat_named_t	ik_acceptFail;
     57 } ipnet_kstats_t;
     58 
     59 #define	IPSK_BUMP(_x, _y)	(_x)->ips_stats._y.value.ui64++
     60 
     61 /*
     62  * Structure used to hold information for both IPv4 and IPv6 addresses.
     63  *
     64  * When ifa_shared is non-NULL, it points to a "fake" ipnetif_t structure
     65  * that represents the network interface for each zone that shares its
     66  * network stack. This is used by BPF to build a list of interface names
     67  * present in each zone. Multiple ipnetif_addr_t's may point to a single
     68  * ipnetif_t using ifa_shared. The typical case is the global zone has
     69  * a bge0 that other zones use as bge0:1, bge0:2, etc. In ipnet, the
     70  * ipnetif_addr_t's that store the IP address for bge0:1, etc, would
     71  * point to an ipnetif_t stored in the if_avl_by_shared tree that has
     72  * the name "bge0".
     73  */
     74 typedef struct ipnetif_addr {
     75 	union {
     76 		ipaddr_t	ifau_ip4addr;
     77 		in6_addr_t	ifau_ip6addr;
     78 	} ifa_addr;
     79 	ipaddr_t	ifa_brdaddr;
     80 	zoneid_t	ifa_zone;
     81 	uint64_t	ifa_id;
     82 	list_node_t	ifa_link;
     83 	struct ipnetif	*ifa_shared;
     84 } ipnetif_addr_t;
     85 #define	ifa_ip4addr	ifa_addr.ifau_ip4addr
     86 #define	ifa_ip6addr	ifa_addr.ifau_ip6addr
     87 
     88 /*
     89  * Structure describes the ipnet module representation of an ip interface.
     90  * The structure holds both IPv4 and IPv6 addresses, the address lists are
     91  * protected by a mutex. The ipnetif structures are held per stack instance
     92  * within avl trees indexed on name and ip index.
     93  *
     94  * if_avl_by_shared is used by zones that share their instance of IP with
     95  * other zones. It is used to store ipnetif_t structures. An example of this
     96  * is the global zone sharing its instance of IP with other local zones.
     97  * In this case, if_avl_by_shared is a tree of names that are in active use
     98  * by zones using a shared instance of IP.
     99  * The value in if_sharecnt represents the number of ipnetif_addr_t's that
    100  * point to it.
    101  */
    102 typedef struct ipnetif {
    103 	char		if_name[LIFNAMSIZ];
    104 	uint_t		if_flags;
    105 	uint_t		if_index;
    106 	kmutex_t	if_addr_lock;	/* protects both addr lists */
    107 	list_t		if_ip4addr_list;
    108 	list_t		if_ip6addr_list;
    109 	avl_node_t	if_avl_by_index;
    110 	avl_node_t	if_avl_by_name;
    111 	dev_t		if_dev;
    112 	uint_t		if_multicnt;	/* protected by ips_event_lock */
    113 	kmutex_t	if_reflock;	/* protects if_refcnt */
    114 	int		if_refcnt;	/* if_reflock */
    115 	zoneid_t	if_zoneid;
    116 	avl_node_t	if_avl_by_shared;	/* protected by ips_avl_lock */
    117 	struct ipnet_stack *if_stackp;
    118 	int		if_sharecnt;	/* protected by if_reflock */
    119 } ipnetif_t;
    120 
    121 /* if_flags */
    122 #define	IPNETIF_IPV4PLUMBED	0x01
    123 #define	IPNETIF_IPV6PLUMBED	0x02
    124 #define	IPNETIF_IPV4ALLMULTI	0x04
    125 #define	IPNETIF_IPV6ALLMULTI	0x08
    126 #define	IPNETIF_LOOPBACK	0x10
    127 
    128 /*
    129  * Structure used by the accept callback function.  This is simply an address
    130  * pointer into a packet (either IPv4 or IPv6), along with an address family
    131  * that denotes which pointer is valid.
    132  */
    133 typedef struct ipnet_addrp {
    134 	sa_family_t	iap_family;
    135 	union {
    136 		ipaddr_t	*iapu_addr4;
    137 		in6_addr_t	*iapu_addr6;
    138 	} iap_addrp;
    139 } ipnet_addrp_t;
    140 #define	iap_addr4	iap_addrp.iapu_addr4
    141 #define	iap_addr6	iap_addrp.iapu_addr6
    142 
    143 struct ipnet;
    144 struct ipobs_hook_data;
    145 typedef boolean_t ipnet_acceptfn_t(struct ipnet *, struct hook_pkt_observe_s *,
    146     ipnet_addrp_t *, ipnet_addrp_t *);
    147 
    148 /*
    149  * Per instance data for all open streams. Instance data is held on a
    150  * per netstack list see struct ipnet_stack below.
    151  */
    152 typedef struct ipnet {
    153 	queue_t		*ipnet_rq;	/* read queue pointer */
    154 	minor_t		ipnet_minor;	/* minor number for this instance */
    155 	ipnetif_t	*ipnet_if;	/* ipnetif for this open instance */
    156 	zoneid_t	ipnet_zoneid;	/* zoneid the device was opened in */
    157 	uint_t		ipnet_flags;	/* see below */
    158 	t_scalar_t	ipnet_family;	/* protocol family of this instance */
    159 	t_uscalar_t	ipnet_dlstate;	/* dlpi state */
    160 	list_node_t	ipnet_next;	/* list next member */
    161 	netstack_t	*ipnet_ns;	/* netstack of zone we were opened in */
    162 	ipnet_acceptfn_t *ipnet_acceptfn; /* accept callback function pointer */
    163 	hook_t		*ipnet_hook;	/* hook token to unregister */
    164 	void		*ipnet_data;	/* value to pass back to bpf_itap */
    165 } ipnet_t;
    166 
    167 /* ipnet_flags */
    168 #define	IPNET_PROMISC_PHYS	0x01
    169 #define	IPNET_PROMISC_MULTI	0x02
    170 #define	IPNET_PROMISC_SAP	0x04
    171 #define	IPNET_INFO		0x08
    172 #define	IPNET_LOMODE		0x10
    173 
    174 /*
    175  * Per-netstack data holding:
    176  * - net_handle_t references for IPv4 and IPv6 for this netstack.
    177  * - avl trees by name and index for ip interfaces associated with this
    178  *   netstack. The trees are protected by ips_avl_lock.
    179  * - ips_str_list is a list of open client streams.  ips_walkers_lock in
    180  *   conjunction with ips_walkers_cv and ips_walkers_cnt synchronize access to
    181  *   the list.  The count is incremented in ipnet_dispatch() at the start of a
    182  *   walk and decremented when the walk is finished. If the walkers count is 0
    183  *   then we cv_broadcast() waiting any threads waiting on the walkers count.
    184  * - ips_event_lock synchronizes ipnet_if_init() and incoming NIC info events.
    185  *   We cannot be processing any NIC info events while initializing interfaces
    186  *   in ipnet_if_init().
    187  *
    188  * Note on lock ordering: If a thread needs to both hold the ips_event_lock
    189  * and any other lock such as ips_walkers_lock, ips_avl_lock, or if_addr_lock,
    190  * the ips_event_lock must be held first.  This lock ordering is mandated by
    191  * ipnet_nicevent_cb() which must always grab ips_event_lock before continuing
    192  * with processing NIC events.
    193  */
    194 typedef struct ipnet_stack {
    195 	net_handle_t	ips_ndv4;
    196 	net_handle_t	ips_ndv6;
    197 	netstack_t	*ips_netstack;
    198 	hook_t		*ips_nicevents;
    199 	kmutex_t	ips_event_lock;
    200 	kmutex_t	ips_avl_lock;
    201 	avl_tree_t	ips_avl_by_index;
    202 	avl_tree_t	ips_avl_by_name;
    203 	kmutex_t	ips_walkers_lock;
    204 	kcondvar_t	ips_walkers_cv;
    205 	uint_t		ips_walkers_cnt;
    206 	list_t		ips_str_list;
    207 	kstat_t		*ips_kstatp;
    208 	ipnet_kstats_t	ips_stats;
    209 	avl_tree_t	ips_avl_by_shared;
    210 	hook_t		*ips_hook;
    211 } ipnet_stack_t;
    212 
    213 /*
    214  * Template for dl_info_ack_t initialization.  We don't have an address, so we
    215  * set the address length to just the SAP length (16 bits).  We don't really
    216  * have a maximum SDU, but setting it to UINT_MAX proved problematic with
    217  * applications that performed arithmetic on dl_max_sdu and wrapped around, so
    218  * we sleaze out and use INT_MAX.
    219  */
    220 #define	IPNET_INFO_ACK_INIT {						\
    221 	DL_INFO_ACK,			/* dl_primitive */		\
    222 	INT_MAX,			/* dl_max_sdu */		\
    223 	0,				/* dl_min_sdu */		\
    224 	sizeof (uint16_t),		/* dl_addr_length */ 		\
    225 	DL_IPNET,			/* dl_mac_type */		\
    226 	0,				/* dl_reserved */		\
    227 	0,				/* dl_current_state */		\
    228 	sizeof (uint16_t),		/* dl_sap_length */ 		\
    229 	DL_CLDLS,			/* dl_service_mode */		\
    230 	0,				/* dl_qos_length */		\
    231 	0,				/* dl_qos_offset */		\
    232 	0,				/* dl_range_length */		\
    233 	0,				/* dl_range_offset */		\
    234 	DL_STYLE1,			/* dl_provider_style */		\
    235 	0,				/* dl_addr_offset */		\
    236 	DL_VERSION_2,			/* dl_version */		\
    237 	0,				/* dl_brdcst_addr_length */	\
    238 	0				/* dl_brdcst_addr_offset */	\
    239 }
    240 
    241 typedef void ipnet_walkfunc_t(const char *, void *, dev_t);
    242 
    243 extern int	ipnet_client_open(ipnetif_t *, ipnetif_t **);
    244 extern void	ipnet_client_close(ipnetif_t *);
    245 extern void	ipnet_close_byhandle(ipnetif_t *);
    246 extern ipnet_stack_t *ipnet_find_by_zoneid(zoneid_t zoneid);
    247 extern int	ipnet_get_linkid_byname(const char *, datalink_id_t *,
    248     zoneid_t);
    249 extern dev_t	ipnet_if_getdev(char *, zoneid_t);
    250 extern const char *ipnet_name(ipnetif_t *);
    251 extern int	ipnet_open_byname(const char *, ipnetif_t **, zoneid_t);
    252 extern int	ipnet_promisc_add(void *, uint_t, void *, uintptr_t *, int);
    253 extern void	ipnet_promisc_remove(void *);
    254 extern void	ipnet_rele(ipnet_stack_t *);
    255 extern void	ipnet_set_itap(bpf_itap_fn_t);
    256 extern void	ipnet_walk_if(ipnet_walkfunc_t *, void *, zoneid_t);
    257 
    258 extern bpf_provider_t	bpf_ipnet;
    259 
    260 #ifdef __cplusplus
    261 }
    262 #endif
    263 
    264 #endif /* _INET_IPNET_H */
    265