Home | History | Annotate | Download | only in inet
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_INET_IPCLASSIFIER_H
     27 #define	_INET_IPCLASSIFIER_H
     28 
     29 #ifdef	__cplusplus
     30 extern "C" {
     31 #endif
     32 
     33 #include <inet/common.h>
     34 #include <inet/ip.h>
     35 #include <inet/mi.h>
     36 #include <inet/tcp.h>
     37 #include <inet/ip6.h>
     38 #include <netinet/in.h>		/* for IPPROTO_* constants */
     39 #include <sys/sdt.h>
     40 #include <sys/socket_proto.h>
     41 #include <sys/sunddi.h>
     42 #include <sys/sunldi.h>
     43 
     44 typedef void (*edesc_rpf)(void *, mblk_t *, void *, ip_recv_attr_t *);
     45 struct icmph_s;
     46 struct icmp6_hdr;
     47 typedef boolean_t (*edesc_vpf)(conn_t *, void *, struct icmph_s *,
     48     struct icmp6_hdr *, ip_recv_attr_t *);
     49 
     50 /*
     51  * ==============================
     52  * =	The CONNECTION		=
     53  * ==============================
     54  */
     55 
     56 /*
     57  * The connection structure contains the common information/flags/ref needed.
     58  * Implementation will keep the connection struct, the layers (with their
     59  * respective data for event i.e. tcp_t if event was tcp_input_data) all in one
     60  * contiguous memory location.
     61  */
     62 
     63 /* Conn Flags */
     64 /* Unused			0x00020000 */
     65 /* Unused			0x00040000 */
     66 #define	IPCL_FULLY_BOUND	0x00080000	/* Bound to correct squeue */
     67 /* Unused			0x00100000 */
     68 /* Unused 			0x00200000 */
     69 /* Unused			0x00400000 */
     70 #define	IPCL_CL_LISTENER	0x00800000	/* Cluster listener */
     71 /* Unused			0x01000000 */
     72 /* Unused			0x02000000 */
     73 /* Unused			0x04000000 */
     74 /* Unused			0x08000000 */
     75 /* Unused			0x10000000 */
     76 /* Unused			0x20000000 */
     77 #define	IPCL_CONNECTED		0x40000000	/* Conn in connected table */
     78 #define	IPCL_BOUND		0x80000000	/* Conn in bind table */
     79 
     80 /* Flags identifying the type of conn */
     81 #define	IPCL_TCPCONN		0x00000001	/* From tcp_conn_cache */
     82 #define	IPCL_SCTPCONN		0x00000002	/* From sctp_conn_cache */
     83 #define	IPCL_IPCCONN		0x00000004	/* From ip_conn_cache */
     84 #define	IPCL_UDPCONN		0x00000008	/* From udp_conn_cache */
     85 #define	IPCL_RAWIPCONN		0x00000010	/* From rawip_conn_cache */
     86 #define	IPCL_RTSCONN		0x00000020	/* From rts_conn_cache */
     87 /* Unused			0x00000040 */
     88 #define	IPCL_IPTUN		0x00000080	/* iptun module above us */
     89 
     90 #define	IPCL_NONSTR		0x00001000	/* A non-STREAMS socket */
     91 /* Unused			0x10000000 */
     92 
     93 #define	IPCL_REMOVED		0x00000100
     94 #define	IPCL_REUSED		0x00000200
     95 
     96 #define	IPCL_IS_CONNECTED(connp)					\
     97 	((connp)->conn_flags & IPCL_CONNECTED)
     98 
     99 #define	IPCL_IS_BOUND(connp)						\
    100 	((connp)->conn_flags & IPCL_BOUND)
    101 
    102 /*
    103  * Can't use conn_proto since we need to tell difference
    104  * between a real TCP socket and a SOCK_RAW, IPPROTO_TCP.
    105  */
    106 #define	IPCL_IS_TCP(connp)						\
    107 	((connp)->conn_flags & IPCL_TCPCONN)
    108 
    109 #define	IPCL_IS_SCTP(connp)						\
    110 	((connp)->conn_flags & IPCL_SCTPCONN)
    111 
    112 #define	IPCL_IS_UDP(connp)						\
    113 	((connp)->conn_flags & IPCL_UDPCONN)
    114 
    115 #define	IPCL_IS_RAWIP(connp)						\
    116 	((connp)->conn_flags & IPCL_RAWIPCONN)
    117 
    118 #define	IPCL_IS_RTS(connp)						\
    119 	((connp)->conn_flags & IPCL_RTSCONN)
    120 
    121 #define	IPCL_IS_IPTUN(connp)						\
    122 	((connp)->conn_flags & IPCL_IPTUN)
    123 
    124 #define	IPCL_IS_NONSTR(connp)	((connp)->conn_flags & IPCL_NONSTR)
    125 
    126 typedef struct connf_s connf_t;
    127 
    128 typedef struct
    129 {
    130 	int	ctb_depth;
    131 #define	CONN_STACK_DEPTH	15
    132 	pc_t	ctb_stack[CONN_STACK_DEPTH];
    133 } conn_trace_t;
    134 
    135 typedef struct ip_helper_minor_info_s {
    136 	dev_t	ip_minfo_dev;		/* Device */
    137 	vmem_t	*ip_minfo_arena;	/* Arena */
    138 } ip_helper_minfo_t;
    139 
    140 /*
    141  * ip helper stream info
    142  */
    143 typedef struct ip_helper_stream_info_s {
    144 	ldi_handle_t		iphs_handle;
    145 	queue_t 		*iphs_rq;
    146 	queue_t 		*iphs_wq;
    147 	ip_helper_minfo_t	*iphs_minfo;
    148 } ip_helper_stream_info_t;
    149 
    150 /*
    151  * Mandatory Access Control mode, in conn_t's conn_mac_mode field.
    152  * 	CONN_MAC_DEFAULT: strict enforcement of MAC.
    153  * 	CONN_MAC_AWARE:   allows communications between unlabeled systems
    154  *			  and privileged daemons
    155  *	CONN_MAC_IMPLICIT: allows communications without explicit labels
    156  *		           on the wire with privileged daemons.
    157  *
    158  * CONN_MAC_IMPLICIT is intended specifically for labeled IPsec key management
    159  * in networks which don't pass CIPSO-labeled packets.
    160  */
    161 #define	CONN_MAC_DEFAULT 0
    162 #define	CONN_MAC_AWARE 1
    163 #define	CONN_MAC_IMPLICIT 2
    164 
    165 /*
    166  * conn receive ancillary definition.
    167  *
    168  * These are the set of socket options that make the receive side
    169  * potentially pass up ancillary data items.
    170  * We have a union with an integer so that we can quickly check whether
    171  * any ancillary data items need to be added.
    172  */
    173 typedef struct crb_s {
    174 	union {
    175 		uint32_t	crbu_all;
    176 		struct {
    177 			uint32_t
    178 	crbb_recvdstaddr : 1,		/* IP_RECVDSTADDR option */
    179 	crbb_recvopts : 1,		/* IP_RECVOPTS option */
    180 	crbb_recvif : 1,		/* IP_RECVIF option */
    181 	crbb_recvslla : 1,		/* IP_RECVSLLA option */
    182 
    183 	crbb_recvttl : 1,		/* IP_RECVTTL option */
    184 	crbb_ip_recvpktinfo : 1,	/* IP*_RECVPKTINFO option  */
    185 	crbb_ipv6_recvhoplimit : 1,	/* IPV6_RECVHOPLIMIT option */
    186 	crbb_ipv6_recvhopopts : 1,	/* IPV6_RECVHOPOPTS option */
    187 
    188 	crbb_ipv6_recvdstopts : 1,	/* IPV6_RECVDSTOPTS option */
    189 	crbb_ipv6_recvrthdr : 1,	/* IPV6_RECVRTHDR option */
    190 	crbb_old_ipv6_recvdstopts : 1,	/* old form of IPV6_DSTOPTS */
    191 	crbb_ipv6_recvrthdrdstopts : 1,	/* IPV6_RECVRTHDRDSTOPTS */
    192 
    193 	crbb_ipv6_recvtclass : 1,	/* IPV6_RECVTCLASS */
    194 	crbb_recvucred : 1,		/* IP_RECVUCRED option */
    195 	crbb_timestamp : 1;		/* SO_TIMESTAMP "socket" option */
    196 
    197 		} crbb;
    198 	} crbu;
    199 } crb_t;
    200 
    201 #define	crb_all				crbu.crbu_all
    202 #define	crb_recvdstaddr			crbu.crbb.crbb_recvdstaddr
    203 #define	crb_recvopts			crbu.crbb.crbb_recvopts
    204 #define	crb_recvif			crbu.crbb.crbb_recvif
    205 #define	crb_recvslla			crbu.crbb.crbb_recvslla
    206 #define	crb_recvttl			crbu.crbb.crbb_recvttl
    207 #define	crb_ip_recvpktinfo		crbu.crbb.crbb_ip_recvpktinfo
    208 #define	crb_ipv6_recvhoplimit		crbu.crbb.crbb_ipv6_recvhoplimit
    209 #define	crb_ipv6_recvhopopts		crbu.crbb.crbb_ipv6_recvhopopts
    210 #define	crb_ipv6_recvdstopts		crbu.crbb.crbb_ipv6_recvdstopts
    211 #define	crb_ipv6_recvrthdr		crbu.crbb.crbb_ipv6_recvrthdr
    212 #define	crb_old_ipv6_recvdstopts	crbu.crbb.crbb_old_ipv6_recvdstopts
    213 #define	crb_ipv6_recvrthdrdstopts	crbu.crbb.crbb_ipv6_recvrthdrdstopts
    214 #define	crb_ipv6_recvtclass		crbu.crbb.crbb_ipv6_recvtclass
    215 #define	crb_recvucred			crbu.crbb.crbb_recvucred
    216 #define	crb_timestamp			crbu.crbb.crbb_timestamp
    217 
    218 /*
    219  * The initial fields in the conn_t are setup by the kmem_cache constructor,
    220  * and are preserved when it is freed. Fields after that are bzero'ed when
    221  * the conn_t is freed.
    222  *
    223  * Much of the conn_t is protected by conn_lock.
    224  *
    225  * conn_lock is also used by some ULPs (like UDP and RAWIP) to protect
    226  * their state.
    227  */
    228 struct conn_s {
    229 	kmutex_t	conn_lock;
    230 	uint32_t	conn_ref;		/* Reference counter */
    231 	uint32_t	conn_flags;		/* Conn Flags */
    232 
    233 	union {
    234 		tcp_t		*cp_tcp;	/* Pointer to the tcp struct */
    235 		struct udp_s	*cp_udp;	/* Pointer to the udp struct */
    236 		struct icmp_s	*cp_icmp;	/* Pointer to rawip struct */
    237 		struct rts_s	*cp_rts;	/* Pointer to rts struct */
    238 		struct iptun_s	*cp_iptun;	/* Pointer to iptun_t */
    239 		struct sctp_s	*cp_sctp;	/* For IPCL_SCTPCONN */
    240 		void		*cp_priv;
    241 	} conn_proto_priv;
    242 #define	conn_tcp	conn_proto_priv.cp_tcp
    243 #define	conn_udp	conn_proto_priv.cp_udp
    244 #define	conn_icmp	conn_proto_priv.cp_icmp
    245 #define	conn_rts	conn_proto_priv.cp_rts
    246 #define	conn_iptun	conn_proto_priv.cp_iptun
    247 #define	conn_sctp	conn_proto_priv.cp_sctp
    248 #define	conn_priv	conn_proto_priv.cp_priv
    249 
    250 	kcondvar_t	conn_cv;
    251 	uint8_t		conn_proto;		/* protocol type */
    252 
    253 	edesc_rpf	conn_recv;		/* Pointer to recv routine */
    254 	edesc_rpf	conn_recvicmp;		/* For ICMP error */
    255 	edesc_vpf	conn_verifyicmp;	/* Verify ICMP error */
    256 
    257 	ip_xmit_attr_t	*conn_ixa;		/* Options if no ancil data */
    258 
    259 	/* Fields after this are bzero'ed when the conn_t is freed. */
    260 #define	conn_start_clr	conn_recv_ancillary
    261 
    262 	/* Options for receive-side ancillary data */
    263 	crb_t		conn_recv_ancillary;
    264 
    265 	squeue_t	*conn_sqp;		/* Squeue for processing */
    266 	uint_t		conn_state_flags;	/* IP state flags */
    267 
    268 	int		conn_lingertime;	/* linger time (in seconds) */
    269 
    270 	unsigned int
    271 		conn_on_sqp : 1,		/* Conn is being processed */
    272 		conn_linger : 1,		/* SO_LINGER state */
    273 		conn_useloopback : 1,		/* SO_USELOOPBACK state */
    274 		conn_broadcast : 1,		/* SO_BROADCAST state */
    275 
    276 		conn_reuseaddr : 1,		/* SO_REUSEADDR state */
    277 		conn_keepalive : 1,		/* SO_KEEPALIVE state */
    278 		conn_multi_router : 1,		/* Wants all multicast pkts */
    279 		conn_did_putbq : 1,		/* ip_wput did a putbq */
    280 
    281 		conn_unspec_src : 1,		/* IP_UNSPEC_SRC */
    282 		conn_policy_cached : 1,		/* Is policy cached/latched ? */
    283 		conn_in_enforce_policy : 1,	/* Enforce Policy on inbound */
    284 		conn_out_enforce_policy : 1,	/* Enforce Policy on outbound */
    285 
    286 		conn_debug : 1,			/* SO_DEBUG */
    287 		conn_ipv6_v6only : 1,		/* IPV6_V6ONLY */
    288 		conn_oobinline : 1, 		/* SO_OOBINLINE state */
    289 		conn_dgram_errind : 1,		/* SO_DGRAM_ERRIND state */
    290 
    291 		conn_exclbind : 1,		/* SO_EXCLBIND state */
    292 		conn_mdt_ok : 1,		/* MDT is permitted */
    293 		conn_allzones : 1,		/* SO_ALLZONES */
    294 		conn_ipv6_recvpathmtu : 1,	/* IPV6_RECVPATHMTU */
    295 
    296 		conn_mcbc_bind : 1,		/* Bound to multi/broadcast */
    297 
    298 		conn_pad_to_bit_31 : 11;
    299 
    300 	boolean_t conn_direct_blocked;		/* conn is flow-controlled */
    301 
    302 	squeue_t	*conn_initial_sqp;	/* Squeue at open time */
    303 	squeue_t	*conn_final_sqp;	/* Squeue after connect */
    304 	ill_t		*conn_dhcpinit_ill;	/* IP_DHCPINIT_IF */
    305 	ipsec_latch_t	*conn_latch;		/* latched IDS */
    306 	struct ipsec_policy_s	*conn_latch_in_policy; /* latched policy (in) */
    307 	struct ipsec_action_s	*conn_latch_in_action; /* latched action (in) */
    308 	uint_t		conn_bound_if;		/* IP*_BOUND_IF */
    309 	queue_t		*conn_rq;		/* Read queue */
    310 	queue_t		*conn_wq;		/* Write queue */
    311 	dev_t		conn_dev;		/* Minor number */
    312 	vmem_t		*conn_minor_arena;	/* Minor arena */
    313 	ip_helper_stream_info_t *conn_helper_info;
    314 
    315 	cred_t		*conn_cred;		/* Credentials */
    316 	pid_t		conn_cpid;		/* pid from open/connect */
    317 	uint64_t	conn_open_time;		/* time when this was opened */
    318 
    319 	connf_t		*conn_g_fanout;		/* Global Hash bucket head */
    320 	struct conn_s	*conn_g_next;		/* Global Hash chain next */
    321 	struct conn_s	*conn_g_prev;		/* Global Hash chain prev */
    322 	struct ipsec_policy_head_s *conn_policy; /* Configured policy */
    323 	in6_addr_t	conn_bound_addr_v6;	/* Address in bind() */
    324 #define	conn_bound_addr_v4	V4_PART_OF_V6(conn_bound_addr_v6)
    325 	connf_t		*conn_fanout;		/* Hash bucket we're part of */
    326 	struct conn_s	*conn_next;		/* Hash chain next */
    327 	struct conn_s	*conn_prev;		/* Hash chain prev */
    328 
    329 	struct {
    330 		in6_addr_t connua_laddr;	/* Local address - match */
    331 		in6_addr_t connua_faddr;	/* Remote address */
    332 	} connua_v6addr;
    333 #define	conn_laddr_v4	V4_PART_OF_V6(connua_v6addr.connua_laddr)
    334 #define	conn_faddr_v4	V4_PART_OF_V6(connua_v6addr.connua_faddr)
    335 #define	conn_laddr_v6	connua_v6addr.connua_laddr
    336 #define	conn_faddr_v6	connua_v6addr.connua_faddr
    337 	in6_addr_t	conn_saddr_v6;		/* Local address - source */
    338 #define	conn_saddr_v4	V4_PART_OF_V6(conn_saddr_v6)
    339 
    340 	union {
    341 		/* Used for classifier match performance */
    342 		uint32_t		connu_ports2;
    343 		struct {
    344 			in_port_t	connu_fport;	/* Remote port */
    345 			in_port_t	connu_lport;	/* Local port */
    346 		} connu_ports;
    347 	} u_port;
    348 #define	conn_fport	u_port.connu_ports.connu_fport
    349 #define	conn_lport	u_port.connu_ports.connu_lport
    350 #define	conn_ports	u_port.connu_ports2
    351 
    352 	uint_t		conn_incoming_ifindex;	/* IP{,V6}_BOUND_IF, scopeid */
    353 	ill_t		*conn_oper_pending_ill; /* pending shared ioctl */
    354 
    355 	krwlock_t	conn_ilg_lock;		/* Protects conn_ilg_* */
    356 	ilg_t		*conn_ilg;		/* Group memberships */
    357 
    358 	kcondvar_t	conn_refcv;		/* For conn_oper_pending_ill */
    359 
    360 	struct conn_s 	*conn_drain_next;	/* Next conn in drain list */
    361 	struct conn_s	*conn_drain_prev;	/* Prev conn in drain list */
    362 	idl_t		*conn_idl;		/* Ptr to the drain list head */
    363 	mblk_t		*conn_ipsec_opt_mp;	/* ipsec option mblk */
    364 	zoneid_t	conn_zoneid;		/* zone connection is in */
    365 	int		conn_rtaware; 		/* RT_AWARE sockopt value */
    366 	kcondvar_t	conn_sq_cv;		/* For non-STREAMS socket IO */
    367 	sock_upcalls_t	*conn_upcalls;		/* Upcalls to sockfs */
    368 	sock_upper_handle_t conn_upper_handle;	/* Upper handle: sonode * */
    369 
    370 	unsigned int
    371 		conn_mlp_type : 2,		/* mlp_type_t; tsol/tndb.h */
    372 		conn_anon_mlp : 1,		/* user wants anon MLP */
    373 		conn_anon_port : 1,		/* user bound anonymously */
    374 
    375 		conn_mac_mode : 2,		/* normal/loose/implicit MAC */
    376 		conn_anon_priv_bind : 1,	/* *_ANON_PRIV_BIND state */
    377 		conn_zone_is_global : 1,	/* GLOBAL_ZONEID */
    378 		conn_isvrrp : 1,		/* VRRP control socket */
    379 		conn_spare : 23;
    380 
    381 	boolean_t	conn_flow_cntrld;
    382 	netstack_t	*conn_netstack;	/* Corresponds to a netstack_hold */
    383 
    384 	/*
    385 	 * IP format that packets received for this struct should use.
    386 	 * Value can be IP4_VERSION or IPV6_VERSION.
    387 	 * The sending version is encoded using IXAF_IS_IPV4.
    388 	 */
    389 	ushort_t	conn_ipversion;
    390 
    391 	/* Written to only once at the time of opening the endpoint */
    392 	sa_family_t	conn_family;		/* Family from socket() call */
    393 	uint_t		conn_so_type;		/* Type from socket() call */
    394 
    395 	uint_t		conn_sndbuf;		/* SO_SNDBUF state */
    396 	uint_t		conn_rcvbuf;		/* SO_RCVBUF state */
    397 	uint_t		conn_wroff;		/* Current write offset */
    398 
    399 	uint_t		conn_sndlowat;		/* Send buffer low water mark */
    400 	uint_t		conn_rcvlowat;		/* Recv buffer low water mark */
    401 
    402 	uint8_t		conn_default_ttl;	/* Default TTL/hoplimit */
    403 
    404 	uint32_t	conn_flowinfo;	/* Connected flow id and tclass */
    405 
    406 	/*
    407 	 * The most recent address for sendto. Initially set to zero
    408 	 * which is always different than then the destination address
    409 	 * since the send interprets zero as the loopback address.
    410 	 */
    411 	in6_addr_t	conn_v6lastdst;
    412 #define	conn_v4lastdst	V4_PART_OF_V6(conn_v6lastdst)
    413 	ushort_t	conn_lastipversion;
    414 	in_port_t	conn_lastdstport;
    415 	uint32_t	conn_lastflowinfo;	/* IPv6-only */
    416 	uint_t		conn_lastscopeid;	/* IPv6-only */
    417 	uint_t		conn_lastsrcid;		/* Only for AF_INET6 */
    418 	/*
    419 	 * When we are not connected conn_saddr might be unspecified.
    420 	 * We track the source that was used with conn_v6lastdst here.
    421 	 */
    422 	in6_addr_t	conn_v6lastsrc;
    423 #define	conn_v4lastsrc	V4_PART_OF_V6(conn_v6lastsrc)
    424 
    425 	/* Templates for transmitting packets */
    426 	ip_pkt_t	conn_xmit_ipp;		/* Options if no ancil data */
    427 
    428 	/*
    429 	 * Header template - conn_ht_ulp is a pointer into conn_ht_iphc.
    430 	 * Note that ixa_ip_hdr_length indicates the offset of ht_ulp in
    431 	 * ht_iphc
    432 	 *
    433 	 * The header template is maintained for connected endpoints (and
    434 	 * updated when sticky options are changed) and also for the lastdst.
    435 	 * There is no conflict between those usages since SOCK_DGRAM and
    436 	 * SOCK_RAW can not be used to specify a destination address (with
    437 	 * sendto/sendmsg) if the socket has been connected.
    438 	 */
    439 	uint8_t		*conn_ht_iphc;		/* Start of IP header */
    440 	uint_t		conn_ht_iphc_allocated;	/* Allocated buffer size */
    441 	uint_t		conn_ht_iphc_len;	/* IP+ULP size */
    442 	uint8_t		*conn_ht_ulp;		/* Upper-layer header */
    443 	uint_t		conn_ht_ulp_len;	/* ULP header len */
    444 
    445 	/* Checksum to compensate for source routed packets. Host byte order */
    446 	uint32_t	conn_sum;
    447 
    448 #ifdef CONN_DEBUG
    449 #define	CONN_TRACE_MAX	10
    450 	int		conn_trace_last;	/* ndx of last used tracebuf */
    451 	conn_trace_t	conn_trace_buf[CONN_TRACE_MAX];
    452 #endif
    453 };
    454 
    455 /*
    456  * connf_t - connection fanout data.
    457  *
    458  * The hash tables and their linkage (conn_t.{hashnextp, hashprevp} are
    459  * protected by the per-bucket lock. Each conn_t inserted in the list
    460  * points back at the connf_t that heads the bucket.
    461  */
    462 struct connf_s {
    463 	struct conn_s	*connf_head;
    464 	kmutex_t	connf_lock;
    465 };
    466 
    467 #define	CONN_INC_REF(connp)	{				\
    468 	mutex_enter(&(connp)->conn_lock);			\
    469 	DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);		\
    470 	ASSERT(conn_trace_ref(connp));				\
    471 	(connp)->conn_ref++;					\
    472 	ASSERT((connp)->conn_ref != 0);				\
    473 	mutex_exit(&(connp)->conn_lock);			\
    474 }
    475 
    476 #define	CONN_INC_REF_LOCKED(connp)	{			\
    477 	DTRACE_PROBE1(conn__inc__ref, conn_t *, connp);		\
    478 	ASSERT(MUTEX_HELD(&(connp)->conn_lock));	 	\
    479 	ASSERT(conn_trace_ref(connp));				\
    480 	(connp)->conn_ref++;					\
    481 	ASSERT((connp)->conn_ref != 0);				\
    482 }
    483 
    484 #define	CONN_DEC_REF(connp)	{					\
    485 	mutex_enter(&(connp)->conn_lock);				\
    486 	DTRACE_PROBE1(conn__dec__ref, conn_t *, connp);			\
    487 	/*								\
    488 	 * The squeue framework always does a CONN_DEC_REF after return	\
    489 	 * from TCP. Hence the refcnt must be at least 2 if conn_on_sqp	\
    490 	 * is B_TRUE and conn_ref is being decremented. This is to	\
    491 	 * account for the mblk being currently processed.		\
    492 	 */								\
    493 	if ((connp)->conn_ref == 0 ||					\
    494 	    ((connp)->conn_ref == 1 && (connp)->conn_on_sqp))		\
    495 		cmn_err(CE_PANIC, "CONN_DEC_REF: connp(%p) has ref "	\
    496 			"= %d\n", (void *)(connp), (connp)->conn_ref);	\
    497 	ASSERT(conn_untrace_ref(connp));				\
    498 	(connp)->conn_ref--;						\
    499 	if ((connp)->conn_ref == 0) {					\
    500 		/* Refcnt can't increase again, safe to drop lock */	\
    501 		mutex_exit(&(connp)->conn_lock);			\
    502 		ipcl_conn_destroy(connp);				\
    503 	} else {							\
    504 		cv_broadcast(&(connp)->conn_cv);			\
    505 		mutex_exit(&(connp)->conn_lock);			\
    506 	}								\
    507 }
    508 
    509 /*
    510  * For use with subsystems within ip which use ALL_ZONES as a wildcard
    511  */
    512 #define	IPCL_ZONEID(connp)						\
    513 	((connp)->conn_allzones ? ALL_ZONES : (connp)->conn_zoneid)
    514 
    515 /*
    516  * For matching between a conn_t and a zoneid.
    517  */
    518 #define	IPCL_ZONE_MATCH(connp, zoneid) 					\
    519 	(((connp)->conn_allzones) ||					\
    520 	    ((zoneid) == ALL_ZONES) ||					\
    521 	    (connp)->conn_zoneid == (zoneid))
    522 
    523 /*
    524  * On a labeled system, we must treat bindings to ports
    525  * on shared IP addresses by sockets with MAC exemption
    526  * privilege as being in all zones, as there's
    527  * otherwise no way to identify the right receiver.
    528  */
    529 
    530 #define	IPCL_CONNS_MAC(conn1, conn2)					\
    531 	(((conn1)->conn_mac_mode != CONN_MAC_DEFAULT) ||		\
    532 	((conn2)->conn_mac_mode != CONN_MAC_DEFAULT))
    533 
    534 #define	IPCL_BIND_ZONE_MATCH(conn1, conn2)				\
    535 	(IPCL_CONNS_MAC(conn1, conn2) ||				\
    536 	IPCL_ZONE_MATCH(conn1, conn2->conn_zoneid) ||			\
    537 	IPCL_ZONE_MATCH(conn2, conn1->conn_zoneid))
    538 
    539 
    540 #define	_IPCL_V4_MATCH(v6addr, v4addr)	\
    541 	(V4_PART_OF_V6((v6addr)) == (v4addr) && IN6_IS_ADDR_V4MAPPED(&(v6addr)))
    542 
    543 #define	_IPCL_V4_MATCH_ANY(addr)	\
    544 	(IN6_IS_ADDR_V4MAPPED_ANY(&(addr)) || IN6_IS_ADDR_UNSPECIFIED(&(addr)))
    545 
    546 
    547 /*
    548  * IPCL_PROTO_MATCH() and IPCL_PROTO_MATCH_V6() only matches conns with
    549  * the specified ira_zoneid or conn_allzones by calling conn_wantpacket.
    550  */
    551 #define	IPCL_PROTO_MATCH(connp, ira, ipha)				\
    552 	((((connp)->conn_laddr_v4 == INADDR_ANY) ||			\
    553 	(((connp)->conn_laddr_v4 == ((ipha)->ipha_dst)) &&		\
    554 	    (((connp)->conn_faddr_v4 == INADDR_ANY) ||			\
    555 	((connp)->conn_faddr_v4 == ((ipha)->ipha_src))))) &&		\
    556 	conn_wantpacket((connp), (ira), (ipha)))
    557 
    558 #define	IPCL_PROTO_MATCH_V6(connp, ira, ip6h)				\
    559 	((IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||		\
    560 	(IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &((ip6h)->ip6_dst)) &&   \
    561 	(IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||		      \
    562 	IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &((ip6h)->ip6_src))))) && \
    563 	(conn_wantpacket_v6((connp), (ira), (ip6h))))
    564 
    565 #define	IPCL_CONN_HASH(src, ports, ipst)				\
    566 	((unsigned)(ntohl((src)) ^ ((ports) >> 24) ^ ((ports) >> 16) ^	\
    567 	((ports) >> 8) ^ (ports)) % (ipst)->ips_ipcl_conn_fanout_size)
    568 
    569 #define	IPCL_CONN_HASH_V6(src, ports, ipst)				\
    570 	IPCL_CONN_HASH(V4_PART_OF_V6((src)), (ports), (ipst))
    571 
    572 #define	IPCL_CONN_MATCH(connp, proto, src, dst, ports)			\
    573 	((connp)->conn_proto == (proto) &&				\
    574 		(connp)->conn_ports == (ports) &&      			\
    575 		_IPCL_V4_MATCH((connp)->conn_faddr_v6, (src)) &&	\
    576 		_IPCL_V4_MATCH((connp)->conn_laddr_v6, (dst)) &&	\
    577 		!(connp)->conn_ipv6_v6only)
    578 
    579 #define	IPCL_CONN_MATCH_V6(connp, proto, src, dst, ports)		\
    580 	((connp)->conn_proto == (proto) &&				\
    581 		(connp)->conn_ports == (ports) &&      			\
    582 		IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(src)) &&	\
    583 		IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(dst)))
    584 
    585 #define	IPCL_PORT_HASH(port, size) \
    586 	((((port) >> 8) ^ (port)) & ((size) - 1))
    587 
    588 #define	IPCL_BIND_HASH(lport, ipst)					\
    589 	((unsigned)(((lport) >> 8) ^ (lport)) % \
    590 	    (ipst)->ips_ipcl_bind_fanout_size)
    591 
    592 #define	IPCL_BIND_MATCH(connp, proto, laddr, lport)			\
    593 	((connp)->conn_proto == (proto) &&				\
    594 		(connp)->conn_lport == (lport) &&			\
    595 		(_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||		\
    596 		_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))) &&	\
    597 		!(connp)->conn_ipv6_v6only)
    598 
    599 #define	IPCL_BIND_MATCH_V6(connp, proto, laddr, lport)			\
    600 	((connp)->conn_proto == (proto) &&				\
    601 		(connp)->conn_lport == (lport) &&			\
    602 		(IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) || \
    603 		IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6)))
    604 
    605 /*
    606  * We compare conn_laddr since it captures both connected and a bind to
    607  * a multicast or broadcast address.
    608  * The caller needs to match the zoneid and also call conn_wantpacket
    609  * for multicast, broadcast, or when conn_incoming_ifindex is set.
    610  */
    611 #define	IPCL_UDP_MATCH(connp, lport, laddr, fport, faddr)		\
    612 	(((connp)->conn_lport == (lport)) &&				\
    613 	((_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||			\
    614 	(_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&		\
    615 	(_IPCL_V4_MATCH_ANY((connp)->conn_faddr_v6) ||			\
    616 	(_IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)) &&		\
    617 	(connp)->conn_fport == (fport)))))) &&				\
    618 	!(connp)->conn_ipv6_v6only)
    619 
    620 /*
    621  * We compare conn_laddr since it captures both connected and a bind to
    622  * a multicast or broadcast address.
    623  * The caller needs to match the zoneid and also call conn_wantpacket_v6
    624  * for multicast or when conn_incoming_ifindex is set.
    625  */
    626 #define	IPCL_UDP_MATCH_V6(connp, lport, laddr, fport, faddr)	\
    627 	(((connp)->conn_lport == (lport)) &&			\
    628 	(IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||	\
    629 	(IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr)) &&	\
    630 	(IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_faddr_v6) ||	\
    631 	(IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, &(faddr)) &&	\
    632 	(connp)->conn_fport == (fport))))))
    633 
    634 #define	IPCL_IPTUN_HASH(laddr, faddr)					\
    635 	((ntohl(laddr) ^ ((ntohl(faddr) << 24) | (ntohl(faddr) >> 8))) % \
    636 	ipcl_iptun_fanout_size)
    637 
    638 #define	IPCL_IPTUN_HASH_V6(laddr, faddr)				\
    639 	IPCL_IPTUN_HASH((laddr)->s6_addr32[0] ^ (laddr)->s6_addr32[1] ^	\
    640 	    (faddr)->s6_addr32[2] ^ (faddr)->s6_addr32[3],		\
    641 	    (faddr)->s6_addr32[0] ^ (faddr)->s6_addr32[1] ^		\
    642 	    (laddr)->s6_addr32[2] ^ (laddr)->s6_addr32[3])
    643 
    644 #define	IPCL_IPTUN_MATCH(connp, laddr, faddr)			\
    645 	(_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr)) &&	\
    646 	_IPCL_V4_MATCH((connp)->conn_faddr_v6, (faddr)))
    647 
    648 #define	IPCL_IPTUN_MATCH_V6(connp, laddr, faddr)		\
    649 	(IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, (laddr)) &&	\
    650 	IN6_ARE_ADDR_EQUAL(&(connp)->conn_faddr_v6, (faddr)))
    651 
    652 #define	IPCL_UDP_HASH(lport, ipst)	\
    653 	IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_udp_fanout_size)
    654 
    655 #define	CONN_G_HASH_SIZE	1024
    656 
    657 /* Raw socket hash function. */
    658 #define	IPCL_RAW_HASH(lport, ipst)	\
    659 	IPCL_PORT_HASH(lport, (ipst)->ips_ipcl_raw_fanout_size)
    660 
    661 /*
    662  * This is similar to IPCL_BIND_MATCH except that the local port check
    663  * is changed to a wildcard port check.
    664  * We compare conn_laddr since it captures both connected and a bind to
    665  * a multicast or broadcast address.
    666  */
    667 #define	IPCL_RAW_MATCH(connp, proto, laddr)			\
    668 	((connp)->conn_proto == (proto) &&			\
    669 	(connp)->conn_lport == 0 &&				\
    670 	(_IPCL_V4_MATCH_ANY((connp)->conn_laddr_v6) ||		\
    671 	_IPCL_V4_MATCH((connp)->conn_laddr_v6, (laddr))))
    672 
    673 #define	IPCL_RAW_MATCH_V6(connp, proto, laddr)			\
    674 	((connp)->conn_proto == (proto) &&			\
    675 	(connp)->conn_lport == 0 &&				\
    676 	(IN6_IS_ADDR_UNSPECIFIED(&(connp)->conn_laddr_v6) ||	\
    677 	IN6_ARE_ADDR_EQUAL(&(connp)->conn_laddr_v6, &(laddr))))
    678 
    679 /* Function prototypes */
    680 extern void ipcl_g_init(void);
    681 extern void ipcl_init(ip_stack_t *);
    682 extern void ipcl_g_destroy(void);
    683 extern void ipcl_destroy(ip_stack_t *);
    684 extern conn_t *ipcl_conn_create(uint32_t, int, netstack_t *);
    685 extern void ipcl_conn_destroy(conn_t *);
    686 
    687 void ipcl_hash_insert_wildcard(connf_t *, conn_t *);
    688 void ipcl_hash_remove(conn_t *);
    689 void ipcl_hash_remove_locked(conn_t *connp, connf_t *connfp);
    690 
    691 extern int	ipcl_bind_insert(conn_t *);
    692 extern int	ipcl_bind_insert_v4(conn_t *);
    693 extern int	ipcl_bind_insert_v6(conn_t *);
    694 extern int	ipcl_conn_insert(conn_t *);
    695 extern int	ipcl_conn_insert_v4(conn_t *);
    696 extern int	ipcl_conn_insert_v6(conn_t *);
    697 extern conn_t	*ipcl_get_next_conn(connf_t *, conn_t *, uint32_t);
    698 
    699 conn_t *ipcl_classify_v4(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
    700 	    ip_stack_t *);
    701 conn_t *ipcl_classify_v6(mblk_t *, uint8_t, uint_t, ip_recv_attr_t *,
    702 	    ip_stack_t *);
    703 conn_t *ipcl_classify(mblk_t *, ip_recv_attr_t *, ip_stack_t *);
    704 conn_t *ipcl_classify_raw(mblk_t *, uint8_t, uint32_t, ipha_t *,
    705     ip6_t *, ip_recv_attr_t *, ip_stack_t *);
    706 conn_t *ipcl_iptun_classify_v4(ipaddr_t *, ipaddr_t *, ip_stack_t *);
    707 conn_t *ipcl_iptun_classify_v6(in6_addr_t *, in6_addr_t *, ip_stack_t *);
    708 void	ipcl_globalhash_insert(conn_t *);
    709 void	ipcl_globalhash_remove(conn_t *);
    710 void	ipcl_walk(pfv_t, void *, ip_stack_t *);
    711 conn_t	*ipcl_tcp_lookup_reversed_ipv4(ipha_t *, tcpha_t *, int, ip_stack_t *);
    712 conn_t	*ipcl_tcp_lookup_reversed_ipv6(ip6_t *, tcpha_t *, int, uint_t,
    713 	    ip_stack_t *);
    714 conn_t	*ipcl_lookup_listener_v4(uint16_t, ipaddr_t, zoneid_t, ip_stack_t *);
    715 conn_t	*ipcl_lookup_listener_v6(uint16_t, in6_addr_t *, uint_t, zoneid_t,
    716 	    ip_stack_t *);
    717 int	conn_trace_ref(conn_t *);
    718 int	conn_untrace_ref(conn_t *);
    719 void	ipcl_conn_cleanup(conn_t *);
    720 extern uint_t	conn_recvancillary_size(conn_t *, crb_t, ip_recv_attr_t *,
    721     mblk_t *, ip_pkt_t *);
    722 extern void	conn_recvancillary_add(conn_t *, crb_t, ip_recv_attr_t *,
    723     ip_pkt_t *, uchar_t *, uint_t);
    724 conn_t *ipcl_conn_tcp_lookup_reversed_ipv4(conn_t *, ipha_t *, tcpha_t *,
    725 	    ip_stack_t *);
    726 conn_t *ipcl_conn_tcp_lookup_reversed_ipv6(conn_t *, ip6_t *, tcpha_t *,
    727 	    ip_stack_t *);
    728 
    729 extern int ip_create_helper_stream(conn_t *, ldi_ident_t);
    730 extern void ip_free_helper_stream(conn_t *);
    731 extern int	ip_helper_stream_setup(queue_t *, dev_t *, int, int,
    732     cred_t *, boolean_t);
    733 
    734 #ifdef	__cplusplus
    735 }
    736 #endif
    737 
    738 #endif	/* _INET_IPCLASSIFIER_H */
    739