Home | History | Annotate | Download | only in udp
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 /* Copyright (c) 1990 Mentat Inc. */
     26 
     27 #include <sys/types.h>
     28 #include <sys/stream.h>
     29 #include <sys/stropts.h>
     30 #include <sys/strlog.h>
     31 #include <sys/strsun.h>
     32 #define	_SUN_TPI_VERSION 2
     33 #include <sys/tihdr.h>
     34 #include <sys/timod.h>
     35 #include <sys/ddi.h>
     36 #include <sys/sunddi.h>
     37 #include <sys/strsubr.h>
     38 #include <sys/suntpi.h>
     39 #include <sys/xti_inet.h>
     40 #include <sys/kmem.h>
     41 #include <sys/cred_impl.h>
     42 #include <sys/policy.h>
     43 #include <sys/priv.h>
     44 #include <sys/ucred.h>
     45 #include <sys/zone.h>
     46 
     47 #include <sys/socket.h>
     48 #include <sys/socketvar.h>
     49 #include <sys/sockio.h>
     50 #include <sys/vtrace.h>
     51 #include <sys/sdt.h>
     52 #include <sys/debug.h>
     53 #include <sys/isa_defs.h>
     54 #include <sys/random.h>
     55 #include <netinet/in.h>
     56 #include <netinet/ip6.h>
     57 #include <netinet/icmp6.h>
     58 #include <netinet/udp.h>
     59 
     60 #include <inet/common.h>
     61 #include <inet/ip.h>
     62 #include <inet/ip_impl.h>
     63 #include <inet/ipsec_impl.h>
     64 #include <inet/ip6.h>
     65 #include <inet/ip_ire.h>
     66 #include <inet/ip_if.h>
     67 #include <inet/ip_multi.h>
     68 #include <inet/ip_ndp.h>
     69 #include <inet/proto_set.h>
     70 #include <inet/mib2.h>
     71 #include <inet/nd.h>
     72 #include <inet/optcom.h>
     73 #include <inet/snmpcom.h>
     74 #include <inet/kstatcom.h>
     75 #include <inet/ipclassifier.h>
     76 #include <sys/squeue_impl.h>
     77 #include <inet/ipnet.h>
     78 #include <sys/ethernet.h>
     79 
     80 #include <sys/tsol/label.h>
     81 #include <sys/tsol/tnet.h>
     82 #include <rpc/pmap_prot.h>
     83 
     84 #include <inet/udp_impl.h>
     85 
     86 /*
     87  * Synchronization notes:
     88  *
     89  * UDP is MT and uses the usual kernel synchronization primitives. There are 2
     90  * locks, the fanout lock (uf_lock) and conn_lock. conn_lock
     91  * protects the contents of the udp_t. uf_lock protects the address and the
     92  * fanout information.
     93  * The lock order is conn_lock -> uf_lock.
     94  *
     95  * The fanout lock uf_lock:
     96  * When a UDP endpoint is bound to a local port, it is inserted into
     97  * a bind hash list.  The list consists of an array of udp_fanout_t buckets.
     98  * The size of the array is controlled by the udp_bind_fanout_size variable.
     99  * This variable can be changed in /etc/system if the default value is
    100  * not large enough.  Each bind hash bucket is protected by a per bucket
    101  * lock.  It protects the udp_bind_hash and udp_ptpbhn fields in the udp_t
    102  * structure and a few other fields in the udp_t. A UDP endpoint is removed
    103  * from the bind hash list only when it is being unbound or being closed.
    104  * The per bucket lock also protects a UDP endpoint's state changes.
    105  *
    106  * Plumbing notes:
    107  * UDP is always a device driver. For compatibility with mibopen() code
    108  * it is possible to I_PUSH "udp", but that results in pushing a passthrough
    109  * dummy module.
    110  *
    111  * The above implies that we don't support any intermediate module to
    112  * reside in between /dev/ip and udp -- in fact, we never supported such
    113  * scenario in the past as the inter-layer communication semantics have
    114  * always been private.
    115  */
    116 
    117 /* For /etc/system control */
    118 uint_t udp_bind_fanout_size = UDP_BIND_FANOUT_SIZE;
    119 
    120 static void	udp_addr_req(queue_t *q, mblk_t *mp);
    121 static void	udp_tpi_bind(queue_t *q, mblk_t *mp);
    122 static void	udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp);
    123 static void	udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock);
    124 static int	udp_build_hdr_template(conn_t *, const in6_addr_t *,
    125     const in6_addr_t *, in_port_t, uint32_t);
    126 static void	udp_capability_req(queue_t *q, mblk_t *mp);
    127 static int	udp_tpi_close(queue_t *q, int flags);
    128 static void	udp_close_free(conn_t *);
    129 static void	udp_tpi_connect(queue_t *q, mblk_t *mp);
    130 static void	udp_tpi_disconnect(queue_t *q, mblk_t *mp);
    131 static void	udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error,
    132     int sys_error);
    133 static void	udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
    134     t_scalar_t tlierr, int sys_error);
    135 static int	udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp,
    136 		    cred_t *cr);
    137 static int	udp_extra_priv_ports_add(queue_t *q, mblk_t *mp,
    138 		    char *value, caddr_t cp, cred_t *cr);
    139 static int	udp_extra_priv_ports_del(queue_t *q, mblk_t *mp,
    140 		    char *value, caddr_t cp, cred_t *cr);
    141 static void	udp_icmp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
    142 static void	udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp,
    143     ip_recv_attr_t *ira);
    144 static void	udp_info_req(queue_t *q, mblk_t *mp);
    145 static void	udp_input(void *, mblk_t *, void *, ip_recv_attr_t *);
    146 static void	udp_lrput(queue_t *, mblk_t *);
    147 static void	udp_lwput(queue_t *, mblk_t *);
    148 static int	udp_open(queue_t *q, dev_t *devp, int flag, int sflag,
    149 		    cred_t *credp, boolean_t isv6);
    150 static int	udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag,
    151 		    cred_t *credp);
    152 static int	udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag,
    153 		    cred_t *credp);
    154 static boolean_t udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name);
    155 int		udp_opt_set(conn_t *connp, uint_t optset_context,
    156 		    int level, int name, uint_t inlen,
    157 		    uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
    158 		    void *thisdg_attrs, cred_t *cr);
    159 int		udp_opt_get(conn_t *connp, int level, int name,
    160 		    uchar_t *ptr);
    161 static int	udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr,
    162 		    pid_t pid);
    163 static int	udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr,
    164     pid_t pid, ip_xmit_attr_t *ixa);
    165 static int	udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin,
    166 		    sin6_t *sin6, ushort_t ipversion, cred_t *cr, pid_t,
    167 		    ip_xmit_attr_t *ixa);
    168 static int	udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr);
    169 static boolean_t udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt);
    170 static int	udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
    171 		    cred_t *cr);
    172 static mblk_t	*udp_prepend_hdr(conn_t *, ip_xmit_attr_t *, const ip_pkt_t *,
    173     const in6_addr_t *, const in6_addr_t *, in_port_t, uint32_t, mblk_t *,
    174     int *);
    175 static mblk_t	*udp_prepend_header_template(conn_t *, ip_xmit_attr_t *,
    176     mblk_t *, const in6_addr_t *, in_port_t, uint32_t, int *);
    177 static void	udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err);
    178 static void	udp_ud_err_connected(conn_t *, t_scalar_t);
    179 static void	udp_tpi_unbind(queue_t *q, mblk_t *mp);
    180 static in_port_t udp_update_next_port(udp_t *udp, in_port_t port,
    181     boolean_t random);
    182 static void	udp_wput_other(queue_t *q, mblk_t *mp);
    183 static void	udp_wput_iocdata(queue_t *q, mblk_t *mp);
    184 static void	udp_wput_fallback(queue_t *q, mblk_t *mp);
    185 static size_t	udp_set_rcv_hiwat(udp_t *udp, size_t size);
    186 
    187 static void	*udp_stack_init(netstackid_t stackid, netstack_t *ns);
    188 static void	udp_stack_fini(netstackid_t stackid, void *arg);
    189 
    190 static void	*udp_kstat_init(netstackid_t stackid);
    191 static void	udp_kstat_fini(netstackid_t stackid, kstat_t *ksp);
    192 static void	*udp_kstat2_init(netstackid_t, udp_stat_t *);
    193 static void	udp_kstat2_fini(netstackid_t, kstat_t *);
    194 static int	udp_kstat_update(kstat_t *kp, int rw);
    195 
    196 
    197 /* Common routines for TPI and socket module */
    198 static void	udp_ulp_recv(conn_t *, mblk_t *, uint_t, ip_recv_attr_t *);
    199 
    200 /* Common routine for TPI and socket module */
    201 static conn_t	*udp_do_open(cred_t *, boolean_t, int, int *);
    202 static void	udp_do_close(conn_t *);
    203 static int	udp_do_bind(conn_t *, struct sockaddr *, socklen_t, cred_t *,
    204     boolean_t);
    205 static int	udp_do_unbind(conn_t *);
    206 
    207 int		udp_getsockname(sock_lower_handle_t,
    208     struct sockaddr *, socklen_t *, cred_t *);
    209 int		udp_getpeername(sock_lower_handle_t,
    210     struct sockaddr *, socklen_t *, cred_t *);
    211 static int	udp_do_connect(conn_t *, const struct sockaddr *, socklen_t,
    212     cred_t *, pid_t);
    213 
    214 #define	UDP_RECV_HIWATER	(56 * 1024)
    215 #define	UDP_RECV_LOWATER	128
    216 #define	UDP_XMIT_HIWATER	(56 * 1024)
    217 #define	UDP_XMIT_LOWATER	1024
    218 
    219 #pragma inline(udp_output_connected, udp_output_newdst, udp_output_lastdst)
    220 
    221 /*
    222  * Checks if the given destination addr/port is allowed out.
    223  * If allowed, registers the (dest_addr/port, node_ID) mapping at Cluster.
    224  * Called for each connect() and for sendto()/sendmsg() to a different
    225  * destination.
    226  * For connect(), called in udp_connect().
    227  * For sendto()/sendmsg(), called in udp_output_newdst().
    228  *
    229  * This macro assumes that the cl_inet_connect2 hook is not NULL.
    230  * Please check this before calling this macro.
    231  *
    232  * void
    233  * CL_INET_UDP_CONNECT(conn_t cp, udp_t *udp, boolean_t is_outgoing,
    234  *     in6_addr_t *faddrp, in_port_t (or uint16_t) fport, int err);
    235  */
    236 #define	CL_INET_UDP_CONNECT(cp, is_outgoing, faddrp, fport, err) {	\
    237 	(err) = 0;							\
    238 	/*								\
    239 	 * Running in cluster mode - check and register active		\
    240 	 * "connection" information					\
    241 	 */								\
    242 	if ((cp)->conn_ipversion == IPV4_VERSION)			\
    243 		(err) = (*cl_inet_connect2)(				\
    244 		    (cp)->conn_netstack->netstack_stackid,		\
    245 		    IPPROTO_UDP, is_outgoing, AF_INET,			\
    246 		    (uint8_t *)&((cp)->conn_laddr_v4),			\
    247 		    (cp)->conn_lport,					\
    248 		    (uint8_t *)&(V4_PART_OF_V6(*faddrp)),		\
    249 		    (in_port_t)(fport), NULL);				\
    250 	else								\
    251 		(err) = (*cl_inet_connect2)(				\
    252 		    (cp)->conn_netstack->netstack_stackid,		\
    253 		    IPPROTO_UDP, is_outgoing, AF_INET6,			\
    254 		    (uint8_t *)&((cp)->conn_laddr_v6),			\
    255 		    (cp)->conn_lport,					\
    256 		    (uint8_t *)(faddrp), (in_port_t)(fport), NULL);	\
    257 }
    258 
    259 static struct module_info udp_mod_info =  {
    260 	UDP_MOD_ID, UDP_MOD_NAME, 1, INFPSZ, UDP_RECV_HIWATER, UDP_RECV_LOWATER
    261 };
    262 
    263 /*
    264  * Entry points for UDP as a device.
    265  * We have separate open functions for the /dev/udp and /dev/udp6 devices.
    266  */
    267 static struct qinit udp_rinitv4 = {
    268 	NULL, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info, NULL
    269 };
    270 
    271 static struct qinit udp_rinitv6 = {
    272 	NULL, NULL, udp_openv6, udp_tpi_close, NULL, &udp_mod_info, NULL
    273 };
    274 
    275 static struct qinit udp_winit = {
    276 	(pfi_t)udp_wput, (pfi_t)ip_wsrv, NULL, NULL, NULL, &udp_mod_info
    277 };
    278 
    279 /* UDP entry point during fallback */
    280 struct qinit udp_fallback_sock_winit = {
    281 	(pfi_t)udp_wput_fallback, NULL, NULL, NULL, NULL, &udp_mod_info
    282 };
    283 
    284 /*
    285  * UDP needs to handle I_LINK and I_PLINK since ifconfig
    286  * likes to use it as a place to hang the various streams.
    287  */
    288 static struct qinit udp_lrinit = {
    289 	(pfi_t)udp_lrput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
    290 };
    291 
    292 static struct qinit udp_lwinit = {
    293 	(pfi_t)udp_lwput, NULL, udp_openv4, udp_tpi_close, NULL, &udp_mod_info
    294 };
    295 
    296 /* For AF_INET aka /dev/udp */
    297 struct streamtab udpinfov4 = {
    298 	&udp_rinitv4, &udp_winit, &udp_lrinit, &udp_lwinit
    299 };
    300 
    301 /* For AF_INET6 aka /dev/udp6 */
    302 struct streamtab udpinfov6 = {
    303 	&udp_rinitv6, &udp_winit, &udp_lrinit, &udp_lwinit
    304 };
    305 
    306 static	sin_t	sin_null;	/* Zero address for quick clears */
    307 static	sin6_t	sin6_null;	/* Zero address for quick clears */
    308 
    309 #define	UDP_MAXPACKET_IPV4 (IP_MAXPACKET - UDPH_SIZE - IP_SIMPLE_HDR_LENGTH)
    310 
    311 /* Default structure copied into T_INFO_ACK messages */
    312 static struct T_info_ack udp_g_t_info_ack_ipv4 = {
    313 	T_INFO_ACK,
    314 	UDP_MAXPACKET_IPV4,	/* TSDU_size. Excl. headers */
    315 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
    316 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
    317 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
    318 	sizeof (sin_t),	/* ADDR_size. */
    319 	0,		/* OPT_size - not initialized here */
    320 	UDP_MAXPACKET_IPV4,	/* TIDU_size.  Excl. headers */
    321 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
    322 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
    323 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
    324 };
    325 
    326 #define	UDP_MAXPACKET_IPV6 (IP_MAXPACKET - UDPH_SIZE - IPV6_HDR_LEN)
    327 
    328 static	struct T_info_ack udp_g_t_info_ack_ipv6 = {
    329 	T_INFO_ACK,
    330 	UDP_MAXPACKET_IPV6,	/* TSDU_size.  Excl. headers */
    331 	T_INVALID,	/* ETSU_size.  udp does not support expedited data. */
    332 	T_INVALID,	/* CDATA_size. udp does not support connect data. */
    333 	T_INVALID,	/* DDATA_size. udp does not support disconnect data. */
    334 	sizeof (sin6_t), /* ADDR_size. */
    335 	0,		/* OPT_size - not initialized here */
    336 	UDP_MAXPACKET_IPV6,	/* TIDU_size. Excl. headers */
    337 	T_CLTS,		/* SERV_type.  udp supports connection-less. */
    338 	TS_UNBND,	/* CURRENT_state.  This is set from udp_state. */
    339 	(XPG4_1|SENDZERO) /* PROVIDER_flag */
    340 };
    341 
    342 /* largest UDP port number */
    343 #define	UDP_MAX_PORT	65535
    344 
    345 /*
    346  * Table of ND variables supported by udp.  These are loaded into us_nd
    347  * in udp_open.
    348  * All of these are alterable, within the min/max values given, at run time.
    349  */
    350 /* BEGIN CSTYLED */
    351 udpparam_t udp_param_arr[] = {
    352  /*min		max		value		name */
    353  { 0L,		256,		32,		"udp_wroff_extra" },
    354  { 1L,		255,		255,		"udp_ipv4_ttl" },
    355  { 0,		IPV6_MAX_HOPS,	IPV6_DEFAULT_HOPS, "udp_ipv6_hoplimit"},
    356  { 1024,	(32 * 1024),	1024,		"udp_smallest_nonpriv_port" },
    357  { 0,		1,		1,		"udp_do_checksum" },
    358  { 1024,	UDP_MAX_PORT,	(32 * 1024),	"udp_smallest_anon_port" },
    359  { 1024,	UDP_MAX_PORT,	UDP_MAX_PORT,	"udp_largest_anon_port" },
    360  { UDP_XMIT_LOWATER, (1<<30), UDP_XMIT_HIWATER,	"udp_xmit_hiwat"},
    361  { 0,		     (1<<30), UDP_XMIT_LOWATER, "udp_xmit_lowat"},
    362  { UDP_RECV_LOWATER, (1<<30), UDP_RECV_HIWATER,	"udp_recv_hiwat"},
    363  { 65536,	(1<<30),	2*1024*1024,	"udp_max_buf"},
    364  { 0,		1,		0,		"udp_pmtu_discovery" },
    365  { 0,		1,		0,		"udp_sendto_ignerr" },
    366 };
    367 /* END CSTYLED */
    368 
    369 /* Setable in /etc/system */
    370 /* If set to 0, pick ephemeral port sequentially; otherwise randomly. */
    371 uint32_t udp_random_anon_port = 1;
    372 
    373 /*
    374  * Hook functions to enable cluster networking.
    375  * On non-clustered systems these vectors must always be NULL
    376  */
    377 
    378 void (*cl_inet_bind)(netstackid_t stack_id, uchar_t protocol,
    379     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
    380     void *args) = NULL;
    381 void (*cl_inet_unbind)(netstackid_t stack_id, uint8_t protocol,
    382     sa_family_t addr_family, uint8_t *laddrp, in_port_t lport,
    383     void *args) = NULL;
    384 
    385 typedef union T_primitives *t_primp_t;
    386 
    387 /*
    388  * Return the next anonymous port in the privileged port range for
    389  * bind checking.
    390  *
    391  * Trusted Extension (TX) notes: TX allows administrator to mark or
    392  * reserve ports as Multilevel ports (MLP). MLP has special function
    393  * on TX systems. Once a port is made MLP, it's not available as
    394  * ordinary port. This creates "holes" in the port name space. It
    395  * may be necessary to skip the "holes" find a suitable anon port.
    396  */
    397 static in_port_t
    398 udp_get_next_priv_port(udp_t *udp)
    399 {
    400 	static in_port_t next_priv_port = IPPORT_RESERVED - 1;
    401 	in_port_t nextport;
    402 	boolean_t restart = B_FALSE;
    403 	udp_stack_t *us = udp->udp_us;
    404 
    405 retry:
    406 	if (next_priv_port < us->us_min_anonpriv_port ||
    407 	    next_priv_port >= IPPORT_RESERVED) {
    408 		next_priv_port = IPPORT_RESERVED - 1;
    409 		if (restart)
    410 			return (0);
    411 		restart = B_TRUE;
    412 	}
    413 
    414 	if (is_system_labeled() &&
    415 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
    416 	    next_priv_port, IPPROTO_UDP, B_FALSE)) != 0) {
    417 		next_priv_port = nextport;
    418 		goto retry;
    419 	}
    420 
    421 	return (next_priv_port--);
    422 }
    423 
    424 /*
    425  * Hash list removal routine for udp_t structures.
    426  */
    427 static void
    428 udp_bind_hash_remove(udp_t *udp, boolean_t caller_holds_lock)
    429 {
    430 	udp_t		*udpnext;
    431 	kmutex_t	*lockp;
    432 	udp_stack_t	*us = udp->udp_us;
    433 	conn_t		*connp = udp->udp_connp;
    434 
    435 	if (udp->udp_ptpbhn == NULL)
    436 		return;
    437 
    438 	/*
    439 	 * Extract the lock pointer in case there are concurrent
    440 	 * hash_remove's for this instance.
    441 	 */
    442 	ASSERT(connp->conn_lport != 0);
    443 	if (!caller_holds_lock) {
    444 		lockp = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
    445 		    us->us_bind_fanout_size)].uf_lock;
    446 		ASSERT(lockp != NULL);
    447 		mutex_enter(lockp);
    448 	}
    449 	if (udp->udp_ptpbhn != NULL) {
    450 		udpnext = udp->udp_bind_hash;
    451 		if (udpnext != NULL) {
    452 			udpnext->udp_ptpbhn = udp->udp_ptpbhn;
    453 			udp->udp_bind_hash = NULL;
    454 		}
    455 		*udp->udp_ptpbhn = udpnext;
    456 		udp->udp_ptpbhn = NULL;
    457 	}
    458 	if (!caller_holds_lock) {
    459 		mutex_exit(lockp);
    460 	}
    461 }
    462 
    463 static void
    464 udp_bind_hash_insert(udp_fanout_t *uf, udp_t *udp)
    465 {
    466 	conn_t	*connp = udp->udp_connp;
    467 	udp_t	**udpp;
    468 	udp_t	*udpnext;
    469 	conn_t	*connext;
    470 
    471 	ASSERT(MUTEX_HELD(&uf->uf_lock));
    472 	ASSERT(udp->udp_ptpbhn == NULL);
    473 	udpp = &uf->uf_udp;
    474 	udpnext = udpp[0];
    475 	if (udpnext != NULL) {
    476 		/*
    477 		 * If the new udp bound to the INADDR_ANY address
    478 		 * and the first one in the list is not bound to
    479 		 * INADDR_ANY we skip all entries until we find the
    480 		 * first one bound to INADDR_ANY.
    481 		 * This makes sure that applications binding to a
    482 		 * specific address get preference over those binding to
    483 		 * INADDR_ANY.
    484 		 */
    485 		connext = udpnext->udp_connp;
    486 		if (V6_OR_V4_INADDR_ANY(connp->conn_bound_addr_v6) &&
    487 		    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
    488 			while ((udpnext = udpp[0]) != NULL &&
    489 			    !V6_OR_V4_INADDR_ANY(connext->conn_bound_addr_v6)) {
    490 				udpp = &(udpnext->udp_bind_hash);
    491 			}
    492 			if (udpnext != NULL)
    493 				udpnext->udp_ptpbhn = &udp->udp_bind_hash;
    494 		} else {
    495 			udpnext->udp_ptpbhn = &udp->udp_bind_hash;
    496 		}
    497 	}
    498 	udp->udp_bind_hash = udpnext;
    499 	udp->udp_ptpbhn = udpp;
    500 	udpp[0] = udp;
    501 }
    502 
    503 /*
    504  * This routine is called to handle each O_T_BIND_REQ/T_BIND_REQ message
    505  * passed to udp_wput.
    506  * It associates a port number and local address with the stream.
    507  * It calls IP to verify the local IP address, and calls IP to insert
    508  * the conn_t in the fanout table.
    509  * If everything is ok it then sends the T_BIND_ACK back up.
    510  *
    511  * Note that UDP over IPv4 and IPv6 sockets can use the same port number
    512  * without setting SO_REUSEADDR. This is needed so that they
    513  * can be viewed as two independent transport protocols.
    514  * However, anonymouns ports are allocated from the same range to avoid
    515  * duplicating the us->us_next_port_to_try.
    516  */
    517 static void
    518 udp_tpi_bind(queue_t *q, mblk_t *mp)
    519 {
    520 	sin_t		*sin;
    521 	sin6_t		*sin6;
    522 	mblk_t		*mp1;
    523 	struct T_bind_req *tbr;
    524 	conn_t		*connp;
    525 	udp_t		*udp;
    526 	int		error;
    527 	struct sockaddr	*sa;
    528 	cred_t		*cr;
    529 
    530 	/*
    531 	 * All Solaris components should pass a db_credp
    532 	 * for this TPI message, hence we ASSERT.
    533 	 * But in case there is some other M_PROTO that looks
    534 	 * like a TPI message sent by some other kernel
    535 	 * component, we check and return an error.
    536 	 */
    537 	cr = msg_getcred(mp, NULL);
    538 	ASSERT(cr != NULL);
    539 	if (cr == NULL) {
    540 		udp_err_ack(q, mp, TSYSERR, EINVAL);
    541 		return;
    542 	}
    543 
    544 	connp = Q_TO_CONN(q);
    545 	udp = connp->conn_udp;
    546 	if ((mp->b_wptr - mp->b_rptr) < sizeof (*tbr)) {
    547 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    548 		    "udp_bind: bad req, len %u",
    549 		    (uint_t)(mp->b_wptr - mp->b_rptr));
    550 		udp_err_ack(q, mp, TPROTO, 0);
    551 		return;
    552 	}
    553 	if (udp->udp_state != TS_UNBND) {
    554 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    555 		    "udp_bind: bad state, %u", udp->udp_state);
    556 		udp_err_ack(q, mp, TOUTSTATE, 0);
    557 		return;
    558 	}
    559 	/*
    560 	 * Reallocate the message to make sure we have enough room for an
    561 	 * address.
    562 	 */
    563 	mp1 = reallocb(mp, sizeof (struct T_bind_ack) + sizeof (sin6_t), 1);
    564 	if (mp1 == NULL) {
    565 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
    566 		return;
    567 	}
    568 
    569 	mp = mp1;
    570 
    571 	/* Reset the message type in preparation for shipping it back. */
    572 	DB_TYPE(mp) = M_PCPROTO;
    573 
    574 	tbr = (struct T_bind_req *)mp->b_rptr;
    575 	switch (tbr->ADDR_length) {
    576 	case 0:			/* Request for a generic port */
    577 		tbr->ADDR_offset = sizeof (struct T_bind_req);
    578 		if (connp->conn_family == AF_INET) {
    579 			tbr->ADDR_length = sizeof (sin_t);
    580 			sin = (sin_t *)&tbr[1];
    581 			*sin = sin_null;
    582 			sin->sin_family = AF_INET;
    583 			mp->b_wptr = (uchar_t *)&sin[1];
    584 			sa = (struct sockaddr *)sin;
    585 		} else {
    586 			ASSERT(connp->conn_family == AF_INET6);
    587 			tbr->ADDR_length = sizeof (sin6_t);
    588 			sin6 = (sin6_t *)&tbr[1];
    589 			*sin6 = sin6_null;
    590 			sin6->sin6_family = AF_INET6;
    591 			mp->b_wptr = (uchar_t *)&sin6[1];
    592 			sa = (struct sockaddr *)sin6;
    593 		}
    594 		break;
    595 
    596 	case sizeof (sin_t):	/* Complete IPv4 address */
    597 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
    598 		    sizeof (sin_t));
    599 		if (sa == NULL || !OK_32PTR((char *)sa)) {
    600 			udp_err_ack(q, mp, TSYSERR, EINVAL);
    601 			return;
    602 		}
    603 		if (connp->conn_family != AF_INET ||
    604 		    sa->sa_family != AF_INET) {
    605 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
    606 			return;
    607 		}
    608 		break;
    609 
    610 	case sizeof (sin6_t):	/* complete IPv6 address */
    611 		sa = (struct sockaddr *)mi_offset_param(mp, tbr->ADDR_offset,
    612 		    sizeof (sin6_t));
    613 		if (sa == NULL || !OK_32PTR((char *)sa)) {
    614 			udp_err_ack(q, mp, TSYSERR, EINVAL);
    615 			return;
    616 		}
    617 		if (connp->conn_family != AF_INET6 ||
    618 		    sa->sa_family != AF_INET6) {
    619 			udp_err_ack(q, mp, TSYSERR, EAFNOSUPPORT);
    620 			return;
    621 		}
    622 		break;
    623 
    624 	default:		/* Invalid request */
    625 		(void) mi_strlog(q, 1, SL_ERROR|SL_TRACE,
    626 		    "udp_bind: bad ADDR_length length %u", tbr->ADDR_length);
    627 		udp_err_ack(q, mp, TBADADDR, 0);
    628 		return;
    629 	}
    630 
    631 	error = udp_do_bind(connp, sa, tbr->ADDR_length, cr,
    632 	    tbr->PRIM_type != O_T_BIND_REQ);
    633 
    634 	if (error != 0) {
    635 		if (error > 0) {
    636 			udp_err_ack(q, mp, TSYSERR, error);
    637 		} else {
    638 			udp_err_ack(q, mp, -error, 0);
    639 		}
    640 	} else {
    641 		tbr->PRIM_type = T_BIND_ACK;
    642 		qreply(q, mp);
    643 	}
    644 }
    645 
    646 /*
    647  * This routine handles each T_CONN_REQ message passed to udp.  It
    648  * associates a default destination address with the stream.
    649  *
    650  * After various error checks are completed, udp_connect() lays
    651  * the target address and port into the composite header template.
    652  * Then we ask IP for information, including a source address if we didn't
    653  * already have one. Finally we send up the T_OK_ACK reply message.
    654  */
    655 static void
    656 udp_tpi_connect(queue_t *q, mblk_t *mp)
    657 {
    658 	conn_t	*connp = Q_TO_CONN(q);
    659 	int	error;
    660 	socklen_t	len;
    661 	struct sockaddr		*sa;
    662 	struct T_conn_req	*tcr;
    663 	cred_t		*cr;
    664 	pid_t		pid;
    665 	/*
    666 	 * All Solaris components should pass a db_credp
    667 	 * for this TPI message, hence we ASSERT.
    668 	 * But in case there is some other M_PROTO that looks
    669 	 * like a TPI message sent by some other kernel
    670 	 * component, we check and return an error.
    671 	 */
    672 	cr = msg_getcred(mp, &pid);
    673 	ASSERT(cr != NULL);
    674 	if (cr == NULL) {
    675 		udp_err_ack(q, mp, TSYSERR, EINVAL);
    676 		return;
    677 	}
    678 
    679 	tcr = (struct T_conn_req *)mp->b_rptr;
    680 
    681 	/* A bit of sanity checking */
    682 	if ((mp->b_wptr - mp->b_rptr) < sizeof (struct T_conn_req)) {
    683 		udp_err_ack(q, mp, TPROTO, 0);
    684 		return;
    685 	}
    686 
    687 	if (tcr->OPT_length != 0) {
    688 		udp_err_ack(q, mp, TBADOPT, 0);
    689 		return;
    690 	}
    691 
    692 	/*
    693 	 * Determine packet type based on type of address passed in
    694 	 * the request should contain an IPv4 or IPv6 address.
    695 	 * Make sure that address family matches the type of
    696 	 * family of the address passed down.
    697 	 */
    698 	len = tcr->DEST_length;
    699 	switch (tcr->DEST_length) {
    700 	default:
    701 		udp_err_ack(q, mp, TBADADDR, 0);
    702 		return;
    703 
    704 	case sizeof (sin_t):
    705 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
    706 		    sizeof (sin_t));
    707 		break;
    708 
    709 	case sizeof (sin6_t):
    710 		sa = (struct sockaddr *)mi_offset_param(mp, tcr->DEST_offset,
    711 		    sizeof (sin6_t));
    712 		break;
    713 	}
    714 
    715 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
    716 	if (error != 0) {
    717 		udp_err_ack(q, mp, TSYSERR, error);
    718 		return;
    719 	}
    720 
    721 	error = udp_do_connect(connp, sa, len, cr, pid);
    722 	if (error != 0) {
    723 		if (error < 0)
    724 			udp_err_ack(q, mp, -error, 0);
    725 		else
    726 			udp_err_ack(q, mp, TSYSERR, error);
    727 	} else {
    728 		mblk_t	*mp1;
    729 		/*
    730 		 * We have to send a connection confirmation to
    731 		 * keep TLI happy.
    732 		 */
    733 		if (connp->conn_family == AF_INET) {
    734 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
    735 			    sizeof (sin_t), NULL, 0);
    736 		} else {
    737 			mp1 = mi_tpi_conn_con(NULL, (char *)sa,
    738 			    sizeof (sin6_t), NULL, 0);
    739 		}
    740 		if (mp1 == NULL) {
    741 			udp_err_ack(q, mp, TSYSERR, ENOMEM);
    742 			return;
    743 		}
    744 
    745 		/*
    746 		 * Send ok_ack for T_CONN_REQ
    747 		 */
    748 		mp = mi_tpi_ok_ack_alloc(mp);
    749 		if (mp == NULL) {
    750 			/* Unable to reuse the T_CONN_REQ for the ack. */
    751 			udp_err_ack_prim(q, mp1, T_CONN_REQ, TSYSERR, ENOMEM);
    752 			return;
    753 		}
    754 
    755 		putnext(connp->conn_rq, mp);
    756 		putnext(connp->conn_rq, mp1);
    757 	}
    758 }
    759 
    760 static int
    761 udp_tpi_close(queue_t *q, int flags)
    762 {
    763 	conn_t	*connp;
    764 
    765 	if (flags & SO_FALLBACK) {
    766 		/*
    767 		 * stream is being closed while in fallback
    768 		 * simply free the resources that were allocated
    769 		 */
    770 		inet_minor_free(WR(q)->q_ptr, (dev_t)(RD(q)->q_ptr));
    771 		qprocsoff(q);
    772 		goto done;
    773 	}
    774 
    775 	connp = Q_TO_CONN(q);
    776 	udp_do_close(connp);
    777 done:
    778 	q->q_ptr = WR(q)->q_ptr = NULL;
    779 	return (0);
    780 }
    781 
    782 static void
    783 udp_close_free(conn_t *connp)
    784 {
    785 	udp_t *udp = connp->conn_udp;
    786 
    787 	/* If there are any options associated with the stream, free them. */
    788 	if (udp->udp_recv_ipp.ipp_fields != 0)
    789 		ip_pkt_free(&udp->udp_recv_ipp);
    790 
    791 	/*
    792 	 * Clear any fields which the kmem_cache constructor clears.
    793 	 * Only udp_connp needs to be preserved.
    794 	 * TBD: We should make this more efficient to avoid clearing
    795 	 * everything.
    796 	 */
    797 	ASSERT(udp->udp_connp == connp);
    798 	bzero(udp, sizeof (udp_t));
    799 	udp->udp_connp = connp;
    800 }
    801 
    802 static int
    803 udp_do_disconnect(conn_t *connp)
    804 {
    805 	udp_t	*udp;
    806 	udp_fanout_t *udpf;
    807 	udp_stack_t *us;
    808 	int	error;
    809 
    810 	udp = connp->conn_udp;
    811 	us = udp->udp_us;
    812 	mutex_enter(&connp->conn_lock);
    813 	if (udp->udp_state != TS_DATA_XFER) {
    814 		mutex_exit(&connp->conn_lock);
    815 		return (-TOUTSTATE);
    816 	}
    817 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
    818 	    us->us_bind_fanout_size)];
    819 	mutex_enter(&udpf->uf_lock);
    820 	if (connp->conn_mcbc_bind)
    821 		connp->conn_saddr_v6 = ipv6_all_zeros;
    822 	else
    823 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
    824 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
    825 	connp->conn_faddr_v6 = ipv6_all_zeros;
    826 	connp->conn_fport = 0;
    827 	udp->udp_state = TS_IDLE;
    828 	mutex_exit(&udpf->uf_lock);
    829 
    830 	/* Remove any remnants of mapped address binding */
    831 	if (connp->conn_family == AF_INET6)
    832 		connp->conn_ipversion = IPV6_VERSION;
    833 
    834 	connp->conn_v6lastdst = ipv6_all_zeros;
    835 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
    836 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
    837 	mutex_exit(&connp->conn_lock);
    838 	if (error != 0)
    839 		return (error);
    840 
    841 	/*
    842 	 * Tell IP to remove the full binding and revert
    843 	 * to the local address binding.
    844 	 */
    845 	return (ip_laddr_fanout_insert(connp));
    846 }
    847 
    848 static void
    849 udp_tpi_disconnect(queue_t *q, mblk_t *mp)
    850 {
    851 	conn_t	*connp = Q_TO_CONN(q);
    852 	int	error;
    853 
    854 	/*
    855 	 * Allocate the largest primitive we need to send back
    856 	 * T_error_ack is > than T_ok_ack
    857 	 */
    858 	mp = reallocb(mp, sizeof (struct T_error_ack), 1);
    859 	if (mp == NULL) {
    860 		/* Unable to reuse the T_DISCON_REQ for the ack. */
    861 		udp_err_ack_prim(q, mp, T_DISCON_REQ, TSYSERR, ENOMEM);
    862 		return;
    863 	}
    864 
    865 	error = udp_do_disconnect(connp);
    866 
    867 	if (error != 0) {
    868 		if (error < 0) {
    869 			udp_err_ack(q, mp, -error, 0);
    870 		} else {
    871 			udp_err_ack(q, mp, TSYSERR, error);
    872 		}
    873 	} else {
    874 		mp = mi_tpi_ok_ack_alloc(mp);
    875 		ASSERT(mp != NULL);
    876 		qreply(q, mp);
    877 	}
    878 }
    879 
    880 int
    881 udp_disconnect(conn_t *connp)
    882 {
    883 	int error;
    884 
    885 	connp->conn_dgram_errind = B_FALSE;
    886 	error = udp_do_disconnect(connp);
    887 	if (error < 0)
    888 		error = proto_tlitosyserr(-error);
    889 
    890 	return (error);
    891 }
    892 
    893 /* This routine creates a T_ERROR_ACK message and passes it upstream. */
    894 static void
    895 udp_err_ack(queue_t *q, mblk_t *mp, t_scalar_t t_error, int sys_error)
    896 {
    897 	if ((mp = mi_tpi_err_ack_alloc(mp, t_error, sys_error)) != NULL)
    898 		qreply(q, mp);
    899 }
    900 
    901 /* Shorthand to generate and send TPI error acks to our client */
    902 static void
    903 udp_err_ack_prim(queue_t *q, mblk_t *mp, t_scalar_t primitive,
    904     t_scalar_t t_error, int sys_error)
    905 {
    906 	struct T_error_ack	*teackp;
    907 
    908 	if ((mp = tpi_ack_alloc(mp, sizeof (struct T_error_ack),
    909 	    M_PCPROTO, T_ERROR_ACK)) != NULL) {
    910 		teackp = (struct T_error_ack *)mp->b_rptr;
    911 		teackp->ERROR_prim = primitive;
    912 		teackp->TLI_error = t_error;
    913 		teackp->UNIX_error = sys_error;
    914 		qreply(q, mp);
    915 	}
    916 }
    917 
    918 /*ARGSUSED2*/
    919 static int
    920 udp_extra_priv_ports_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
    921 {
    922 	int i;
    923 	udp_t		*udp = Q_TO_UDP(q);
    924 	udp_stack_t *us = udp->udp_us;
    925 
    926 	for (i = 0; i < us->us_num_epriv_ports; i++) {
    927 		if (us->us_epriv_ports[i] != 0)
    928 			(void) mi_mpprintf(mp, "%d ", us->us_epriv_ports[i]);
    929 	}
    930 	return (0);
    931 }
    932 
    933 /* ARGSUSED1 */
    934 static int
    935 udp_extra_priv_ports_add(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
    936     cred_t *cr)
    937 {
    938 	long	new_value;
    939 	int	i;
    940 	udp_t		*udp = Q_TO_UDP(q);
    941 	udp_stack_t *us = udp->udp_us;
    942 
    943 	/*
    944 	 * Fail the request if the new value does not lie within the
    945 	 * port number limits.
    946 	 */
    947 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
    948 	    new_value <= 0 || new_value >= 65536) {
    949 		return (EINVAL);
    950 	}
    951 
    952 	/* Check if the value is already in the list */
    953 	for (i = 0; i < us->us_num_epriv_ports; i++) {
    954 		if (new_value == us->us_epriv_ports[i]) {
    955 			return (EEXIST);
    956 		}
    957 	}
    958 	/* Find an empty slot */
    959 	for (i = 0; i < us->us_num_epriv_ports; i++) {
    960 		if (us->us_epriv_ports[i] == 0)
    961 			break;
    962 	}
    963 	if (i == us->us_num_epriv_ports) {
    964 		return (EOVERFLOW);
    965 	}
    966 
    967 	/* Set the new value */
    968 	us->us_epriv_ports[i] = (in_port_t)new_value;
    969 	return (0);
    970 }
    971 
    972 /* ARGSUSED1 */
    973 static int
    974 udp_extra_priv_ports_del(queue_t *q, mblk_t *mp, char *value, caddr_t cp,
    975     cred_t *cr)
    976 {
    977 	long	new_value;
    978 	int	i;
    979 	udp_t		*udp = Q_TO_UDP(q);
    980 	udp_stack_t *us = udp->udp_us;
    981 
    982 	/*
    983 	 * Fail the request if the new value does not lie within the
    984 	 * port number limits.
    985 	 */
    986 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
    987 	    new_value <= 0 || new_value >= 65536) {
    988 		return (EINVAL);
    989 	}
    990 
    991 	/* Check that the value is already in the list */
    992 	for (i = 0; i < us->us_num_epriv_ports; i++) {
    993 		if (us->us_epriv_ports[i] == new_value)
    994 			break;
    995 	}
    996 	if (i == us->us_num_epriv_ports) {
    997 		return (ESRCH);
    998 	}
    999 
   1000 	/* Clear the value */
   1001 	us->us_epriv_ports[i] = 0;
   1002 	return (0);
   1003 }
   1004 
   1005 /* At minimum we need 4 bytes of UDP header */
   1006 #define	ICMP_MIN_UDP_HDR	4
   1007 
   1008 /*
   1009  * udp_icmp_input is called as conn_recvicmp to process ICMP messages.
   1010  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
   1011  * Assumes that IP has pulled up everything up to and including the ICMP header.
   1012  */
   1013 /* ARGSUSED2 */
   1014 static void
   1015 udp_icmp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
   1016 {
   1017 	conn_t		*connp = (conn_t *)arg1;
   1018 	icmph_t		*icmph;
   1019 	ipha_t		*ipha;
   1020 	int		iph_hdr_length;
   1021 	udpha_t		*udpha;
   1022 	sin_t		sin;
   1023 	sin6_t		sin6;
   1024 	mblk_t		*mp1;
   1025 	int		error = 0;
   1026 	udp_t		*udp = connp->conn_udp;
   1027 
   1028 	ipha = (ipha_t *)mp->b_rptr;
   1029 
   1030 	ASSERT(OK_32PTR(mp->b_rptr));
   1031 
   1032 	if (IPH_HDR_VERSION(ipha) != IPV4_VERSION) {
   1033 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
   1034 		udp_icmp_error_ipv6(connp, mp, ira);
   1035 		return;
   1036 	}
   1037 	ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
   1038 
   1039 	/* Skip past the outer IP and ICMP headers */
   1040 	ASSERT(IPH_HDR_LENGTH(ipha) == ira->ira_ip_hdr_length);
   1041 	iph_hdr_length = ira->ira_ip_hdr_length;
   1042 	icmph = (icmph_t *)&mp->b_rptr[iph_hdr_length];
   1043 	ipha = (ipha_t *)&icmph[1];	/* Inner IP header */
   1044 
   1045 	/* Skip past the inner IP and find the ULP header */
   1046 	iph_hdr_length = IPH_HDR_LENGTH(ipha);
   1047 	udpha = (udpha_t *)((char *)ipha + iph_hdr_length);
   1048 
   1049 	switch (icmph->icmph_type) {
   1050 	case ICMP_DEST_UNREACHABLE:
   1051 		switch (icmph->icmph_code) {
   1052 		case ICMP_FRAGMENTATION_NEEDED: {
   1053 			ipha_t		*ipha;
   1054 			ip_xmit_attr_t	*ixa;
   1055 			/*
   1056 			 * IP has already adjusted the path MTU.
   1057 			 * But we need to adjust DF for IPv4.
   1058 			 */
   1059 			if (connp->conn_ipversion != IPV4_VERSION)
   1060 				break;
   1061 
   1062 			ixa = conn_get_ixa(connp, B_FALSE);
   1063 			if (ixa == NULL || ixa->ixa_ire == NULL) {
   1064 				/*
   1065 				 * Some other thread holds conn_ixa. We will
   1066 				 * redo this on the next ICMP too big.
   1067 				 */
   1068 				if (ixa != NULL)
   1069 					ixa_refrele(ixa);
   1070 				break;
   1071 			}
   1072 			(void) ip_get_pmtu(ixa);
   1073 
   1074 			mutex_enter(&connp->conn_lock);
   1075 			ipha = (ipha_t *)connp->conn_ht_iphc;
   1076 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
   1077 				ipha->ipha_fragment_offset_and_flags |=
   1078 				    IPH_DF_HTONS;
   1079 			} else {
   1080 				ipha->ipha_fragment_offset_and_flags &=
   1081 				    ~IPH_DF_HTONS;
   1082 			}
   1083 			mutex_exit(&connp->conn_lock);
   1084 			ixa_refrele(ixa);
   1085 			break;
   1086 		}
   1087 		case ICMP_PORT_UNREACHABLE:
   1088 		case ICMP_PROTOCOL_UNREACHABLE:
   1089 			error = ECONNREFUSED;
   1090 			break;
   1091 		default:
   1092 			/* Transient errors */
   1093 			break;
   1094 		}
   1095 		break;
   1096 	default:
   1097 		/* Transient errors */
   1098 		break;
   1099 	}
   1100 	if (error == 0) {
   1101 		freemsg(mp);
   1102 		return;
   1103 	}
   1104 
   1105 	/*
   1106 	 * Deliver T_UDERROR_IND when the application has asked for it.
   1107 	 * The socket layer enables this automatically when connected.
   1108 	 */
   1109 	if (!connp->conn_dgram_errind) {
   1110 		freemsg(mp);
   1111 		return;
   1112 	}
   1113 
   1114 	switch (connp->conn_family) {
   1115 	case AF_INET:
   1116 		sin = sin_null;
   1117 		sin.sin_family = AF_INET;
   1118 		sin.sin_addr.s_addr = ipha->ipha_dst;
   1119 		sin.sin_port = udpha->uha_dst_port;
   1120 		if (IPCL_IS_NONSTR(connp)) {
   1121 			mutex_enter(&connp->conn_lock);
   1122 			if (udp->udp_state == TS_DATA_XFER) {
   1123 				if (sin.sin_port == connp->conn_fport &&
   1124 				    sin.sin_addr.s_addr ==
   1125 				    connp->conn_faddr_v4) {
   1126 					mutex_exit(&connp->conn_lock);
   1127 					(*connp->conn_upcalls->su_set_error)
   1128 					    (connp->conn_upper_handle, error);
   1129 					goto done;
   1130 				}
   1131 			} else {
   1132 				udp->udp_delayed_error = error;
   1133 				*((sin_t *)&udp->udp_delayed_addr) = sin;
   1134 			}
   1135 			mutex_exit(&connp->conn_lock);
   1136 		} else {
   1137 			mp1 = mi_tpi_uderror_ind((char *)&sin, sizeof (sin_t),
   1138 			    NULL, 0, error);
   1139 			if (mp1 != NULL)
   1140 				putnext(connp->conn_rq, mp1);
   1141 		}
   1142 		break;
   1143 	case AF_INET6:
   1144 		sin6 = sin6_null;
   1145 		sin6.sin6_family = AF_INET6;
   1146 		IN6_IPADDR_TO_V4MAPPED(ipha->ipha_dst, &sin6.sin6_addr);
   1147 		sin6.sin6_port = udpha->uha_dst_port;
   1148 		if (IPCL_IS_NONSTR(connp)) {
   1149 			mutex_enter(&connp->conn_lock);
   1150 			if (udp->udp_state == TS_DATA_XFER) {
   1151 				if (sin6.sin6_port == connp->conn_fport &&
   1152 				    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
   1153 				    &connp->conn_faddr_v6)) {
   1154 					mutex_exit(&connp->conn_lock);
   1155 					(*connp->conn_upcalls->su_set_error)
   1156 					    (connp->conn_upper_handle, error);
   1157 					goto done;
   1158 				}
   1159 			} else {
   1160 				udp->udp_delayed_error = error;
   1161 				*((sin6_t *)&udp->udp_delayed_addr) = sin6;
   1162 			}
   1163 			mutex_exit(&connp->conn_lock);
   1164 		} else {
   1165 			mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
   1166 			    NULL, 0, error);
   1167 			if (mp1 != NULL)
   1168 				putnext(connp->conn_rq, mp1);
   1169 		}
   1170 		break;
   1171 	}
   1172 done:
   1173 	freemsg(mp);
   1174 }
   1175 
   1176 /*
   1177  * udp_icmp_error_ipv6 is called by udp_icmp_error to process ICMP for IPv6.
   1178  * Generates the appropriate T_UDERROR_IND for permanent (non-transient) errors.
   1179  * Assumes that IP has pulled up all the extension headers as well as the
   1180  * ICMPv6 header.
   1181  */
   1182 static void
   1183 udp_icmp_error_ipv6(conn_t *connp, mblk_t *mp, ip_recv_attr_t *ira)
   1184 {
   1185 	icmp6_t		*icmp6;
   1186 	ip6_t		*ip6h, *outer_ip6h;
   1187 	uint16_t	iph_hdr_length;
   1188 	uint8_t		*nexthdrp;
   1189 	udpha_t		*udpha;
   1190 	sin6_t		sin6;
   1191 	mblk_t		*mp1;
   1192 	int		error = 0;
   1193 	udp_t		*udp = connp->conn_udp;
   1194 	udp_stack_t	*us = udp->udp_us;
   1195 
   1196 	outer_ip6h = (ip6_t *)mp->b_rptr;
   1197 #ifdef DEBUG
   1198 	if (outer_ip6h->ip6_nxt != IPPROTO_ICMPV6)
   1199 		iph_hdr_length = ip_hdr_length_v6(mp, outer_ip6h);
   1200 	else
   1201 		iph_hdr_length = IPV6_HDR_LEN;
   1202 	ASSERT(iph_hdr_length == ira->ira_ip_hdr_length);
   1203 #endif
   1204 	/* Skip past the outer IP and ICMP headers */
   1205 	iph_hdr_length = ira->ira_ip_hdr_length;
   1206 	icmp6 = (icmp6_t *)&mp->b_rptr[iph_hdr_length];
   1207 
   1208 	/* Skip past the inner IP and find the ULP header */
   1209 	ip6h = (ip6_t *)&icmp6[1];	/* Inner IP header */
   1210 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &iph_hdr_length, &nexthdrp)) {
   1211 		freemsg(mp);
   1212 		return;
   1213 	}
   1214 	udpha = (udpha_t *)((char *)ip6h + iph_hdr_length);
   1215 
   1216 	switch (icmp6->icmp6_type) {
   1217 	case ICMP6_DST_UNREACH:
   1218 		switch (icmp6->icmp6_code) {
   1219 		case ICMP6_DST_UNREACH_NOPORT:
   1220 			error = ECONNREFUSED;
   1221 			break;
   1222 		case ICMP6_DST_UNREACH_ADMIN:
   1223 		case ICMP6_DST_UNREACH_NOROUTE:
   1224 		case ICMP6_DST_UNREACH_BEYONDSCOPE:
   1225 		case ICMP6_DST_UNREACH_ADDR:
   1226 			/* Transient errors */
   1227 			break;
   1228 		default:
   1229 			break;
   1230 		}
   1231 		break;
   1232 	case ICMP6_PACKET_TOO_BIG: {
   1233 		struct T_unitdata_ind	*tudi;
   1234 		struct T_opthdr		*toh;
   1235 		size_t			udi_size;
   1236 		mblk_t			*newmp;
   1237 		t_scalar_t		opt_length = sizeof (struct T_opthdr) +
   1238 		    sizeof (struct ip6_mtuinfo);
   1239 		sin6_t			*sin6;
   1240 		struct ip6_mtuinfo	*mtuinfo;
   1241 
   1242 		/*
   1243 		 * If the application has requested to receive path mtu
   1244 		 * information, send up an empty message containing an
   1245 		 * IPV6_PATHMTU ancillary data item.
   1246 		 */
   1247 		if (!connp->conn_ipv6_recvpathmtu)
   1248 			break;
   1249 
   1250 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t) +
   1251 		    opt_length;
   1252 		if ((newmp = allocb(udi_size, BPRI_MED)) == NULL) {
   1253 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
   1254 			break;
   1255 		}
   1256 
   1257 		/*
   1258 		 * newmp->b_cont is left to NULL on purpose.  This is an
   1259 		 * empty message containing only ancillary data.
   1260 		 */
   1261 		newmp->b_datap->db_type = M_PROTO;
   1262 		tudi = (struct T_unitdata_ind *)newmp->b_rptr;
   1263 		newmp->b_wptr = (uchar_t *)tudi + udi_size;
   1264 		tudi->PRIM_type = T_UNITDATA_IND;
   1265 		tudi->SRC_length = sizeof (sin6_t);
   1266 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   1267 		tudi->OPT_offset = tudi->SRC_offset + sizeof (sin6_t);
   1268 		tudi->OPT_length = opt_length;
   1269 
   1270 		sin6 = (sin6_t *)&tudi[1];
   1271 		bzero(sin6, sizeof (sin6_t));
   1272 		sin6->sin6_family = AF_INET6;
   1273 		sin6->sin6_addr = connp->conn_faddr_v6;
   1274 
   1275 		toh = (struct T_opthdr *)&sin6[1];
   1276 		toh->level = IPPROTO_IPV6;
   1277 		toh->name = IPV6_PATHMTU;
   1278 		toh->len = opt_length;
   1279 		toh->status = 0;
   1280 
   1281 		mtuinfo = (struct ip6_mtuinfo *)&toh[1];
   1282 		bzero(mtuinfo, sizeof (struct ip6_mtuinfo));
   1283 		mtuinfo->ip6m_addr.sin6_family = AF_INET6;
   1284 		mtuinfo->ip6m_addr.sin6_addr = ip6h->ip6_dst;
   1285 		mtuinfo->ip6m_mtu = icmp6->icmp6_mtu;
   1286 		/*
   1287 		 * We've consumed everything we need from the original
   1288 		 * message.  Free it, then send our empty message.
   1289 		 */
   1290 		freemsg(mp);
   1291 		udp_ulp_recv(connp, newmp, msgdsize(newmp), ira);
   1292 		return;
   1293 	}
   1294 	case ICMP6_TIME_EXCEEDED:
   1295 		/* Transient errors */
   1296 		break;
   1297 	case ICMP6_PARAM_PROB:
   1298 		/* If this corresponds to an ICMP_PROTOCOL_UNREACHABLE */
   1299 		if (icmp6->icmp6_code == ICMP6_PARAMPROB_NEXTHEADER &&
   1300 		    (uchar_t *)ip6h + icmp6->icmp6_pptr ==
   1301 		    (uchar_t *)nexthdrp) {
   1302 			error = ECONNREFUSED;
   1303 			break;
   1304 		}
   1305 		break;
   1306 	}
   1307 	if (error == 0) {
   1308 		freemsg(mp);
   1309 		return;
   1310 	}
   1311 
   1312 	/*
   1313 	 * Deliver T_UDERROR_IND when the application has asked for it.
   1314 	 * The socket layer enables this automatically when connected.
   1315 	 */
   1316 	if (!connp->conn_dgram_errind) {
   1317 		freemsg(mp);
   1318 		return;
   1319 	}
   1320 
   1321 	sin6 = sin6_null;
   1322 	sin6.sin6_family = AF_INET6;
   1323 	sin6.sin6_addr = ip6h->ip6_dst;
   1324 	sin6.sin6_port = udpha->uha_dst_port;
   1325 	sin6.sin6_flowinfo = ip6h->ip6_vcf & ~IPV6_VERS_AND_FLOW_MASK;
   1326 
   1327 	if (IPCL_IS_NONSTR(connp)) {
   1328 		mutex_enter(&connp->conn_lock);
   1329 		if (udp->udp_state == TS_DATA_XFER) {
   1330 			if (sin6.sin6_port == connp->conn_fport &&
   1331 			    IN6_ARE_ADDR_EQUAL(&sin6.sin6_addr,
   1332 			    &connp->conn_faddr_v6)) {
   1333 				mutex_exit(&connp->conn_lock);
   1334 				(*connp->conn_upcalls->su_set_error)
   1335 				    (connp->conn_upper_handle, error);
   1336 				goto done;
   1337 			}
   1338 		} else {
   1339 			udp->udp_delayed_error = error;
   1340 			*((sin6_t *)&udp->udp_delayed_addr) = sin6;
   1341 		}
   1342 		mutex_exit(&connp->conn_lock);
   1343 	} else {
   1344 		mp1 = mi_tpi_uderror_ind((char *)&sin6, sizeof (sin6_t),
   1345 		    NULL, 0, error);
   1346 		if (mp1 != NULL)
   1347 			putnext(connp->conn_rq, mp1);
   1348 	}
   1349 done:
   1350 	freemsg(mp);
   1351 }
   1352 
   1353 /*
   1354  * This routine responds to T_ADDR_REQ messages.  It is called by udp_wput.
   1355  * The local address is filled in if endpoint is bound. The remote address
   1356  * is filled in if remote address has been precified ("connected endpoint")
   1357  * (The concept of connected CLTS sockets is alien to published TPI
   1358  *  but we support it anyway).
   1359  */
   1360 static void
   1361 udp_addr_req(queue_t *q, mblk_t *mp)
   1362 {
   1363 	struct sockaddr *sa;
   1364 	mblk_t	*ackmp;
   1365 	struct T_addr_ack *taa;
   1366 	udp_t	*udp = Q_TO_UDP(q);
   1367 	conn_t	*connp = udp->udp_connp;
   1368 	uint_t	addrlen;
   1369 
   1370 	/* Make it large enough for worst case */
   1371 	ackmp = reallocb(mp, sizeof (struct T_addr_ack) +
   1372 	    2 * sizeof (sin6_t), 1);
   1373 	if (ackmp == NULL) {
   1374 		udp_err_ack(q, mp, TSYSERR, ENOMEM);
   1375 		return;
   1376 	}
   1377 	taa = (struct T_addr_ack *)ackmp->b_rptr;
   1378 
   1379 	bzero(taa, sizeof (struct T_addr_ack));
   1380 	ackmp->b_wptr = (uchar_t *)&taa[1];
   1381 
   1382 	taa->PRIM_type = T_ADDR_ACK;
   1383 	ackmp->b_datap->db_type = M_PCPROTO;
   1384 
   1385 	if (connp->conn_family == AF_INET)
   1386 		addrlen = sizeof (sin_t);
   1387 	else
   1388 		addrlen = sizeof (sin6_t);
   1389 
   1390 	mutex_enter(&connp->conn_lock);
   1391 	/*
   1392 	 * Note: Following code assumes 32 bit alignment of basic
   1393 	 * data structures like sin_t and struct T_addr_ack.
   1394 	 */
   1395 	if (udp->udp_state != TS_UNBND) {
   1396 		/*
   1397 		 * Fill in local address first
   1398 		 */
   1399 		taa->LOCADDR_offset = sizeof (*taa);
   1400 		taa->LOCADDR_length = addrlen;
   1401 		sa = (struct sockaddr *)&taa[1];
   1402 		(void) conn_getsockname(connp, sa, &addrlen);
   1403 		ackmp->b_wptr += addrlen;
   1404 	}
   1405 	if (udp->udp_state == TS_DATA_XFER) {
   1406 		/*
   1407 		 * connected, fill remote address too
   1408 		 */
   1409 		taa->REMADDR_length = addrlen;
   1410 		/* assumed 32-bit alignment */
   1411 		taa->REMADDR_offset = taa->LOCADDR_offset + taa->LOCADDR_length;
   1412 		sa = (struct sockaddr *)(ackmp->b_rptr + taa->REMADDR_offset);
   1413 		(void) conn_getpeername(connp, sa, &addrlen);
   1414 		ackmp->b_wptr += addrlen;
   1415 	}
   1416 	mutex_exit(&connp->conn_lock);
   1417 	ASSERT(ackmp->b_wptr <= ackmp->b_datap->db_lim);
   1418 	qreply(q, ackmp);
   1419 }
   1420 
   1421 static void
   1422 udp_copy_info(struct T_info_ack *tap, udp_t *udp)
   1423 {
   1424 	conn_t		*connp = udp->udp_connp;
   1425 
   1426 	if (connp->conn_family == AF_INET) {
   1427 		*tap = udp_g_t_info_ack_ipv4;
   1428 	} else {
   1429 		*tap = udp_g_t_info_ack_ipv6;
   1430 	}
   1431 	tap->CURRENT_state = udp->udp_state;
   1432 	tap->OPT_size = udp_max_optsize;
   1433 }
   1434 
   1435 static void
   1436 udp_do_capability_ack(udp_t *udp, struct T_capability_ack *tcap,
   1437     t_uscalar_t cap_bits1)
   1438 {
   1439 	tcap->CAP_bits1 = 0;
   1440 
   1441 	if (cap_bits1 & TC1_INFO) {
   1442 		udp_copy_info(&tcap->INFO_ack, udp);
   1443 		tcap->CAP_bits1 |= TC1_INFO;
   1444 	}
   1445 }
   1446 
   1447 /*
   1448  * This routine responds to T_CAPABILITY_REQ messages.  It is called by
   1449  * udp_wput.  Much of the T_CAPABILITY_ACK information is copied from
   1450  * udp_g_t_info_ack.  The current state of the stream is copied from
   1451  * udp_state.
   1452  */
   1453 static void
   1454 udp_capability_req(queue_t *q, mblk_t *mp)
   1455 {
   1456 	t_uscalar_t		cap_bits1;
   1457 	struct T_capability_ack	*tcap;
   1458 	udp_t	*udp = Q_TO_UDP(q);
   1459 
   1460 	cap_bits1 = ((struct T_capability_req *)mp->b_rptr)->CAP_bits1;
   1461 
   1462 	mp = tpi_ack_alloc(mp, sizeof (struct T_capability_ack),
   1463 	    mp->b_datap->db_type, T_CAPABILITY_ACK);
   1464 	if (!mp)
   1465 		return;
   1466 
   1467 	tcap = (struct T_capability_ack *)mp->b_rptr;
   1468 	udp_do_capability_ack(udp, tcap, cap_bits1);
   1469 
   1470 	qreply(q, mp);
   1471 }
   1472 
   1473 /*
   1474  * This routine responds to T_INFO_REQ messages.  It is called by udp_wput.
   1475  * Most of the T_INFO_ACK information is copied from udp_g_t_info_ack.
   1476  * The current state of the stream is copied from udp_state.
   1477  */
   1478 static void
   1479 udp_info_req(queue_t *q, mblk_t *mp)
   1480 {
   1481 	udp_t *udp = Q_TO_UDP(q);
   1482 
   1483 	/* Create a T_INFO_ACK message. */
   1484 	mp = tpi_ack_alloc(mp, sizeof (struct T_info_ack), M_PCPROTO,
   1485 	    T_INFO_ACK);
   1486 	if (!mp)
   1487 		return;
   1488 	udp_copy_info((struct T_info_ack *)mp->b_rptr, udp);
   1489 	qreply(q, mp);
   1490 }
   1491 
   1492 /* For /dev/udp aka AF_INET open */
   1493 static int
   1494 udp_openv4(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   1495 {
   1496 	return (udp_open(q, devp, flag, sflag, credp, B_FALSE));
   1497 }
   1498 
   1499 /* For /dev/udp6 aka AF_INET6 open */
   1500 static int
   1501 udp_openv6(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
   1502 {
   1503 	return (udp_open(q, devp, flag, sflag, credp, B_TRUE));
   1504 }
   1505 
   1506 /*
   1507  * This is the open routine for udp.  It allocates a udp_t structure for
   1508  * the stream and, on the first open of the module, creates an ND table.
   1509  */
   1510 static int
   1511 udp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp,
   1512     boolean_t isv6)
   1513 {
   1514 	udp_t		*udp;
   1515 	conn_t		*connp;
   1516 	dev_t		conn_dev;
   1517 	vmem_t		*minor_arena;
   1518 	int		err;
   1519 
   1520 	/* If the stream is already open, return immediately. */
   1521 	if (q->q_ptr != NULL)
   1522 		return (0);
   1523 
   1524 	if (sflag == MODOPEN)
   1525 		return (EINVAL);
   1526 
   1527 	if ((ip_minor_arena_la != NULL) && (flag & SO_SOCKSTR) &&
   1528 	    ((conn_dev = inet_minor_alloc(ip_minor_arena_la)) != 0)) {
   1529 		minor_arena = ip_minor_arena_la;
   1530 	} else {
   1531 		/*
   1532 		 * Either minor numbers in the large arena were exhausted
   1533 		 * or a non socket application is doing the open.
   1534 		 * Try to allocate from the small arena.
   1535 		 */
   1536 		if ((conn_dev = inet_minor_alloc(ip_minor_arena_sa)) == 0)
   1537 			return (EBUSY);
   1538 
   1539 		minor_arena = ip_minor_arena_sa;
   1540 	}
   1541 
   1542 	if (flag & SO_FALLBACK) {
   1543 		/*
   1544 		 * Non streams socket needs a stream to fallback to
   1545 		 */
   1546 		RD(q)->q_ptr = (void *)conn_dev;
   1547 		WR(q)->q_qinfo = &udp_fallback_sock_winit;
   1548 		WR(q)->q_ptr = (void *)minor_arena;
   1549 		qprocson(q);
   1550 		return (0);
   1551 	}
   1552 
   1553 	connp = udp_do_open(credp, isv6, KM_SLEEP, &err);
   1554 	if (connp == NULL) {
   1555 		inet_minor_free(minor_arena, conn_dev);
   1556 		return (err);
   1557 	}
   1558 	udp = connp->conn_udp;
   1559 
   1560 	*devp = makedevice(getemajor(*devp), (minor_t)conn_dev);
   1561 	connp->conn_dev = conn_dev;
   1562 	connp->conn_minor_arena = minor_arena;
   1563 
   1564 	/*
   1565 	 * Initialize the udp_t structure for this stream.
   1566 	 */
   1567 	q->q_ptr = connp;
   1568 	WR(q)->q_ptr = connp;
   1569 	connp->conn_rq = q;
   1570 	connp->conn_wq = WR(q);
   1571 
   1572 	/*
   1573 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
   1574 	 * need to lock anything.
   1575 	 */
   1576 	ASSERT(connp->conn_proto == IPPROTO_UDP);
   1577 	ASSERT(connp->conn_udp == udp);
   1578 	ASSERT(udp->udp_connp == connp);
   1579 
   1580 	if (flag & SO_SOCKSTR) {
   1581 		udp->udp_issocket = B_TRUE;
   1582 	}
   1583 
   1584 	WR(q)->q_hiwat = connp->conn_sndbuf;
   1585 	WR(q)->q_lowat = connp->conn_sndlowat;
   1586 
   1587 	qprocson(q);
   1588 
   1589 	/* Set the Stream head write offset and high watermark. */
   1590 	(void) proto_set_tx_wroff(q, connp, connp->conn_wroff);
   1591 	(void) proto_set_rx_hiwat(q, connp,
   1592 	    udp_set_rcv_hiwat(udp, connp->conn_rcvbuf));
   1593 
   1594 	mutex_enter(&connp->conn_lock);
   1595 	connp->conn_state_flags &= ~CONN_INCIPIENT;
   1596 	mutex_exit(&connp->conn_lock);
   1597 	return (0);
   1598 }
   1599 
   1600 /*
   1601  * Which UDP options OK to set through T_UNITDATA_REQ...
   1602  */
   1603 /* ARGSUSED */
   1604 static boolean_t
   1605 udp_opt_allow_udr_set(t_scalar_t level, t_scalar_t name)
   1606 {
   1607 	return (B_TRUE);
   1608 }
   1609 
   1610 /*
   1611  * This routine gets default values of certain options whose default
   1612  * values are maintained by protcol specific code
   1613  */
   1614 int
   1615 udp_opt_default(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
   1616 {
   1617 	udp_t		*udp = Q_TO_UDP(q);
   1618 	udp_stack_t *us = udp->udp_us;
   1619 	int *i1 = (int *)ptr;
   1620 
   1621 	switch (level) {
   1622 	case IPPROTO_IP:
   1623 		switch (name) {
   1624 		case IP_MULTICAST_TTL:
   1625 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_TTL;
   1626 			return (sizeof (uchar_t));
   1627 		case IP_MULTICAST_LOOP:
   1628 			*ptr = (uchar_t)IP_DEFAULT_MULTICAST_LOOP;
   1629 			return (sizeof (uchar_t));
   1630 		}
   1631 		break;
   1632 	case IPPROTO_IPV6:
   1633 		switch (name) {
   1634 		case IPV6_MULTICAST_HOPS:
   1635 			*i1 = IP_DEFAULT_MULTICAST_TTL;
   1636 			return (sizeof (int));
   1637 		case IPV6_MULTICAST_LOOP:
   1638 			*i1 = IP_DEFAULT_MULTICAST_LOOP;
   1639 			return (sizeof (int));
   1640 		case IPV6_UNICAST_HOPS:
   1641 			*i1 = us->us_ipv6_hoplimit;
   1642 			return (sizeof (int));
   1643 		}
   1644 		break;
   1645 	}
   1646 	return (-1);
   1647 }
   1648 
   1649 /*
   1650  * This routine retrieves the current status of socket options.
   1651  * It returns the size of the option retrieved, or -1.
   1652  */
   1653 int
   1654 udp_opt_get(conn_t *connp, t_scalar_t level, t_scalar_t name,
   1655     uchar_t *ptr)
   1656 {
   1657 	int		*i1 = (int *)ptr;
   1658 	udp_t		*udp = connp->conn_udp;
   1659 	int		len;
   1660 	conn_opt_arg_t	coas;
   1661 	int		retval;
   1662 
   1663 	coas.coa_connp = connp;
   1664 	coas.coa_ixa = connp->conn_ixa;
   1665 	coas.coa_ipp = &connp->conn_xmit_ipp;
   1666 	coas.coa_ancillary = B_FALSE;
   1667 	coas.coa_changed = 0;
   1668 
   1669 	/*
   1670 	 * We assume that the optcom framework has checked for the set
   1671 	 * of levels and names that are supported, hence we don't worry
   1672 	 * about rejecting based on that.
   1673 	 * First check for UDP specific handling, then pass to common routine.
   1674 	 */
   1675 	switch (level) {
   1676 	case IPPROTO_IP:
   1677 		/*
   1678 		 * Only allow IPv4 option processing on IPv4 sockets.
   1679 		 */
   1680 		if (connp->conn_family != AF_INET)
   1681 			return (-1);
   1682 
   1683 		switch (name) {
   1684 		case IP_OPTIONS:
   1685 		case T_IP_OPTIONS:
   1686 			mutex_enter(&connp->conn_lock);
   1687 			if (!(udp->udp_recv_ipp.ipp_fields &
   1688 			    IPPF_IPV4_OPTIONS)) {
   1689 				mutex_exit(&connp->conn_lock);
   1690 				return (0);
   1691 			}
   1692 
   1693 			len = udp->udp_recv_ipp.ipp_ipv4_options_len;
   1694 			ASSERT(len != 0);
   1695 			bcopy(udp->udp_recv_ipp.ipp_ipv4_options, ptr, len);
   1696 			mutex_exit(&connp->conn_lock);
   1697 			return (len);
   1698 		}
   1699 		break;
   1700 	case IPPROTO_UDP:
   1701 		switch (name) {
   1702 		case UDP_NAT_T_ENDPOINT:
   1703 			mutex_enter(&connp->conn_lock);
   1704 			*i1 = udp->udp_nat_t_endpoint;
   1705 			mutex_exit(&connp->conn_lock);
   1706 			return (sizeof (int));
   1707 		case UDP_RCVHDR:
   1708 			mutex_enter(&connp->conn_lock);
   1709 			*i1 = udp->udp_rcvhdr ? 1 : 0;
   1710 			mutex_exit(&connp->conn_lock);
   1711 			return (sizeof (int));
   1712 		}
   1713 	}
   1714 	mutex_enter(&connp->conn_lock);
   1715 	retval = conn_opt_get(&coas, level, name, ptr);
   1716 	mutex_exit(&connp->conn_lock);
   1717 	return (retval);
   1718 }
   1719 
   1720 /*
   1721  * This routine retrieves the current status of socket options.
   1722  * It returns the size of the option retrieved, or -1.
   1723  */
   1724 int
   1725 udp_tpi_opt_get(queue_t *q, t_scalar_t level, t_scalar_t name, uchar_t *ptr)
   1726 {
   1727 	conn_t		*connp = Q_TO_CONN(q);
   1728 	int		err;
   1729 
   1730 	err = udp_opt_get(connp, level, name, ptr);
   1731 	return (err);
   1732 }
   1733 
   1734 /*
   1735  * This routine sets socket options.
   1736  */
   1737 int
   1738 udp_do_opt_set(conn_opt_arg_t *coa, int level, int name,
   1739     uint_t inlen, uchar_t *invalp, cred_t *cr, boolean_t checkonly)
   1740 {
   1741 	conn_t		*connp = coa->coa_connp;
   1742 	ip_xmit_attr_t	*ixa = coa->coa_ixa;
   1743 	udp_t		*udp = connp->conn_udp;
   1744 	udp_stack_t	*us = udp->udp_us;
   1745 	int		*i1 = (int *)invalp;
   1746 	boolean_t 	onoff = (*i1 == 0) ? 0 : 1;
   1747 	int		error;
   1748 
   1749 	ASSERT(MUTEX_NOT_HELD(&coa->coa_connp->conn_lock));
   1750 	/*
   1751 	 * First do UDP specific sanity checks and handle UDP specific
   1752 	 * options. Note that some IPPROTO_UDP options are handled
   1753 	 * by conn_opt_set.
   1754 	 */
   1755 	switch (level) {
   1756 	case SOL_SOCKET:
   1757 		switch (name) {
   1758 		case SO_SNDBUF:
   1759 			if (*i1 > us->us_max_buf) {
   1760 				return (ENOBUFS);
   1761 			}
   1762 			break;
   1763 		case SO_RCVBUF:
   1764 			if (*i1 > us->us_max_buf) {
   1765 				return (ENOBUFS);
   1766 			}
   1767 			break;
   1768 
   1769 		case SCM_UCRED: {
   1770 			struct ucred_s *ucr;
   1771 			cred_t *newcr;
   1772 			ts_label_t *tsl;
   1773 
   1774 			/*
   1775 			 * Only sockets that have proper privileges and are
   1776 			 * bound to MLPs will have any other value here, so
   1777 			 * this implicitly tests for privilege to set label.
   1778 			 */
   1779 			if (connp->conn_mlp_type == mlptSingle)
   1780 				break;
   1781 
   1782 			ucr = (struct ucred_s *)invalp;
   1783 			if (inlen < sizeof (*ucr) + sizeof (bslabel_t) ||
   1784 			    ucr->uc_labeloff < sizeof (*ucr) ||
   1785 			    ucr->uc_labeloff + sizeof (bslabel_t) > inlen)
   1786 				return (EINVAL);
   1787 			if (!checkonly) {
   1788 				/*
   1789 				 * Set ixa_tsl to the new label.
   1790 				 * We assume that crgetzoneid doesn't change
   1791 				 * as part of the SCM_UCRED.
   1792 				 */
   1793 				ASSERT(cr != NULL);
   1794 				if ((tsl = crgetlabel(cr)) == NULL)
   1795 					return (EINVAL);
   1796 				newcr = copycred_from_bslabel(cr, UCLABEL(ucr),
   1797 				    tsl->tsl_doi, KM_NOSLEEP);
   1798 				if (newcr == NULL)
   1799 					return (ENOSR);
   1800 				ASSERT(newcr->cr_label != NULL);
   1801 				/*
   1802 				 * Move the hold on the cr_label to ixa_tsl by
   1803 				 * setting cr_label to NULL. Then release newcr.
   1804 				 */
   1805 				ip_xmit_attr_replace_tsl(ixa, newcr->cr_label);
   1806 				ixa->ixa_flags |= IXAF_UCRED_TSL;
   1807 				newcr->cr_label = NULL;
   1808 				crfree(newcr);
   1809 				coa->coa_changed |= COA_HEADER_CHANGED;
   1810 				coa->coa_changed |= COA_WROFF_CHANGED;
   1811 			}
   1812 			/* Fully handled this option. */
   1813 			return (0);
   1814 		}
   1815 		}
   1816 		break;
   1817 	case IPPROTO_UDP:
   1818 		switch (name) {
   1819 		case UDP_NAT_T_ENDPOINT:
   1820 			if ((error = secpolicy_ip_config(cr, B_FALSE)) != 0) {
   1821 				return (error);
   1822 			}
   1823 
   1824 			/*
   1825 			 * Use conn_family instead so we can avoid ambiguitites
   1826 			 * with AF_INET6 sockets that may switch from IPv4
   1827 			 * to IPv6.
   1828 			 */
   1829 			if (connp->conn_family != AF_INET) {
   1830 				return (EAFNOSUPPORT);
   1831 			}
   1832 
   1833 			if (!checkonly) {
   1834 				mutex_enter(&connp->conn_lock);
   1835 				udp->udp_nat_t_endpoint = onoff;
   1836 				mutex_exit(&connp->conn_lock);
   1837 				coa->coa_changed |= COA_HEADER_CHANGED;
   1838 				coa->coa_changed |= COA_WROFF_CHANGED;
   1839 			}
   1840 			/* Fully handled this option. */
   1841 			return (0);
   1842 		case UDP_RCVHDR:
   1843 			mutex_enter(&connp->conn_lock);
   1844 			udp->udp_rcvhdr = onoff;
   1845 			mutex_exit(&connp->conn_lock);
   1846 			return (0);
   1847 		}
   1848 		break;
   1849 	}
   1850 	error = conn_opt_set(coa, level, name, inlen, invalp,
   1851 	    checkonly, cr);
   1852 	return (error);
   1853 }
   1854 
   1855 /*
   1856  * This routine sets socket options.
   1857  */
   1858 int
   1859 udp_opt_set(conn_t *connp, uint_t optset_context, int level,
   1860     int name, uint_t inlen, uchar_t *invalp, uint_t *outlenp,
   1861     uchar_t *outvalp, void *thisdg_attrs, cred_t *cr)
   1862 {
   1863 	udp_t		*udp = connp->conn_udp;
   1864 	int		err;
   1865 	conn_opt_arg_t	coas, *coa;
   1866 	boolean_t	checkonly;
   1867 	udp_stack_t	*us = udp->udp_us;
   1868 
   1869 	switch (optset_context) {
   1870 	case SETFN_OPTCOM_CHECKONLY:
   1871 		checkonly = B_TRUE;
   1872 		/*
   1873 		 * Note: Implies T_CHECK semantics for T_OPTCOM_REQ
   1874 		 * inlen != 0 implies value supplied and
   1875 		 * 	we have to "pretend" to set it.
   1876 		 * inlen == 0 implies that there is no
   1877 		 * 	value part in T_CHECK request and just validation
   1878 		 * done elsewhere should be enough, we just return here.
   1879 		 */
   1880 		if (inlen == 0) {
   1881 			*outlenp = 0;
   1882 			return (0);
   1883 		}
   1884 		break;
   1885 	case SETFN_OPTCOM_NEGOTIATE:
   1886 		checkonly = B_FALSE;
   1887 		break;
   1888 	case SETFN_UD_NEGOTIATE:
   1889 	case SETFN_CONN_NEGOTIATE:
   1890 		checkonly = B_FALSE;
   1891 		/*
   1892 		 * Negotiating local and "association-related" options
   1893 		 * through T_UNITDATA_REQ.
   1894 		 *
   1895 		 * Following routine can filter out ones we do not
   1896 		 * want to be "set" this way.
   1897 		 */
   1898 		if (!udp_opt_allow_udr_set(level, name)) {
   1899 			*outlenp = 0;
   1900 			return (EINVAL);
   1901 		}
   1902 		break;
   1903 	default:
   1904 		/*
   1905 		 * We should never get here
   1906 		 */
   1907 		*outlenp = 0;
   1908 		return (EINVAL);
   1909 	}
   1910 
   1911 	ASSERT((optset_context != SETFN_OPTCOM_CHECKONLY) ||
   1912 	    (optset_context == SETFN_OPTCOM_CHECKONLY && inlen != 0));
   1913 
   1914 	if (thisdg_attrs != NULL) {
   1915 		/* Options from T_UNITDATA_REQ */
   1916 		coa = (conn_opt_arg_t *)thisdg_attrs;
   1917 		ASSERT(coa->coa_connp == connp);
   1918 		ASSERT(coa->coa_ixa != NULL);
   1919 		ASSERT(coa->coa_ipp != NULL);
   1920 		ASSERT(coa->coa_ancillary);
   1921 	} else {
   1922 		coa = &coas;
   1923 		coas.coa_connp = connp;
   1924 		/* Get a reference on conn_ixa to prevent concurrent mods */
   1925 		coas.coa_ixa = conn_get_ixa(connp, B_TRUE);
   1926 		if (coas.coa_ixa == NULL) {
   1927 			*outlenp = 0;
   1928 			return (ENOMEM);
   1929 		}
   1930 		coas.coa_ipp = &connp->conn_xmit_ipp;
   1931 		coas.coa_ancillary = B_FALSE;
   1932 		coas.coa_changed = 0;
   1933 	}
   1934 
   1935 	err = udp_do_opt_set(coa, level, name, inlen, invalp,
   1936 	    cr, checkonly);
   1937 	if (err != 0) {
   1938 errout:
   1939 		if (!coa->coa_ancillary)
   1940 			ixa_refrele(coa->coa_ixa);
   1941 		*outlenp = 0;
   1942 		return (err);
   1943 	}
   1944 	/* Handle DHCPINIT here outside of lock */
   1945 	if (level == IPPROTO_IP && name == IP_DHCPINIT_IF) {
   1946 		uint_t	ifindex;
   1947 		ill_t	*ill;
   1948 
   1949 		ifindex = *(uint_t *)invalp;
   1950 		if (ifindex == 0) {
   1951 			ill = NULL;
   1952 		} else {
   1953 			ill = ill_lookup_on_ifindex(ifindex, B_FALSE,
   1954 			    coa->coa_ixa->ixa_ipst);
   1955 			if (ill == NULL) {
   1956 				err = ENXIO;
   1957 				goto errout;
   1958 			}
   1959 
   1960 			mutex_enter(&ill->ill_lock);
   1961 			if (ill->ill_state_flags & ILL_CONDEMNED) {
   1962 				mutex_exit(&ill->ill_lock);
   1963 				ill_refrele(ill);
   1964 				err = ENXIO;
   1965 				goto errout;
   1966 			}
   1967 			if (IS_VNI(ill)) {
   1968 				mutex_exit(&ill->ill_lock);
   1969 				ill_refrele(ill);
   1970 				err = EINVAL;
   1971 				goto errout;
   1972 			}
   1973 		}
   1974 		mutex_enter(&connp->conn_lock);
   1975 
   1976 		if (connp->conn_dhcpinit_ill != NULL) {
   1977 			/*
   1978 			 * We've locked the conn so conn_cleanup_ill()
   1979 			 * cannot clear conn_dhcpinit_ill -- so it's
   1980 			 * safe to access the ill.
   1981 			 */
   1982 			ill_t *oill = connp->conn_dhcpinit_ill;
   1983 
   1984 			ASSERT(oill->ill_dhcpinit != 0);
   1985 			atomic_dec_32(&oill->ill_dhcpinit);
   1986 			ill_set_inputfn(connp->conn_dhcpinit_ill);
   1987 			connp->conn_dhcpinit_ill = NULL;
   1988 		}
   1989 
   1990 		if (ill != NULL) {
   1991 			connp->conn_dhcpinit_ill = ill;
   1992 			atomic_inc_32(&ill->ill_dhcpinit);
   1993 			ill_set_inputfn(ill);
   1994 			mutex_exit(&connp->conn_lock);
   1995 			mutex_exit(&ill->ill_lock);
   1996 			ill_refrele(ill);
   1997 		} else {
   1998 			mutex_exit(&connp->conn_lock);
   1999 		}
   2000 	}
   2001 
   2002 	/*
   2003 	 * Common case of OK return with outval same as inval.
   2004 	 */
   2005 	if (invalp != outvalp) {
   2006 		/* don't trust bcopy for identical src/dst */
   2007 		(void) bcopy(invalp, outvalp, inlen);
   2008 	}
   2009 	*outlenp = inlen;
   2010 
   2011 	/*
   2012 	 * If this was not ancillary data, then we rebuild the headers,
   2013 	 * update the IRE/NCE, and IPsec as needed.
   2014 	 * Since the label depends on the destination we go through
   2015 	 * ip_set_destination first.
   2016 	 */
   2017 	if (coa->coa_ancillary) {
   2018 		return (0);
   2019 	}
   2020 
   2021 	if (coa->coa_changed & COA_ROUTE_CHANGED) {
   2022 		in6_addr_t saddr, faddr, nexthop;
   2023 		in_port_t fport;
   2024 
   2025 		/*
   2026 		 * We clear lastdst to make sure we pick up the change
   2027 		 * next time sending.
   2028 		 * If we are connected we re-cache the information.
   2029 		 * We ignore errors to preserve BSD behavior.
   2030 		 * Note that we don't redo IPsec policy lookup here
   2031 		 * since the final destination (or source) didn't change.
   2032 		 */
   2033 		mutex_enter(&connp->conn_lock);
   2034 		connp->conn_v6lastdst = ipv6_all_zeros;
   2035 
   2036 		ip_attr_nexthop(coa->coa_ipp, coa->coa_ixa,
   2037 		    &connp->conn_faddr_v6, &nexthop);
   2038 		saddr = connp->conn_saddr_v6;
   2039 		faddr = connp->conn_faddr_v6;
   2040 		fport = connp->conn_fport;
   2041 		mutex_exit(&connp->conn_lock);
   2042 
   2043 		if (!IN6_IS_ADDR_UNSPECIFIED(&faddr) &&
   2044 		    !IN6_IS_ADDR_V4MAPPED_ANY(&faddr)) {
   2045 			(void) ip_attr_connect(connp, coa->coa_ixa,
   2046 			    &saddr, &faddr, &nexthop, fport, NULL, NULL,
   2047 			    IPDF_ALLOW_MCBC | IPDF_VERIFY_DST);
   2048 		}
   2049 	}
   2050 
   2051 	ixa_refrele(coa->coa_ixa);
   2052 
   2053 	if (coa->coa_changed & COA_HEADER_CHANGED) {
   2054 		/*
   2055 		 * Rebuild the header template if we are connected.
   2056 		 * Otherwise clear conn_v6lastdst so we rebuild the header
   2057 		 * in the data path.
   2058 		 */
   2059 		mutex_enter(&connp->conn_lock);
   2060 		if (!IN6_IS_ADDR_UNSPECIFIED(&connp->conn_faddr_v6) &&
   2061 		    !IN6_IS_ADDR_V4MAPPED_ANY(&connp->conn_faddr_v6)) {
   2062 			err = udp_build_hdr_template(connp,
   2063 			    &connp->conn_saddr_v6, &connp->conn_faddr_v6,
   2064 			    connp->conn_fport, connp->conn_flowinfo);
   2065 			if (err != 0) {
   2066 				mutex_exit(&connp->conn_lock);
   2067 				return (err);
   2068 			}
   2069 		} else {
   2070 			connp->conn_v6lastdst = ipv6_all_zeros;
   2071 		}
   2072 		mutex_exit(&connp->conn_lock);
   2073 	}
   2074 	if (coa->coa_changed & COA_RCVBUF_CHANGED) {
   2075 		(void) proto_set_rx_hiwat(connp->conn_rq, connp,
   2076 		    connp->conn_rcvbuf);
   2077 	}
   2078 	if ((coa->coa_changed & COA_SNDBUF_CHANGED) && !IPCL_IS_NONSTR(connp)) {
   2079 		connp->conn_wq->q_hiwat = connp->conn_sndbuf;
   2080 	}
   2081 	if (coa->coa_changed & COA_WROFF_CHANGED) {
   2082 		/* Increase wroff if needed */
   2083 		uint_t wroff;
   2084 
   2085 		mutex_enter(&connp->conn_lock);
   2086 		wroff = connp->conn_ht_iphc_allocated + us->us_wroff_extra;
   2087 		if (udp->udp_nat_t_endpoint)
   2088 			wroff += sizeof (uint32_t);
   2089 		if (wroff > connp->conn_wroff) {
   2090 			connp->conn_wroff = wroff;
   2091 			mutex_exit(&connp->conn_lock);
   2092 			(void) proto_set_tx_wroff(connp->conn_rq, connp, wroff);
   2093 		} else {
   2094 			mutex_exit(&connp->conn_lock);
   2095 		}
   2096 	}
   2097 	return (err);
   2098 }
   2099 
   2100 /* This routine sets socket options. */
   2101 int
   2102 udp_tpi_opt_set(queue_t *q, uint_t optset_context, int level, int name,
   2103     uint_t inlen, uchar_t *invalp, uint_t *outlenp, uchar_t *outvalp,
   2104     void *thisdg_attrs, cred_t *cr)
   2105 {
   2106 	conn_t	*connp = Q_TO_CONN(q);
   2107 	int error;
   2108 
   2109 	error = udp_opt_set(connp, optset_context, level, name, inlen, invalp,
   2110 	    outlenp, outvalp, thisdg_attrs, cr);
   2111 	return (error);
   2112 }
   2113 
   2114 /*
   2115  * Setup IP and UDP headers.
   2116  * Returns NULL on allocation failure, in which case data_mp is freed.
   2117  */
   2118 mblk_t *
   2119 udp_prepend_hdr(conn_t *connp, ip_xmit_attr_t *ixa, const ip_pkt_t *ipp,
   2120     const in6_addr_t *v6src, const in6_addr_t *v6dst, in_port_t dstport,
   2121     uint32_t flowinfo, mblk_t *data_mp, int *errorp)
   2122 {
   2123 	mblk_t		*mp;
   2124 	udpha_t		*udpha;
   2125 	udp_stack_t	*us = connp->conn_netstack->netstack_udp;
   2126 	uint_t		data_len;
   2127 	uint32_t	cksum;
   2128 	udp_t		*udp = connp->conn_udp;
   2129 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
   2130 	uint_t		ulp_hdr_len;
   2131 
   2132 	data_len = msgdsize(data_mp);
   2133 	ulp_hdr_len = UDPH_SIZE;
   2134 	if (insert_spi)
   2135 		ulp_hdr_len += sizeof (uint32_t);
   2136 
   2137 	mp = conn_prepend_hdr(ixa, ipp, v6src, v6dst, IPPROTO_UDP, flowinfo,
   2138 	    ulp_hdr_len, data_mp, data_len, us->us_wroff_extra, &cksum, errorp);
   2139 	if (mp == NULL) {
   2140 		ASSERT(*errorp != 0);
   2141 		return (NULL);
   2142 	}
   2143 
   2144 	data_len += ulp_hdr_len;
   2145 	ixa->ixa_pktlen = data_len + ixa->ixa_ip_hdr_length;
   2146 
   2147 	udpha = (udpha_t *)(mp->b_rptr + ixa->ixa_ip_hdr_length);
   2148 	udpha->uha_src_port = connp->conn_lport;
   2149 	udpha->uha_dst_port = dstport;
   2150 	udpha->uha_checksum = 0;
   2151 	udpha->uha_length = htons(data_len);
   2152 
   2153 	/*
   2154 	 * If there was a routing option/header then conn_prepend_hdr
   2155 	 * has massaged it and placed the pseudo-header checksum difference
   2156 	 * in the cksum argument.
   2157 	 *
   2158 	 * Setup header length and prepare for ULP checksum done in IP.
   2159 	 *
   2160 	 * We make it easy for IP to include our pseudo header
   2161 	 * by putting our length in uha_checksum.
   2162 	 * The IP source, destination, and length have already been set by
   2163 	 * conn_prepend_hdr.
   2164 	 */
   2165 	cksum += data_len;
   2166 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
   2167 	ASSERT(cksum < 0x10000);
   2168 
   2169 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2170 		ipha_t	*ipha = (ipha_t *)mp->b_rptr;
   2171 
   2172 		ASSERT(ntohs(ipha->ipha_length) == ixa->ixa_pktlen);
   2173 
   2174 		/* IP does the checksum if uha_checksum is non-zero */
   2175 		if (us->us_do_checksum) {
   2176 			if (cksum == 0)
   2177 				udpha->uha_checksum = 0xffff;
   2178 			else
   2179 				udpha->uha_checksum = htons(cksum);
   2180 		} else {
   2181 			udpha->uha_checksum = 0;
   2182 		}
   2183 	} else {
   2184 		ip6_t *ip6h = (ip6_t *)mp->b_rptr;
   2185 
   2186 		ASSERT(ntohs(ip6h->ip6_plen) + IPV6_HDR_LEN == ixa->ixa_pktlen);
   2187 		if (cksum == 0)
   2188 			udpha->uha_checksum = 0xffff;
   2189 		else
   2190 			udpha->uha_checksum = htons(cksum);
   2191 	}
   2192 
   2193 	/* Insert all-0s SPI now. */
   2194 	if (insert_spi)
   2195 		*((uint32_t *)(udpha + 1)) = 0;
   2196 
   2197 	return (mp);
   2198 }
   2199 
   2200 static int
   2201 udp_build_hdr_template(conn_t *connp, const in6_addr_t *v6src,
   2202     const in6_addr_t *v6dst, in_port_t dstport, uint32_t flowinfo)
   2203 {
   2204 	udpha_t		*udpha;
   2205 	int		error;
   2206 
   2207 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   2208 	/*
   2209 	 * We clear lastdst to make sure we don't use the lastdst path
   2210 	 * next time sending since we might not have set v6dst yet.
   2211 	 */
   2212 	connp->conn_v6lastdst = ipv6_all_zeros;
   2213 
   2214 	error = conn_build_hdr_template(connp, UDPH_SIZE, 0, v6src, v6dst,
   2215 	    flowinfo);
   2216 	if (error != 0)
   2217 		return (error);
   2218 
   2219 	/*
   2220 	 * Any routing header/option has been massaged. The checksum difference
   2221 	 * is stored in conn_sum.
   2222 	 */
   2223 	udpha = (udpha_t *)connp->conn_ht_ulp;
   2224 	udpha->uha_src_port = connp->conn_lport;
   2225 	udpha->uha_dst_port = dstport;
   2226 	udpha->uha_checksum = 0;
   2227 	udpha->uha_length = htons(UDPH_SIZE);	/* Filled in later */
   2228 	return (0);
   2229 }
   2230 
   2231 /*
   2232  * This routine retrieves the value of an ND variable in a udpparam_t
   2233  * structure.  It is called through nd_getset when a user reads the
   2234  * variable.
   2235  */
   2236 /* ARGSUSED */
   2237 static int
   2238 udp_param_get(queue_t *q, mblk_t *mp, caddr_t cp, cred_t *cr)
   2239 {
   2240 	udpparam_t *udppa = (udpparam_t *)cp;
   2241 
   2242 	(void) mi_mpprintf(mp, "%d", udppa->udp_param_value);
   2243 	return (0);
   2244 }
   2245 
   2246 /*
   2247  * Walk through the param array specified registering each element with the
   2248  * named dispatch (ND) handler.
   2249  */
   2250 static boolean_t
   2251 udp_param_register(IDP *ndp, udpparam_t *udppa, int cnt)
   2252 {
   2253 	for (; cnt-- > 0; udppa++) {
   2254 		if (udppa->udp_param_name && udppa->udp_param_name[0]) {
   2255 			if (!nd_load(ndp, udppa->udp_param_name,
   2256 			    udp_param_get, udp_param_set,
   2257 			    (caddr_t)udppa)) {
   2258 				nd_free(ndp);
   2259 				return (B_FALSE);
   2260 			}
   2261 		}
   2262 	}
   2263 	if (!nd_load(ndp, "udp_extra_priv_ports",
   2264 	    udp_extra_priv_ports_get, NULL, NULL)) {
   2265 		nd_free(ndp);
   2266 		return (B_FALSE);
   2267 	}
   2268 	if (!nd_load(ndp, "udp_extra_priv_ports_add",
   2269 	    NULL, udp_extra_priv_ports_add, NULL)) {
   2270 		nd_free(ndp);
   2271 		return (B_FALSE);
   2272 	}
   2273 	if (!nd_load(ndp, "udp_extra_priv_ports_del",
   2274 	    NULL, udp_extra_priv_ports_del, NULL)) {
   2275 		nd_free(ndp);
   2276 		return (B_FALSE);
   2277 	}
   2278 	return (B_TRUE);
   2279 }
   2280 
   2281 /* This routine sets an ND variable in a udpparam_t structure. */
   2282 /* ARGSUSED */
   2283 static int
   2284 udp_param_set(queue_t *q, mblk_t *mp, char *value, caddr_t cp, cred_t *cr)
   2285 {
   2286 	long		new_value;
   2287 	udpparam_t	*udppa = (udpparam_t *)cp;
   2288 
   2289 	/*
   2290 	 * Fail the request if the new value does not lie within the
   2291 	 * required bounds.
   2292 	 */
   2293 	if (ddi_strtol(value, NULL, 10, &new_value) != 0 ||
   2294 	    new_value < udppa->udp_param_min ||
   2295 	    new_value > udppa->udp_param_max) {
   2296 		return (EINVAL);
   2297 	}
   2298 
   2299 	/* Set the new value */
   2300 	udppa->udp_param_value = new_value;
   2301 	return (0);
   2302 }
   2303 
   2304 static mblk_t *
   2305 udp_queue_fallback(udp_t *udp, mblk_t *mp)
   2306 {
   2307 	ASSERT(MUTEX_HELD(&udp->udp_recv_lock));
   2308 	if (IPCL_IS_NONSTR(udp->udp_connp)) {
   2309 		/*
   2310 		 * fallback has started but messages have not been moved yet
   2311 		 */
   2312 		if (udp->udp_fallback_queue_head == NULL) {
   2313 			ASSERT(udp->udp_fallback_queue_tail == NULL);
   2314 			udp->udp_fallback_queue_head = mp;
   2315 			udp->udp_fallback_queue_tail = mp;
   2316 		} else {
   2317 			ASSERT(udp->udp_fallback_queue_tail != NULL);
   2318 			udp->udp_fallback_queue_tail->b_next = mp;
   2319 			udp->udp_fallback_queue_tail = mp;
   2320 		}
   2321 		return (NULL);
   2322 	} else {
   2323 		/*
   2324 		 * Fallback completed, let the caller putnext() the mblk.
   2325 		 */
   2326 		return (mp);
   2327 	}
   2328 }
   2329 
   2330 /*
   2331  * Deliver data to ULP. In case we have a socket, and it's falling back to
   2332  * TPI, then we'll queue the mp for later processing.
   2333  */
   2334 static void
   2335 udp_ulp_recv(conn_t *connp, mblk_t *mp, uint_t len, ip_recv_attr_t *ira)
   2336 {
   2337 	if (IPCL_IS_NONSTR(connp)) {
   2338 		udp_t *udp = connp->conn_udp;
   2339 		int error;
   2340 
   2341 		ASSERT(len == msgdsize(mp));
   2342 		if ((*connp->conn_upcalls->su_recv)
   2343 		    (connp->conn_upper_handle, mp, len, 0, &error, NULL) < 0) {
   2344 			mutex_enter(&udp->udp_recv_lock);
   2345 			if (error == ENOSPC) {
   2346 				/*
   2347 				 * let's confirm while holding the lock
   2348 				 */
   2349 				if ((*connp->conn_upcalls->su_recv)
   2350 				    (connp->conn_upper_handle, NULL, 0, 0,
   2351 				    &error, NULL) < 0) {
   2352 					ASSERT(error == ENOSPC);
   2353 					if (error == ENOSPC) {
   2354 						connp->conn_flow_cntrld =
   2355 						    B_TRUE;
   2356 					}
   2357 				}
   2358 				mutex_exit(&udp->udp_recv_lock);
   2359 			} else {
   2360 				ASSERT(error == EOPNOTSUPP);
   2361 				mp = udp_queue_fallback(udp, mp);
   2362 				mutex_exit(&udp->udp_recv_lock);
   2363 				if (mp != NULL)
   2364 					putnext(connp->conn_rq, mp);
   2365 			}
   2366 		}
   2367 		ASSERT(MUTEX_NOT_HELD(&udp->udp_recv_lock));
   2368 	} else {
   2369 		if (is_system_labeled()) {
   2370 			ASSERT(ira->ira_cred != NULL);
   2371 			/*
   2372 			 * Provide for protocols above UDP such as RPC
   2373 			 * NOPID leaves db_cpid unchanged.
   2374 			 */
   2375 			mblk_setcred(mp, ira->ira_cred, NOPID);
   2376 		}
   2377 
   2378 		putnext(connp->conn_rq, mp);
   2379 	}
   2380 }
   2381 
   2382 /*
   2383  * This is the inbound data path.
   2384  * IP has already pulled up the IP plus UDP headers and verified alignment
   2385  * etc.
   2386  */
   2387 /* ARGSUSED2 */
   2388 static void
   2389 udp_input(void *arg1, mblk_t *mp, void *arg2, ip_recv_attr_t *ira)
   2390 {
   2391 	conn_t			*connp = (conn_t *)arg1;
   2392 	struct T_unitdata_ind	*tudi;
   2393 	uchar_t			*rptr;		/* Pointer to IP header */
   2394 	int			hdr_length;	/* Length of IP+UDP headers */
   2395 	int			udi_size;	/* Size of T_unitdata_ind */
   2396 	int			pkt_len;
   2397 	udp_t			*udp;
   2398 	udpha_t			*udpha;
   2399 	ip_pkt_t		ipps;
   2400 	ip6_t			*ip6h;
   2401 	mblk_t			*mp1;
   2402 	uint32_t		udp_ipv4_options_len;
   2403 	crb_t			recv_ancillary;
   2404 	udp_stack_t		*us;
   2405 
   2406 	ASSERT(connp->conn_flags & IPCL_UDPCONN);
   2407 
   2408 	udp = connp->conn_udp;
   2409 	us = udp->udp_us;
   2410 	rptr = mp->b_rptr;
   2411 
   2412 	ASSERT(DB_TYPE(mp) == M_DATA);
   2413 	ASSERT(OK_32PTR(rptr));
   2414 	ASSERT(ira->ira_pktlen == msgdsize(mp));
   2415 	pkt_len = ira->ira_pktlen;
   2416 
   2417 	/*
   2418 	 * Get a snapshot of these and allow other threads to change
   2419 	 * them after that. We need the same recv_ancillary when determining
   2420 	 * the size as when adding the ancillary data items.
   2421 	 */
   2422 	mutex_enter(&connp->conn_lock);
   2423 	udp_ipv4_options_len = udp->udp_recv_ipp.ipp_ipv4_options_len;
   2424 	recv_ancillary = connp->conn_recv_ancillary;
   2425 	mutex_exit(&connp->conn_lock);
   2426 
   2427 	hdr_length = ira->ira_ip_hdr_length;
   2428 
   2429 	/*
   2430 	 * IP inspected the UDP header thus all of it must be in the mblk.
   2431 	 * UDP length check is performed for IPv6 packets and IPv4 packets
   2432 	 * to check if the size of the packet as specified
   2433 	 * by the UDP header is the same as the length derived from the IP
   2434 	 * header.
   2435 	 */
   2436 	udpha = (udpha_t *)(rptr + hdr_length);
   2437 	if (pkt_len != ntohs(udpha->uha_length) + hdr_length)
   2438 		goto tossit;
   2439 
   2440 	hdr_length += UDPH_SIZE;
   2441 	ASSERT(MBLKL(mp) >= hdr_length);	/* IP did a pullup */
   2442 
   2443 	/* Initialize regardless of IP version */
   2444 	ipps.ipp_fields = 0;
   2445 
   2446 	if (((ira->ira_flags & IRAF_IPV4_OPTIONS) ||
   2447 	    udp_ipv4_options_len > 0) &&
   2448 	    connp->conn_family == AF_INET) {
   2449 		int	err;
   2450 
   2451 		/*
   2452 		 * Record/update udp_recv_ipp with the lock
   2453 		 * held. Not needed for AF_INET6 sockets
   2454 		 * since they don't support a getsockopt of IP_OPTIONS.
   2455 		 */
   2456 		mutex_enter(&connp->conn_lock);
   2457 		err = ip_find_hdr_v4((ipha_t *)rptr, &udp->udp_recv_ipp,
   2458 		    B_TRUE);
   2459 		if (err != 0) {
   2460 			/* Allocation failed. Drop packet */
   2461 			mutex_exit(&connp->conn_lock);
   2462 			freemsg(mp);
   2463 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
   2464 			return;
   2465 		}
   2466 		mutex_exit(&connp->conn_lock);
   2467 	}
   2468 
   2469 	if (recv_ancillary.crb_all != 0) {
   2470 		/*
   2471 		 * Record packet information in the ip_pkt_t
   2472 		 */
   2473 		if (ira->ira_flags & IRAF_IS_IPV4) {
   2474 			ASSERT(IPH_HDR_VERSION(rptr) == IPV4_VERSION);
   2475 			ASSERT(MBLKL(mp) >= sizeof (ipha_t));
   2476 			ASSERT(((ipha_t *)rptr)->ipha_protocol == IPPROTO_UDP);
   2477 			ASSERT(ira->ira_ip_hdr_length == IPH_HDR_LENGTH(rptr));
   2478 
   2479 			(void) ip_find_hdr_v4((ipha_t *)rptr, &ipps, B_FALSE);
   2480 		} else {
   2481 			uint8_t nexthdrp;
   2482 
   2483 			ASSERT(IPH_HDR_VERSION(rptr) == IPV6_VERSION);
   2484 			/*
   2485 			 * IPv6 packets can only be received by applications
   2486 			 * that are prepared to receive IPv6 addresses.
   2487 			 * The IP fanout must ensure this.
   2488 			 */
   2489 			ASSERT(connp->conn_family == AF_INET6);
   2490 
   2491 			ip6h = (ip6_t *)rptr;
   2492 
   2493 			/* We don't care about the length, but need the ipp */
   2494 			hdr_length = ip_find_hdr_v6(mp, ip6h, B_TRUE, &ipps,
   2495 			    &nexthdrp);
   2496 			ASSERT(hdr_length == ira->ira_ip_hdr_length);
   2497 			/* Restore */
   2498 			hdr_length = ira->ira_ip_hdr_length + UDPH_SIZE;
   2499 			ASSERT(nexthdrp == IPPROTO_UDP);
   2500 		}
   2501 	}
   2502 
   2503 	/*
   2504 	 * This is the inbound data path.  Packets are passed upstream as
   2505 	 * T_UNITDATA_IND messages.
   2506 	 */
   2507 	if (connp->conn_family == AF_INET) {
   2508 		sin_t *sin;
   2509 
   2510 		ASSERT(IPH_HDR_VERSION((ipha_t *)rptr) == IPV4_VERSION);
   2511 
   2512 		/*
   2513 		 * Normally only send up the source address.
   2514 		 * If any ancillary data items are wanted we add those.
   2515 		 */
   2516 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin_t);
   2517 		if (recv_ancillary.crb_all != 0) {
   2518 			udi_size += conn_recvancillary_size(connp,
   2519 			    recv_ancillary, ira, mp, &ipps);
   2520 		}
   2521 
   2522 		/* Allocate a message block for the T_UNITDATA_IND structure. */
   2523 		mp1 = allocb(udi_size, BPRI_MED);
   2524 		if (mp1 == NULL) {
   2525 			freemsg(mp);
   2526 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
   2527 			return;
   2528 		}
   2529 		mp1->b_cont = mp;
   2530 		mp1->b_datap->db_type = M_PROTO;
   2531 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
   2532 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
   2533 		tudi->PRIM_type = T_UNITDATA_IND;
   2534 		tudi->SRC_length = sizeof (sin_t);
   2535 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   2536 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
   2537 		    sizeof (sin_t);
   2538 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin_t));
   2539 		tudi->OPT_length = udi_size;
   2540 		sin = (sin_t *)&tudi[1];
   2541 		sin->sin_addr.s_addr = ((ipha_t *)rptr)->ipha_src;
   2542 		sin->sin_port =	udpha->uha_src_port;
   2543 		sin->sin_family = connp->conn_family;
   2544 		*(uint32_t *)&sin->sin_zero[0] = 0;
   2545 		*(uint32_t *)&sin->sin_zero[4] = 0;
   2546 
   2547 		/*
   2548 		 * Add options if IP_RECVDSTADDR, IP_RECVIF, IP_RECVSLLA or
   2549 		 * IP_RECVTTL has been set.
   2550 		 */
   2551 		if (udi_size != 0) {
   2552 			conn_recvancillary_add(connp, recv_ancillary, ira,
   2553 			    &ipps, (uchar_t *)&sin[1], udi_size);
   2554 		}
   2555 	} else {
   2556 		sin6_t *sin6;
   2557 
   2558 		/*
   2559 		 * Handle both IPv4 and IPv6 packets for IPv6 sockets.
   2560 		 *
   2561 		 * Normally we only send up the address. If receiving of any
   2562 		 * optional receive side information is enabled, we also send
   2563 		 * that up as options.
   2564 		 */
   2565 		udi_size = sizeof (struct T_unitdata_ind) + sizeof (sin6_t);
   2566 
   2567 		if (recv_ancillary.crb_all != 0) {
   2568 			udi_size += conn_recvancillary_size(connp,
   2569 			    recv_ancillary, ira, mp, &ipps);
   2570 		}
   2571 
   2572 		mp1 = allocb(udi_size, BPRI_MED);
   2573 		if (mp1 == NULL) {
   2574 			freemsg(mp);
   2575 			BUMP_MIB(&us->us_udp_mib, udpInErrors);
   2576 			return;
   2577 		}
   2578 		mp1->b_cont = mp;
   2579 		mp1->b_datap->db_type = M_PROTO;
   2580 		tudi = (struct T_unitdata_ind *)mp1->b_rptr;
   2581 		mp1->b_wptr = (uchar_t *)tudi + udi_size;
   2582 		tudi->PRIM_type = T_UNITDATA_IND;
   2583 		tudi->SRC_length = sizeof (sin6_t);
   2584 		tudi->SRC_offset = sizeof (struct T_unitdata_ind);
   2585 		tudi->OPT_offset = sizeof (struct T_unitdata_ind) +
   2586 		    sizeof (sin6_t);
   2587 		udi_size -= (sizeof (struct T_unitdata_ind) + sizeof (sin6_t));
   2588 		tudi->OPT_length = udi_size;
   2589 		sin6 = (sin6_t *)&tudi[1];
   2590 		if (ira->ira_flags & IRAF_IS_IPV4) {
   2591 			in6_addr_t v6dst;
   2592 
   2593 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_src,
   2594 			    &sin6->sin6_addr);
   2595 			IN6_IPADDR_TO_V4MAPPED(((ipha_t *)rptr)->ipha_dst,
   2596 			    &v6dst);
   2597 			sin6->sin6_flowinfo = 0;
   2598 			sin6->sin6_scope_id = 0;
   2599 			sin6->__sin6_src_id = ip_srcid_find_addr(&v6dst,
   2600 			    IPCL_ZONEID(connp), us->us_netstack);
   2601 		} else {
   2602 			ip6h = (ip6_t *)rptr;
   2603 
   2604 			sin6->sin6_addr = ip6h->ip6_src;
   2605 			/* No sin6_flowinfo per API */
   2606 			sin6->sin6_flowinfo = 0;
   2607 			/* For link-scope pass up scope id */
   2608 			if (IN6_IS_ADDR_LINKSCOPE(&ip6h->ip6_src))
   2609 				sin6->sin6_scope_id = ira->ira_ruifindex;
   2610 			else
   2611 				sin6->sin6_scope_id = 0;
   2612 			sin6->__sin6_src_id = ip_srcid_find_addr(
   2613 			    &ip6h->ip6_dst, IPCL_ZONEID(connp),
   2614 			    us->us_netstack);
   2615 		}
   2616 		sin6->sin6_port = udpha->uha_src_port;
   2617 		sin6->sin6_family = connp->conn_family;
   2618 
   2619 		if (udi_size != 0) {
   2620 			conn_recvancillary_add(connp, recv_ancillary, ira,
   2621 			    &ipps, (uchar_t *)&sin6[1], udi_size);
   2622 		}
   2623 	}
   2624 
   2625 	/* Walk past the headers unless IP_RECVHDR was set. */
   2626 	if (!udp->udp_rcvhdr) {
   2627 		mp->b_rptr = rptr + hdr_length;
   2628 		pkt_len -= hdr_length;
   2629 	}
   2630 
   2631 	BUMP_MIB(&us->us_udp_mib, udpHCInDatagrams);
   2632 	udp_ulp_recv(connp, mp1, pkt_len, ira);
   2633 	return;
   2634 
   2635 tossit:
   2636 	freemsg(mp);
   2637 	BUMP_MIB(&us->us_udp_mib, udpInErrors);
   2638 }
   2639 
   2640 /*
   2641  * return SNMP stuff in buffer in mpdata. We don't hold any lock and report
   2642  * information that can be changing beneath us.
   2643  */
   2644 mblk_t *
   2645 udp_snmp_get(queue_t *q, mblk_t *mpctl)
   2646 {
   2647 	mblk_t			*mpdata;
   2648 	mblk_t			*mp_conn_ctl;
   2649 	mblk_t			*mp_attr_ctl;
   2650 	mblk_t			*mp6_conn_ctl;
   2651 	mblk_t			*mp6_attr_ctl;
   2652 	mblk_t			*mp_conn_tail;
   2653 	mblk_t			*mp_attr_tail;
   2654 	mblk_t			*mp6_conn_tail;
   2655 	mblk_t			*mp6_attr_tail;
   2656 	struct opthdr		*optp;
   2657 	mib2_udpEntry_t		ude;
   2658 	mib2_udp6Entry_t	ude6;
   2659 	mib2_transportMLPEntry_t mlp;
   2660 	int			state;
   2661 	zoneid_t		zoneid;
   2662 	int			i;
   2663 	connf_t			*connfp;
   2664 	conn_t			*connp = Q_TO_CONN(q);
   2665 	int			v4_conn_idx;
   2666 	int			v6_conn_idx;
   2667 	boolean_t		needattr;
   2668 	udp_t			*udp;
   2669 	ip_stack_t		*ipst = connp->conn_netstack->netstack_ip;
   2670 	udp_stack_t		*us = connp->conn_netstack->netstack_udp;
   2671 	mblk_t			*mp2ctl;
   2672 
   2673 	/*
   2674 	 * make a copy of the original message
   2675 	 */
   2676 	mp2ctl = copymsg(mpctl);
   2677 
   2678 	mp_conn_ctl = mp_attr_ctl = mp6_conn_ctl = NULL;
   2679 	if (mpctl == NULL ||
   2680 	    (mpdata = mpctl->b_cont) == NULL ||
   2681 	    (mp_conn_ctl = copymsg(mpctl)) == NULL ||
   2682 	    (mp_attr_ctl = copymsg(mpctl)) == NULL ||
   2683 	    (mp6_conn_ctl = copymsg(mpctl)) == NULL ||
   2684 	    (mp6_attr_ctl = copymsg(mpctl)) == NULL) {
   2685 		freemsg(mp_conn_ctl);
   2686 		freemsg(mp_attr_ctl);
   2687 		freemsg(mp6_conn_ctl);
   2688 		freemsg(mpctl);
   2689 		freemsg(mp2ctl);
   2690 		return (0);
   2691 	}
   2692 
   2693 	zoneid = connp->conn_zoneid;
   2694 
   2695 	/* fixed length structure for IPv4 and IPv6 counters */
   2696 	SET_MIB(us->us_udp_mib.udpEntrySize, sizeof (mib2_udpEntry_t));
   2697 	SET_MIB(us->us_udp_mib.udp6EntrySize, sizeof (mib2_udp6Entry_t));
   2698 	/* synchronize 64- and 32-bit counters */
   2699 	SYNC32_MIB(&us->us_udp_mib, udpInDatagrams, udpHCInDatagrams);
   2700 	SYNC32_MIB(&us->us_udp_mib, udpOutDatagrams, udpHCOutDatagrams);
   2701 
   2702 	optp = (struct opthdr *)&mpctl->b_rptr[sizeof (struct T_optmgmt_ack)];
   2703 	optp->level = MIB2_UDP;
   2704 	optp->name = 0;
   2705 	(void) snmp_append_data(mpdata, (char *)&us->us_udp_mib,
   2706 	    sizeof (us->us_udp_mib));
   2707 	optp->len = msgdsize(mpdata);
   2708 	qreply(q, mpctl);
   2709 
   2710 	mp_conn_tail = mp_attr_tail = mp6_conn_tail = mp6_attr_tail = NULL;
   2711 	v4_conn_idx = v6_conn_idx = 0;
   2712 
   2713 	for (i = 0; i < CONN_G_HASH_SIZE; i++) {
   2714 		connfp = &ipst->ips_ipcl_globalhash_fanout[i];
   2715 		connp = NULL;
   2716 
   2717 		while ((connp = ipcl_get_next_conn(connfp, connp,
   2718 		    IPCL_UDPCONN))) {
   2719 			udp = connp->conn_udp;
   2720 			if (zoneid != connp->conn_zoneid)
   2721 				continue;
   2722 
   2723 			/*
   2724 			 * Note that the port numbers are sent in
   2725 			 * host byte order
   2726 			 */
   2727 
   2728 			if (udp->udp_state == TS_UNBND)
   2729 				state = MIB2_UDP_unbound;
   2730 			else if (udp->udp_state == TS_IDLE)
   2731 				state = MIB2_UDP_idle;
   2732 			else if (udp->udp_state == TS_DATA_XFER)
   2733 				state = MIB2_UDP_connected;
   2734 			else
   2735 				state = MIB2_UDP_unknown;
   2736 
   2737 			needattr = B_FALSE;
   2738 			bzero(&mlp, sizeof (mlp));
   2739 			if (connp->conn_mlp_type != mlptSingle) {
   2740 				if (connp->conn_mlp_type == mlptShared ||
   2741 				    connp->conn_mlp_type == mlptBoth)
   2742 					mlp.tme_flags |= MIB2_TMEF_SHARED;
   2743 				if (connp->conn_mlp_type == mlptPrivate ||
   2744 				    connp->conn_mlp_type == mlptBoth)
   2745 					mlp.tme_flags |= MIB2_TMEF_PRIVATE;
   2746 				needattr = B_TRUE;
   2747 			}
   2748 			if (connp->conn_anon_mlp) {
   2749 				mlp.tme_flags |= MIB2_TMEF_ANONMLP;
   2750 				needattr = B_TRUE;
   2751 			}
   2752 			switch (connp->conn_mac_mode) {
   2753 			case CONN_MAC_DEFAULT:
   2754 				break;
   2755 			case CONN_MAC_AWARE:
   2756 				mlp.tme_flags |= MIB2_TMEF_MACEXEMPT;
   2757 				needattr = B_TRUE;
   2758 				break;
   2759 			case CONN_MAC_IMPLICIT:
   2760 				mlp.tme_flags |= MIB2_TMEF_MACIMPLICIT;
   2761 				needattr = B_TRUE;
   2762 				break;
   2763 			}
   2764 			mutex_enter(&connp->conn_lock);
   2765 			if (udp->udp_state == TS_DATA_XFER &&
   2766 			    connp->conn_ixa->ixa_tsl != NULL) {
   2767 				ts_label_t *tsl;
   2768 
   2769 				tsl = connp->conn_ixa->ixa_tsl;
   2770 				mlp.tme_flags |= MIB2_TMEF_IS_LABELED;
   2771 				mlp.tme_doi = label2doi(tsl);
   2772 				mlp.tme_label = *label2bslabel(tsl);
   2773 				needattr = B_TRUE;
   2774 			}
   2775 			mutex_exit(&connp->conn_lock);
   2776 
   2777 			/*
   2778 			 * Create an IPv4 table entry for IPv4 entries and also
   2779 			 * any IPv6 entries which are bound to in6addr_any
   2780 			 * (i.e. anything a IPv4 peer could connect/send to).
   2781 			 */
   2782 			if (connp->conn_ipversion == IPV4_VERSION ||
   2783 			    (udp->udp_state <= TS_IDLE &&
   2784 			    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_laddr_v6))) {
   2785 				ude.udpEntryInfo.ue_state = state;
   2786 				/*
   2787 				 * If in6addr_any this will set it to
   2788 				 * INADDR_ANY
   2789 				 */
   2790 				ude.udpLocalAddress = connp->conn_laddr_v4;
   2791 				ude.udpLocalPort = ntohs(connp->conn_lport);
   2792 				if (udp->udp_state == TS_DATA_XFER) {
   2793 					/*
   2794 					 * Can potentially get here for
   2795 					 * v6 socket if another process
   2796 					 * (say, ping) has just done a
   2797 					 * sendto(), changing the state
   2798 					 * from the TS_IDLE above to
   2799 					 * TS_DATA_XFER by the time we hit
   2800 					 * this part of the code.
   2801 					 */
   2802 					ude.udpEntryInfo.ue_RemoteAddress =
   2803 					    connp->conn_faddr_v4;
   2804 					ude.udpEntryInfo.ue_RemotePort =
   2805 					    ntohs(connp->conn_fport);
   2806 				} else {
   2807 					ude.udpEntryInfo.ue_RemoteAddress = 0;
   2808 					ude.udpEntryInfo.ue_RemotePort = 0;
   2809 				}
   2810 
   2811 				/*
   2812 				 * We make the assumption that all udp_t
   2813 				 * structs will be created within an address
   2814 				 * region no larger than 32-bits.
   2815 				 */
   2816 				ude.udpInstance = (uint32_t)(uintptr_t)udp;
   2817 				ude.udpCreationProcess =
   2818 				    (connp->conn_cpid < 0) ?
   2819 				    MIB2_UNKNOWN_PROCESS :
   2820 				    connp->conn_cpid;
   2821 				ude.udpCreationTime = connp->conn_open_time;
   2822 
   2823 				(void) snmp_append_data2(mp_conn_ctl->b_cont,
   2824 				    &mp_conn_tail, (char *)&ude, sizeof (ude));
   2825 				mlp.tme_connidx = v4_conn_idx++;
   2826 				if (needattr)
   2827 					(void) snmp_append_data2(
   2828 					    mp_attr_ctl->b_cont, &mp_attr_tail,
   2829 					    (char *)&mlp, sizeof (mlp));
   2830 			}
   2831 			if (connp->conn_ipversion == IPV6_VERSION) {
   2832 				ude6.udp6EntryInfo.ue_state  = state;
   2833 				ude6.udp6LocalAddress = connp->conn_laddr_v6;
   2834 				ude6.udp6LocalPort = ntohs(connp->conn_lport);
   2835 				mutex_enter(&connp->conn_lock);
   2836 				if (connp->conn_ixa->ixa_flags &
   2837 				    IXAF_SCOPEID_SET) {
   2838 					ude6.udp6IfIndex =
   2839 					    connp->conn_ixa->ixa_scopeid;
   2840 				} else {
   2841 					ude6.udp6IfIndex = connp->conn_bound_if;
   2842 				}
   2843 				mutex_exit(&connp->conn_lock);
   2844 				if (udp->udp_state == TS_DATA_XFER) {
   2845 					ude6.udp6EntryInfo.ue_RemoteAddress =
   2846 					    connp->conn_faddr_v6;
   2847 					ude6.udp6EntryInfo.ue_RemotePort =
   2848 					    ntohs(connp->conn_fport);
   2849 				} else {
   2850 					ude6.udp6EntryInfo.ue_RemoteAddress =
   2851 					    sin6_null.sin6_addr;
   2852 					ude6.udp6EntryInfo.ue_RemotePort = 0;
   2853 				}
   2854 				/*
   2855 				 * We make the assumption that all udp_t
   2856 				 * structs will be created within an address
   2857 				 * region no larger than 32-bits.
   2858 				 */
   2859 				ude6.udp6Instance = (uint32_t)(uintptr_t)udp;
   2860 				ude6.udp6CreationProcess =
   2861 				    (connp->conn_cpid < 0) ?
   2862 				    MIB2_UNKNOWN_PROCESS :
   2863 				    connp->conn_cpid;
   2864 				ude6.udp6CreationTime = connp->conn_open_time;
   2865 
   2866 				(void) snmp_append_data2(mp6_conn_ctl->b_cont,
   2867 				    &mp6_conn_tail, (char *)&ude6,
   2868 				    sizeof (ude6));
   2869 				mlp.tme_connidx = v6_conn_idx++;
   2870 				if (needattr)
   2871 					(void) snmp_append_data2(
   2872 					    mp6_attr_ctl->b_cont,
   2873 					    &mp6_attr_tail, (char *)&mlp,
   2874 					    sizeof (mlp));
   2875 			}
   2876 		}
   2877 	}
   2878 
   2879 	/* IPv4 UDP endpoints */
   2880 	optp = (struct opthdr *)&mp_conn_ctl->b_rptr[
   2881 	    sizeof (struct T_optmgmt_ack)];
   2882 	optp->level = MIB2_UDP;
   2883 	optp->name = MIB2_UDP_ENTRY;
   2884 	optp->len = msgdsize(mp_conn_ctl->b_cont);
   2885 	qreply(q, mp_conn_ctl);
   2886 
   2887 	/* table of MLP attributes... */
   2888 	optp = (struct opthdr *)&mp_attr_ctl->b_rptr[
   2889 	    sizeof (struct T_optmgmt_ack)];
   2890 	optp->level = MIB2_UDP;
   2891 	optp->name = EXPER_XPORT_MLP;
   2892 	optp->len = msgdsize(mp_attr_ctl->b_cont);
   2893 	if (optp->len == 0)
   2894 		freemsg(mp_attr_ctl);
   2895 	else
   2896 		qreply(q, mp_attr_ctl);
   2897 
   2898 	/* IPv6 UDP endpoints */
   2899 	optp = (struct opthdr *)&mp6_conn_ctl->b_rptr[
   2900 	    sizeof (struct T_optmgmt_ack)];
   2901 	optp->level = MIB2_UDP6;
   2902 	optp->name = MIB2_UDP6_ENTRY;
   2903 	optp->len = msgdsize(mp6_conn_ctl->b_cont);
   2904 	qreply(q, mp6_conn_ctl);
   2905 
   2906 	/* table of MLP attributes... */
   2907 	optp = (struct opthdr *)&mp6_attr_ctl->b_rptr[
   2908 	    sizeof (struct T_optmgmt_ack)];
   2909 	optp->level = MIB2_UDP6;
   2910 	optp->name = EXPER_XPORT_MLP;
   2911 	optp->len = msgdsize(mp6_attr_ctl->b_cont);
   2912 	if (optp->len == 0)
   2913 		freemsg(mp6_attr_ctl);
   2914 	else
   2915 		qreply(q, mp6_attr_ctl);
   2916 
   2917 	return (mp2ctl);
   2918 }
   2919 
   2920 /*
   2921  * Return 0 if invalid set request, 1 otherwise, including non-udp requests.
   2922  * NOTE: Per MIB-II, UDP has no writable data.
   2923  * TODO:  If this ever actually tries to set anything, it needs to be
   2924  * to do the appropriate locking.
   2925  */
   2926 /* ARGSUSED */
   2927 int
   2928 udp_snmp_set(queue_t *q, t_scalar_t level, t_scalar_t name,
   2929     uchar_t *ptr, int len)
   2930 {
   2931 	switch (level) {
   2932 	case MIB2_UDP:
   2933 		return (0);
   2934 	default:
   2935 		return (1);
   2936 	}
   2937 }
   2938 
   2939 /*
   2940  * This routine creates a T_UDERROR_IND message and passes it upstream.
   2941  * The address and options are copied from the T_UNITDATA_REQ message
   2942  * passed in mp.  This message is freed.
   2943  */
   2944 static void
   2945 udp_ud_err(queue_t *q, mblk_t *mp, t_scalar_t err)
   2946 {
   2947 	struct T_unitdata_req *tudr;
   2948 	mblk_t	*mp1;
   2949 	uchar_t *destaddr;
   2950 	t_scalar_t destlen;
   2951 	uchar_t	*optaddr;
   2952 	t_scalar_t optlen;
   2953 
   2954 	if ((mp->b_wptr < mp->b_rptr) ||
   2955 	    (MBLKL(mp)) < sizeof (struct T_unitdata_req)) {
   2956 		goto done;
   2957 	}
   2958 	tudr = (struct T_unitdata_req *)mp->b_rptr;
   2959 	destaddr = mp->b_rptr + tudr->DEST_offset;
   2960 	if (destaddr < mp->b_rptr || destaddr >= mp->b_wptr ||
   2961 	    destaddr + tudr->DEST_length < mp->b_rptr ||
   2962 	    destaddr + tudr->DEST_length > mp->b_wptr) {
   2963 		goto done;
   2964 	}
   2965 	optaddr = mp->b_rptr + tudr->OPT_offset;
   2966 	if (optaddr < mp->b_rptr || optaddr >= mp->b_wptr ||
   2967 	    optaddr + tudr->OPT_length < mp->b_rptr ||
   2968 	    optaddr + tudr->OPT_length > mp->b_wptr) {
   2969 		goto done;
   2970 	}
   2971 	destlen = tudr->DEST_length;
   2972 	optlen = tudr->OPT_length;
   2973 
   2974 	mp1 = mi_tpi_uderror_ind((char *)destaddr, destlen,
   2975 	    (char *)optaddr, optlen, err);
   2976 	if (mp1 != NULL)
   2977 		qreply(q, mp1);
   2978 
   2979 done:
   2980 	freemsg(mp);
   2981 }
   2982 
   2983 /*
   2984  * This routine removes a port number association from a stream.  It
   2985  * is called by udp_wput to handle T_UNBIND_REQ messages.
   2986  */
   2987 static void
   2988 udp_tpi_unbind(queue_t *q, mblk_t *mp)
   2989 {
   2990 	conn_t	*connp = Q_TO_CONN(q);
   2991 	int	error;
   2992 
   2993 	error = udp_do_unbind(connp);
   2994 	if (error) {
   2995 		if (error < 0)
   2996 			udp_err_ack(q, mp, -error, 0);
   2997 		else
   2998 			udp_err_ack(q, mp, TSYSERR, error);
   2999 		return;
   3000 	}
   3001 
   3002 	mp = mi_tpi_ok_ack_alloc(mp);
   3003 	ASSERT(mp != NULL);
   3004 	ASSERT(((struct T_ok_ack *)mp->b_rptr)->PRIM_type == T_OK_ACK);
   3005 	qreply(q, mp);
   3006 }
   3007 
   3008 /*
   3009  * Don't let port fall into the privileged range.
   3010  * Since the extra privileged ports can be arbitrary we also
   3011  * ensure that we exclude those from consideration.
   3012  * us->us_epriv_ports is not sorted thus we loop over it until
   3013  * there are no changes.
   3014  */
   3015 static in_port_t
   3016 udp_update_next_port(udp_t *udp, in_port_t port, boolean_t random)
   3017 {
   3018 	int i;
   3019 	in_port_t nextport;
   3020 	boolean_t restart = B_FALSE;
   3021 	udp_stack_t *us = udp->udp_us;
   3022 
   3023 	if (random && udp_random_anon_port != 0) {
   3024 		(void) random_get_pseudo_bytes((uint8_t *)&port,
   3025 		    sizeof (in_port_t));
   3026 		/*
   3027 		 * Unless changed by a sys admin, the smallest anon port
   3028 		 * is 32768 and the largest anon port is 65535.  It is
   3029 		 * very likely (50%) for the random port to be smaller
   3030 		 * than the smallest anon port.  When that happens,
   3031 		 * add port % (anon port range) to the smallest anon
   3032 		 * port to get the random port.  It should fall into the
   3033 		 * valid anon port range.
   3034 		 */
   3035 		if (port < us->us_smallest_anon_port) {
   3036 			port = us->us_smallest_anon_port +
   3037 			    port % (us->us_largest_anon_port -
   3038 			    us->us_smallest_anon_port);
   3039 		}
   3040 	}
   3041 
   3042 retry:
   3043 	if (port < us->us_smallest_anon_port)
   3044 		port = us->us_smallest_anon_port;
   3045 
   3046 	if (port > us->us_largest_anon_port) {
   3047 		port = us->us_smallest_anon_port;
   3048 		if (restart)
   3049 			return (0);
   3050 		restart = B_TRUE;
   3051 	}
   3052 
   3053 	if (port < us->us_smallest_nonpriv_port)
   3054 		port = us->us_smallest_nonpriv_port;
   3055 
   3056 	for (i = 0; i < us->us_num_epriv_ports; i++) {
   3057 		if (port == us->us_epriv_ports[i]) {
   3058 			port++;
   3059 			/*
   3060 			 * Make sure that the port is in the
   3061 			 * valid range.
   3062 			 */
   3063 			goto retry;
   3064 		}
   3065 	}
   3066 
   3067 	if (is_system_labeled() &&
   3068 	    (nextport = tsol_next_port(crgetzone(udp->udp_connp->conn_cred),
   3069 	    port, IPPROTO_UDP, B_TRUE)) != 0) {
   3070 		port = nextport;
   3071 		goto retry;
   3072 	}
   3073 
   3074 	return (port);
   3075 }
   3076 
   3077 /*
   3078  * Handle T_UNITDATA_REQ with options. Both IPv4 and IPv6
   3079  * Either tudr_mp or msg is set. If tudr_mp we take ancillary data from
   3080  * the TPI options, otherwise we take them from msg_control.
   3081  * If both sin and sin6 is set it is a connected socket and we use conn_faddr.
   3082  * Always consumes mp; never consumes tudr_mp.
   3083  */
   3084 static int
   3085 udp_output_ancillary(conn_t *connp, sin_t *sin, sin6_t *sin6, mblk_t *mp,
   3086     mblk_t *tudr_mp, struct nmsghdr *msg, cred_t *cr, pid_t pid)
   3087 {
   3088 	udp_t		*udp = connp->conn_udp;
   3089 	udp_stack_t	*us = udp->udp_us;
   3090 	int		error;
   3091 	ip_xmit_attr_t	*ixa;
   3092 	ip_pkt_t	*ipp;
   3093 	in6_addr_t	v6src;
   3094 	in6_addr_t	v6dst;
   3095 	in6_addr_t	v6nexthop;
   3096 	in_port_t	dstport;
   3097 	uint32_t	flowinfo;
   3098 	uint_t		srcid;
   3099 	int		is_absreq_failure = 0;
   3100 	conn_opt_arg_t	coas, *coa;
   3101 
   3102 	ASSERT(tudr_mp != NULL || msg != NULL);
   3103 
   3104 	/*
   3105 	 * Get ixa before checking state to handle a disconnect race.
   3106 	 *
   3107 	 * We need an exclusive copy of conn_ixa since the ancillary data
   3108 	 * options might modify it. That copy has no pointers hence we
   3109 	 * need to set them up once we've parsed the ancillary data.
   3110 	 */
   3111 	ixa = conn_get_ixa_exclusive(connp);
   3112 	if (ixa == NULL) {
   3113 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3114 		freemsg(mp);
   3115 		return (ENOMEM);
   3116 	}
   3117 	ASSERT(cr != NULL);
   3118 	ixa->ixa_cred = cr;
   3119 	ixa->ixa_cpid = pid;
   3120 	if (is_system_labeled()) {
   3121 		/* We need to restart with a label based on the cred */
   3122 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
   3123 	}
   3124 
   3125 	/* In case previous destination was multicast or multirt */
   3126 	ip_attr_newdst(ixa);
   3127 
   3128 	/* Get a copy of conn_xmit_ipp since the options might change it */
   3129 	ipp = kmem_zalloc(sizeof (*ipp), KM_NOSLEEP);
   3130 	if (ipp == NULL) {
   3131 		ixa_refrele(ixa);
   3132 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3133 		freemsg(mp);
   3134 		return (ENOMEM);
   3135 	}
   3136 	mutex_enter(&connp->conn_lock);
   3137 	error = ip_pkt_copy(&connp->conn_xmit_ipp, ipp, KM_NOSLEEP);
   3138 	mutex_exit(&connp->conn_lock);
   3139 	if (error != 0) {
   3140 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3141 		freemsg(mp);
   3142 		goto done;
   3143 	}
   3144 
   3145 	/*
   3146 	 * Parse the options and update ixa and ipp as a result.
   3147 	 * Note that ixa_tsl can be updated if SCM_UCRED.
   3148 	 * ixa_refrele/ixa_inactivate will release any reference on ixa_tsl.
   3149 	 */
   3150 
   3151 	coa = &coas;
   3152 	coa->coa_connp = connp;
   3153 	coa->coa_ixa = ixa;
   3154 	coa->coa_ipp = ipp;
   3155 	coa->coa_ancillary = B_TRUE;
   3156 	coa->coa_changed = 0;
   3157 
   3158 	if (msg != NULL) {
   3159 		error = process_auxiliary_options(connp, msg->msg_control,
   3160 		    msg->msg_controllen, coa, &udp_opt_obj, udp_opt_set, cr);
   3161 	} else {
   3162 		struct T_unitdata_req *tudr;
   3163 
   3164 		tudr = (struct T_unitdata_req *)tudr_mp->b_rptr;
   3165 		ASSERT(tudr->PRIM_type == T_UNITDATA_REQ);
   3166 		error = tpi_optcom_buf(connp->conn_wq, tudr_mp,
   3167 		    &tudr->OPT_length, tudr->OPT_offset, cr, &udp_opt_obj,
   3168 		    coa, &is_absreq_failure);
   3169 	}
   3170 	if (error != 0) {
   3171 		/*
   3172 		 * Note: No special action needed in this
   3173 		 * module for "is_absreq_failure"
   3174 		 */
   3175 		freemsg(mp);
   3176 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3177 		goto done;
   3178 	}
   3179 	ASSERT(is_absreq_failure == 0);
   3180 
   3181 	mutex_enter(&connp->conn_lock);
   3182 	/*
   3183 	 * If laddr is unspecified then we look at sin6_src_id.
   3184 	 * We will give precedence to a source address set with IPV6_PKTINFO
   3185 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
   3186 	 * want ip_attr_connect to select a source (since it can fail) when
   3187 	 * IPV6_PKTINFO is specified.
   3188 	 * If this doesn't result in a source address then we get a source
   3189 	 * from ip_attr_connect() below.
   3190 	 */
   3191 	v6src = connp->conn_saddr_v6;
   3192 	if (sin != NULL) {
   3193 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
   3194 		dstport = sin->sin_port;
   3195 		flowinfo = 0;
   3196 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   3197 		ixa->ixa_flags |= IXAF_IS_IPV4;
   3198 	} else if (sin6 != NULL) {
   3199 		v6dst = sin6->sin6_addr;
   3200 		dstport = sin6->sin6_port;
   3201 		flowinfo = sin6->sin6_flowinfo;
   3202 		srcid = sin6->__sin6_src_id;
   3203 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
   3204 			ixa->ixa_scopeid = sin6->sin6_scope_id;
   3205 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
   3206 		} else {
   3207 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   3208 		}
   3209 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
   3210 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
   3211 			    connp->conn_netstack);
   3212 		}
   3213 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
   3214 			ixa->ixa_flags |= IXAF_IS_IPV4;
   3215 		else
   3216 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
   3217 	} else {
   3218 		/* Connected case */
   3219 		v6dst = connp->conn_faddr_v6;
   3220 		dstport = connp->conn_fport;
   3221 		flowinfo = connp->conn_flowinfo;
   3222 	}
   3223 	mutex_exit(&connp->conn_lock);
   3224 
   3225 	/* Handle IPV6_PKTINFO setting source address. */
   3226 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
   3227 	    (ipp->ipp_fields & IPPF_ADDR)) {
   3228 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
   3229 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
   3230 				v6src = ipp->ipp_addr;
   3231 		} else {
   3232 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
   3233 				v6src = ipp->ipp_addr;
   3234 		}
   3235 	}
   3236 
   3237 	ip_attr_nexthop(ipp, ixa, &v6dst, &v6nexthop);
   3238 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
   3239 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
   3240 
   3241 	switch (error) {
   3242 	case 0:
   3243 		break;
   3244 	case EADDRNOTAVAIL:
   3245 		/*
   3246 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3247 		 * Don't have the application see that errno
   3248 		 */
   3249 		error = ENETUNREACH;
   3250 		goto failed;
   3251 	case ENETDOWN:
   3252 		/*
   3253 		 * Have !ipif_addr_ready address; drop packet silently
   3254 		 * until we can get applications to not send until we
   3255 		 * are ready.
   3256 		 */
   3257 		error = 0;
   3258 		goto failed;
   3259 	case EHOSTUNREACH:
   3260 	case ENETUNREACH:
   3261 		if (ixa->ixa_ire != NULL) {
   3262 			/*
   3263 			 * Let conn_ip_output/ire_send_noroute return
   3264 			 * the error and send any local ICMP error.
   3265 			 */
   3266 			error = 0;
   3267 			break;
   3268 		}
   3269 		/* FALLTHRU */
   3270 	default:
   3271 	failed:
   3272 		freemsg(mp);
   3273 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3274 		goto done;
   3275 	}
   3276 
   3277 	/*
   3278 	 * We might be going to a different destination than last time,
   3279 	 * thus check that TX allows the communication and compute any
   3280 	 * needed label.
   3281 	 *
   3282 	 * TSOL Note: We have an exclusive ipp and ixa for this thread so we
   3283 	 * don't have to worry about concurrent threads.
   3284 	 */
   3285 	if (is_system_labeled()) {
   3286 		/* Using UDP MLP requires SCM_UCRED from user */
   3287 		if (connp->conn_mlp_type != mlptSingle &&
   3288 		    !((ixa->ixa_flags & IXAF_UCRED_TSL))) {
   3289 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3290 			error = ECONNREFUSED;
   3291 			freemsg(mp);
   3292 			goto done;
   3293 		}
   3294 		/*
   3295 		 * Check whether Trusted Solaris policy allows communication
   3296 		 * with this host, and pretend that the destination is
   3297 		 * unreachable if not.
   3298 		 * Compute any needed label and place it in ipp_label_v4/v6.
   3299 		 *
   3300 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
   3301 		 * ipp_label_v4/v6 to form the packet.
   3302 		 *
   3303 		 * Tsol note: We have ipp structure local to this thread so
   3304 		 * no locking is needed.
   3305 		 */
   3306 		error = conn_update_label(connp, ixa, &v6dst, ipp);
   3307 		if (error != 0) {
   3308 			freemsg(mp);
   3309 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3310 			goto done;
   3311 		}
   3312 	}
   3313 	mp = udp_prepend_hdr(connp, ixa, ipp, &v6src, &v6dst, dstport,
   3314 	    flowinfo, mp, &error);
   3315 	if (mp == NULL) {
   3316 		ASSERT(error != 0);
   3317 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3318 		goto done;
   3319 	}
   3320 	if (ixa->ixa_pktlen > IP_MAXPACKET) {
   3321 		error = EMSGSIZE;
   3322 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3323 		freemsg(mp);
   3324 		goto done;
   3325 	}
   3326 	/* We're done.  Pass the packet to ip. */
   3327 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
   3328 
   3329 	error = conn_ip_output(mp, ixa);
   3330 	/* No udpOutErrors if an error since IP increases its error counter */
   3331 	switch (error) {
   3332 	case 0:
   3333 		break;
   3334 	case EWOULDBLOCK:
   3335 		(void) ixa_check_drain_insert(connp, ixa);
   3336 		error = 0;
   3337 		break;
   3338 	case EADDRNOTAVAIL:
   3339 		/*
   3340 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3341 		 * Don't have the application see that errno
   3342 		 */
   3343 		error = ENETUNREACH;
   3344 		/* FALLTHRU */
   3345 	default:
   3346 		mutex_enter(&connp->conn_lock);
   3347 		/*
   3348 		 * Clear the source and v6lastdst so we call ip_attr_connect
   3349 		 * for the next packet and try to pick a better source.
   3350 		 */
   3351 		if (connp->conn_mcbc_bind)
   3352 			connp->conn_saddr_v6 = ipv6_all_zeros;
   3353 		else
   3354 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
   3355 		connp->conn_v6lastdst = ipv6_all_zeros;
   3356 		mutex_exit(&connp->conn_lock);
   3357 		break;
   3358 	}
   3359 done:
   3360 	ixa_refrele(ixa);
   3361 	ip_pkt_free(ipp);
   3362 	kmem_free(ipp, sizeof (*ipp));
   3363 	return (error);
   3364 }
   3365 
   3366 /*
   3367  * Handle sending an M_DATA for a connected socket.
   3368  * Handles both IPv4 and IPv6.
   3369  */
   3370 static int
   3371 udp_output_connected(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid)
   3372 {
   3373 	udp_t		*udp = connp->conn_udp;
   3374 	udp_stack_t	*us = udp->udp_us;
   3375 	int		error;
   3376 	ip_xmit_attr_t	*ixa;
   3377 
   3378 	/*
   3379 	 * If no other thread is using conn_ixa this just gets a reference to
   3380 	 * conn_ixa. Otherwise we get a safe copy of conn_ixa.
   3381 	 */
   3382 	ixa = conn_get_ixa(connp, B_FALSE);
   3383 	if (ixa == NULL) {
   3384 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3385 		freemsg(mp);
   3386 		return (ENOMEM);
   3387 	}
   3388 
   3389 	ASSERT(cr != NULL);
   3390 	ixa->ixa_cred = cr;
   3391 	ixa->ixa_cpid = pid;
   3392 
   3393 	mutex_enter(&connp->conn_lock);
   3394 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_saddr_v6,
   3395 	    connp->conn_fport, connp->conn_flowinfo, &error);
   3396 
   3397 	if (mp == NULL) {
   3398 		ASSERT(error != 0);
   3399 		mutex_exit(&connp->conn_lock);
   3400 		ixa_refrele(ixa);
   3401 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3402 		freemsg(mp);
   3403 		return (error);
   3404 	}
   3405 
   3406 	/*
   3407 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
   3408 	 * safe copy, then we need to fill in any pointers in it.
   3409 	 */
   3410 	if (ixa->ixa_ire == NULL) {
   3411 		in6_addr_t	faddr, saddr;
   3412 		in6_addr_t	nexthop;
   3413 		in_port_t	fport;
   3414 
   3415 		saddr = connp->conn_saddr_v6;
   3416 		faddr = connp->conn_faddr_v6;
   3417 		fport = connp->conn_fport;
   3418 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &faddr, &nexthop);
   3419 		mutex_exit(&connp->conn_lock);
   3420 
   3421 		error = ip_attr_connect(connp, ixa, &saddr, &faddr, &nexthop,
   3422 		    fport, NULL, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST |
   3423 		    IPDF_IPSEC);
   3424 		switch (error) {
   3425 		case 0:
   3426 			break;
   3427 		case EADDRNOTAVAIL:
   3428 			/*
   3429 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3430 			 * Don't have the application see that errno
   3431 			 */
   3432 			error = ENETUNREACH;
   3433 			goto failed;
   3434 		case ENETDOWN:
   3435 			/*
   3436 			 * Have !ipif_addr_ready address; drop packet silently
   3437 			 * until we can get applications to not send until we
   3438 			 * are ready.
   3439 			 */
   3440 			error = 0;
   3441 			goto failed;
   3442 		case EHOSTUNREACH:
   3443 		case ENETUNREACH:
   3444 			if (ixa->ixa_ire != NULL) {
   3445 				/*
   3446 				 * Let conn_ip_output/ire_send_noroute return
   3447 				 * the error and send any local ICMP error.
   3448 				 */
   3449 				error = 0;
   3450 				break;
   3451 			}
   3452 			/* FALLTHRU */
   3453 		default:
   3454 		failed:
   3455 			ixa_refrele(ixa);
   3456 			freemsg(mp);
   3457 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3458 			return (error);
   3459 		}
   3460 	} else {
   3461 		/* Done with conn_t */
   3462 		mutex_exit(&connp->conn_lock);
   3463 	}
   3464 	ASSERT(ixa->ixa_ire != NULL);
   3465 
   3466 	/* We're done.  Pass the packet to ip. */
   3467 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
   3468 
   3469 	error = conn_ip_output(mp, ixa);
   3470 	/* No udpOutErrors if an error since IP increases its error counter */
   3471 	switch (error) {
   3472 	case 0:
   3473 		break;
   3474 	case EWOULDBLOCK:
   3475 		(void) ixa_check_drain_insert(connp, ixa);
   3476 		error = 0;
   3477 		break;
   3478 	case EADDRNOTAVAIL:
   3479 		/*
   3480 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3481 		 * Don't have the application see that errno
   3482 		 */
   3483 		error = ENETUNREACH;
   3484 		break;
   3485 	}
   3486 	ixa_refrele(ixa);
   3487 	return (error);
   3488 }
   3489 
   3490 /*
   3491  * Handle sending an M_DATA to the last destination.
   3492  * Handles both IPv4 and IPv6.
   3493  *
   3494  * NOTE: The caller must hold conn_lock and we drop it here.
   3495  */
   3496 static int
   3497 udp_output_lastdst(conn_t *connp, mblk_t *mp, cred_t *cr, pid_t pid,
   3498     ip_xmit_attr_t *ixa)
   3499 {
   3500 	udp_t		*udp = connp->conn_udp;
   3501 	udp_stack_t	*us = udp->udp_us;
   3502 	int		error;
   3503 
   3504 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   3505 	ASSERT(ixa != NULL);
   3506 
   3507 	ASSERT(cr != NULL);
   3508 	ixa->ixa_cred = cr;
   3509 	ixa->ixa_cpid = pid;
   3510 
   3511 	mp = udp_prepend_header_template(connp, ixa, mp, &connp->conn_v6lastsrc,
   3512 	    connp->conn_lastdstport, connp->conn_lastflowinfo, &error);
   3513 
   3514 	if (mp == NULL) {
   3515 		ASSERT(error != 0);
   3516 		mutex_exit(&connp->conn_lock);
   3517 		ixa_refrele(ixa);
   3518 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3519 		freemsg(mp);
   3520 		return (error);
   3521 	}
   3522 
   3523 	/*
   3524 	 * In case we got a safe copy of conn_ixa, or if opt_set made us a new
   3525 	 * safe copy, then we need to fill in any pointers in it.
   3526 	 */
   3527 	if (ixa->ixa_ire == NULL) {
   3528 		in6_addr_t	lastdst, lastsrc;
   3529 		in6_addr_t	nexthop;
   3530 		in_port_t	lastport;
   3531 
   3532 		lastsrc = connp->conn_v6lastsrc;
   3533 		lastdst = connp->conn_v6lastdst;
   3534 		lastport = connp->conn_lastdstport;
   3535 		ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &lastdst, &nexthop);
   3536 		mutex_exit(&connp->conn_lock);
   3537 
   3538 		error = ip_attr_connect(connp, ixa, &lastsrc, &lastdst,
   3539 		    &nexthop, lastport, NULL, NULL, IPDF_ALLOW_MCBC |
   3540 		    IPDF_VERIFY_DST | IPDF_IPSEC);
   3541 		switch (error) {
   3542 		case 0:
   3543 			break;
   3544 		case EADDRNOTAVAIL:
   3545 			/*
   3546 			 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3547 			 * Don't have the application see that errno
   3548 			 */
   3549 			error = ENETUNREACH;
   3550 			goto failed;
   3551 		case ENETDOWN:
   3552 			/*
   3553 			 * Have !ipif_addr_ready address; drop packet silently
   3554 			 * until we can get applications to not send until we
   3555 			 * are ready.
   3556 			 */
   3557 			error = 0;
   3558 			goto failed;
   3559 		case EHOSTUNREACH:
   3560 		case ENETUNREACH:
   3561 			if (ixa->ixa_ire != NULL) {
   3562 				/*
   3563 				 * Let conn_ip_output/ire_send_noroute return
   3564 				 * the error and send any local ICMP error.
   3565 				 */
   3566 				error = 0;
   3567 				break;
   3568 			}
   3569 			/* FALLTHRU */
   3570 		default:
   3571 		failed:
   3572 			ixa_refrele(ixa);
   3573 			freemsg(mp);
   3574 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3575 			return (error);
   3576 		}
   3577 	} else {
   3578 		/* Done with conn_t */
   3579 		mutex_exit(&connp->conn_lock);
   3580 	}
   3581 
   3582 	/* We're done.  Pass the packet to ip. */
   3583 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
   3584 
   3585 	error = conn_ip_output(mp, ixa);
   3586 	/* No udpOutErrors if an error since IP increases its error counter */
   3587 	switch (error) {
   3588 	case 0:
   3589 		break;
   3590 	case EWOULDBLOCK:
   3591 		(void) ixa_check_drain_insert(connp, ixa);
   3592 		error = 0;
   3593 		break;
   3594 	case EADDRNOTAVAIL:
   3595 		/*
   3596 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   3597 		 * Don't have the application see that errno
   3598 		 */
   3599 		error = ENETUNREACH;
   3600 		/* FALLTHRU */
   3601 	default:
   3602 		mutex_enter(&connp->conn_lock);
   3603 		/*
   3604 		 * Clear the source and v6lastdst so we call ip_attr_connect
   3605 		 * for the next packet and try to pick a better source.
   3606 		 */
   3607 		if (connp->conn_mcbc_bind)
   3608 			connp->conn_saddr_v6 = ipv6_all_zeros;
   3609 		else
   3610 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
   3611 		connp->conn_v6lastdst = ipv6_all_zeros;
   3612 		mutex_exit(&connp->conn_lock);
   3613 		break;
   3614 	}
   3615 	ixa_refrele(ixa);
   3616 	return (error);
   3617 }
   3618 
   3619 
   3620 /*
   3621  * Prepend the header template and then fill in the source and
   3622  * flowinfo. The caller needs to handle the destination address since
   3623  * it's setting is different if rthdr or source route.
   3624  *
   3625  * Returns NULL is allocation failed or if the packet would exceed IP_MAXPACKET.
   3626  * When it returns NULL it sets errorp.
   3627  */
   3628 static mblk_t *
   3629 udp_prepend_header_template(conn_t *connp, ip_xmit_attr_t *ixa, mblk_t *mp,
   3630     const in6_addr_t *v6src, in_port_t dstport, uint32_t flowinfo, int *errorp)
   3631 {
   3632 	udp_t		*udp = connp->conn_udp;
   3633 	udp_stack_t	*us = udp->udp_us;
   3634 	boolean_t	insert_spi = udp->udp_nat_t_endpoint;
   3635 	uint_t		pktlen;
   3636 	uint_t		alloclen;
   3637 	uint_t		copylen;
   3638 	uint8_t		*iph;
   3639 	uint_t		ip_hdr_length;
   3640 	udpha_t		*udpha;
   3641 	uint32_t	cksum;
   3642 	ip_pkt_t	*ipp;
   3643 
   3644 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   3645 
   3646 	/*
   3647 	 * Copy the header template and leave space for an SPI
   3648 	 */
   3649 	copylen = connp->conn_ht_iphc_len;
   3650 	alloclen = copylen + (insert_spi ? sizeof (uint32_t) : 0);
   3651 	pktlen = alloclen + msgdsize(mp);
   3652 	if (pktlen > IP_MAXPACKET) {
   3653 		freemsg(mp);
   3654 		*errorp = EMSGSIZE;
   3655 		return (NULL);
   3656 	}
   3657 	ixa->ixa_pktlen = pktlen;
   3658 
   3659 	/* check/fix buffer config, setup pointers into it */
   3660 	iph = mp->b_rptr - alloclen;
   3661 	if (DB_REF(mp) != 1 || iph < DB_BASE(mp) || !OK_32PTR(iph)) {
   3662 		mblk_t *mp1;
   3663 
   3664 		mp1 = allocb(alloclen + us->us_wroff_extra, BPRI_MED);
   3665 		if (mp1 == NULL) {
   3666 			freemsg(mp);
   3667 			*errorp = ENOMEM;
   3668 			return (NULL);
   3669 		}
   3670 		mp1->b_wptr = DB_LIM(mp1);
   3671 		mp1->b_cont = mp;
   3672 		mp = mp1;
   3673 		iph = (mp->b_wptr - alloclen);
   3674 	}
   3675 	mp->b_rptr = iph;
   3676 	bcopy(connp->conn_ht_iphc, iph, copylen);
   3677 	ip_hdr_length = (uint_t)(connp->conn_ht_ulp - connp->conn_ht_iphc);
   3678 
   3679 	ixa->ixa_ip_hdr_length = ip_hdr_length;
   3680 	udpha = (udpha_t *)(iph + ip_hdr_length);
   3681 
   3682 	/*
   3683 	 * Setup header length and prepare for ULP checksum done in IP.
   3684 	 * udp_build_hdr_template has already massaged any routing header
   3685 	 * and placed the result in conn_sum.
   3686 	 *
   3687 	 * We make it easy for IP to include our pseudo header
   3688 	 * by putting our length in uha_checksum.
   3689 	 */
   3690 	cksum = pktlen - ip_hdr_length;
   3691 	udpha->uha_length = htons(cksum);
   3692 
   3693 	cksum += connp->conn_sum;
   3694 	cksum = (cksum >> 16) + (cksum & 0xFFFF);
   3695 	ASSERT(cksum < 0x10000);
   3696 
   3697 	ipp = &connp->conn_xmit_ipp;
   3698 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   3699 		ipha_t	*ipha = (ipha_t *)iph;
   3700 
   3701 		ipha->ipha_length = htons((uint16_t)pktlen);
   3702 
   3703 		/* IP does the checksum if uha_checksum is non-zero */
   3704 		if (us->us_do_checksum)
   3705 			udpha->uha_checksum = htons(cksum);
   3706 
   3707 		/* if IP_PKTINFO specified an addres it wins over bind() */
   3708 		if ((ipp->ipp_fields & IPPF_ADDR) &&
   3709 		    IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
   3710 			ASSERT(ipp->ipp_addr_v4 != INADDR_ANY);
   3711 			ipha->ipha_src = ipp->ipp_addr_v4;
   3712 		} else {
   3713 			IN6_V4MAPPED_TO_IPADDR(v6src, ipha->ipha_src);
   3714 		}
   3715 	} else {
   3716 		ip6_t *ip6h = (ip6_t *)iph;
   3717 
   3718 		ip6h->ip6_plen =  htons((uint16_t)(pktlen - IPV6_HDR_LEN));
   3719 		udpha->uha_checksum = htons(cksum);
   3720 
   3721 		/* if IP_PKTINFO specified an addres it wins over bind() */
   3722 		if ((ipp->ipp_fields & IPPF_ADDR) &&
   3723 		    !IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr)) {
   3724 			ASSERT(!IN6_IS_ADDR_UNSPECIFIED(&ipp->ipp_addr));
   3725 			ip6h->ip6_src = ipp->ipp_addr;
   3726 		} else {
   3727 			ip6h->ip6_src = *v6src;
   3728 		}
   3729 		ip6h->ip6_vcf =
   3730 		    (IPV6_DEFAULT_VERS_AND_FLOW & IPV6_VERS_AND_FLOW_MASK) |
   3731 		    (flowinfo & ~IPV6_VERS_AND_FLOW_MASK);
   3732 		if (ipp->ipp_fields & IPPF_TCLASS) {
   3733 			/* Overrides the class part of flowinfo */
   3734 			ip6h->ip6_vcf = IPV6_TCLASS_FLOW(ip6h->ip6_vcf,
   3735 			    ipp->ipp_tclass);
   3736 		}
   3737 	}
   3738 
   3739 	/* Insert all-0s SPI now. */
   3740 	if (insert_spi)
   3741 		*((uint32_t *)(udpha + 1)) = 0;
   3742 
   3743 	udpha->uha_dst_port = dstport;
   3744 	return (mp);
   3745 }
   3746 
   3747 /*
   3748  * Send a T_UDERR_IND in response to an M_DATA
   3749  */
   3750 static void
   3751 udp_ud_err_connected(conn_t *connp, t_scalar_t error)
   3752 {
   3753 	struct sockaddr_storage ss;
   3754 	sin_t		*sin;
   3755 	sin6_t		*sin6;
   3756 	struct sockaddr	*addr;
   3757 	socklen_t	addrlen;
   3758 	mblk_t		*mp1;
   3759 
   3760 	mutex_enter(&connp->conn_lock);
   3761 	/* Initialize addr and addrlen as if they're passed in */
   3762 	if (connp->conn_family == AF_INET) {
   3763 		sin = (sin_t *)&ss;
   3764 		*sin = sin_null;
   3765 		sin->sin_family = AF_INET;
   3766 		sin->sin_port = connp->conn_fport;
   3767 		sin->sin_addr.s_addr = connp->conn_faddr_v4;
   3768 		addr = (struct sockaddr *)sin;
   3769 		addrlen = sizeof (*sin);
   3770 	} else {
   3771 		sin6 = (sin6_t *)&ss;
   3772 		*sin6 = sin6_null;
   3773 		sin6->sin6_family = AF_INET6;
   3774 		sin6->sin6_port = connp->conn_fport;
   3775 		sin6->sin6_flowinfo = connp->conn_flowinfo;
   3776 		sin6->sin6_addr = connp->conn_faddr_v6;
   3777 		if (IN6_IS_ADDR_LINKSCOPE(&connp->conn_faddr_v6) &&
   3778 		    (connp->conn_ixa->ixa_flags & IXAF_SCOPEID_SET)) {
   3779 			sin6->sin6_scope_id = connp->conn_ixa->ixa_scopeid;
   3780 		} else {
   3781 			sin6->sin6_scope_id = 0;
   3782 		}
   3783 		sin6->__sin6_src_id = 0;
   3784 		addr = (struct sockaddr *)sin6;
   3785 		addrlen = sizeof (*sin6);
   3786 	}
   3787 	mutex_exit(&connp->conn_lock);
   3788 
   3789 	mp1 = mi_tpi_uderror_ind((char *)addr, addrlen, NULL, 0, error);
   3790 	if (mp1 != NULL)
   3791 		putnext(connp->conn_rq, mp1);
   3792 }
   3793 
   3794 /*
   3795  * This routine handles all messages passed downstream.  It either
   3796  * consumes the message or passes it downstream; it never queues a
   3797  * a message.
   3798  *
   3799  * Also entry point for sockfs when udp is in "direct sockfs" mode.  This mode
   3800  * is valid when we are directly beneath the stream head, and thus sockfs
   3801  * is able to bypass STREAMS and directly call us, passing along the sockaddr
   3802  * structure without the cumbersome T_UNITDATA_REQ interface for the case of
   3803  * connected endpoints.
   3804  */
   3805 void
   3806 udp_wput(queue_t *q, mblk_t *mp)
   3807 {
   3808 	sin6_t		*sin6;
   3809 	sin_t		*sin = NULL;
   3810 	uint_t		srcid;
   3811 	conn_t		*connp = Q_TO_CONN(q);
   3812 	udp_t		*udp = connp->conn_udp;
   3813 	int		error = 0;
   3814 	struct sockaddr	*addr = NULL;
   3815 	socklen_t	addrlen;
   3816 	udp_stack_t	*us = udp->udp_us;
   3817 	struct T_unitdata_req *tudr;
   3818 	mblk_t		*data_mp;
   3819 	ushort_t	ipversion;
   3820 	cred_t		*cr;
   3821 	pid_t		pid;
   3822 
   3823 	/*
   3824 	 * We directly handle several cases here: T_UNITDATA_REQ message
   3825 	 * coming down as M_PROTO/M_PCPROTO and M_DATA messages for connected
   3826 	 * socket.
   3827 	 */
   3828 	switch (DB_TYPE(mp)) {
   3829 	case M_DATA:
   3830 		if (!udp->udp_issocket || udp->udp_state != TS_DATA_XFER) {
   3831 			/* Not connected; address is required */
   3832 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3833 			UDP_DBGSTAT(us, udp_data_notconn);
   3834 			UDP_STAT(us, udp_out_err_notconn);
   3835 			freemsg(mp);
   3836 			return;
   3837 		}
   3838 		/*
   3839 		 * All Solaris components should pass a db_credp
   3840 		 * for this message, hence we ASSERT.
   3841 		 * On production kernels we return an error to be robust against
   3842 		 * random streams modules sitting on top of us.
   3843 		 */
   3844 		cr = msg_getcred(mp, &pid);
   3845 		ASSERT(cr != NULL);
   3846 		if (cr == NULL) {
   3847 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   3848 			freemsg(mp);
   3849 			return;
   3850 		}
   3851 		ASSERT(udp->udp_issocket);
   3852 		UDP_DBGSTAT(us, udp_data_conn);
   3853 		error = udp_output_connected(connp, mp, cr, pid);
   3854 		if (error != 0) {
   3855 			UDP_STAT(us, udp_out_err_output);
   3856 			if (connp->conn_rq != NULL)
   3857 				udp_ud_err_connected(connp, (t_scalar_t)error);
   3858 #ifdef DEBUG
   3859 			printf("udp_output_connected returned %d\n", error);
   3860 #endif
   3861 		}
   3862 		return;
   3863 
   3864 	case M_PROTO:
   3865 	case M_PCPROTO:
   3866 		tudr = (struct T_unitdata_req *)mp->b_rptr;
   3867 		if (MBLKL(mp) < sizeof (*tudr) ||
   3868 		    ((t_primp_t)mp->b_rptr)->type != T_UNITDATA_REQ) {
   3869 			udp_wput_other(q, mp);
   3870 			return;
   3871 		}
   3872 		break;
   3873 
   3874 	default:
   3875 		udp_wput_other(q, mp);
   3876 		return;
   3877 	}
   3878 
   3879 	/* Handle valid T_UNITDATA_REQ here */
   3880 	data_mp = mp->b_cont;
   3881 	if (data_mp == NULL) {
   3882 		error = EPROTO;
   3883 		goto ud_error2;
   3884 	}
   3885 	mp->b_cont = NULL;
   3886 
   3887 	if (!MBLKIN(mp, 0, tudr->DEST_offset + tudr->DEST_length)) {
   3888 		error = EADDRNOTAVAIL;
   3889 		goto ud_error2;
   3890 	}
   3891 
   3892 	/*
   3893 	 * All Solaris components should pass a db_credp
   3894 	 * for this TPI message, hence we should ASSERT.
   3895 	 * However, RPC (svc_clts_ksend) does this odd thing where it
   3896 	 * passes the options from a T_UNITDATA_IND unchanged in a
   3897 	 * T_UNITDATA_REQ. While that is the right thing to do for
   3898 	 * some options, SCM_UCRED being the key one, this also makes it
   3899 	 * pass down IP_RECVDSTADDR. Hence we can't ASSERT here.
   3900 	 */
   3901 	cr = msg_getcred(mp, &pid);
   3902 	if (cr == NULL) {
   3903 		cr = connp->conn_cred;
   3904 		pid = connp->conn_cpid;
   3905 	}
   3906 
   3907 	/*
   3908 	 * If a port has not been bound to the stream, fail.
   3909 	 * This is not a problem when sockfs is directly
   3910 	 * above us, because it will ensure that the socket
   3911 	 * is first bound before allowing data to be sent.
   3912 	 */
   3913 	if (udp->udp_state == TS_UNBND) {
   3914 		error = EPROTO;
   3915 		goto ud_error2;
   3916 	}
   3917 	addr = (struct sockaddr *)&mp->b_rptr[tudr->DEST_offset];
   3918 	addrlen = tudr->DEST_length;
   3919 
   3920 	switch (connp->conn_family) {
   3921 	case AF_INET6:
   3922 		sin6 = (sin6_t *)addr;
   3923 		if (!OK_32PTR((char *)sin6) || (addrlen != sizeof (sin6_t)) ||
   3924 		    (sin6->sin6_family != AF_INET6)) {
   3925 			error = EADDRNOTAVAIL;
   3926 			goto ud_error2;
   3927 		}
   3928 
   3929 		srcid = sin6->__sin6_src_id;
   3930 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
   3931 			/*
   3932 			 * Destination is a non-IPv4-compatible IPv6 address.
   3933 			 * Send out an IPv6 format packet.
   3934 			 */
   3935 
   3936 			/*
   3937 			 * If the local address is a mapped address return
   3938 			 * an error.
   3939 			 * It would be possible to send an IPv6 packet but the
   3940 			 * response would never make it back to the application
   3941 			 * since it is bound to a mapped address.
   3942 			 */
   3943 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
   3944 				error = EADDRNOTAVAIL;
   3945 				goto ud_error2;
   3946 			}
   3947 
   3948 			UDP_DBGSTAT(us, udp_out_ipv6);
   3949 
   3950 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
   3951 				sin6->sin6_addr = ipv6_loopback;
   3952 			ipversion = IPV6_VERSION;
   3953 		} else {
   3954 			if (connp->conn_ipv6_v6only) {
   3955 				error = EADDRNOTAVAIL;
   3956 				goto ud_error2;
   3957 			}
   3958 
   3959 			/*
   3960 			 * If the local address is not zero or a mapped address
   3961 			 * return an error.  It would be possible to send an
   3962 			 * IPv4 packet but the response would never make it
   3963 			 * back to the application since it is bound to a
   3964 			 * non-mapped address.
   3965 			 */
   3966 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
   3967 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
   3968 				error = EADDRNOTAVAIL;
   3969 				goto ud_error2;
   3970 			}
   3971 			UDP_DBGSTAT(us, udp_out_mapped);
   3972 
   3973 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
   3974 				V4_PART_OF_V6(sin6->sin6_addr) =
   3975 				    htonl(INADDR_LOOPBACK);
   3976 			}
   3977 			ipversion = IPV4_VERSION;
   3978 		}
   3979 
   3980 		if (tudr->OPT_length != 0) {
   3981 			/*
   3982 			 * If we are connected then the destination needs to be
   3983 			 * the same as the connected one.
   3984 			 */
   3985 			if (udp->udp_state == TS_DATA_XFER &&
   3986 			    !conn_same_as_last_v6(connp, sin6)) {
   3987 				error = EISCONN;
   3988 				goto ud_error2;
   3989 			}
   3990 			UDP_STAT(us, udp_out_opt);
   3991 			error = udp_output_ancillary(connp, NULL, sin6,
   3992 			    data_mp, mp, NULL, cr, pid);
   3993 		} else {
   3994 			ip_xmit_attr_t *ixa;
   3995 
   3996 			/*
   3997 			 * We have to allocate an ip_xmit_attr_t before we grab
   3998 			 * conn_lock and we need to hold conn_lock once we've
   3999 			 * checked conn_same_as_last_v6 to handle concurrent
   4000 			 * send* calls on a socket.
   4001 			 */
   4002 			ixa = conn_get_ixa(connp, B_FALSE);
   4003 			if (ixa == NULL) {
   4004 				error = ENOMEM;
   4005 				goto ud_error2;
   4006 			}
   4007 			mutex_enter(&connp->conn_lock);
   4008 
   4009 			if (conn_same_as_last_v6(connp, sin6) &&
   4010 			    connp->conn_lastsrcid == srcid &&
   4011 			    ipsec_outbound_policy_current(ixa)) {
   4012 				UDP_DBGSTAT(us, udp_out_lastdst);
   4013 				/* udp_output_lastdst drops conn_lock */
   4014 				error = udp_output_lastdst(connp, data_mp, cr,
   4015 				    pid, ixa);
   4016 			} else {
   4017 				UDP_DBGSTAT(us, udp_out_diffdst);
   4018 				/* udp_output_newdst drops conn_lock */
   4019 				error = udp_output_newdst(connp, data_mp, NULL,
   4020 				    sin6, ipversion, cr, pid, ixa);
   4021 			}
   4022 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
   4023 		}
   4024 		if (error == 0) {
   4025 			freeb(mp);
   4026 			return;
   4027 		}
   4028 		break;
   4029 
   4030 	case AF_INET:
   4031 		sin = (sin_t *)addr;
   4032 		if ((!OK_32PTR((char *)sin) || addrlen != sizeof (sin_t)) ||
   4033 		    (sin->sin_family != AF_INET)) {
   4034 			error = EADDRNOTAVAIL;
   4035 			goto ud_error2;
   4036 		}
   4037 		UDP_DBGSTAT(us, udp_out_ipv4);
   4038 		if (sin->sin_addr.s_addr == INADDR_ANY)
   4039 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   4040 		ipversion = IPV4_VERSION;
   4041 
   4042 		srcid = 0;
   4043 		if (tudr->OPT_length != 0) {
   4044 			/*
   4045 			 * If we are connected then the destination needs to be
   4046 			 * the same as the connected one.
   4047 			 */
   4048 			if (udp->udp_state == TS_DATA_XFER &&
   4049 			    !conn_same_as_last_v4(connp, sin)) {
   4050 				error = EISCONN;
   4051 				goto ud_error2;
   4052 			}
   4053 			UDP_STAT(us, udp_out_opt);
   4054 			error = udp_output_ancillary(connp, sin, NULL,
   4055 			    data_mp, mp, NULL, cr, pid);
   4056 		} else {
   4057 			ip_xmit_attr_t *ixa;
   4058 
   4059 			/*
   4060 			 * We have to allocate an ip_xmit_attr_t before we grab
   4061 			 * conn_lock and we need to hold conn_lock once we've
   4062 			 * checked conn_same_as_last_v4 to handle concurrent
   4063 			 * send* calls on a socket.
   4064 			 */
   4065 			ixa = conn_get_ixa(connp, B_FALSE);
   4066 			if (ixa == NULL) {
   4067 				error = ENOMEM;
   4068 				goto ud_error2;
   4069 			}
   4070 			mutex_enter(&connp->conn_lock);
   4071 
   4072 			if (conn_same_as_last_v4(connp, sin) &&
   4073 			    ipsec_outbound_policy_current(ixa)) {
   4074 				UDP_DBGSTAT(us, udp_out_lastdst);
   4075 				/* udp_output_lastdst drops conn_lock */
   4076 				error = udp_output_lastdst(connp, data_mp, cr,
   4077 				    pid, ixa);
   4078 			} else {
   4079 				UDP_DBGSTAT(us, udp_out_diffdst);
   4080 				/* udp_output_newdst drops conn_lock */
   4081 				error = udp_output_newdst(connp, data_mp, sin,
   4082 				    NULL, ipversion, cr, pid, ixa);
   4083 			}
   4084 			ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
   4085 		}
   4086 		if (error == 0) {
   4087 			freeb(mp);
   4088 			return;
   4089 		}
   4090 		break;
   4091 	}
   4092 	UDP_STAT(us, udp_out_err_output);
   4093 	ASSERT(mp != NULL);
   4094 	/* mp is freed by the following routine */
   4095 	udp_ud_err(q, mp, (t_scalar_t)error);
   4096 	return;
   4097 
   4098 ud_error2:
   4099 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   4100 	freemsg(data_mp);
   4101 	UDP_STAT(us, udp_out_err_output);
   4102 	ASSERT(mp != NULL);
   4103 	/* mp is freed by the following routine */
   4104 	udp_ud_err(q, mp, (t_scalar_t)error);
   4105 }
   4106 
   4107 /*
   4108  * Handle the case of the IP address, port, flow label being different
   4109  * for both IPv4 and IPv6.
   4110  *
   4111  * NOTE: The caller must hold conn_lock and we drop it here.
   4112  */
   4113 static int
   4114 udp_output_newdst(conn_t *connp, mblk_t *data_mp, sin_t *sin, sin6_t *sin6,
   4115     ushort_t ipversion, cred_t *cr, pid_t pid, ip_xmit_attr_t *ixa)
   4116 {
   4117 	uint_t		srcid;
   4118 	uint32_t	flowinfo;
   4119 	udp_t		*udp = connp->conn_udp;
   4120 	int		error = 0;
   4121 	ip_xmit_attr_t	*oldixa;
   4122 	udp_stack_t	*us = udp->udp_us;
   4123 	in6_addr_t	v6src;
   4124 	in6_addr_t	v6dst;
   4125 	in6_addr_t	v6nexthop;
   4126 	in_port_t	dstport;
   4127 
   4128 	ASSERT(MUTEX_HELD(&connp->conn_lock));
   4129 	ASSERT(ixa != NULL);
   4130 	/*
   4131 	 * We hold conn_lock across all the use and modifications of
   4132 	 * the conn_lastdst, conn_ixa, and conn_xmit_ipp to ensure that they
   4133 	 * stay consistent.
   4134 	 */
   4135 
   4136 	ASSERT(cr != NULL);
   4137 	ixa->ixa_cred = cr;
   4138 	ixa->ixa_cpid = pid;
   4139 	if (is_system_labeled()) {
   4140 		/* We need to restart with a label based on the cred */
   4141 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
   4142 	}
   4143 
   4144 	/*
   4145 	 * If we are connected then the destination needs to be the
   4146 	 * same as the connected one, which is not the case here since we
   4147 	 * checked for that above.
   4148 	 */
   4149 	if (udp->udp_state == TS_DATA_XFER) {
   4150 		mutex_exit(&connp->conn_lock);
   4151 		error = EISCONN;
   4152 		goto ud_error;
   4153 	}
   4154 
   4155 	/* In case previous destination was multicast or multirt */
   4156 	ip_attr_newdst(ixa);
   4157 
   4158 	/*
   4159 	 * If laddr is unspecified then we look at sin6_src_id.
   4160 	 * We will give precedence to a source address set with IPV6_PKTINFO
   4161 	 * (aka IPPF_ADDR) but that is handled in build_hdrs. However, we don't
   4162 	 * want ip_attr_connect to select a source (since it can fail) when
   4163 	 * IPV6_PKTINFO is specified.
   4164 	 * If this doesn't result in a source address then we get a source
   4165 	 * from ip_attr_connect() below.
   4166 	 */
   4167 	v6src = connp->conn_saddr_v6;
   4168 	if (sin != NULL) {
   4169 		IN6_IPADDR_TO_V4MAPPED(sin->sin_addr.s_addr, &v6dst);
   4170 		dstport = sin->sin_port;
   4171 		flowinfo = 0;
   4172 		srcid = 0;
   4173 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   4174 		if (srcid != 0 && V4_PART_OF_V6(&v6src) == INADDR_ANY) {
   4175 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
   4176 			    connp->conn_netstack);
   4177 		}
   4178 		ixa->ixa_flags |= IXAF_IS_IPV4;
   4179 	} else {
   4180 		v6dst = sin6->sin6_addr;
   4181 		dstport = sin6->sin6_port;
   4182 		flowinfo = sin6->sin6_flowinfo;
   4183 		srcid = sin6->__sin6_src_id;
   4184 		if (IN6_IS_ADDR_LINKSCOPE(&v6dst) && sin6->sin6_scope_id != 0) {
   4185 			ixa->ixa_scopeid = sin6->sin6_scope_id;
   4186 			ixa->ixa_flags |= IXAF_SCOPEID_SET;
   4187 		} else {
   4188 			ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   4189 		}
   4190 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
   4191 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
   4192 			    connp->conn_netstack);
   4193 		}
   4194 		if (IN6_IS_ADDR_V4MAPPED(&v6dst))
   4195 			ixa->ixa_flags |= IXAF_IS_IPV4;
   4196 		else
   4197 			ixa->ixa_flags &= ~IXAF_IS_IPV4;
   4198 	}
   4199 	/* Handle IPV6_PKTINFO setting source address. */
   4200 	if (IN6_IS_ADDR_UNSPECIFIED(&v6src) &&
   4201 	    (connp->conn_xmit_ipp.ipp_fields & IPPF_ADDR)) {
   4202 		ip_pkt_t *ipp = &connp->conn_xmit_ipp;
   4203 
   4204 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
   4205 			if (IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
   4206 				v6src = ipp->ipp_addr;
   4207 		} else {
   4208 			if (!IN6_IS_ADDR_V4MAPPED(&ipp->ipp_addr))
   4209 				v6src = ipp->ipp_addr;
   4210 		}
   4211 	}
   4212 
   4213 	ip_attr_nexthop(&connp->conn_xmit_ipp, ixa, &v6dst, &v6nexthop);
   4214 	mutex_exit(&connp->conn_lock);
   4215 
   4216 	error = ip_attr_connect(connp, ixa, &v6src, &v6dst, &v6nexthop, dstport,
   4217 	    &v6src, NULL, IPDF_ALLOW_MCBC | IPDF_VERIFY_DST | IPDF_IPSEC);
   4218 	switch (error) {
   4219 	case 0:
   4220 		break;
   4221 	case EADDRNOTAVAIL:
   4222 		/*
   4223 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   4224 		 * Don't have the application see that errno
   4225 		 */
   4226 		error = ENETUNREACH;
   4227 		goto failed;
   4228 	case ENETDOWN:
   4229 		/*
   4230 		 * Have !ipif_addr_ready address; drop packet silently
   4231 		 * until we can get applications to not send until we
   4232 		 * are ready.
   4233 		 */
   4234 		error = 0;
   4235 		goto failed;
   4236 	case EHOSTUNREACH:
   4237 	case ENETUNREACH:
   4238 		if (ixa->ixa_ire != NULL) {
   4239 			/*
   4240 			 * Let conn_ip_output/ire_send_noroute return
   4241 			 * the error and send any local ICMP error.
   4242 			 */
   4243 			error = 0;
   4244 			break;
   4245 		}
   4246 		/* FALLTHRU */
   4247 	failed:
   4248 	default:
   4249 		goto ud_error;
   4250 	}
   4251 
   4252 
   4253 	/*
   4254 	 * Cluster note: we let the cluster hook know that we are sending to a
   4255 	 * new address and/or port.
   4256 	 */
   4257 	if (cl_inet_connect2 != NULL) {
   4258 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
   4259 		if (error != 0) {
   4260 			error = EHOSTUNREACH;
   4261 			goto ud_error;
   4262 		}
   4263 	}
   4264 
   4265 	mutex_enter(&connp->conn_lock);
   4266 	/*
   4267 	 * While we dropped the lock some other thread might have connected
   4268 	 * this socket. If so we bail out with EISCONN to ensure that the
   4269 	 * connecting thread is the one that updates conn_ixa, conn_ht_*
   4270 	 * and conn_*last*.
   4271 	 */
   4272 	if (udp->udp_state == TS_DATA_XFER) {
   4273 		mutex_exit(&connp->conn_lock);
   4274 		error = EISCONN;
   4275 		goto ud_error;
   4276 	}
   4277 
   4278 	/*
   4279 	 * We need to rebuild the headers if
   4280 	 *  - we are labeling packets (could be different for different
   4281 	 *    destinations)
   4282 	 *  - we have a source route (or routing header) since we need to
   4283 	 *    massage that to get the pseudo-header checksum
   4284 	 *  - the IP version is different than the last time
   4285 	 *  - a socket option with COA_HEADER_CHANGED has been set which
   4286 	 *    set conn_v6lastdst to zero.
   4287 	 *
   4288 	 * Otherwise the prepend function will just update the src, dst,
   4289 	 * dstport, and flow label.
   4290 	 */
   4291 	if (is_system_labeled()) {
   4292 		/* TX MLP requires SCM_UCRED and don't have that here */
   4293 		if (connp->conn_mlp_type != mlptSingle) {
   4294 			mutex_exit(&connp->conn_lock);
   4295 			error = ECONNREFUSED;
   4296 			goto ud_error;
   4297 		}
   4298 		/*
   4299 		 * Check whether Trusted Solaris policy allows communication
   4300 		 * with this host, and pretend that the destination is
   4301 		 * unreachable if not.
   4302 		 * Compute any needed label and place it in ipp_label_v4/v6.
   4303 		 *
   4304 		 * Later conn_build_hdr_template/conn_prepend_hdr takes
   4305 		 * ipp_label_v4/v6 to form the packet.
   4306 		 *
   4307 		 * Tsol note: Since we hold conn_lock we know no other
   4308 		 * thread manipulates conn_xmit_ipp.
   4309 		 */
   4310 		error = conn_update_label(connp, ixa, &v6dst,
   4311 		    &connp->conn_xmit_ipp);
   4312 		if (error != 0) {
   4313 			mutex_exit(&connp->conn_lock);
   4314 			goto ud_error;
   4315 		}
   4316 		/* Rebuild the header template */
   4317 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
   4318 		    flowinfo);
   4319 		if (error != 0) {
   4320 			mutex_exit(&connp->conn_lock);
   4321 			goto ud_error;
   4322 		}
   4323 	} else if ((connp->conn_xmit_ipp.ipp_fields &
   4324 	    (IPPF_IPV4_OPTIONS|IPPF_RTHDR)) ||
   4325 	    ipversion != connp->conn_lastipversion ||
   4326 	    IN6_IS_ADDR_UNSPECIFIED(&connp->conn_v6lastdst)) {
   4327 		/* Rebuild the header template */
   4328 		error = udp_build_hdr_template(connp, &v6src, &v6dst, dstport,
   4329 		    flowinfo);
   4330 		if (error != 0) {
   4331 			mutex_exit(&connp->conn_lock);
   4332 			goto ud_error;
   4333 		}
   4334 	} else {
   4335 		/* Simply update the destination address if no source route */
   4336 		if (ixa->ixa_flags & IXAF_IS_IPV4) {
   4337 			ipha_t	*ipha = (ipha_t *)connp->conn_ht_iphc;
   4338 
   4339 			IN6_V4MAPPED_TO_IPADDR(&v6dst, ipha->ipha_dst);
   4340 			if (ixa->ixa_flags & IXAF_PMTU_IPV4_DF) {
   4341 				ipha->ipha_fragment_offset_and_flags |=
   4342 				    IPH_DF_HTONS;
   4343 			} else {
   4344 				ipha->ipha_fragment_offset_and_flags &=
   4345 				    ~IPH_DF_HTONS;
   4346 			}
   4347 		} else {
   4348 			ip6_t *ip6h = (ip6_t *)connp->conn_ht_iphc;
   4349 			ip6h->ip6_dst = v6dst;
   4350 		}
   4351 	}
   4352 
   4353 	/*
   4354 	 * Remember the dst/dstport etc which corresponds to the built header
   4355 	 * template and conn_ixa.
   4356 	 */
   4357 	oldixa = conn_replace_ixa(connp, ixa);
   4358 	connp->conn_v6lastdst = v6dst;
   4359 	connp->conn_lastipversion = ipversion;
   4360 	connp->conn_lastdstport = dstport;
   4361 	connp->conn_lastflowinfo = flowinfo;
   4362 	connp->conn_lastscopeid = ixa->ixa_scopeid;
   4363 	connp->conn_lastsrcid = srcid;
   4364 	/* Also remember a source to use together with lastdst */
   4365 	connp->conn_v6lastsrc = v6src;
   4366 
   4367 	data_mp = udp_prepend_header_template(connp, ixa, data_mp, &v6src,
   4368 	    dstport, flowinfo, &error);
   4369 
   4370 	/* Done with conn_t */
   4371 	mutex_exit(&connp->conn_lock);
   4372 	ixa_refrele(oldixa);
   4373 
   4374 	if (data_mp == NULL) {
   4375 		ASSERT(error != 0);
   4376 		goto ud_error;
   4377 	}
   4378 
   4379 	/* We're done.  Pass the packet to ip. */
   4380 	BUMP_MIB(&us->us_udp_mib, udpHCOutDatagrams);
   4381 
   4382 	error = conn_ip_output(data_mp, ixa);
   4383 	/* No udpOutErrors if an error since IP increases its error counter */
   4384 	switch (error) {
   4385 	case 0:
   4386 		break;
   4387 	case EWOULDBLOCK:
   4388 		(void) ixa_check_drain_insert(connp, ixa);
   4389 		error = 0;
   4390 		break;
   4391 	case EADDRNOTAVAIL:
   4392 		/*
   4393 		 * IXAF_VERIFY_SOURCE tells us to pick a better source.
   4394 		 * Don't have the application see that errno
   4395 		 */
   4396 		error = ENETUNREACH;
   4397 		/* FALLTHRU */
   4398 	default:
   4399 		mutex_enter(&connp->conn_lock);
   4400 		/*
   4401 		 * Clear the source and v6lastdst so we call ip_attr_connect
   4402 		 * for the next packet and try to pick a better source.
   4403 		 */
   4404 		if (connp->conn_mcbc_bind)
   4405 			connp->conn_saddr_v6 = ipv6_all_zeros;
   4406 		else
   4407 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
   4408 		connp->conn_v6lastdst = ipv6_all_zeros;
   4409 		mutex_exit(&connp->conn_lock);
   4410 		break;
   4411 	}
   4412 	ixa_refrele(ixa);
   4413 	return (error);
   4414 
   4415 ud_error:
   4416 	if (ixa != NULL)
   4417 		ixa_refrele(ixa);
   4418 
   4419 	freemsg(data_mp);
   4420 	BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   4421 	UDP_STAT(us, udp_out_err_output);
   4422 	return (error);
   4423 }
   4424 
   4425 /* ARGSUSED */
   4426 static void
   4427 udp_wput_fallback(queue_t *wq, mblk_t *mp)
   4428 {
   4429 #ifdef DEBUG
   4430 	cmn_err(CE_CONT, "udp_wput_fallback: Message in fallback \n");
   4431 #endif
   4432 	freemsg(mp);
   4433 }
   4434 
   4435 
   4436 /*
   4437  * Handle special out-of-band ioctl requests (see PSARC/2008/265).
   4438  */
   4439 static void
   4440 udp_wput_cmdblk(queue_t *q, mblk_t *mp)
   4441 {
   4442 	void	*data;
   4443 	mblk_t	*datamp = mp->b_cont;
   4444 	conn_t	*connp = Q_TO_CONN(q);
   4445 	udp_t	*udp = connp->conn_udp;
   4446 	cmdblk_t *cmdp = (cmdblk_t *)mp->b_rptr;
   4447 
   4448 	if (datamp == NULL || MBLKL(datamp) < cmdp->cb_len) {
   4449 		cmdp->cb_error = EPROTO;
   4450 		qreply(q, mp);
   4451 		return;
   4452 	}
   4453 	data = datamp->b_rptr;
   4454 
   4455 	mutex_enter(&connp->conn_lock);
   4456 	switch (cmdp->cb_cmd) {
   4457 	case TI_GETPEERNAME:
   4458 		if (udp->udp_state != TS_DATA_XFER)
   4459 			cmdp->cb_error = ENOTCONN;
   4460 		else
   4461 			cmdp->cb_error = conn_getpeername(connp, data,
   4462 			    &cmdp->cb_len);
   4463 		break;
   4464 	case TI_GETMYNAME:
   4465 		cmdp->cb_error = conn_getsockname(connp, data, &cmdp->cb_len);
   4466 		break;
   4467 	default:
   4468 		cmdp->cb_error = EINVAL;
   4469 		break;
   4470 	}
   4471 	mutex_exit(&connp->conn_lock);
   4472 
   4473 	qreply(q, mp);
   4474 }
   4475 
   4476 static void
   4477 udp_use_pure_tpi(udp_t *udp)
   4478 {
   4479 	conn_t	*connp = udp->udp_connp;
   4480 
   4481 	mutex_enter(&connp->conn_lock);
   4482 	udp->udp_issocket = B_FALSE;
   4483 	mutex_exit(&connp->conn_lock);
   4484 	UDP_STAT(udp->udp_us, udp_sock_fallback);
   4485 }
   4486 
   4487 static void
   4488 udp_wput_other(queue_t *q, mblk_t *mp)
   4489 {
   4490 	uchar_t	*rptr = mp->b_rptr;
   4491 	struct iocblk *iocp;
   4492 	conn_t	*connp = Q_TO_CONN(q);
   4493 	udp_t	*udp = connp->conn_udp;
   4494 	udp_stack_t *us = udp->udp_us;
   4495 	cred_t	*cr;
   4496 
   4497 	switch (mp->b_datap->db_type) {
   4498 	case M_CMD:
   4499 		udp_wput_cmdblk(q, mp);
   4500 		return;
   4501 
   4502 	case M_PROTO:
   4503 	case M_PCPROTO:
   4504 		if (mp->b_wptr - rptr < sizeof (t_scalar_t)) {
   4505 			/*
   4506 			 * If the message does not contain a PRIM_type,
   4507 			 * throw it away.
   4508 			 */
   4509 			freemsg(mp);
   4510 			return;
   4511 		}
   4512 		switch (((t_primp_t)rptr)->type) {
   4513 		case T_ADDR_REQ:
   4514 			udp_addr_req(q, mp);
   4515 			return;
   4516 		case O_T_BIND_REQ:
   4517 		case T_BIND_REQ:
   4518 			udp_tpi_bind(q, mp);
   4519 			return;
   4520 		case T_CONN_REQ:
   4521 			udp_tpi_connect(q, mp);
   4522 			return;
   4523 		case T_CAPABILITY_REQ:
   4524 			udp_capability_req(q, mp);
   4525 			return;
   4526 		case T_INFO_REQ:
   4527 			udp_info_req(q, mp);
   4528 			return;
   4529 		case T_UNITDATA_REQ:
   4530 			/*
   4531 			 * If a T_UNITDATA_REQ gets here, the address must
   4532 			 * be bad.  Valid T_UNITDATA_REQs are handled
   4533 			 * in udp_wput.
   4534 			 */
   4535 			udp_ud_err(q, mp, EADDRNOTAVAIL);
   4536 			return;
   4537 		case T_UNBIND_REQ:
   4538 			udp_tpi_unbind(q, mp);
   4539 			return;
   4540 		case T_SVR4_OPTMGMT_REQ:
   4541 			/*
   4542 			 * All Solaris components should pass a db_credp
   4543 			 * for this TPI message, hence we ASSERT.
   4544 			 * But in case there is some other M_PROTO that looks
   4545 			 * like a TPI message sent by some other kernel
   4546 			 * component, we check and return an error.
   4547 			 */
   4548 			cr = msg_getcred(mp, NULL);
   4549 			ASSERT(cr != NULL);
   4550 			if (cr == NULL) {
   4551 				udp_err_ack(q, mp, TSYSERR, EINVAL);
   4552 				return;
   4553 			}
   4554 			if (!snmpcom_req(q, mp, udp_snmp_set, ip_snmp_get,
   4555 			    cr)) {
   4556 				svr4_optcom_req(q, mp, cr, &udp_opt_obj);
   4557 			}
   4558 			return;
   4559 
   4560 		case T_OPTMGMT_REQ:
   4561 			/*
   4562 			 * All Solaris components should pass a db_credp
   4563 			 * for this TPI message, hence we ASSERT.
   4564 			 * But in case there is some other M_PROTO that looks
   4565 			 * like a TPI message sent by some other kernel
   4566 			 * component, we check and return an error.
   4567 			 */
   4568 			cr = msg_getcred(mp, NULL);
   4569 			ASSERT(cr != NULL);
   4570 			if (cr == NULL) {
   4571 				udp_err_ack(q, mp, TSYSERR, EINVAL);
   4572 				return;
   4573 			}
   4574 			tpi_optcom_req(q, mp, cr, &udp_opt_obj);
   4575 			return;
   4576 
   4577 		case T_DISCON_REQ:
   4578 			udp_tpi_disconnect(q, mp);
   4579 			return;
   4580 
   4581 		/* The following TPI message is not supported by udp. */
   4582 		case O_T_CONN_RES:
   4583 		case T_CONN_RES:
   4584 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
   4585 			return;
   4586 
   4587 		/* The following 3 TPI requests are illegal for udp. */
   4588 		case T_DATA_REQ:
   4589 		case T_EXDATA_REQ:
   4590 		case T_ORDREL_REQ:
   4591 			udp_err_ack(q, mp, TNOTSUPPORT, 0);
   4592 			return;
   4593 		default:
   4594 			break;
   4595 		}
   4596 		break;
   4597 	case M_FLUSH:
   4598 		if (*rptr & FLUSHW)
   4599 			flushq(q, FLUSHDATA);
   4600 		break;
   4601 	case M_IOCTL:
   4602 		iocp = (struct iocblk *)mp->b_rptr;
   4603 		switch (iocp->ioc_cmd) {
   4604 		case TI_GETPEERNAME:
   4605 			if (udp->udp_state != TS_DATA_XFER) {
   4606 				/*
   4607 				 * If a default destination address has not
   4608 				 * been associated with the stream, then we
   4609 				 * don't know the peer's name.
   4610 				 */
   4611 				iocp->ioc_error = ENOTCONN;
   4612 				iocp->ioc_count = 0;
   4613 				mp->b_datap->db_type = M_IOCACK;
   4614 				qreply(q, mp);
   4615 				return;
   4616 			}
   4617 			/* FALLTHRU */
   4618 		case TI_GETMYNAME:
   4619 			/*
   4620 			 * For TI_GETPEERNAME and TI_GETMYNAME, we first
   4621 			 * need to copyin the user's strbuf structure.
   4622 			 * Processing will continue in the M_IOCDATA case
   4623 			 * below.
   4624 			 */
   4625 			mi_copyin(q, mp, NULL,
   4626 			    SIZEOF_STRUCT(strbuf, iocp->ioc_flag));
   4627 			return;
   4628 		case ND_SET:
   4629 			/* nd_getset performs the necessary checking */
   4630 		case ND_GET:
   4631 			if (nd_getset(q, us->us_nd, mp)) {
   4632 				qreply(q, mp);
   4633 				return;
   4634 			}
   4635 			break;
   4636 		case _SIOCSOCKFALLBACK:
   4637 			/*
   4638 			 * Either sockmod is about to be popped and the
   4639 			 * socket would now be treated as a plain stream,
   4640 			 * or a module is about to be pushed so we have
   4641 			 * to follow pure TPI semantics.
   4642 			 */
   4643 			if (!udp->udp_issocket) {
   4644 				DB_TYPE(mp) = M_IOCNAK;
   4645 				iocp->ioc_error = EINVAL;
   4646 			} else {
   4647 				udp_use_pure_tpi(udp);
   4648 
   4649 				DB_TYPE(mp) = M_IOCACK;
   4650 				iocp->ioc_error = 0;
   4651 			}
   4652 			iocp->ioc_count = 0;
   4653 			iocp->ioc_rval = 0;
   4654 			qreply(q, mp);
   4655 			return;
   4656 		default:
   4657 			break;
   4658 		}
   4659 		break;
   4660 	case M_IOCDATA:
   4661 		udp_wput_iocdata(q, mp);
   4662 		return;
   4663 	default:
   4664 		/* Unrecognized messages are passed through without change. */
   4665 		break;
   4666 	}
   4667 	ip_wput_nondata(q, mp);
   4668 }
   4669 
   4670 /*
   4671  * udp_wput_iocdata is called by udp_wput_other to handle all M_IOCDATA
   4672  * messages.
   4673  */
   4674 static void
   4675 udp_wput_iocdata(queue_t *q, mblk_t *mp)
   4676 {
   4677 	mblk_t		*mp1;
   4678 	struct	iocblk *iocp = (struct iocblk *)mp->b_rptr;
   4679 	STRUCT_HANDLE(strbuf, sb);
   4680 	uint_t		addrlen;
   4681 	conn_t		*connp = Q_TO_CONN(q);
   4682 	udp_t		*udp = connp->conn_udp;
   4683 
   4684 	/* Make sure it is one of ours. */
   4685 	switch (iocp->ioc_cmd) {
   4686 	case TI_GETMYNAME:
   4687 	case TI_GETPEERNAME:
   4688 		break;
   4689 	default:
   4690 		ip_wput_nondata(q, mp);
   4691 		return;
   4692 	}
   4693 
   4694 	switch (mi_copy_state(q, mp, &mp1)) {
   4695 	case -1:
   4696 		return;
   4697 	case MI_COPY_CASE(MI_COPY_IN, 1):
   4698 		break;
   4699 	case MI_COPY_CASE(MI_COPY_OUT, 1):
   4700 		/*
   4701 		 * The address has been copied out, so now
   4702 		 * copyout the strbuf.
   4703 		 */
   4704 		mi_copyout(q, mp);
   4705 		return;
   4706 	case MI_COPY_CASE(MI_COPY_OUT, 2):
   4707 		/*
   4708 		 * The address and strbuf have been copied out.
   4709 		 * We're done, so just acknowledge the original
   4710 		 * M_IOCTL.
   4711 		 */
   4712 		mi_copy_done(q, mp, 0);
   4713 		return;
   4714 	default:
   4715 		/*
   4716 		 * Something strange has happened, so acknowledge
   4717 		 * the original M_IOCTL with an EPROTO error.
   4718 		 */
   4719 		mi_copy_done(q, mp, EPROTO);
   4720 		return;
   4721 	}
   4722 
   4723 	/*
   4724 	 * Now we have the strbuf structure for TI_GETMYNAME
   4725 	 * and TI_GETPEERNAME.  Next we copyout the requested
   4726 	 * address and then we'll copyout the strbuf.
   4727 	 */
   4728 	STRUCT_SET_HANDLE(sb, iocp->ioc_flag, (void *)mp1->b_rptr);
   4729 
   4730 	if (connp->conn_family == AF_INET)
   4731 		addrlen = sizeof (sin_t);
   4732 	else
   4733 		addrlen = sizeof (sin6_t);
   4734 
   4735 	if (STRUCT_FGET(sb, maxlen) < addrlen) {
   4736 		mi_copy_done(q, mp, EINVAL);
   4737 		return;
   4738 	}
   4739 
   4740 	switch (iocp->ioc_cmd) {
   4741 	case TI_GETMYNAME:
   4742 		break;
   4743 	case TI_GETPEERNAME:
   4744 		if (udp->udp_state != TS_DATA_XFER) {
   4745 			mi_copy_done(q, mp, ENOTCONN);
   4746 			return;
   4747 		}
   4748 		break;
   4749 	}
   4750 	mp1 = mi_copyout_alloc(q, mp, STRUCT_FGETP(sb, buf), addrlen, B_TRUE);
   4751 	if (!mp1)
   4752 		return;
   4753 
   4754 	STRUCT_FSET(sb, len, addrlen);
   4755 	switch (((struct iocblk *)mp->b_rptr)->ioc_cmd) {
   4756 	case TI_GETMYNAME:
   4757 		(void) conn_getsockname(connp, (struct sockaddr *)mp1->b_wptr,
   4758 		    &addrlen);
   4759 		break;
   4760 	case TI_GETPEERNAME:
   4761 		(void) conn_getpeername(connp, (struct sockaddr *)mp1->b_wptr,
   4762 		    &addrlen);
   4763 		break;
   4764 	}
   4765 	mp1->b_wptr += addrlen;
   4766 	/* Copy out the address */
   4767 	mi_copyout(q, mp);
   4768 }
   4769 
   4770 void
   4771 udp_ddi_g_init(void)
   4772 {
   4773 	udp_max_optsize = optcom_max_optsize(udp_opt_obj.odb_opt_des_arr,
   4774 	    udp_opt_obj.odb_opt_arr_cnt);
   4775 
   4776 	/*
   4777 	 * We want to be informed each time a stack is created or
   4778 	 * destroyed in the kernel, so we can maintain the
   4779 	 * set of udp_stack_t's.
   4780 	 */
   4781 	netstack_register(NS_UDP, udp_stack_init, NULL, udp_stack_fini);
   4782 }
   4783 
   4784 void
   4785 udp_ddi_g_destroy(void)
   4786 {
   4787 	netstack_unregister(NS_UDP);
   4788 }
   4789 
   4790 #define	INET_NAME	"ip"
   4791 
   4792 /*
   4793  * Initialize the UDP stack instance.
   4794  */
   4795 static void *
   4796 udp_stack_init(netstackid_t stackid, netstack_t *ns)
   4797 {
   4798 	udp_stack_t	*us;
   4799 	udpparam_t	*pa;
   4800 	int		i;
   4801 	int		error = 0;
   4802 	major_t		major;
   4803 
   4804 	us = (udp_stack_t *)kmem_zalloc(sizeof (*us), KM_SLEEP);
   4805 	us->us_netstack = ns;
   4806 
   4807 	us->us_num_epriv_ports = UDP_NUM_EPRIV_PORTS;
   4808 	us->us_epriv_ports[0] = 2049;
   4809 	us->us_epriv_ports[1] = 4045;
   4810 
   4811 	/*
   4812 	 * The smallest anonymous port in the priviledged port range which UDP
   4813 	 * looks for free port.  Use in the option UDP_ANONPRIVBIND.
   4814 	 */
   4815 	us->us_min_anonpriv_port = 512;
   4816 
   4817 	us->us_bind_fanout_size = udp_bind_fanout_size;
   4818 
   4819 	/* Roundup variable that might have been modified in /etc/system */
   4820 	if (us->us_bind_fanout_size & (us->us_bind_fanout_size - 1)) {
   4821 		/* Not a power of two. Round up to nearest power of two */
   4822 		for (i = 0; i < 31; i++) {
   4823 			if (us->us_bind_fanout_size < (1 << i))
   4824 				break;
   4825 		}
   4826 		us->us_bind_fanout_size = 1 << i;
   4827 	}
   4828 	us->us_bind_fanout = kmem_zalloc(us->us_bind_fanout_size *
   4829 	    sizeof (udp_fanout_t), KM_SLEEP);
   4830 	for (i = 0; i < us->us_bind_fanout_size; i++) {
   4831 		mutex_init(&us->us_bind_fanout[i].uf_lock, NULL, MUTEX_DEFAULT,
   4832 		    NULL);
   4833 	}
   4834 
   4835 	pa = (udpparam_t *)kmem_alloc(sizeof (udp_param_arr), KM_SLEEP);
   4836 
   4837 	us->us_param_arr = pa;
   4838 	bcopy(udp_param_arr, us->us_param_arr, sizeof (udp_param_arr));
   4839 
   4840 	(void) udp_param_register(&us->us_nd,
   4841 	    us->us_param_arr, A_CNT(udp_param_arr));
   4842 
   4843 	us->us_kstat = udp_kstat2_init(stackid, &us->us_statistics);
   4844 	us->us_mibkp = udp_kstat_init(stackid);
   4845 
   4846 	major = mod_name_to_major(INET_NAME);
   4847 	error = ldi_ident_from_major(major, &us->us_ldi_ident);
   4848 	ASSERT(error == 0);
   4849 	return (us);
   4850 }
   4851 
   4852 /*
   4853  * Free the UDP stack instance.
   4854  */
   4855 static void
   4856 udp_stack_fini(netstackid_t stackid, void *arg)
   4857 {
   4858 	udp_stack_t *us = (udp_stack_t *)arg;
   4859 	int i;
   4860 
   4861 	for (i = 0; i < us->us_bind_fanout_size; i++) {
   4862 		mutex_destroy(&us->us_bind_fanout[i].uf_lock);
   4863 	}
   4864 
   4865 	kmem_free(us->us_bind_fanout, us->us_bind_fanout_size *
   4866 	    sizeof (udp_fanout_t));
   4867 
   4868 	us->us_bind_fanout = NULL;
   4869 
   4870 	nd_free(&us->us_nd);
   4871 	kmem_free(us->us_param_arr, sizeof (udp_param_arr));
   4872 	us->us_param_arr = NULL;
   4873 
   4874 	udp_kstat_fini(stackid, us->us_mibkp);
   4875 	us->us_mibkp = NULL;
   4876 
   4877 	udp_kstat2_fini(stackid, us->us_kstat);
   4878 	us->us_kstat = NULL;
   4879 	bzero(&us->us_statistics, sizeof (us->us_statistics));
   4880 
   4881 	ldi_ident_release(us->us_ldi_ident);
   4882 	kmem_free(us, sizeof (*us));
   4883 }
   4884 
   4885 static void *
   4886 udp_kstat2_init(netstackid_t stackid, udp_stat_t *us_statisticsp)
   4887 {
   4888 	kstat_t *ksp;
   4889 
   4890 	udp_stat_t template = {
   4891 		{ "udp_sock_fallback",		KSTAT_DATA_UINT64 },
   4892 		{ "udp_out_opt",		KSTAT_DATA_UINT64 },
   4893 		{ "udp_out_err_notconn",	KSTAT_DATA_UINT64 },
   4894 		{ "udp_out_err_output",		KSTAT_DATA_UINT64 },
   4895 		{ "udp_out_err_tudr",		KSTAT_DATA_UINT64 },
   4896 #ifdef DEBUG
   4897 		{ "udp_data_conn",		KSTAT_DATA_UINT64 },
   4898 		{ "udp_data_notconn",		KSTAT_DATA_UINT64 },
   4899 		{ "udp_out_lastdst",		KSTAT_DATA_UINT64 },
   4900 		{ "udp_out_diffdst",		KSTAT_DATA_UINT64 },
   4901 		{ "udp_out_ipv6",		KSTAT_DATA_UINT64 },
   4902 		{ "udp_out_mapped",		KSTAT_DATA_UINT64 },
   4903 		{ "udp_out_ipv4",		KSTAT_DATA_UINT64 },
   4904 #endif
   4905 	};
   4906 
   4907 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, "udpstat", "net",
   4908 	    KSTAT_TYPE_NAMED, sizeof (template) / sizeof (kstat_named_t),
   4909 	    KSTAT_FLAG_VIRTUAL, stackid);
   4910 
   4911 	if (ksp == NULL)
   4912 		return (NULL);
   4913 
   4914 	bcopy(&template, us_statisticsp, sizeof (template));
   4915 	ksp->ks_data = (void *)us_statisticsp;
   4916 	ksp->ks_private = (void *)(uintptr_t)stackid;
   4917 
   4918 	kstat_install(ksp);
   4919 	return (ksp);
   4920 }
   4921 
   4922 static void
   4923 udp_kstat2_fini(netstackid_t stackid, kstat_t *ksp)
   4924 {
   4925 	if (ksp != NULL) {
   4926 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
   4927 		kstat_delete_netstack(ksp, stackid);
   4928 	}
   4929 }
   4930 
   4931 static void *
   4932 udp_kstat_init(netstackid_t stackid)
   4933 {
   4934 	kstat_t	*ksp;
   4935 
   4936 	udp_named_kstat_t template = {
   4937 		{ "inDatagrams",	KSTAT_DATA_UINT64, 0 },
   4938 		{ "inErrors",		KSTAT_DATA_UINT32, 0 },
   4939 		{ "outDatagrams",	KSTAT_DATA_UINT64, 0 },
   4940 		{ "entrySize",		KSTAT_DATA_INT32, 0 },
   4941 		{ "entry6Size",		KSTAT_DATA_INT32, 0 },
   4942 		{ "outErrors",		KSTAT_DATA_UINT32, 0 },
   4943 	};
   4944 
   4945 	ksp = kstat_create_netstack(UDP_MOD_NAME, 0, UDP_MOD_NAME, "mib2",
   4946 	    KSTAT_TYPE_NAMED,
   4947 	    NUM_OF_FIELDS(udp_named_kstat_t), 0, stackid);
   4948 
   4949 	if (ksp == NULL || ksp->ks_data == NULL)
   4950 		return (NULL);
   4951 
   4952 	template.entrySize.value.ui32 = sizeof (mib2_udpEntry_t);
   4953 	template.entry6Size.value.ui32 = sizeof (mib2_udp6Entry_t);
   4954 
   4955 	bcopy(&template, ksp->ks_data, sizeof (template));
   4956 	ksp->ks_update = udp_kstat_update;
   4957 	ksp->ks_private = (void *)(uintptr_t)stackid;
   4958 
   4959 	kstat_install(ksp);
   4960 	return (ksp);
   4961 }
   4962 
   4963 static void
   4964 udp_kstat_fini(netstackid_t stackid, kstat_t *ksp)
   4965 {
   4966 	if (ksp != NULL) {
   4967 		ASSERT(stackid == (netstackid_t)(uintptr_t)ksp->ks_private);
   4968 		kstat_delete_netstack(ksp, stackid);
   4969 	}
   4970 }
   4971 
   4972 static int
   4973 udp_kstat_update(kstat_t *kp, int rw)
   4974 {
   4975 	udp_named_kstat_t *udpkp;
   4976 	netstackid_t	stackid = (netstackid_t)(uintptr_t)kp->ks_private;
   4977 	netstack_t	*ns;
   4978 	udp_stack_t	*us;
   4979 
   4980 	if ((kp == NULL) || (kp->ks_data == NULL))
   4981 		return (EIO);
   4982 
   4983 	if (rw == KSTAT_WRITE)
   4984 		return (EACCES);
   4985 
   4986 	ns = netstack_find_by_stackid(stackid);
   4987 	if (ns == NULL)
   4988 		return (-1);
   4989 	us = ns->netstack_udp;
   4990 	if (us == NULL) {
   4991 		netstack_rele(ns);
   4992 		return (-1);
   4993 	}
   4994 	udpkp = (udp_named_kstat_t *)kp->ks_data;
   4995 
   4996 	udpkp->inDatagrams.value.ui64 =	us->us_udp_mib.udpHCInDatagrams;
   4997 	udpkp->inErrors.value.ui32 =	us->us_udp_mib.udpInErrors;
   4998 	udpkp->outDatagrams.value.ui64 = us->us_udp_mib.udpHCOutDatagrams;
   4999 	udpkp->outErrors.value.ui32 =	us->us_udp_mib.udpOutErrors;
   5000 	netstack_rele(ns);
   5001 	return (0);
   5002 }
   5003 
   5004 static size_t
   5005 udp_set_rcv_hiwat(udp_t *udp, size_t size)
   5006 {
   5007 	udp_stack_t *us = udp->udp_us;
   5008 
   5009 	/* We add a bit of extra buffering */
   5010 	size += size >> 1;
   5011 	if (size > us->us_max_buf)
   5012 		size = us->us_max_buf;
   5013 
   5014 	udp->udp_rcv_hiwat = size;
   5015 	return (size);
   5016 }
   5017 
   5018 /*
   5019  * For the lower queue so that UDP can be a dummy mux.
   5020  * Nobody should be sending
   5021  * packets up this stream
   5022  */
   5023 static void
   5024 udp_lrput(queue_t *q, mblk_t *mp)
   5025 {
   5026 	switch (mp->b_datap->db_type) {
   5027 	case M_FLUSH:
   5028 		/* Turn around */
   5029 		if (*mp->b_rptr & FLUSHW) {
   5030 			*mp->b_rptr &= ~FLUSHR;
   5031 			qreply(q, mp);
   5032 			return;
   5033 		}
   5034 		break;
   5035 	}
   5036 	freemsg(mp);
   5037 }
   5038 
   5039 /*
   5040  * For the lower queue so that UDP can be a dummy mux.
   5041  * Nobody should be sending packets down this stream.
   5042  */
   5043 /* ARGSUSED */
   5044 void
   5045 udp_lwput(queue_t *q, mblk_t *mp)
   5046 {
   5047 	freemsg(mp);
   5048 }
   5049 
   5050 /*
   5051  * Below routines for UDP socket module.
   5052  */
   5053 
   5054 static conn_t *
   5055 udp_do_open(cred_t *credp, boolean_t isv6, int flags, int *errorp)
   5056 {
   5057 	udp_t		*udp;
   5058 	conn_t		*connp;
   5059 	zoneid_t 	zoneid;
   5060 	netstack_t 	*ns;
   5061 	udp_stack_t 	*us;
   5062 	int		len;
   5063 
   5064 	ASSERT(errorp != NULL);
   5065 
   5066 	if ((*errorp = secpolicy_basic_net_access(credp)) != 0)
   5067 		return (NULL);
   5068 
   5069 	ns = netstack_find_by_cred(credp);
   5070 	ASSERT(ns != NULL);
   5071 	us = ns->netstack_udp;
   5072 	ASSERT(us != NULL);
   5073 
   5074 	/*
   5075 	 * For exclusive stacks we set the zoneid to zero
   5076 	 * to make UDP operate as if in the global zone.
   5077 	 */
   5078 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
   5079 		zoneid = GLOBAL_ZONEID;
   5080 	else
   5081 		zoneid = crgetzoneid(credp);
   5082 
   5083 	ASSERT(flags == KM_SLEEP || flags == KM_NOSLEEP);
   5084 
   5085 	connp = ipcl_conn_create(IPCL_UDPCONN, flags, ns);
   5086 	if (connp == NULL) {
   5087 		netstack_rele(ns);
   5088 		*errorp = ENOMEM;
   5089 		return (NULL);
   5090 	}
   5091 	udp = connp->conn_udp;
   5092 
   5093 	/*
   5094 	 * ipcl_conn_create did a netstack_hold. Undo the hold that was
   5095 	 * done by netstack_find_by_cred()
   5096 	 */
   5097 	netstack_rele(ns);
   5098 
   5099 	/*
   5100 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
   5101 	 * need to lock anything.
   5102 	 */
   5103 	ASSERT(connp->conn_proto == IPPROTO_UDP);
   5104 	ASSERT(connp->conn_udp == udp);
   5105 	ASSERT(udp->udp_connp == connp);
   5106 
   5107 	/* Set the initial state of the stream and the privilege status. */
   5108 	udp->udp_state = TS_UNBND;
   5109 	connp->conn_ixa->ixa_flags |= IXAF_VERIFY_SOURCE;
   5110 	if (isv6) {
   5111 		connp->conn_family = AF_INET6;
   5112 		connp->conn_ipversion = IPV6_VERSION;
   5113 		connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
   5114 		connp->conn_default_ttl = us->us_ipv6_hoplimit;
   5115 		len = sizeof (ip6_t) + UDPH_SIZE;
   5116 	} else {
   5117 		connp->conn_family = AF_INET;
   5118 		connp->conn_ipversion = IPV4_VERSION;
   5119 		connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
   5120 		connp->conn_default_ttl = us->us_ipv4_ttl;
   5121 		len = sizeof (ipha_t) + UDPH_SIZE;
   5122 	}
   5123 
   5124 	ASSERT(connp->conn_ixa->ixa_protocol == connp->conn_proto);
   5125 	connp->conn_xmit_ipp.ipp_unicast_hops = connp->conn_default_ttl;
   5126 
   5127 	connp->conn_ixa->ixa_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
   5128 	connp->conn_ixa->ixa_flags |= IXAF_MULTICAST_LOOP | IXAF_SET_ULP_CKSUM;
   5129 	/* conn_allzones can not be set this early, hence no IPCL_ZONEID */
   5130 	connp->conn_ixa->ixa_zoneid = zoneid;
   5131 
   5132 	connp->conn_zoneid = zoneid;
   5133 
   5134 	/*
   5135 	 * If the caller has the process-wide flag set, then default to MAC
   5136 	 * exempt mode.  This allows read-down to unlabeled hosts.
   5137 	 */
   5138 	if (getpflags(NET_MAC_AWARE, credp) != 0)
   5139 		connp->conn_mac_mode = CONN_MAC_AWARE;
   5140 
   5141 	connp->conn_zone_is_global = (crgetzoneid(credp) == GLOBAL_ZONEID);
   5142 
   5143 	udp->udp_us = us;
   5144 
   5145 	connp->conn_rcvbuf = us->us_recv_hiwat;
   5146 	connp->conn_sndbuf = us->us_xmit_hiwat;
   5147 	connp->conn_sndlowat = us->us_xmit_lowat;
   5148 	connp->conn_rcvlowat = udp_mod_info.mi_lowat;
   5149 
   5150 	connp->conn_wroff = len + us->us_wroff_extra;
   5151 	connp->conn_so_type = SOCK_DGRAM;
   5152 
   5153 	connp->conn_recv = udp_input;
   5154 	connp->conn_recvicmp = udp_icmp_input;
   5155 	crhold(credp);
   5156 	connp->conn_cred = credp;
   5157 	connp->conn_cpid = curproc->p_pid;
   5158 	connp->conn_open_time = ddi_get_lbolt64();
   5159 	/* Cache things in ixa without an extra refhold */
   5160 	connp->conn_ixa->ixa_cred = connp->conn_cred;
   5161 	connp->conn_ixa->ixa_cpid = connp->conn_cpid;
   5162 	if (is_system_labeled())
   5163 		connp->conn_ixa->ixa_tsl = crgetlabel(connp->conn_cred);
   5164 
   5165 	*((sin6_t *)&udp->udp_delayed_addr) = sin6_null;
   5166 
   5167 	if (us->us_pmtu_discovery)
   5168 		connp->conn_ixa->ixa_flags |= IXAF_PMTU_DISCOVERY;
   5169 
   5170 	return (connp);
   5171 }
   5172 
   5173 sock_lower_handle_t
   5174 udp_create(int family, int type, int proto, sock_downcalls_t **sock_downcalls,
   5175     uint_t *smodep, int *errorp, int flags, cred_t *credp)
   5176 {
   5177 	udp_t		*udp = NULL;
   5178 	udp_stack_t	*us;
   5179 	conn_t		*connp;
   5180 	boolean_t	isv6;
   5181 
   5182 	if (type != SOCK_DGRAM || (family != AF_INET && family != AF_INET6) ||
   5183 	    (proto != 0 && proto != IPPROTO_UDP)) {
   5184 		*errorp = EPROTONOSUPPORT;
   5185 		return (NULL);
   5186 	}
   5187 
   5188 	if (family == AF_INET6)
   5189 		isv6 = B_TRUE;
   5190 	else
   5191 		isv6 = B_FALSE;
   5192 
   5193 	connp = udp_do_open(credp, isv6, flags, errorp);
   5194 	if (connp == NULL)
   5195 		return (NULL);
   5196 
   5197 	udp = connp->conn_udp;
   5198 	ASSERT(udp != NULL);
   5199 	us = udp->udp_us;
   5200 	ASSERT(us != NULL);
   5201 
   5202 	udp->udp_issocket = B_TRUE;
   5203 	connp->conn_flags |= IPCL_NONSTR;
   5204 
   5205 	/*
   5206 	 * Set flow control
   5207 	 * Since this conn_t/udp_t is not yet visible to anybody else we don't
   5208 	 * need to lock anything.
   5209 	 */
   5210 	(void) udp_set_rcv_hiwat(udp, connp->conn_rcvbuf);
   5211 	udp->udp_rcv_disply_hiwat = connp->conn_rcvbuf;
   5212 
   5213 	connp->conn_flow_cntrld = B_FALSE;
   5214 
   5215 	mutex_enter(&connp->conn_lock);
   5216 	connp->conn_state_flags &= ~CONN_INCIPIENT;
   5217 	mutex_exit(&connp->conn_lock);
   5218 
   5219 	*errorp = 0;
   5220 	*smodep = SM_ATOMIC;
   5221 	*sock_downcalls = &sock_udp_downcalls;
   5222 	return ((sock_lower_handle_t)connp);
   5223 }
   5224 
   5225 /* ARGSUSED3 */
   5226 void
   5227 udp_activate(sock_lower_handle_t proto_handle, sock_upper_handle_t sock_handle,
   5228     sock_upcalls_t *sock_upcalls, int flags, cred_t *cr)
   5229 {
   5230 	conn_t 		*connp = (conn_t *)proto_handle;
   5231 	struct sock_proto_props sopp;
   5232 
   5233 	/* All Solaris components should pass a cred for this operation. */
   5234 	ASSERT(cr != NULL);
   5235 
   5236 	connp->conn_upcalls = sock_upcalls;
   5237 	connp->conn_upper_handle = sock_handle;
   5238 
   5239 	sopp.sopp_flags = SOCKOPT_WROFF | SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT |
   5240 	    SOCKOPT_MAXBLK | SOCKOPT_MAXPSZ | SOCKOPT_MINPSZ;
   5241 	sopp.sopp_wroff = connp->conn_wroff;
   5242 	sopp.sopp_maxblk = INFPSZ;
   5243 	sopp.sopp_rxhiwat = connp->conn_rcvbuf;
   5244 	sopp.sopp_rxlowat = connp->conn_rcvlowat;
   5245 	sopp.sopp_maxaddrlen = sizeof (sin6_t);
   5246 	sopp.sopp_maxpsz =
   5247 	    (connp->conn_family == AF_INET) ? UDP_MAXPACKET_IPV4 :
   5248 	    UDP_MAXPACKET_IPV6;
   5249 	sopp.sopp_minpsz = (udp_mod_info.mi_minpsz == 1) ? 0 :
   5250 	    udp_mod_info.mi_minpsz;
   5251 
   5252 	(*connp->conn_upcalls->su_set_proto_props)(connp->conn_upper_handle,
   5253 	    &sopp);
   5254 }
   5255 
   5256 static void
   5257 udp_do_close(conn_t *connp)
   5258 {
   5259 	udp_t	*udp;
   5260 
   5261 	ASSERT(connp != NULL && IPCL_IS_UDP(connp));
   5262 	udp = connp->conn_udp;
   5263 
   5264 	if (cl_inet_unbind != NULL && udp->udp_state == TS_IDLE) {
   5265 		/*
   5266 		 * Running in cluster mode - register unbind information
   5267 		 */
   5268 		if (connp->conn_ipversion == IPV4_VERSION) {
   5269 			(*cl_inet_unbind)(
   5270 			    connp->conn_netstack->netstack_stackid,
   5271 			    IPPROTO_UDP, AF_INET,
   5272 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
   5273 			    (in_port_t)connp->conn_lport, NULL);
   5274 		} else {
   5275 			(*cl_inet_unbind)(
   5276 			    connp->conn_netstack->netstack_stackid,
   5277 			    IPPROTO_UDP, AF_INET6,
   5278 			    (uint8_t *)&(connp->conn_laddr_v6),
   5279 			    (in_port_t)connp->conn_lport, NULL);
   5280 		}
   5281 	}
   5282 
   5283 	udp_bind_hash_remove(udp, B_FALSE);
   5284 
   5285 	ip_quiesce_conn(connp);
   5286 
   5287 	if (!IPCL_IS_NONSTR(connp)) {
   5288 		ASSERT(connp->conn_wq != NULL);
   5289 		ASSERT(connp->conn_rq != NULL);
   5290 		qprocsoff(connp->conn_rq);
   5291 	}
   5292 
   5293 	udp_close_free(connp);
   5294 
   5295 	/*
   5296 	 * Now we are truly single threaded on this stream, and can
   5297 	 * delete the things hanging off the connp, and finally the connp.
   5298 	 * We removed this connp from the fanout list, it cannot be
   5299 	 * accessed thru the fanouts, and we already waited for the
   5300 	 * conn_ref to drop to 0. We are already in close, so
   5301 	 * there cannot be any other thread from the top. qprocsoff
   5302 	 * has completed, and service has completed or won't run in
   5303 	 * future.
   5304 	 */
   5305 	ASSERT(connp->conn_ref == 1);
   5306 
   5307 	if (!IPCL_IS_NONSTR(connp)) {
   5308 		inet_minor_free(connp->conn_minor_arena, connp->conn_dev);
   5309 	} else {
   5310 		ip_free_helper_stream(connp);
   5311 	}
   5312 
   5313 	connp->conn_ref--;
   5314 	ipcl_conn_destroy(connp);
   5315 }
   5316 
   5317 /* ARGSUSED1 */
   5318 int
   5319 udp_close(sock_lower_handle_t proto_handle, int flags, cred_t *cr)
   5320 {
   5321 	conn_t	*connp = (conn_t *)proto_handle;
   5322 
   5323 	/* All Solaris components should pass a cred for this operation. */
   5324 	ASSERT(cr != NULL);
   5325 
   5326 	udp_do_close(connp);
   5327 	return (0);
   5328 }
   5329 
   5330 static int
   5331 udp_do_bind(conn_t *connp, struct sockaddr *sa, socklen_t len, cred_t *cr,
   5332     boolean_t bind_to_req_port_only)
   5333 {
   5334 	sin_t		*sin;
   5335 	sin6_t		*sin6;
   5336 	udp_t		*udp = connp->conn_udp;
   5337 	int		error = 0;
   5338 	ip_laddr_t	laddr_type = IPVL_UNICAST_UP;	/* INADDR_ANY */
   5339 	in_port_t	port;		/* Host byte order */
   5340 	in_port_t	requested_port;	/* Host byte order */
   5341 	int		count;
   5342 	ipaddr_t	v4src;		/* Set if AF_INET */
   5343 	in6_addr_t	v6src;
   5344 	int		loopmax;
   5345 	udp_fanout_t	*udpf;
   5346 	in_port_t	lport;		/* Network byte order */
   5347 	uint_t		scopeid = 0;
   5348 	zoneid_t	zoneid = IPCL_ZONEID(connp);
   5349 	ip_stack_t	*ipst = connp->conn_netstack->netstack_ip;
   5350 	boolean_t	is_inaddr_any;
   5351 	mlp_type_t	addrtype, mlptype;
   5352 	udp_stack_t	*us = udp->udp_us;
   5353 
   5354 	switch (len) {
   5355 	case sizeof (sin_t):	/* Complete IPv4 address */
   5356 		sin = (sin_t *)sa;
   5357 
   5358 		if (sin == NULL || !OK_32PTR((char *)sin))
   5359 			return (EINVAL);
   5360 
   5361 		if (connp->conn_family != AF_INET ||
   5362 		    sin->sin_family != AF_INET) {
   5363 			return (EAFNOSUPPORT);
   5364 		}
   5365 		v4src = sin->sin_addr.s_addr;
   5366 		IN6_IPADDR_TO_V4MAPPED(v4src, &v6src);
   5367 		if (v4src != INADDR_ANY) {
   5368 			laddr_type = ip_laddr_verify_v4(v4src, zoneid, ipst,
   5369 			    B_TRUE);
   5370 		}
   5371 		port = ntohs(sin->sin_port);
   5372 		break;
   5373 
   5374 	case sizeof (sin6_t):	/* complete IPv6 address */
   5375 		sin6 = (sin6_t *)sa;
   5376 
   5377 		if (sin6 == NULL || !OK_32PTR((char *)sin6))
   5378 			return (EINVAL);
   5379 
   5380 		if (connp->conn_family != AF_INET6 ||
   5381 		    sin6->sin6_family != AF_INET6) {
   5382 			return (EAFNOSUPPORT);
   5383 		}
   5384 		v6src = sin6->sin6_addr;
   5385 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
   5386 			if (connp->conn_ipv6_v6only)
   5387 				return (EADDRNOTAVAIL);
   5388 
   5389 			IN6_V4MAPPED_TO_IPADDR(&v6src, v4src);
   5390 			if (v4src != INADDR_ANY) {
   5391 				laddr_type = ip_laddr_verify_v4(v4src,
   5392 				    zoneid, ipst, B_FALSE);
   5393 			}
   5394 		} else {
   5395 			if (!IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
   5396 				if (IN6_IS_ADDR_LINKSCOPE(&v6src))
   5397 					scopeid = sin6->sin6_scope_id;
   5398 				laddr_type = ip_laddr_verify_v6(&v6src,
   5399 				    zoneid, ipst, B_TRUE, scopeid);
   5400 			}
   5401 		}
   5402 		port = ntohs(sin6->sin6_port);
   5403 		break;
   5404 
   5405 	default:		/* Invalid request */
   5406 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
   5407 		    "udp_bind: bad ADDR_length length %u", len);
   5408 		return (-TBADADDR);
   5409 	}
   5410 
   5411 	/* Is the local address a valid unicast, multicast, or broadcast? */
   5412 	if (laddr_type == IPVL_BAD)
   5413 		return (EADDRNOTAVAIL);
   5414 
   5415 	requested_port = port;
   5416 
   5417 	if (requested_port == 0 || !bind_to_req_port_only)
   5418 		bind_to_req_port_only = B_FALSE;
   5419 	else		/* T_BIND_REQ and requested_port != 0 */
   5420 		bind_to_req_port_only = B_TRUE;
   5421 
   5422 	if (requested_port == 0) {
   5423 		/*
   5424 		 * If the application passed in zero for the port number, it
   5425 		 * doesn't care which port number we bind to. Get one in the
   5426 		 * valid range.
   5427 		 */
   5428 		if (connp->conn_anon_priv_bind) {
   5429 			port = udp_get_next_priv_port(udp);
   5430 		} else {
   5431 			port = udp_update_next_port(udp,
   5432 			    us->us_next_port_to_try, B_TRUE);
   5433 		}
   5434 	} else {
   5435 		/*
   5436 		 * If the port is in the well-known privileged range,
   5437 		 * make sure the caller was privileged.
   5438 		 */
   5439 		int i;
   5440 		boolean_t priv = B_FALSE;
   5441 
   5442 		if (port < us->us_smallest_nonpriv_port) {
   5443 			priv = B_TRUE;
   5444 		} else {
   5445 			for (i = 0; i < us->us_num_epriv_ports; i++) {
   5446 				if (port == us->us_epriv_ports[i]) {
   5447 					priv = B_TRUE;
   5448 					break;
   5449 				}
   5450 			}
   5451 		}
   5452 
   5453 		if (priv) {
   5454 			if (secpolicy_net_privaddr(cr, port, IPPROTO_UDP) != 0)
   5455 				return (-TACCES);
   5456 		}
   5457 	}
   5458 
   5459 	if (port == 0)
   5460 		return (-TNOADDR);
   5461 
   5462 	/*
   5463 	 * The state must be TS_UNBND. TPI mandates that users must send
   5464 	 * TPI primitives only 1 at a time and wait for the response before
   5465 	 * sending the next primitive.
   5466 	 */
   5467 	mutex_enter(&connp->conn_lock);
   5468 	if (udp->udp_state != TS_UNBND) {
   5469 		mutex_exit(&connp->conn_lock);
   5470 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
   5471 		    "udp_bind: bad state, %u", udp->udp_state);
   5472 		return (-TOUTSTATE);
   5473 	}
   5474 	/*
   5475 	 * Copy the source address into our udp structure. This address
   5476 	 * may still be zero; if so, IP will fill in the correct address
   5477 	 * each time an outbound packet is passed to it. Since the udp is
   5478 	 * not yet in the bind hash list, we don't grab the uf_lock to
   5479 	 * change conn_ipversion
   5480 	 */
   5481 	if (connp->conn_family == AF_INET) {
   5482 		ASSERT(sin != NULL);
   5483 		ASSERT(connp->conn_ixa->ixa_flags & IXAF_IS_IPV4);
   5484 	} else {
   5485 		if (IN6_IS_ADDR_V4MAPPED(&v6src)) {
   5486 			/*
   5487 			 * no need to hold the uf_lock to set the conn_ipversion
   5488 			 * since we are not yet in the fanout list
   5489 			 */
   5490 			connp->conn_ipversion = IPV4_VERSION;
   5491 			connp->conn_ixa->ixa_flags |= IXAF_IS_IPV4;
   5492 		} else {
   5493 			connp->conn_ipversion = IPV6_VERSION;
   5494 			connp->conn_ixa->ixa_flags &= ~IXAF_IS_IPV4;
   5495 		}
   5496 	}
   5497 
   5498 	/*
   5499 	 * If conn_reuseaddr is not set, then we have to make sure that
   5500 	 * the IP address and port number the application requested
   5501 	 * (or we selected for the application) is not being used by
   5502 	 * another stream.  If another stream is already using the
   5503 	 * requested IP address and port, the behavior depends on
   5504 	 * "bind_to_req_port_only". If set the bind fails; otherwise we
   5505 	 * search for any an unused port to bind to the stream.
   5506 	 *
   5507 	 * As per the BSD semantics, as modified by the Deering multicast
   5508 	 * changes, if udp_reuseaddr is set, then we allow multiple binds
   5509 	 * to the same port independent of the local IP address.
   5510 	 *
   5511 	 * This is slightly different than in SunOS 4.X which did not
   5512 	 * support IP multicast. Note that the change implemented by the
   5513 	 * Deering multicast code effects all binds - not only binding
   5514 	 * to IP multicast addresses.
   5515 	 *
   5516 	 * Note that when binding to port zero we ignore SO_REUSEADDR in
   5517 	 * order to guarantee a unique port.
   5518 	 */
   5519 
   5520 	count = 0;
   5521 	if (connp->conn_anon_priv_bind) {
   5522 		/*
   5523 		 * loopmax = (IPPORT_RESERVED-1) -
   5524 		 *    us->us_min_anonpriv_port + 1
   5525 		 */
   5526 		loopmax = IPPORT_RESERVED - us->us_min_anonpriv_port;
   5527 	} else {
   5528 		loopmax = us->us_largest_anon_port -
   5529 		    us->us_smallest_anon_port + 1;
   5530 	}
   5531 
   5532 	is_inaddr_any = V6_OR_V4_INADDR_ANY(v6src);
   5533 
   5534 	for (;;) {
   5535 		udp_t		*udp1;
   5536 		boolean_t	found_exclbind = B_FALSE;
   5537 		conn_t		*connp1;
   5538 
   5539 		/*
   5540 		 * Walk through the list of udp streams bound to
   5541 		 * requested port with the same IP address.
   5542 		 */
   5543 		lport = htons(port);
   5544 		udpf = &us->us_bind_fanout[UDP_BIND_HASH(lport,
   5545 		    us->us_bind_fanout_size)];
   5546 		mutex_enter(&udpf->uf_lock);
   5547 		for (udp1 = udpf->uf_udp; udp1 != NULL;
   5548 		    udp1 = udp1->udp_bind_hash) {
   5549 			connp1 = udp1->udp_connp;
   5550 
   5551 			if (lport != connp1->conn_lport)
   5552 				continue;
   5553 
   5554 			/*
   5555 			 * On a labeled system, we must treat bindings to ports
   5556 			 * on shared IP addresses by sockets with MAC exemption
   5557 			 * privilege as being in all zones, as there's
   5558 			 * otherwise no way to identify the right receiver.
   5559 			 */
   5560 			if (!IPCL_BIND_ZONE_MATCH(connp1, connp))
   5561 				continue;
   5562 
   5563 			/*
   5564 			 * If UDP_EXCLBIND is set for either the bound or
   5565 			 * binding endpoint, the semantics of bind
   5566 			 * is changed according to the following chart.
   5567 			 *
   5568 			 * spec = specified address (v4 or v6)
   5569 			 * unspec = unspecified address (v4 or v6)
   5570 			 * A = specified addresses are different for endpoints
   5571 			 *
   5572 			 * bound	bind to		allowed?
   5573 			 * -------------------------------------
   5574 			 * unspec	unspec		no
   5575 			 * unspec	spec		no
   5576 			 * spec		unspec		no
   5577 			 * spec		spec		yes if A
   5578 			 *
   5579 			 * For labeled systems, SO_MAC_EXEMPT behaves the same
   5580 			 * as UDP_EXCLBIND, except that zoneid is ignored.
   5581 			 */
   5582 			if (connp1->conn_exclbind || connp->conn_exclbind ||
   5583 			    IPCL_CONNS_MAC(udp1->udp_connp, connp)) {
   5584 				if (V6_OR_V4_INADDR_ANY(
   5585 				    connp1->conn_bound_addr_v6) ||
   5586 				    is_inaddr_any ||
   5587 				    IN6_ARE_ADDR_EQUAL(
   5588 				    &connp1->conn_bound_addr_v6,
   5589 				    &v6src)) {
   5590 					found_exclbind = B_TRUE;
   5591 					break;
   5592 				}
   5593 				continue;
   5594 			}
   5595 
   5596 			/*
   5597 			 * Check ipversion to allow IPv4 and IPv6 sockets to
   5598 			 * have disjoint port number spaces.
   5599 			 */
   5600 			if (connp->conn_ipversion != connp1->conn_ipversion) {
   5601 
   5602 				/*
   5603 				 * On the first time through the loop, if the
   5604 				 * the user intentionally specified a
   5605 				 * particular port number, then ignore any
   5606 				 * bindings of the other protocol that may
   5607 				 * conflict. This allows the user to bind IPv6
   5608 				 * alone and get both v4 and v6, or bind both
   5609 				 * both and get each seperately. On subsequent
   5610 				 * times through the loop, we're checking a
   5611 				 * port that we chose (not the user) and thus
   5612 				 * we do not allow casual duplicate bindings.
   5613 				 */
   5614 				if (count == 0 && requested_port != 0)
   5615 					continue;
   5616 			}
   5617 
   5618 			/*
   5619 			 * No difference depending on SO_REUSEADDR.
   5620 			 *
   5621 			 * If existing port is bound to a
   5622 			 * non-wildcard IP address and
   5623 			 * the requesting stream is bound to
   5624 			 * a distinct different IP addresses
   5625 			 * (non-wildcard, also), keep going.
   5626 			 */
   5627 			if (!is_inaddr_any &&
   5628 			    !V6_OR_V4_INADDR_ANY(connp1->conn_bound_addr_v6) &&
   5629 			    !IN6_ARE_ADDR_EQUAL(&connp1->conn_laddr_v6,
   5630 			    &v6src)) {
   5631 				continue;
   5632 			}
   5633 			break;
   5634 		}
   5635 
   5636 		if (!found_exclbind &&
   5637 		    (connp->conn_reuseaddr && requested_port != 0)) {
   5638 			break;
   5639 		}
   5640 
   5641 		if (udp1 == NULL) {
   5642 			/*
   5643 			 * No other stream has this IP address
   5644 			 * and port number. We can use it.
   5645 			 */
   5646 			break;
   5647 		}
   5648 		mutex_exit(&udpf->uf_lock);
   5649 		if (bind_to_req_port_only) {
   5650 			/*
   5651 			 * We get here only when requested port
   5652 			 * is bound (and only first  of the for()
   5653 			 * loop iteration).
   5654 			 *
   5655 			 * The semantics of this bind request
   5656 			 * require it to fail so we return from
   5657 			 * the routine (and exit the loop).
   5658 			 *
   5659 			 */
   5660 			mutex_exit(&connp->conn_lock);
   5661 			return (-TADDRBUSY);
   5662 		}
   5663 
   5664 		if (connp->conn_anon_priv_bind) {
   5665 			port = udp_get_next_priv_port(udp);
   5666 		} else {
   5667 			if ((count == 0) && (requested_port != 0)) {
   5668 				/*
   5669 				 * If the application wants us to find
   5670 				 * a port, get one to start with. Set
   5671 				 * requested_port to 0, so that we will
   5672 				 * update us->us_next_port_to_try below.
   5673 				 */
   5674 				port = udp_update_next_port(udp,
   5675 				    us->us_next_port_to_try, B_TRUE);
   5676 				requested_port = 0;
   5677 			} else {
   5678 				port = udp_update_next_port(udp, port + 1,
   5679 				    B_FALSE);
   5680 			}
   5681 		}
   5682 
   5683 		if (port == 0 || ++count >= loopmax) {
   5684 			/*
   5685 			 * We've tried every possible port number and
   5686 			 * there are none available, so send an error
   5687 			 * to the user.
   5688 			 */
   5689 			mutex_exit(&connp->conn_lock);
   5690 			return (-TNOADDR);
   5691 		}
   5692 	}
   5693 
   5694 	/*
   5695 	 * Copy the source address into our udp structure.  This address
   5696 	 * may still be zero; if so, ip_attr_connect will fill in the correct
   5697 	 * address when a packet is about to be sent.
   5698 	 * If we are binding to a broadcast or multicast address then
   5699 	 * we just set the conn_bound_addr since we don't want to use
   5700 	 * that as the source address when sending.
   5701 	 */
   5702 	connp->conn_bound_addr_v6 = v6src;
   5703 	connp->conn_laddr_v6 = v6src;
   5704 	if (scopeid != 0) {
   5705 		connp->conn_ixa->ixa_flags |= IXAF_SCOPEID_SET;
   5706 		connp->conn_ixa->ixa_scopeid = scopeid;
   5707 		connp->conn_incoming_ifindex = scopeid;
   5708 	} else {
   5709 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   5710 		connp->conn_incoming_ifindex = connp->conn_bound_if;
   5711 	}
   5712 
   5713 	switch (laddr_type) {
   5714 	case IPVL_UNICAST_UP:
   5715 	case IPVL_UNICAST_DOWN:
   5716 		connp->conn_saddr_v6 = v6src;
   5717 		connp->conn_mcbc_bind = B_FALSE;
   5718 		break;
   5719 	case IPVL_MCAST:
   5720 	case IPVL_BCAST:
   5721 		/* ip_set_destination will pick a source address later */
   5722 		connp->conn_saddr_v6 = ipv6_all_zeros;
   5723 		connp->conn_mcbc_bind = B_TRUE;
   5724 		break;
   5725 	}
   5726 
   5727 	/* Any errors after this point should use late_error */
   5728 	connp->conn_lport = lport;
   5729 
   5730 	/*
   5731 	 * Now reset the next anonymous port if the application requested
   5732 	 * an anonymous port, or we handed out the next anonymous port.
   5733 	 */
   5734 	if ((requested_port == 0) && (!connp->conn_anon_priv_bind)) {
   5735 		us->us_next_port_to_try = port + 1;
   5736 	}
   5737 
   5738 	/* Initialize the T_BIND_ACK. */
   5739 	if (connp->conn_family == AF_INET) {
   5740 		sin->sin_port = connp->conn_lport;
   5741 	} else {
   5742 		sin6->sin6_port = connp->conn_lport;
   5743 	}
   5744 	udp->udp_state = TS_IDLE;
   5745 	udp_bind_hash_insert(udpf, udp);
   5746 	mutex_exit(&udpf->uf_lock);
   5747 	mutex_exit(&connp->conn_lock);
   5748 
   5749 	if (cl_inet_bind) {
   5750 		/*
   5751 		 * Running in cluster mode - register bind information
   5752 		 */
   5753 		if (connp->conn_ipversion == IPV4_VERSION) {
   5754 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
   5755 			    IPPROTO_UDP, AF_INET, (uint8_t *)&v4src,
   5756 			    (in_port_t)connp->conn_lport, NULL);
   5757 		} else {
   5758 			(*cl_inet_bind)(connp->conn_netstack->netstack_stackid,
   5759 			    IPPROTO_UDP, AF_INET6, (uint8_t *)&v6src,
   5760 			    (in_port_t)connp->conn_lport, NULL);
   5761 		}
   5762 	}
   5763 
   5764 	mutex_enter(&connp->conn_lock);
   5765 	connp->conn_anon_port = (is_system_labeled() && requested_port == 0);
   5766 	if (is_system_labeled() && (!connp->conn_anon_port ||
   5767 	    connp->conn_anon_mlp)) {
   5768 		uint16_t mlpport;
   5769 		zone_t *zone;
   5770 
   5771 		zone = crgetzone(cr);
   5772 		connp->conn_mlp_type =
   5773 		    connp->conn_recv_ancillary.crb_recvucred ? mlptBoth :
   5774 		    mlptSingle;
   5775 		addrtype = tsol_mlp_addr_type(
   5776 		    connp->conn_allzones ? ALL_ZONES : zone->zone_id,
   5777 		    IPV6_VERSION, &v6src, us->us_netstack->netstack_ip);
   5778 		if (addrtype == mlptSingle) {
   5779 			error = -TNOADDR;
   5780 			mutex_exit(&connp->conn_lock);
   5781 			goto late_error;
   5782 		}
   5783 		mlpport = connp->conn_anon_port ? PMAPPORT : port;
   5784 		mlptype = tsol_mlp_port_type(zone, IPPROTO_UDP, mlpport,
   5785 		    addrtype);
   5786 
   5787 		/*
   5788 		 * It is a coding error to attempt to bind an MLP port
   5789 		 * without first setting SOL_SOCKET/SCM_UCRED.
   5790 		 */
   5791 		if (mlptype != mlptSingle &&
   5792 		    connp->conn_mlp_type == mlptSingle) {
   5793 			error = EINVAL;
   5794 			mutex_exit(&connp->conn_lock);
   5795 			goto late_error;
   5796 		}
   5797 
   5798 		/*
   5799 		 * It is an access violation to attempt to bind an MLP port
   5800 		 * without NET_BINDMLP privilege.
   5801 		 */
   5802 		if (mlptype != mlptSingle &&
   5803 		    secpolicy_net_bindmlp(cr) != 0) {
   5804 			if (connp->conn_debug) {
   5805 				(void) strlog(UDP_MOD_ID, 0, 1,
   5806 				    SL_ERROR|SL_TRACE,
   5807 				    "udp_bind: no priv for multilevel port %d",
   5808 				    mlpport);
   5809 			}
   5810 			error = -TACCES;
   5811 			mutex_exit(&connp->conn_lock);
   5812 			goto late_error;
   5813 		}
   5814 
   5815 		/*
   5816 		 * If we're specifically binding a shared IP address and the
   5817 		 * port is MLP on shared addresses, then check to see if this
   5818 		 * zone actually owns the MLP.  Reject if not.
   5819 		 */
   5820 		if (mlptype == mlptShared && addrtype == mlptShared) {
   5821 			/*
   5822 			 * No need to handle exclusive-stack zones since
   5823 			 * ALL_ZONES only applies to the shared stack.
   5824 			 */
   5825 			zoneid_t mlpzone;
   5826 
   5827 			mlpzone = tsol_mlp_findzone(IPPROTO_UDP,
   5828 			    htons(mlpport));
   5829 			if (connp->conn_zoneid != mlpzone) {
   5830 				if (connp->conn_debug) {
   5831 					(void) strlog(UDP_MOD_ID, 0, 1,
   5832 					    SL_ERROR|SL_TRACE,
   5833 					    "udp_bind: attempt to bind port "
   5834 					    "%d on shared addr in zone %d "
   5835 					    "(should be %d)",
   5836 					    mlpport, connp->conn_zoneid,
   5837 					    mlpzone);
   5838 				}
   5839 				error = -TACCES;
   5840 				mutex_exit(&connp->conn_lock);
   5841 				goto late_error;
   5842 			}
   5843 		}
   5844 		if (connp->conn_anon_port) {
   5845 			error = tsol_mlp_anon(zone, mlptype, connp->conn_proto,
   5846 			    port, B_TRUE);
   5847 			if (error != 0) {
   5848 				if (connp->conn_debug) {
   5849 					(void) strlog(UDP_MOD_ID, 0, 1,
   5850 					    SL_ERROR|SL_TRACE,
   5851 					    "udp_bind: cannot establish anon "
   5852 					    "MLP for port %d", port);
   5853 				}
   5854 				error = -TACCES;
   5855 				mutex_exit(&connp->conn_lock);
   5856 				goto late_error;
   5857 			}
   5858 		}
   5859 		connp->conn_mlp_type = mlptype;
   5860 	}
   5861 
   5862 	/*
   5863 	 * We create an initial header template here to make a subsequent
   5864 	 * sendto have a starting point. Since conn_last_dst is zero the
   5865 	 * first sendto will always follow the 'dst changed' code path.
   5866 	 * Note that we defer massaging options and the related checksum
   5867 	 * adjustment until we have a destination address.
   5868 	 */
   5869 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
   5870 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
   5871 	if (error != 0) {
   5872 		mutex_exit(&connp->conn_lock);
   5873 		goto late_error;
   5874 	}
   5875 	/* Just in case */
   5876 	connp->conn_faddr_v6 = ipv6_all_zeros;
   5877 	connp->conn_fport = 0;
   5878 	connp->conn_v6lastdst = ipv6_all_zeros;
   5879 	mutex_exit(&connp->conn_lock);
   5880 
   5881 	error = ip_laddr_fanout_insert(connp);
   5882 	if (error != 0)
   5883 		goto late_error;
   5884 
   5885 	/* Bind succeeded */
   5886 	return (0);
   5887 
   5888 late_error:
   5889 	/* We had already picked the port number, and then the bind failed */
   5890 	mutex_enter(&connp->conn_lock);
   5891 	udpf = &us->us_bind_fanout[
   5892 	    UDP_BIND_HASH(connp->conn_lport,
   5893 	    us->us_bind_fanout_size)];
   5894 	mutex_enter(&udpf->uf_lock);
   5895 	connp->conn_saddr_v6 = ipv6_all_zeros;
   5896 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
   5897 	connp->conn_laddr_v6 = ipv6_all_zeros;
   5898 	if (scopeid != 0) {
   5899 		connp->conn_ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   5900 		connp->conn_incoming_ifindex = connp->conn_bound_if;
   5901 	}
   5902 	udp->udp_state = TS_UNBND;
   5903 	udp_bind_hash_remove(udp, B_TRUE);
   5904 	connp->conn_lport = 0;
   5905 	mutex_exit(&udpf->uf_lock);
   5906 	connp->conn_anon_port = B_FALSE;
   5907 	connp->conn_mlp_type = mlptSingle;
   5908 
   5909 	connp->conn_v6lastdst = ipv6_all_zeros;
   5910 
   5911 	/* Restore the header that was built above - different source address */
   5912 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
   5913 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
   5914 	mutex_exit(&connp->conn_lock);
   5915 	return (error);
   5916 }
   5917 
   5918 int
   5919 udp_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
   5920     socklen_t len, cred_t *cr)
   5921 {
   5922 	int		error;
   5923 	conn_t		*connp;
   5924 
   5925 	/* All Solaris components should pass a cred for this operation. */
   5926 	ASSERT(cr != NULL);
   5927 
   5928 	connp = (conn_t *)proto_handle;
   5929 
   5930 	if (sa == NULL)
   5931 		error = udp_do_unbind(connp);
   5932 	else
   5933 		error = udp_do_bind(connp, sa, len, cr, B_TRUE);
   5934 
   5935 	if (error < 0) {
   5936 		if (error == -TOUTSTATE)
   5937 			error = EINVAL;
   5938 		else
   5939 			error = proto_tlitosyserr(-error);
   5940 	}
   5941 
   5942 	return (error);
   5943 }
   5944 
   5945 static int
   5946 udp_implicit_bind(conn_t *connp, cred_t *cr)
   5947 {
   5948 	sin6_t sin6addr;
   5949 	sin_t *sin;
   5950 	sin6_t *sin6;
   5951 	socklen_t len;
   5952 	int error;
   5953 
   5954 	/* All Solaris components should pass a cred for this operation. */
   5955 	ASSERT(cr != NULL);
   5956 
   5957 	if (connp->conn_family == AF_INET) {
   5958 		len = sizeof (struct sockaddr_in);
   5959 		sin = (sin_t *)&sin6addr;
   5960 		*sin = sin_null;
   5961 		sin->sin_family = AF_INET;
   5962 		sin->sin_addr.s_addr = INADDR_ANY;
   5963 	} else {
   5964 		ASSERT(connp->conn_family == AF_INET6);
   5965 		len = sizeof (sin6_t);
   5966 		sin6 = (sin6_t *)&sin6addr;
   5967 		*sin6 = sin6_null;
   5968 		sin6->sin6_family = AF_INET6;
   5969 		V6_SET_ZERO(sin6->sin6_addr);
   5970 	}
   5971 
   5972 	error = udp_do_bind(connp, (struct sockaddr *)&sin6addr, len,
   5973 	    cr, B_FALSE);
   5974 	return ((error < 0) ? proto_tlitosyserr(-error) : error);
   5975 }
   5976 
   5977 /*
   5978  * This routine removes a port number association from a stream. It
   5979  * is called by udp_unbind and udp_tpi_unbind.
   5980  */
   5981 static int
   5982 udp_do_unbind(conn_t *connp)
   5983 {
   5984 	udp_t 		*udp = connp->conn_udp;
   5985 	udp_fanout_t	*udpf;
   5986 	udp_stack_t	*us = udp->udp_us;
   5987 
   5988 	if (cl_inet_unbind != NULL) {
   5989 		/*
   5990 		 * Running in cluster mode - register unbind information
   5991 		 */
   5992 		if (connp->conn_ipversion == IPV4_VERSION) {
   5993 			(*cl_inet_unbind)(
   5994 			    connp->conn_netstack->netstack_stackid,
   5995 			    IPPROTO_UDP, AF_INET,
   5996 			    (uint8_t *)(&V4_PART_OF_V6(connp->conn_laddr_v6)),
   5997 			    (in_port_t)connp->conn_lport, NULL);
   5998 		} else {
   5999 			(*cl_inet_unbind)(
   6000 			    connp->conn_netstack->netstack_stackid,
   6001 			    IPPROTO_UDP, AF_INET6,
   6002 			    (uint8_t *)&(connp->conn_laddr_v6),
   6003 			    (in_port_t)connp->conn_lport, NULL);
   6004 		}
   6005 	}
   6006 
   6007 	mutex_enter(&connp->conn_lock);
   6008 	/* If a bind has not been done, we can't unbind. */
   6009 	if (udp->udp_state == TS_UNBND) {
   6010 		mutex_exit(&connp->conn_lock);
   6011 		return (-TOUTSTATE);
   6012 	}
   6013 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
   6014 	    us->us_bind_fanout_size)];
   6015 	mutex_enter(&udpf->uf_lock);
   6016 	udp_bind_hash_remove(udp, B_TRUE);
   6017 	connp->conn_saddr_v6 = ipv6_all_zeros;
   6018 	connp->conn_bound_addr_v6 = ipv6_all_zeros;
   6019 	connp->conn_laddr_v6 = ipv6_all_zeros;
   6020 	connp->conn_mcbc_bind = B_FALSE;
   6021 	connp->conn_lport = 0;
   6022 	/* In case we were also connected */
   6023 	connp->conn_faddr_v6 = ipv6_all_zeros;
   6024 	connp->conn_fport = 0;
   6025 	mutex_exit(&udpf->uf_lock);
   6026 
   6027 	connp->conn_v6lastdst = ipv6_all_zeros;
   6028 	udp->udp_state = TS_UNBND;
   6029 
   6030 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
   6031 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
   6032 	mutex_exit(&connp->conn_lock);
   6033 
   6034 	ip_unbind(connp);
   6035 
   6036 	return (0);
   6037 }
   6038 
   6039 /*
   6040  * It associates a default destination address with the stream.
   6041  */
   6042 static int
   6043 udp_do_connect(conn_t *connp, const struct sockaddr *sa, socklen_t len,
   6044     cred_t *cr, pid_t pid)
   6045 {
   6046 	sin6_t		*sin6;
   6047 	sin_t		*sin;
   6048 	in6_addr_t 	v6dst;
   6049 	ipaddr_t 	v4dst;
   6050 	uint16_t 	dstport;
   6051 	uint32_t 	flowinfo;
   6052 	udp_fanout_t	*udpf;
   6053 	udp_t		*udp, *udp1;
   6054 	ushort_t	ipversion;
   6055 	udp_stack_t	*us;
   6056 	int		error;
   6057 	conn_t		*connp1;
   6058 	ip_xmit_attr_t	*ixa;
   6059 	uint_t		scopeid = 0;
   6060 	uint_t		srcid = 0;
   6061 	in6_addr_t	v6src = connp->conn_saddr_v6;
   6062 
   6063 	udp = connp->conn_udp;
   6064 	us = udp->udp_us;
   6065 
   6066 	/*
   6067 	 * Address has been verified by the caller
   6068 	 */
   6069 	switch (len) {
   6070 	default:
   6071 		/*
   6072 		 * Should never happen
   6073 		 */
   6074 		return (EINVAL);
   6075 
   6076 	case sizeof (sin_t):
   6077 		sin = (sin_t *)sa;
   6078 		v4dst = sin->sin_addr.s_addr;
   6079 		dstport = sin->sin_port;
   6080 		IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
   6081 		ASSERT(connp->conn_ipversion == IPV4_VERSION);
   6082 		ipversion = IPV4_VERSION;
   6083 		break;
   6084 
   6085 	case sizeof (sin6_t):
   6086 		sin6 = (sin6_t *)sa;
   6087 		v6dst = sin6->sin6_addr;
   6088 		dstport = sin6->sin6_port;
   6089 		srcid = sin6->__sin6_src_id;
   6090 		if (srcid != 0 && IN6_IS_ADDR_UNSPECIFIED(&v6src)) {
   6091 			ip_srcid_find_id(srcid, &v6src, IPCL_ZONEID(connp),
   6092 			    connp->conn_netstack);
   6093 		}
   6094 		if (IN6_IS_ADDR_V4MAPPED(&v6dst)) {
   6095 			if (connp->conn_ipv6_v6only)
   6096 				return (EADDRNOTAVAIL);
   6097 
   6098 			/*
   6099 			 * Destination adress is mapped IPv6 address.
   6100 			 * Source bound address should be unspecified or
   6101 			 * IPv6 mapped address as well.
   6102 			 */
   6103 			if (!IN6_IS_ADDR_UNSPECIFIED(
   6104 			    &connp->conn_bound_addr_v6) &&
   6105 			    !IN6_IS_ADDR_V4MAPPED(&connp->conn_bound_addr_v6)) {
   6106 				return (EADDRNOTAVAIL);
   6107 			}
   6108 			IN6_V4MAPPED_TO_IPADDR(&v6dst, v4dst);
   6109 			ipversion = IPV4_VERSION;
   6110 			flowinfo = 0;
   6111 		} else {
   6112 			ipversion = IPV6_VERSION;
   6113 			flowinfo = sin6->sin6_flowinfo;
   6114 			if (IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr))
   6115 				scopeid = sin6->sin6_scope_id;
   6116 		}
   6117 		break;
   6118 	}
   6119 
   6120 	if (dstport == 0)
   6121 		return (-TBADADDR);
   6122 
   6123 	/*
   6124 	 * If there is a different thread using conn_ixa then we get a new
   6125 	 * copy and cut the old one loose from conn_ixa. Otherwise we use
   6126 	 * conn_ixa and prevent any other thread from using/changing it.
   6127 	 * Once connect() is done other threads can use conn_ixa since the
   6128 	 * refcnt will be back at one.
   6129 	 */
   6130 	ixa = conn_get_ixa(connp, B_TRUE);
   6131 	if (ixa == NULL)
   6132 		return (ENOMEM);
   6133 
   6134 	ASSERT(ixa->ixa_refcnt >= 2);
   6135 	ASSERT(ixa == connp->conn_ixa);
   6136 
   6137 	mutex_enter(&connp->conn_lock);
   6138 	/*
   6139 	 * This udp_t must have bound to a port already before doing a connect.
   6140 	 * Reject if a connect is in progress (we drop conn_lock during
   6141 	 * udp_do_connect).
   6142 	 */
   6143 	if (udp->udp_state == TS_UNBND || udp->udp_state == TS_WCON_CREQ) {
   6144 		mutex_exit(&connp->conn_lock);
   6145 		(void) strlog(UDP_MOD_ID, 0, 1, SL_ERROR|SL_TRACE,
   6146 		    "udp_connect: bad state, %u", udp->udp_state);
   6147 		ixa_refrele(ixa);
   6148 		return (-TOUTSTATE);
   6149 	}
   6150 	ASSERT(connp->conn_lport != 0 && udp->udp_ptpbhn != NULL);
   6151 
   6152 	udpf = &us->us_bind_fanout[UDP_BIND_HASH(connp->conn_lport,
   6153 	    us->us_bind_fanout_size)];
   6154 
   6155 	mutex_enter(&udpf->uf_lock);
   6156 	if (udp->udp_state == TS_DATA_XFER) {
   6157 		/* Already connected - clear out state */
   6158 		if (connp->conn_mcbc_bind)
   6159 			connp->conn_saddr_v6 = ipv6_all_zeros;
   6160 		else
   6161 			connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
   6162 		connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
   6163 		connp->conn_faddr_v6 = ipv6_all_zeros;
   6164 		connp->conn_fport = 0;
   6165 		udp->udp_state = TS_IDLE;
   6166 	}
   6167 
   6168 	connp->conn_fport = dstport;
   6169 	connp->conn_ipversion = ipversion;
   6170 	if (ipversion == IPV4_VERSION) {
   6171 		/*
   6172 		 * Interpret a zero destination to mean loopback.
   6173 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
   6174 		 * generate the T_CONN_CON.
   6175 		 */
   6176 		if (v4dst == INADDR_ANY) {
   6177 			v4dst = htonl(INADDR_LOOPBACK);
   6178 			IN6_IPADDR_TO_V4MAPPED(v4dst, &v6dst);
   6179 			if (connp->conn_family == AF_INET) {
   6180 				sin->sin_addr.s_addr = v4dst;
   6181 			} else {
   6182 				sin6->sin6_addr = v6dst;
   6183 			}
   6184 		}
   6185 		connp->conn_faddr_v6 = v6dst;
   6186 		connp->conn_flowinfo = 0;
   6187 	} else {
   6188 		ASSERT(connp->conn_ipversion == IPV6_VERSION);
   6189 		/*
   6190 		 * Interpret a zero destination to mean loopback.
   6191 		 * Update the T_CONN_REQ (sin/sin6) since it is used to
   6192 		 * generate the T_CONN_CON.
   6193 		 */
   6194 		if (IN6_IS_ADDR_UNSPECIFIED(&v6dst)) {
   6195 			v6dst = ipv6_loopback;
   6196 			sin6->sin6_addr = v6dst;
   6197 		}
   6198 		connp->conn_faddr_v6 = v6dst;
   6199 		connp->conn_flowinfo = flowinfo;
   6200 	}
   6201 	mutex_exit(&udpf->uf_lock);
   6202 
   6203 	ixa->ixa_cred = cr;
   6204 	ixa->ixa_cpid = pid;
   6205 	if (is_system_labeled()) {
   6206 		/* We need to restart with a label based on the cred */
   6207 		ip_xmit_attr_restore_tsl(ixa, ixa->ixa_cred);
   6208 	}
   6209 
   6210 	if (scopeid != 0) {
   6211 		ixa->ixa_flags |= IXAF_SCOPEID_SET;
   6212 		ixa->ixa_scopeid = scopeid;
   6213 		connp->conn_incoming_ifindex = scopeid;
   6214 	} else {
   6215 		ixa->ixa_flags &= ~IXAF_SCOPEID_SET;
   6216 		connp->conn_incoming_ifindex = connp->conn_bound_if;
   6217 	}
   6218 	/*
   6219 	 * conn_connect will drop conn_lock and reacquire it.
   6220 	 * To prevent a send* from messing with this udp_t while the lock
   6221 	 * is dropped we set udp_state and clear conn_v6lastdst.
   6222 	 * That will make all send* fail with EISCONN.
   6223 	 */
   6224 	connp->conn_v6lastdst = ipv6_all_zeros;
   6225 	udp->udp_state = TS_WCON_CREQ;
   6226 
   6227 	error = conn_connect(connp, NULL, IPDF_ALLOW_MCBC);
   6228 	mutex_exit(&connp->conn_lock);
   6229 	if (error != 0)
   6230 		goto connect_failed;
   6231 
   6232 	/*
   6233 	 * The addresses have been verified. Time to insert in
   6234 	 * the correct fanout list.
   6235 	 */
   6236 	error = ipcl_conn_insert(connp);
   6237 	if (error != 0)
   6238 		goto connect_failed;
   6239 
   6240 	mutex_enter(&connp->conn_lock);
   6241 	error = udp_build_hdr_template(connp, &connp->conn_saddr_v6,
   6242 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
   6243 	if (error != 0) {
   6244 		mutex_exit(&connp->conn_lock);
   6245 		goto connect_failed;
   6246 	}
   6247 
   6248 	udp->udp_state = TS_DATA_XFER;
   6249 	/* Record this as the "last" send even though we haven't sent any */
   6250 	connp->conn_v6lastdst = connp->conn_faddr_v6;
   6251 	connp->conn_lastipversion = connp->conn_ipversion;
   6252 	connp->conn_lastdstport = connp->conn_fport;
   6253 	connp->conn_lastflowinfo = connp->conn_flowinfo;
   6254 	connp->conn_lastscopeid = scopeid;
   6255 	connp->conn_lastsrcid = srcid;
   6256 	/* Also remember a source to use together with lastdst */
   6257 	connp->conn_v6lastsrc = v6src;
   6258 	mutex_exit(&connp->conn_lock);
   6259 
   6260 	/*
   6261 	 * We've picked a source address above. Now we can
   6262 	 * verify that the src/port/dst/port is unique for all
   6263 	 * connections in TS_DATA_XFER, skipping ourselves.
   6264 	 */
   6265 	mutex_enter(&udpf->uf_lock);
   6266 	for (udp1 = udpf->uf_udp; udp1 != NULL; udp1 = udp1->udp_bind_hash) {
   6267 		if (udp1->udp_state != TS_DATA_XFER)
   6268 			continue;
   6269 
   6270 		if (udp1 == udp)
   6271 			continue;
   6272 
   6273 		connp1 = udp1->udp_connp;
   6274 		if (connp->conn_lport != connp1->conn_lport ||
   6275 		    connp->conn_ipversion != connp1->conn_ipversion ||
   6276 		    dstport != connp1->conn_fport ||
   6277 		    !IN6_ARE_ADDR_EQUAL(&connp->conn_laddr_v6,
   6278 		    &connp1->conn_laddr_v6) ||
   6279 		    !IN6_ARE_ADDR_EQUAL(&v6dst, &connp1->conn_faddr_v6) ||
   6280 		    !(IPCL_ZONE_MATCH(connp, connp1->conn_zoneid) ||
   6281 		    IPCL_ZONE_MATCH(connp1, connp->conn_zoneid)))
   6282 			continue;
   6283 		mutex_exit(&udpf->uf_lock);
   6284 		error = -TBADADDR;
   6285 		goto connect_failed;
   6286 	}
   6287 	if (cl_inet_connect2 != NULL) {
   6288 		CL_INET_UDP_CONNECT(connp, B_TRUE, &v6dst, dstport, error);
   6289 		if (error != 0) {
   6290 			mutex_exit(&udpf->uf_lock);
   6291 			error = -TBADADDR;
   6292 			goto connect_failed;
   6293 		}
   6294 	}
   6295 	mutex_exit(&udpf->uf_lock);
   6296 
   6297 	ixa_refrele(ixa);
   6298 	return (0);
   6299 
   6300 connect_failed:
   6301 	if (ixa != NULL)
   6302 		ixa_refrele(ixa);
   6303 	mutex_enter(&connp->conn_lock);
   6304 	mutex_enter(&udpf->uf_lock);
   6305 	udp->udp_state = TS_IDLE;
   6306 	connp->conn_faddr_v6 = ipv6_all_zeros;
   6307 	connp->conn_fport = 0;
   6308 	/* In case the source address was set above */
   6309 	if (connp->conn_mcbc_bind)
   6310 		connp->conn_saddr_v6 = ipv6_all_zeros;
   6311 	else
   6312 		connp->conn_saddr_v6 = connp->conn_bound_addr_v6;
   6313 	connp->conn_laddr_v6 = connp->conn_bound_addr_v6;
   6314 	mutex_exit(&udpf->uf_lock);
   6315 
   6316 	connp->conn_v6lastdst = ipv6_all_zeros;
   6317 	connp->conn_flowinfo = 0;
   6318 
   6319 	(void) udp_build_hdr_template(connp, &connp->conn_saddr_v6,
   6320 	    &connp->conn_faddr_v6, connp->conn_fport, connp->conn_flowinfo);
   6321 	mutex_exit(&connp->conn_lock);
   6322 	return (error);
   6323 }
   6324 
   6325 static int
   6326 udp_connect(sock_lower_handle_t proto_handle, const struct sockaddr *sa,
   6327     socklen_t len, sock_connid_t *id, cred_t *cr)
   6328 {
   6329 	conn_t	*connp = (conn_t *)proto_handle;
   6330 	udp_t	*udp = connp->conn_udp;
   6331 	int	error;
   6332 	boolean_t did_bind = B_FALSE;
   6333 	pid_t	pid = curproc->p_pid;
   6334 
   6335 	/* All Solaris components should pass a cred for this operation. */
   6336 	ASSERT(cr != NULL);
   6337 
   6338 	if (sa == NULL) {
   6339 		/*
   6340 		 * Disconnect
   6341 		 * Make sure we are connected
   6342 		 */
   6343 		if (udp->udp_state != TS_DATA_XFER)
   6344 			return (EINVAL);
   6345 
   6346 		error = udp_disconnect(connp);
   6347 		return (error);
   6348 	}
   6349 
   6350 	error = proto_verify_ip_addr(connp->conn_family, sa, len);
   6351 	if (error != 0)
   6352 		goto done;
   6353 
   6354 	/* do an implicit bind if necessary */
   6355 	if (udp->udp_state == TS_UNBND) {
   6356 		error = udp_implicit_bind(connp, cr);
   6357 		/*
   6358 		 * We could be racing with an actual bind, in which case
   6359 		 * we would see EPROTO. We cross our fingers and try
   6360 		 * to connect.
   6361 		 */
   6362 		if (!(error == 0 || error == EPROTO))
   6363 			goto done;
   6364 		did_bind = B_TRUE;
   6365 	}
   6366 	/*
   6367 	 * set SO_DGRAM_ERRIND
   6368 	 */
   6369 	connp->conn_dgram_errind = B_TRUE;
   6370 
   6371 	error = udp_do_connect(connp, sa, len, cr, pid);
   6372 
   6373 	if (error != 0 && did_bind) {
   6374 		int unbind_err;
   6375 
   6376 		unbind_err = udp_do_unbind(connp);
   6377 		ASSERT(unbind_err == 0);
   6378 	}
   6379 
   6380 	if (error == 0) {
   6381 		*id = 0;
   6382 		(*connp->conn_upcalls->su_connected)
   6383 		    (connp->conn_upper_handle, 0, NULL, -1);
   6384 	} else if (error < 0) {
   6385 		error = proto_tlitosyserr(-error);
   6386 	}
   6387 
   6388 done:
   6389 	if (error != 0 && udp->udp_state == TS_DATA_XFER) {
   6390 		/*
   6391 		 * No need to hold locks to set state
   6392 		 * after connect failure socket state is undefined
   6393 		 * We set the state only to imitate old sockfs behavior
   6394 		 */
   6395 		udp->udp_state = TS_IDLE;
   6396 	}
   6397 	return (error);
   6398 }
   6399 
   6400 int
   6401 udp_send(sock_lower_handle_t proto_handle, mblk_t *mp, struct nmsghdr *msg,
   6402     cred_t *cr)
   6403 {
   6404 	sin6_t		*sin6;
   6405 	sin_t		*sin = NULL;
   6406 	uint_t		srcid;
   6407 	conn_t		*connp = (conn_t *)proto_handle;
   6408 	udp_t		*udp = connp->conn_udp;
   6409 	int		error = 0;
   6410 	udp_stack_t	*us = udp->udp_us;
   6411 	ushort_t	ipversion;
   6412 	pid_t		pid = curproc->p_pid;
   6413 	ip_xmit_attr_t	*ixa;
   6414 
   6415 	ASSERT(DB_TYPE(mp) == M_DATA);
   6416 
   6417 	/* All Solaris components should pass a cred for this operation. */
   6418 	ASSERT(cr != NULL);
   6419 
   6420 	/* do an implicit bind if necessary */
   6421 	if (udp->udp_state == TS_UNBND) {
   6422 		error = udp_implicit_bind(connp, cr);
   6423 		/*
   6424 		 * We could be racing with an actual bind, in which case
   6425 		 * we would see EPROTO. We cross our fingers and try
   6426 		 * to connect.
   6427 		 */
   6428 		if (!(error == 0 || error == EPROTO)) {
   6429 			freemsg(mp);
   6430 			return (error);
   6431 		}
   6432 	}
   6433 
   6434 	/* Connected? */
   6435 	if (msg->msg_name == NULL) {
   6436 		if (udp->udp_state != TS_DATA_XFER) {
   6437 			BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6438 			return (EDESTADDRREQ);
   6439 		}
   6440 		if (msg->msg_controllen != 0) {
   6441 			error = udp_output_ancillary(connp, NULL, NULL, mp,
   6442 			    NULL, msg, cr, pid);
   6443 		} else {
   6444 			error = udp_output_connected(connp, mp, cr, pid);
   6445 		}
   6446 		if (us->us_sendto_ignerr)
   6447 			return (0);
   6448 		else
   6449 			return (error);
   6450 	}
   6451 	if (udp->udp_state == TS_DATA_XFER) {
   6452 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6453 		return (EISCONN);
   6454 	}
   6455 	error = proto_verify_ip_addr(connp->conn_family,
   6456 	    (struct sockaddr *)msg->msg_name, msg->msg_namelen);
   6457 	if (error != 0) {
   6458 		BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6459 		return (error);
   6460 	}
   6461 	switch (connp->conn_family) {
   6462 	case AF_INET6:
   6463 		sin6 = (sin6_t *)msg->msg_name;
   6464 
   6465 		srcid = sin6->__sin6_src_id;
   6466 
   6467 		if (!IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
   6468 			/*
   6469 			 * Destination is a non-IPv4-compatible IPv6 address.
   6470 			 * Send out an IPv6 format packet.
   6471 			 */
   6472 
   6473 			/*
   6474 			 * If the local address is a mapped address return
   6475 			 * an error.
   6476 			 * It would be possible to send an IPv6 packet but the
   6477 			 * response would never make it back to the application
   6478 			 * since it is bound to a mapped address.
   6479 			 */
   6480 			if (IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6)) {
   6481 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6482 				return (EADDRNOTAVAIL);
   6483 			}
   6484 			if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
   6485 				sin6->sin6_addr = ipv6_loopback;
   6486 			ipversion = IPV6_VERSION;
   6487 		} else {
   6488 			if (connp->conn_ipv6_v6only) {
   6489 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6490 				return (EADDRNOTAVAIL);
   6491 			}
   6492 
   6493 			/*
   6494 			 * If the local address is not zero or a mapped address
   6495 			 * return an error.  It would be possible to send an
   6496 			 * IPv4 packet but the response would never make it
   6497 			 * back to the application since it is bound to a
   6498 			 * non-mapped address.
   6499 			 */
   6500 			if (!IN6_IS_ADDR_V4MAPPED(&connp->conn_saddr_v6) &&
   6501 			    !IN6_IS_ADDR_UNSPECIFIED(&connp->conn_saddr_v6)) {
   6502 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6503 				return (EADDRNOTAVAIL);
   6504 			}
   6505 
   6506 			if (V4_PART_OF_V6(sin6->sin6_addr) == INADDR_ANY) {
   6507 				V4_PART_OF_V6(sin6->sin6_addr) =
   6508 				    htonl(INADDR_LOOPBACK);
   6509 			}
   6510 			ipversion = IPV4_VERSION;
   6511 		}
   6512 
   6513 		/*
   6514 		 * We have to allocate an ip_xmit_attr_t before we grab
   6515 		 * conn_lock and we need to hold conn_lock once we've check
   6516 		 * conn_same_as_last_v6 to handle concurrent send* calls on a
   6517 		 * socket.
   6518 		 */
   6519 		if (msg->msg_controllen == 0) {
   6520 			ixa = conn_get_ixa(connp, B_FALSE);
   6521 			if (ixa == NULL) {
   6522 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6523 				return (ENOMEM);
   6524 			}
   6525 		} else {
   6526 			ixa = NULL;
   6527 		}
   6528 		mutex_enter(&connp->conn_lock);
   6529 		if (udp->udp_delayed_error != 0) {
   6530 			sin6_t  *sin2 = (sin6_t *)&udp->udp_delayed_addr;
   6531 
   6532 			error = udp->udp_delayed_error;
   6533 			udp->udp_delayed_error = 0;
   6534 
   6535 			/* Compare IP address, port, and family */
   6536 
   6537 			if (sin6->sin6_port == sin2->sin6_port &&
   6538 			    IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
   6539 			    &sin2->sin6_addr) &&
   6540 			    sin6->sin6_family == sin2->sin6_family) {
   6541 				mutex_exit(&connp->conn_lock);
   6542 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6543 				if (ixa != NULL)
   6544 					ixa_refrele(ixa);
   6545 				return (error);
   6546 			}
   6547 		}
   6548 
   6549 		if (msg->msg_controllen != 0) {
   6550 			mutex_exit(&connp->conn_lock);
   6551 			ASSERT(ixa == NULL);
   6552 			error = udp_output_ancillary(connp, NULL, sin6, mp,
   6553 			    NULL, msg, cr, pid);
   6554 		} else if (conn_same_as_last_v6(connp, sin6) &&
   6555 		    connp->conn_lastsrcid == srcid &&
   6556 		    ipsec_outbound_policy_current(ixa)) {
   6557 			/* udp_output_lastdst drops conn_lock */
   6558 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
   6559 		} else {
   6560 			/* udp_output_newdst drops conn_lock */
   6561 			error = udp_output_newdst(connp, mp, NULL, sin6,
   6562 			    ipversion, cr, pid, ixa);
   6563 		}
   6564 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
   6565 		if (us->us_sendto_ignerr)
   6566 			return (0);
   6567 		else
   6568 			return (error);
   6569 	case AF_INET:
   6570 		sin = (sin_t *)msg->msg_name;
   6571 
   6572 		ipversion = IPV4_VERSION;
   6573 
   6574 		if (sin->sin_addr.s_addr == INADDR_ANY)
   6575 			sin->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
   6576 
   6577 		/*
   6578 		 * We have to allocate an ip_xmit_attr_t before we grab
   6579 		 * conn_lock and we need to hold conn_lock once we've check
   6580 		 * conn_same_as_last_v6 to handle concurrent send* on a socket.
   6581 		 */
   6582 		if (msg->msg_controllen == 0) {
   6583 			ixa = conn_get_ixa(connp, B_FALSE);
   6584 			if (ixa == NULL) {
   6585 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6586 				return (ENOMEM);
   6587 			}
   6588 		} else {
   6589 			ixa = NULL;
   6590 		}
   6591 		mutex_enter(&connp->conn_lock);
   6592 		if (udp->udp_delayed_error != 0) {
   6593 			sin_t  *sin2 = (sin_t *)&udp->udp_delayed_addr;
   6594 
   6595 			error = udp->udp_delayed_error;
   6596 			udp->udp_delayed_error = 0;
   6597 
   6598 			/* Compare IP address and port */
   6599 
   6600 			if (sin->sin_port == sin2->sin_port &&
   6601 			    sin->sin_addr.s_addr == sin2->sin_addr.s_addr) {
   6602 				mutex_exit(&connp->conn_lock);
   6603 				BUMP_MIB(&us->us_udp_mib, udpOutErrors);
   6604 				if (ixa != NULL)
   6605 					ixa_refrele(ixa);
   6606 				return (error);
   6607 			}
   6608 		}
   6609 		if (msg->msg_controllen != 0) {
   6610 			mutex_exit(&connp->conn_lock);
   6611 			ASSERT(ixa == NULL);
   6612 			error = udp_output_ancillary(connp, sin, NULL, mp,
   6613 			    NULL, msg, cr, pid);
   6614 		} else if (conn_same_as_last_v4(connp, sin) &&
   6615 		    ipsec_outbound_policy_current(ixa)) {
   6616 			/* udp_output_lastdst drops conn_lock */
   6617 			error = udp_output_lastdst(connp, mp, cr, pid, ixa);
   6618 		} else {
   6619 			/* udp_output_newdst drops conn_lock */
   6620 			error = udp_output_newdst(connp, mp, sin, NULL,
   6621 			    ipversion, cr, pid, ixa);
   6622 		}
   6623 		ASSERT(MUTEX_NOT_HELD(&connp->conn_lock));
   6624 		if (us->us_sendto_ignerr)
   6625 			return (0);
   6626 		else
   6627 			return (error);
   6628 	default:
   6629 		return (EINVAL);
   6630 	}
   6631 }
   6632 
   6633 int
   6634 udp_fallback(sock_lower_handle_t proto_handle, queue_t *q,
   6635     boolean_t issocket, so_proto_quiesced_cb_t quiesced_cb)
   6636 {
   6637 	conn_t 	*connp = (conn_t *)proto_handle;
   6638 	udp_t	*udp;
   6639 	struct T_capability_ack tca;
   6640 	struct sockaddr_in6 laddr, faddr;
   6641 	socklen_t laddrlen, faddrlen;
   6642 	short opts;
   6643 	struct stroptions *stropt;
   6644 	mblk_t *stropt_mp;
   6645 	int error;
   6646 
   6647 	udp = connp->conn_udp;
   6648 
   6649 	stropt_mp = allocb_wait(sizeof (*stropt), BPRI_HI, STR_NOSIG, NULL);
   6650 
   6651 	/*
   6652 	 * setup the fallback stream that was allocated
   6653 	 */
   6654 	connp->conn_dev = (dev_t)RD(q)->q_ptr;
   6655 	connp->conn_minor_arena = WR(q)->q_ptr;
   6656 
   6657 	RD(q)->q_ptr = WR(q)->q_ptr = connp;
   6658 
   6659 	WR(q)->q_qinfo = &udp_winit;
   6660 
   6661 	connp->conn_rq = RD(q);
   6662 	connp->conn_wq = WR(q);
   6663 
   6664 	/* Notify stream head about options before sending up data */
   6665 	stropt_mp->b_datap->db_type = M_SETOPTS;
   6666 	stropt_mp->b_wptr += sizeof (*stropt);
   6667 	stropt = (struct stroptions *)stropt_mp->b_rptr;
   6668 	stropt->so_flags = SO_WROFF | SO_HIWAT;
   6669 	stropt->so_wroff = connp->conn_wroff;
   6670 	stropt->so_hiwat = udp->udp_rcv_disply_hiwat;
   6671 	putnext(RD(q), stropt_mp);
   6672 
   6673 	/*
   6674 	 * Free the helper stream
   6675 	 */
   6676 	ip_free_helper_stream(connp);
   6677 
   6678 	if (!issocket)
   6679 		udp_use_pure_tpi(udp);
   6680 
   6681 	/*
   6682 	 * Collect the information needed to sync with the sonode
   6683 	 */
   6684 	udp_do_capability_ack(udp, &tca, TC1_INFO);
   6685 
   6686 	laddrlen = faddrlen = sizeof (sin6_t);
   6687 	(void) udp_getsockname((sock_lower_handle_t)connp,
   6688 	    (struct sockaddr *)&laddr, &laddrlen, CRED());
   6689 	error = udp_getpeername((sock_lower_handle_t)connp,
   6690 	    (struct sockaddr *)&faddr, &faddrlen, CRED());
   6691 	if (error != 0)
   6692 		faddrlen = 0;
   6693 
   6694 	opts = 0;
   6695 	if (connp->conn_dgram_errind)
   6696 		opts |= SO_DGRAM_ERRIND;
   6697 	if (connp->conn_ixa->ixa_flags & IXAF_DONTROUTE)
   6698 		opts |= SO_DONTROUTE;
   6699 
   6700 	(*quiesced_cb)(connp->conn_upper_handle, q, &tca,
   6701 	    (struct sockaddr *)&laddr, laddrlen,
   6702 	    (struct sockaddr *)&faddr, faddrlen, opts);
   6703 
   6704 	mutex_enter(&udp->udp_recv_lock);
   6705 	/*
   6706 	 * Attempts to send data up during fallback will result in it being
   6707 	 * queued in udp_t. Now we push up any queued packets.
   6708 	 */
   6709 	while (udp->udp_fallback_queue_head != NULL) {
   6710 		mblk_t *mp;
   6711 		mp = udp->udp_fallback_queue_head;
   6712 		udp->udp_fallback_queue_head = mp->b_next;
   6713 		mutex_exit(&udp->udp_recv_lock);
   6714 		mp->b_next = NULL;
   6715 		putnext(RD(q), mp);
   6716 		mutex_enter(&udp->udp_recv_lock);
   6717 	}
   6718 	udp->udp_fallback_queue_tail = udp->udp_fallback_queue_head;
   6719 	/*
   6720 	 * No longer a streams less socket
   6721 	 */
   6722 	mutex_enter(&connp->conn_lock);
   6723 	connp->conn_flags &= ~IPCL_NONSTR;
   6724 	mutex_exit(&connp->conn_lock);
   6725 
   6726 	mutex_exit(&udp->udp_recv_lock);
   6727 
   6728 	ASSERT(connp->conn_ref >= 1);
   6729 
   6730 	return (0);
   6731 }
   6732 
   6733 /* ARGSUSED3 */
   6734 int
   6735 udp_getpeername(sock_lower_handle_t  proto_handle, struct sockaddr *sa,
   6736     socklen_t *salenp, cred_t *cr)
   6737 {
   6738 	conn_t	*connp = (conn_t *)proto_handle;
   6739 	udp_t	*udp = connp->conn_udp;
   6740 	int error;
   6741 
   6742 	/* All Solaris components should pass a cred for this operation. */
   6743 	ASSERT(cr != NULL);
   6744 
   6745 	mutex_enter(&connp->conn_lock);
   6746 	if (udp->udp_state != TS_DATA_XFER)
   6747 		error = ENOTCONN;
   6748 	else
   6749 		error = conn_getpeername(connp, sa, salenp);
   6750 	mutex_exit(&connp->conn_lock);
   6751 	return (error);
   6752 }
   6753 
   6754 /* ARGSUSED3 */
   6755 int
   6756 udp_getsockname(sock_lower_handle_t proto_handle, struct sockaddr *sa,
   6757     socklen_t *salenp, cred_t *cr)
   6758 {
   6759 	conn_t	*connp = (conn_t *)proto_handle;
   6760 	int error;
   6761 
   6762 	/* All Solaris components should pass a cred for this operation. */
   6763 	ASSERT(cr != NULL);
   6764 
   6765 	mutex_enter(&connp->conn_lock);
   6766 	error = conn_getsockname(connp, sa, salenp);
   6767 	mutex_exit(&connp->conn_lock);
   6768 	return (error);
   6769 }
   6770 
   6771 int
   6772 udp_getsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
   6773     void *optvalp, socklen_t *optlen, cred_t *cr)
   6774 {
   6775 	conn_t		*connp = (conn_t *)proto_handle;
   6776 	int		error;
   6777 	t_uscalar_t	max_optbuf_len;
   6778 	void		*optvalp_buf;
   6779 	int		len;
   6780 
   6781 	/* All Solaris components should pass a cred for this operation. */
   6782 	ASSERT(cr != NULL);
   6783 
   6784 	error = proto_opt_check(level, option_name, *optlen, &max_optbuf_len,
   6785 	    udp_opt_obj.odb_opt_des_arr,
   6786 	    udp_opt_obj.odb_opt_arr_cnt,
   6787 	    B_FALSE, B_TRUE, cr);
   6788 	if (error != 0) {
   6789 		if (error < 0)
   6790 			error = proto_tlitosyserr(-error);
   6791 		return (error);
   6792 	}
   6793 
   6794 	optvalp_buf = kmem_alloc(max_optbuf_len, KM_SLEEP);
   6795 	len = udp_opt_get(connp, level, option_name, optvalp_buf);
   6796 	if (len == -1) {
   6797 		kmem_free(optvalp_buf, max_optbuf_len);
   6798 		return (EINVAL);
   6799 	}
   6800 
   6801 	/*
   6802 	 * update optlen and copy option value
   6803 	 */
   6804 	t_uscalar_t size = MIN(len, *optlen);
   6805 
   6806 	bcopy(optvalp_buf, optvalp, size);
   6807 	bcopy(&size, optlen, sizeof (size));
   6808 
   6809 	kmem_free(optvalp_buf, max_optbuf_len);
   6810 	return (0);
   6811 }
   6812 
   6813 int
   6814 udp_setsockopt(sock_lower_handle_t proto_handle, int level, int option_name,
   6815     const void *optvalp, socklen_t optlen, cred_t *cr)
   6816 {
   6817 	conn_t		*connp = (conn_t *)proto_handle;
   6818 	int		error;
   6819 
   6820 	/* All Solaris components should pass a cred for this operation. */
   6821 	ASSERT(cr != NULL);
   6822 
   6823 	error = proto_opt_check(level, option_name, optlen, NULL,
   6824 	    udp_opt_obj.odb_opt_des_arr,
   6825 	    udp_opt_obj.odb_opt_arr_cnt,
   6826 	    B_TRUE, B_FALSE, cr);
   6827 
   6828 	if (error != 0) {
   6829 		if (error < 0)
   6830 			error = proto_tlitosyserr(-error);
   6831 		return (error);
   6832 	}
   6833 
   6834 	error = udp_opt_set(connp, SETFN_OPTCOM_NEGOTIATE, level, option_name,
   6835 	    optlen, (uchar_t *)optvalp, (uint_t *)&optlen, (uchar_t *)optvalp,
   6836 	    NULL, cr);
   6837 
   6838 	ASSERT(error >= 0);
   6839 
   6840 	return (error);
   6841 }
   6842 
   6843 void
   6844 udp_clr_flowctrl(sock_lower_handle_t proto_handle)
   6845 {
   6846 	conn_t	*connp = (conn_t *)proto_handle;
   6847 	udp_t	*udp = connp->conn_udp;
   6848 
   6849 	mutex_enter(&udp->udp_recv_lock);
   6850 	connp->conn_flow_cntrld = B_FALSE;
   6851 	mutex_exit(&udp->udp_recv_lock);
   6852 }
   6853 
   6854 /* ARGSUSED2 */
   6855 int
   6856 udp_shutdown(sock_lower_handle_t proto_handle, int how, cred_t *cr)
   6857 {
   6858 	conn_t	*connp = (conn_t *)proto_handle;
   6859 
   6860 	/* All Solaris components should pass a cred for this operation. */
   6861 	ASSERT(cr != NULL);
   6862 
   6863 	/* shut down the send side */
   6864 	if (how != SHUT_RD)
   6865 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
   6866 		    SOCK_OPCTL_SHUT_SEND, 0);
   6867 	/* shut down the recv side */
   6868 	if (how != SHUT_WR)
   6869 		(*connp->conn_upcalls->su_opctl)(connp->conn_upper_handle,
   6870 		    SOCK_OPCTL_SHUT_RECV, 0);
   6871 	return (0);
   6872 }
   6873 
   6874 int
   6875 udp_ioctl(sock_lower_handle_t proto_handle, int cmd, intptr_t arg,
   6876     int mode, int32_t *rvalp, cred_t *cr)
   6877 {
   6878 	conn_t  	*connp = (conn_t *)proto_handle;
   6879 	int		error;
   6880 
   6881 	/* All Solaris components should pass a cred for this operation. */
   6882 	ASSERT(cr != NULL);
   6883 
   6884 	/*
   6885 	 * If we don't have a helper stream then create one.
   6886 	 * ip_create_helper_stream takes care of locking the conn_t,
   6887 	 * so this check for NULL is just a performance optimization.
   6888 	 */
   6889 	if (connp->conn_helper_info == NULL) {
   6890 		udp_stack_t *us = connp->conn_udp->udp_us;
   6891 
   6892 		ASSERT(us->us_ldi_ident != NULL);
   6893 
   6894 		/*
   6895 		 * Create a helper stream for non-STREAMS socket.
   6896 		 */
   6897 		error = ip_create_helper_stream(connp, us->us_ldi_ident);
   6898 		if (error != 0) {
   6899 			ip0dbg(("tcp_ioctl: create of IP helper stream "
   6900 			    "failed %d\n", error));
   6901 			return (error);
   6902 		}
   6903 	}
   6904 
   6905 	switch (cmd) {
   6906 		case ND_SET:
   6907 		case ND_GET:
   6908 		case _SIOCSOCKFALLBACK:
   6909 		case TI_GETPEERNAME:
   6910 		case TI_GETMYNAME:
   6911 			ip1dbg(("udp_ioctl: cmd 0x%x on non streams socket",
   6912 			    cmd));
   6913 			error = EINVAL;
   6914 			break;
   6915 		default:
   6916 			/*
   6917 			 * Pass on to IP using helper stream
   6918 			 */
   6919 			error = ldi_ioctl(connp->conn_helper_info->iphs_handle,
   6920 			    cmd, arg, mode, cr, rvalp);
   6921 			break;
   6922 	}
   6923 	return (error);
   6924 }
   6925 
   6926 /* ARGSUSED */
   6927 int
   6928 udp_accept(sock_lower_handle_t lproto_handle,
   6929     sock_lower_handle_t eproto_handle, sock_upper_handle_t sock_handle,
   6930     cred_t *cr)
   6931 {
   6932 	return (EOPNOTSUPP);
   6933 }
   6934 
   6935 /* ARGSUSED */
   6936 int
   6937 udp_listen(sock_lower_handle_t proto_handle, int backlog, cred_t *cr)
   6938 {
   6939 	return (EOPNOTSUPP);
   6940 }
   6941 
   6942 sock_downcalls_t sock_udp_downcalls = {
   6943 	udp_activate,		/* sd_activate */
   6944 	udp_accept,		/* sd_accept */
   6945 	udp_bind,		/* sd_bind */
   6946 	udp_listen,		/* sd_listen */
   6947 	udp_connect,		/* sd_connect */
   6948 	udp_getpeername,	/* sd_getpeername */
   6949 	udp_getsockname,	/* sd_getsockname */
   6950 	udp_getsockopt,		/* sd_getsockopt */
   6951 	udp_setsockopt,		/* sd_setsockopt */
   6952 	udp_send,		/* sd_send */
   6953 	NULL,			/* sd_send_uio */
   6954 	NULL,			/* sd_recv_uio */
   6955 	NULL,			/* sd_poll */
   6956 	udp_shutdown,		/* sd_shutdown */
   6957 	udp_clr_flowctrl,	/* sd_setflowctrl */
   6958 	udp_ioctl,		/* sd_ioctl */
   6959 	udp_close		/* sd_close */
   6960 };
   6961