Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/stream.h>
     28 #include <sys/stropts.h>
     29 #include <sys/errno.h>
     30 #include <sys/strlog.h>
     31 #include <sys/tihdr.h>
     32 #include <sys/socket.h>
     33 #include <sys/ddi.h>
     34 #include <sys/sunddi.h>
     35 #include <sys/kmem.h>
     36 #include <sys/zone.h>
     37 #include <sys/sysmacros.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/vtrace.h>
     40 #include <sys/debug.h>
     41 #include <sys/atomic.h>
     42 #include <sys/strsun.h>
     43 #include <sys/random.h>
     44 #include <netinet/in.h>
     45 #include <net/if.h>
     46 #include <netinet/ip6.h>
     47 #include <net/pfkeyv2.h>
     48 #include <net/pfpolicy.h>
     49 
     50 #include <inet/common.h>
     51 #include <inet/mi.h>
     52 #include <inet/nd.h>
     53 #include <inet/ip.h>
     54 #include <inet/ip_impl.h>
     55 #include <inet/ip6.h>
     56 #include <inet/ip_if.h>
     57 #include <inet/ip_ndp.h>
     58 #include <inet/sadb.h>
     59 #include <inet/ipsec_info.h>
     60 #include <inet/ipsec_impl.h>
     61 #include <inet/ipsecesp.h>
     62 #include <inet/ipdrop.h>
     63 #include <inet/tcp.h>
     64 #include <sys/kstat.h>
     65 #include <sys/policy.h>
     66 #include <sys/strsun.h>
     67 #include <sys/strsubr.h>
     68 #include <inet/udp_impl.h>
     69 #include <sys/taskq.h>
     70 #include <sys/note.h>
     71 
     72 #include <sys/tsol/tnet.h>
     73 
     74 /*
     75  * Table of ND variables supported by ipsecesp. These are loaded into
     76  * ipsecesp_g_nd in ipsecesp_init_nd.
     77  * All of these are alterable, within the min/max values given, at run time.
     78  */
     79 static	ipsecespparam_t	lcl_param_arr[] = {
     80 	/* min	max			value	name */
     81 	{ 0,	3,			0,	"ipsecesp_debug"},
     82 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
     83 	{ 1,	10,			1,	"ipsecesp_reap_delay"},
     84 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecesp_replay_size"},
     85 	{ 1,	300,			15,	"ipsecesp_acquire_timeout"},
     86 	{ 1,	1800,			90,	"ipsecesp_larval_timeout"},
     87 	/* Default lifetime values for ACQUIRE messages. */
     88 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_bytes"},
     89 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_bytes"},
     90 	{ 0,	0xffffffffU,	24000,	"ipsecesp_default_soft_addtime"},
     91 	{ 0,	0xffffffffU,	28800,	"ipsecesp_default_hard_addtime"},
     92 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_usetime"},
     93 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_usetime"},
     94 	{ 0,	1,		0,	"ipsecesp_log_unknown_spi"},
     95 	{ 0,	2,		1,	"ipsecesp_padding_check"},
     96 	{ 0,	600,		20,	"ipsecesp_nat_keepalive_interval"},
     97 };
     98 #define	ipsecesp_debug	ipsecesp_params[0].ipsecesp_param_value
     99 #define	ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value
    100 #define	ipsecesp_age_int_max	ipsecesp_params[1].ipsecesp_param_max
    101 #define	ipsecesp_reap_delay	ipsecesp_params[2].ipsecesp_param_value
    102 #define	ipsecesp_replay_size	ipsecesp_params[3].ipsecesp_param_value
    103 #define	ipsecesp_acquire_timeout	\
    104 	ipsecesp_params[4].ipsecesp_param_value
    105 #define	ipsecesp_larval_timeout	\
    106 	ipsecesp_params[5].ipsecesp_param_value
    107 #define	ipsecesp_default_soft_bytes	\
    108 	ipsecesp_params[6].ipsecesp_param_value
    109 #define	ipsecesp_default_hard_bytes	\
    110 	ipsecesp_params[7].ipsecesp_param_value
    111 #define	ipsecesp_default_soft_addtime	\
    112 	ipsecesp_params[8].ipsecesp_param_value
    113 #define	ipsecesp_default_hard_addtime	\
    114 	ipsecesp_params[9].ipsecesp_param_value
    115 #define	ipsecesp_default_soft_usetime	\
    116 	ipsecesp_params[10].ipsecesp_param_value
    117 #define	ipsecesp_default_hard_usetime	\
    118 	ipsecesp_params[11].ipsecesp_param_value
    119 #define	ipsecesp_log_unknown_spi	\
    120 	ipsecesp_params[12].ipsecesp_param_value
    121 #define	ipsecesp_padding_check	\
    122 	ipsecesp_params[13].ipsecesp_param_value
    123 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
    124 
    125 #define	esp0dbg(a)	printf a
    126 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
    127 #define	esp1dbg(espstack, a)	if (espstack->ipsecesp_debug != 0) printf a
    128 #define	esp2dbg(espstack, a)	if (espstack->ipsecesp_debug > 1) printf a
    129 #define	esp3dbg(espstack, a)	if (espstack->ipsecesp_debug > 2) printf a
    130 
    131 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
    132 static int ipsecesp_close(queue_t *);
    133 static void ipsecesp_wput(queue_t *, mblk_t *);
    134 static void	*ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
    135 static void	ipsecesp_stack_fini(netstackid_t stackid, void *arg);
    136 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
    137 
    138 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
    139 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
    140 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
    141 
    142 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
    143     ipsecesp_stack_t *, cred_t *);
    144 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
    145     kstat_named_t **, ipsecesp_stack_t *);
    146 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
    147     ipsa_t *, uint_t);
    148 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
    149     ipsa_t *, uchar_t *, uint_t);
    150 
    151 /* Setable in /etc/system */
    152 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
    153 
    154 static struct module_info info = {
    155 	5137, "ipsecesp", 0, INFPSZ, 65536, 1024
    156 };
    157 
    158 static struct qinit rinit = {
    159 	(pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
    160 	NULL
    161 };
    162 
    163 static struct qinit winit = {
    164 	(pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
    165 	NULL
    166 };
    167 
    168 struct streamtab ipsecespinfo = {
    169 	&rinit, &winit, NULL, NULL
    170 };
    171 
    172 static taskq_t *esp_taskq;
    173 
    174 /*
    175  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
    176  *
    177  * Question:	Do I need this, given that all instance's esps->esps_wq point
    178  *		to IP?
    179  *
    180  * Answer:	Yes, because I need to know which queue is BOUND to
    181  *		IPPROTO_ESP
    182  */
    183 
    184 /*
    185  * Stats.  This may eventually become a full-blown SNMP MIB once that spec
    186  * stabilizes.
    187  */
    188 
    189 typedef struct esp_kstats_s {
    190 	kstat_named_t esp_stat_num_aalgs;
    191 	kstat_named_t esp_stat_good_auth;
    192 	kstat_named_t esp_stat_bad_auth;
    193 	kstat_named_t esp_stat_bad_padding;
    194 	kstat_named_t esp_stat_replay_failures;
    195 	kstat_named_t esp_stat_replay_early_failures;
    196 	kstat_named_t esp_stat_keysock_in;
    197 	kstat_named_t esp_stat_out_requests;
    198 	kstat_named_t esp_stat_acquire_requests;
    199 	kstat_named_t esp_stat_bytes_expired;
    200 	kstat_named_t esp_stat_out_discards;
    201 	kstat_named_t esp_stat_crypto_sync;
    202 	kstat_named_t esp_stat_crypto_async;
    203 	kstat_named_t esp_stat_crypto_failures;
    204 	kstat_named_t esp_stat_num_ealgs;
    205 	kstat_named_t esp_stat_bad_decrypt;
    206 	kstat_named_t esp_stat_sa_port_renumbers;
    207 } esp_kstats_t;
    208 
    209 /*
    210  * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if
    211  * kstat_create_netstack for espstack->esp_ksp succeeds, but when it
    212  * fails, it will be NULL. Note this is done for all stack instances,
    213  * so it *could* fail. hence a non-NULL checking is done for
    214  * ESP_BUMP_STAT and ESP_DEBUMP_STAT
    215  */
    216 #define	ESP_BUMP_STAT(espstack, x)					\
    217 do {									\
    218 	if (espstack->esp_kstats != NULL)				\
    219 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64++;	\
    220 _NOTE(CONSTCOND)							\
    221 } while (0)
    222 
    223 #define	ESP_DEBUMP_STAT(espstack, x)					\
    224 do {									\
    225 	if (espstack->esp_kstats != NULL)				\
    226 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64--;	\
    227 _NOTE(CONSTCOND)							\
    228 } while (0)
    229 
    230 static int	esp_kstat_update(kstat_t *, int);
    231 
    232 static boolean_t
    233 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
    234 {
    235 	espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
    236 	    "net", KSTAT_TYPE_NAMED,
    237 	    sizeof (esp_kstats_t) / sizeof (kstat_named_t),
    238 	    KSTAT_FLAG_PERSISTENT, stackid);
    239 
    240 	if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
    241 		return (B_FALSE);
    242 
    243 	espstack->esp_kstats = espstack->esp_ksp->ks_data;
    244 
    245 	espstack->esp_ksp->ks_update = esp_kstat_update;
    246 	espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
    247 
    248 #define	K64 KSTAT_DATA_UINT64
    249 #define	KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
    250 
    251 	KI(num_aalgs);
    252 	KI(num_ealgs);
    253 	KI(good_auth);
    254 	KI(bad_auth);
    255 	KI(bad_padding);
    256 	KI(replay_failures);
    257 	KI(replay_early_failures);
    258 	KI(keysock_in);
    259 	KI(out_requests);
    260 	KI(acquire_requests);
    261 	KI(bytes_expired);
    262 	KI(out_discards);
    263 	KI(crypto_sync);
    264 	KI(crypto_async);
    265 	KI(crypto_failures);
    266 	KI(bad_decrypt);
    267 	KI(sa_port_renumbers);
    268 
    269 #undef KI
    270 #undef K64
    271 
    272 	kstat_install(espstack->esp_ksp);
    273 
    274 	return (B_TRUE);
    275 }
    276 
    277 static int
    278 esp_kstat_update(kstat_t *kp, int rw)
    279 {
    280 	esp_kstats_t *ekp;
    281 	netstackid_t	stackid = (zoneid_t)(uintptr_t)kp->ks_private;
    282 	netstack_t	*ns;
    283 	ipsec_stack_t	*ipss;
    284 
    285 	if ((kp == NULL) || (kp->ks_data == NULL))
    286 		return (EIO);
    287 
    288 	if (rw == KSTAT_WRITE)
    289 		return (EACCES);
    290 
    291 	ns = netstack_find_by_stackid(stackid);
    292 	if (ns == NULL)
    293 		return (-1);
    294 	ipss = ns->netstack_ipsec;
    295 	if (ipss == NULL) {
    296 		netstack_rele(ns);
    297 		return (-1);
    298 	}
    299 	ekp = (esp_kstats_t *)kp->ks_data;
    300 
    301 	mutex_enter(&ipss->ipsec_alg_lock);
    302 	ekp->esp_stat_num_aalgs.value.ui64 =
    303 	    ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
    304 	ekp->esp_stat_num_ealgs.value.ui64 =
    305 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
    306 	mutex_exit(&ipss->ipsec_alg_lock);
    307 
    308 	netstack_rele(ns);
    309 	return (0);
    310 }
    311 
    312 #ifdef DEBUG
    313 /*
    314  * Debug routine, useful to see pre-encryption data.
    315  */
    316 static char *
    317 dump_msg(mblk_t *mp)
    318 {
    319 	char tmp_str[3], tmp_line[256];
    320 
    321 	while (mp != NULL) {
    322 		unsigned char *ptr;
    323 
    324 		printf("mblk address 0x%p, length %ld, db_ref %d "
    325 		    "type %d, base 0x%p, lim 0x%p\n",
    326 		    (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
    327 		    mp->b_datap->db_ref, mp->b_datap->db_type,
    328 		    (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
    329 		ptr = mp->b_rptr;
    330 
    331 		tmp_line[0] = '\0';
    332 		while (ptr < mp->b_wptr) {
    333 			uint_t diff;
    334 
    335 			diff = (ptr - mp->b_rptr);
    336 			if (!(diff & 0x1f)) {
    337 				if (strlen(tmp_line) > 0) {
    338 					printf("bytes: %s\n", tmp_line);
    339 					tmp_line[0] = '\0';
    340 				}
    341 			}
    342 			if (!(diff & 0x3))
    343 				(void) strcat(tmp_line, " ");
    344 			(void) sprintf(tmp_str, "%02x", *ptr);
    345 			(void) strcat(tmp_line, tmp_str);
    346 			ptr++;
    347 		}
    348 		if (strlen(tmp_line) > 0)
    349 			printf("bytes: %s\n", tmp_line);
    350 
    351 		mp = mp->b_cont;
    352 	}
    353 
    354 	return ("\n");
    355 }
    356 
    357 #else /* DEBUG */
    358 static char *
    359 dump_msg(mblk_t *mp)
    360 {
    361 	printf("Find value of mp %p.\n", mp);
    362 	return ("\n");
    363 }
    364 #endif /* DEBUG */
    365 
    366 /*
    367  * Don't have to lock age_interval, as only one thread will access it at
    368  * a time, because I control the one function that does with timeout().
    369  */
    370 static void
    371 esp_ager(void *arg)
    372 {
    373 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
    374 	netstack_t	*ns = espstack->ipsecesp_netstack;
    375 	hrtime_t begin = gethrtime();
    376 
    377 	sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
    378 	    espstack->ipsecesp_reap_delay, ns);
    379 	sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
    380 	    espstack->ipsecesp_reap_delay, ns);
    381 
    382 	espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
    383 	    esp_ager, espstack,
    384 	    &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
    385 	    info.mi_idnum);
    386 }
    387 
    388 /*
    389  * Get an ESP NDD parameter.
    390  */
    391 /* ARGSUSED */
    392 static int
    393 ipsecesp_param_get(q, mp, cp, cr)
    394 	queue_t	*q;
    395 	mblk_t	*mp;
    396 	caddr_t	cp;
    397 	cred_t *cr;
    398 {
    399 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
    400 	uint_t value;
    401 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    402 
    403 	mutex_enter(&espstack->ipsecesp_param_lock);
    404 	value = ipsecesppa->ipsecesp_param_value;
    405 	mutex_exit(&espstack->ipsecesp_param_lock);
    406 
    407 	(void) mi_mpprintf(mp, "%u", value);
    408 	return (0);
    409 }
    410 
    411 /*
    412  * This routine sets an NDD variable in a ipsecespparam_t structure.
    413  */
    414 /* ARGSUSED */
    415 static int
    416 ipsecesp_param_set(q, mp, value, cp, cr)
    417 	queue_t	*q;
    418 	mblk_t	*mp;
    419 	char	*value;
    420 	caddr_t	cp;
    421 	cred_t *cr;
    422 {
    423 	ulong_t	new_value;
    424 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
    425 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    426 
    427 	/*
    428 	 * Fail the request if the new value does not lie within the
    429 	 * required bounds.
    430 	 */
    431 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
    432 	    new_value < ipsecesppa->ipsecesp_param_min ||
    433 	    new_value > ipsecesppa->ipsecesp_param_max) {
    434 		return (EINVAL);
    435 	}
    436 
    437 	/* Set the new value */
    438 	mutex_enter(&espstack->ipsecesp_param_lock);
    439 	ipsecesppa->ipsecesp_param_value = new_value;
    440 	mutex_exit(&espstack->ipsecesp_param_lock);
    441 	return (0);
    442 }
    443 
    444 /*
    445  * Using lifetime NDD variables, fill in an extended combination's
    446  * lifetime information.
    447  */
    448 void
    449 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
    450 {
    451 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
    452 
    453 	ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
    454 	ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
    455 	ecomb->sadb_x_ecomb_soft_addtime =
    456 	    espstack->ipsecesp_default_soft_addtime;
    457 	ecomb->sadb_x_ecomb_hard_addtime =
    458 	    espstack->ipsecesp_default_hard_addtime;
    459 	ecomb->sadb_x_ecomb_soft_usetime =
    460 	    espstack->ipsecesp_default_soft_usetime;
    461 	ecomb->sadb_x_ecomb_hard_usetime =
    462 	    espstack->ipsecesp_default_hard_usetime;
    463 }
    464 
    465 /*
    466  * Initialize things for ESP at module load time.
    467  */
    468 boolean_t
    469 ipsecesp_ddi_init(void)
    470 {
    471 	esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
    472 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
    473 
    474 	/*
    475 	 * We want to be informed each time a stack is created or
    476 	 * destroyed in the kernel, so we can maintain the
    477 	 * set of ipsecesp_stack_t's.
    478 	 */
    479 	netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
    480 	    ipsecesp_stack_fini);
    481 
    482 	return (B_TRUE);
    483 }
    484 
    485 /*
    486  * Walk through the param array specified registering each element with the
    487  * named dispatch handler.
    488  */
    489 static boolean_t
    490 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
    491 {
    492 	for (; cnt-- > 0; espp++) {
    493 		if (espp->ipsecesp_param_name != NULL &&
    494 		    espp->ipsecesp_param_name[0]) {
    495 			if (!nd_load(ndp,
    496 			    espp->ipsecesp_param_name,
    497 			    ipsecesp_param_get, ipsecesp_param_set,
    498 			    (caddr_t)espp)) {
    499 				nd_free(ndp);
    500 				return (B_FALSE);
    501 			}
    502 		}
    503 	}
    504 	return (B_TRUE);
    505 }
    506 /*
    507  * Initialize things for ESP for each stack instance
    508  */
    509 static void *
    510 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
    511 {
    512 	ipsecesp_stack_t	*espstack;
    513 	ipsecespparam_t		*espp;
    514 
    515 	espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
    516 	    KM_SLEEP);
    517 	espstack->ipsecesp_netstack = ns;
    518 
    519 	espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
    520 	espstack->ipsecesp_params = espp;
    521 	bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
    522 
    523 	(void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
    524 	    A_CNT(lcl_param_arr));
    525 
    526 	(void) esp_kstat_init(espstack, stackid);
    527 
    528 	espstack->esp_sadb.s_acquire_timeout =
    529 	    &espstack->ipsecesp_acquire_timeout;
    530 	espstack->esp_sadb.s_acqfn = esp_send_acquire;
    531 	sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
    532 	    espstack->ipsecesp_netstack);
    533 
    534 	mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
    535 
    536 	ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
    537 	return (espstack);
    538 }
    539 
    540 /*
    541  * Destroy things for ESP at module unload time.
    542  */
    543 void
    544 ipsecesp_ddi_destroy(void)
    545 {
    546 	netstack_unregister(NS_IPSECESP);
    547 	taskq_destroy(esp_taskq);
    548 }
    549 
    550 /*
    551  * Destroy things for ESP for one stack instance
    552  */
    553 static void
    554 ipsecesp_stack_fini(netstackid_t stackid, void *arg)
    555 {
    556 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
    557 
    558 	if (espstack->esp_pfkey_q != NULL) {
    559 		(void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
    560 	}
    561 	espstack->esp_sadb.s_acqfn = NULL;
    562 	espstack->esp_sadb.s_acquire_timeout = NULL;
    563 	sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
    564 	ip_drop_unregister(&espstack->esp_dropper);
    565 	mutex_destroy(&espstack->ipsecesp_param_lock);
    566 	nd_free(&espstack->ipsecesp_g_nd);
    567 
    568 	kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
    569 	espstack->ipsecesp_params = NULL;
    570 	kstat_delete_netstack(espstack->esp_ksp, stackid);
    571 	espstack->esp_ksp = NULL;
    572 	espstack->esp_kstats = NULL;
    573 	kmem_free(espstack, sizeof (*espstack));
    574 }
    575 
    576 /*
    577  * ESP module open routine, which is here for keysock plumbing.
    578  * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
    579  * Days of export control, and fears that ESP would not be allowed
    580  * to be shipped at all by default.  Eventually, keysock should
    581  * either access AH and ESP via modstubs or krtld dependencies, or
    582  * perhaps be folded in with AH and ESP into a single IPsec/netsec
    583  * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
    584  */
    585 /* ARGSUSED */
    586 static int
    587 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
    588 {
    589 	netstack_t		*ns;
    590 	ipsecesp_stack_t	*espstack;
    591 
    592 	if (secpolicy_ip_config(credp, B_FALSE) != 0)
    593 		return (EPERM);
    594 
    595 	if (q->q_ptr != NULL)
    596 		return (0);  /* Re-open of an already open instance. */
    597 
    598 	if (sflag != MODOPEN)
    599 		return (EINVAL);
    600 
    601 	ns = netstack_find_by_cred(credp);
    602 	ASSERT(ns != NULL);
    603 	espstack = ns->netstack_ipsecesp;
    604 	ASSERT(espstack != NULL);
    605 
    606 	q->q_ptr = espstack;
    607 	WR(q)->q_ptr = q->q_ptr;
    608 
    609 	qprocson(q);
    610 	return (0);
    611 }
    612 
    613 /*
    614  * ESP module close routine.
    615  */
    616 static int
    617 ipsecesp_close(queue_t *q)
    618 {
    619 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    620 
    621 	/*
    622 	 * Clean up q_ptr, if needed.
    623 	 */
    624 	qprocsoff(q);
    625 
    626 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
    627 
    628 	if (q == espstack->esp_pfkey_q) {
    629 		esp1dbg(espstack,
    630 		    ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
    631 		espstack->esp_pfkey_q = NULL;
    632 		/* Detach qtimeouts. */
    633 		(void) quntimeout(q, espstack->esp_event);
    634 	}
    635 
    636 	netstack_rele(espstack->ipsecesp_netstack);
    637 	return (0);
    638 }
    639 
    640 /*
    641  * Add a number of bytes to what the SA has protected so far.  Return
    642  * B_TRUE if the SA can still protect that many bytes.
    643  *
    644  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
    645  * any obtained peer SA.
    646  */
    647 static boolean_t
    648 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
    649 {
    650 	ipsa_t *inassoc, *outassoc;
    651 	isaf_t *bucket;
    652 	boolean_t inrc, outrc, isv6;
    653 	sadb_t *sp;
    654 	int outhash;
    655 	netstack_t		*ns = assoc->ipsa_netstack;
    656 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
    657 
    658 	/* No peer?  No problem! */
    659 	if (!assoc->ipsa_haspeer) {
    660 		return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
    661 		    B_TRUE));
    662 	}
    663 
    664 	/*
    665 	 * Otherwise, we want to grab both the original assoc and its peer.
    666 	 * There might be a race for this, but if it's a real race, two
    667 	 * expire messages may occur.  We limit this by only sending the
    668 	 * expire message on one of the peers, we'll pick the inbound
    669 	 * arbitrarily.
    670 	 *
    671 	 * If we need tight synchronization on the peer SA, then we need to
    672 	 * reconsider.
    673 	 */
    674 
    675 	/* Use address length to select IPv6/IPv4 */
    676 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
    677 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
    678 
    679 	if (inbound) {
    680 		inassoc = assoc;
    681 		if (isv6) {
    682 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
    683 			    &inassoc->ipsa_dstaddr));
    684 		} else {
    685 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
    686 			    &inassoc->ipsa_dstaddr));
    687 		}
    688 		bucket = &sp->sdb_of[outhash];
    689 		mutex_enter(&bucket->isaf_lock);
    690 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
    691 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
    692 		    inassoc->ipsa_addrfam);
    693 		mutex_exit(&bucket->isaf_lock);
    694 		if (outassoc == NULL) {
    695 			/* Q: Do we wish to set haspeer == B_FALSE? */
    696 			esp0dbg(("esp_age_bytes: "
    697 			    "can't find peer for inbound.\n"));
    698 			return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
    699 			    bytes, B_TRUE));
    700 		}
    701 	} else {
    702 		outassoc = assoc;
    703 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
    704 		mutex_enter(&bucket->isaf_lock);
    705 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
    706 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
    707 		    outassoc->ipsa_addrfam);
    708 		mutex_exit(&bucket->isaf_lock);
    709 		if (inassoc == NULL) {
    710 			/* Q: Do we wish to set haspeer == B_FALSE? */
    711 			esp0dbg(("esp_age_bytes: "
    712 			    "can't find peer for outbound.\n"));
    713 			return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
    714 			    bytes, B_TRUE));
    715 		}
    716 	}
    717 
    718 	inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
    719 	outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
    720 
    721 	/*
    722 	 * REFRELE any peer SA.
    723 	 *
    724 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
    725 	 * them in { }.
    726 	 */
    727 	if (inbound) {
    728 		IPSA_REFRELE(outassoc);
    729 	} else {
    730 		IPSA_REFRELE(inassoc);
    731 	}
    732 
    733 	return (inrc && outrc);
    734 }
    735 
    736 /*
    737  * Do incoming NAT-T manipulations for packet.
    738  * Returns NULL if the mblk chain is consumed.
    739  */
    740 static mblk_t *
    741 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
    742 {
    743 	ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
    744 	tcpha_t *tcpha;
    745 	udpha_t *udpha;
    746 	/* Initialize to our inbound cksum adjustment... */
    747 	uint32_t sum = assoc->ipsa_inbound_cksum;
    748 
    749 	switch (ipha->ipha_protocol) {
    750 	case IPPROTO_TCP:
    751 		tcpha = (tcpha_t *)(data_mp->b_rptr +
    752 		    IPH_HDR_LENGTH(ipha));
    753 
    754 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
    755 		sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
    756 		DOWN_SUM(sum);
    757 		DOWN_SUM(sum);
    758 		tcpha->tha_sum = ~htons(sum);
    759 		break;
    760 	case IPPROTO_UDP:
    761 		udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
    762 
    763 		if (udpha->uha_checksum != 0) {
    764 			/* Adujst if the inbound one was not zero. */
    765 			sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
    766 			DOWN_SUM(sum);
    767 			DOWN_SUM(sum);
    768 			udpha->uha_checksum = ~htons(sum);
    769 			if (udpha->uha_checksum == 0)
    770 				udpha->uha_checksum = 0xFFFF;
    771 		}
    772 #undef DOWN_SUM
    773 		break;
    774 	case IPPROTO_IP:
    775 		/*
    776 		 * This case is only an issue for self-encapsulated
    777 		 * packets.  So for now, fall through.
    778 		 */
    779 		break;
    780 	}
    781 	return (data_mp);
    782 }
    783 
    784 
    785 /*
    786  * Strip ESP header, check padding, and fix IP header.
    787  * Returns B_TRUE on success, B_FALSE if an error occured.
    788  */
    789 static boolean_t
    790 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
    791     kstat_named_t **counter, ipsecesp_stack_t *espstack)
    792 {
    793 	ipha_t *ipha;
    794 	ip6_t *ip6h;
    795 	uint_t divpoint;
    796 	mblk_t *scratch;
    797 	uint8_t nexthdr, padlen;
    798 	uint8_t lastpad;
    799 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
    800 	uint8_t *lastbyte;
    801 
    802 	/*
    803 	 * Strip ESP data and fix IP header.
    804 	 *
    805 	 * XXX In case the beginning of esp_inbound() changes to not do a
    806 	 * pullup, this part of the code can remain unchanged.
    807 	 */
    808 	if (isv4) {
    809 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
    810 		ipha = (ipha_t *)data_mp->b_rptr;
    811 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
    812 		    IPH_HDR_LENGTH(ipha));
    813 		divpoint = IPH_HDR_LENGTH(ipha);
    814 	} else {
    815 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
    816 		ip6h = (ip6_t *)data_mp->b_rptr;
    817 		divpoint = ip_hdr_length_v6(data_mp, ip6h);
    818 	}
    819 
    820 	scratch = data_mp;
    821 	while (scratch->b_cont != NULL)
    822 		scratch = scratch->b_cont;
    823 
    824 	ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
    825 
    826 	/*
    827 	 * "Next header" and padding length are the last two bytes in the
    828 	 * ESP-protected datagram, thus the explicit - 1 and - 2.
    829 	 * lastpad is the last byte of the padding, which can be used for
    830 	 * a quick check to see if the padding is correct.
    831 	 */
    832 	lastbyte = scratch->b_wptr - 1;
    833 	nexthdr = *lastbyte--;
    834 	padlen = *lastbyte--;
    835 
    836 	if (isv4) {
    837 		/* Fix part of the IP header. */
    838 		ipha->ipha_protocol = nexthdr;
    839 		/*
    840 		 * Reality check the padlen.  The explicit - 2 is for the
    841 		 * padding length and the next-header bytes.
    842 		 */
    843 		if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
    844 		    sizeof (esph_t) - ivlen) {
    845 			ESP_BUMP_STAT(espstack, bad_decrypt);
    846 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    847 			    info.mi_idnum, 0, 0,
    848 			    SL_ERROR | SL_WARN,
    849 			    "Corrupt ESP packet (padlen too big).\n");
    850 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
    851 			    padlen));
    852 			esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
    853 			    "hdr - ivlen(%d) = %d.\n",
    854 			    ntohs(ipha->ipha_length), ivlen,
    855 			    (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
    856 			    2 - sizeof (esph_t) - ivlen)));
    857 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
    858 			return (B_FALSE);
    859 		}
    860 
    861 		/*
    862 		 * Fix the rest of the header.  The explicit - 2 is for the
    863 		 * padding length and the next-header bytes.
    864 		 */
    865 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
    866 		    2 - sizeof (esph_t) - ivlen);
    867 		ipha->ipha_hdr_checksum = 0;
    868 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
    869 	} else {
    870 		if (ip6h->ip6_nxt == IPPROTO_ESP) {
    871 			ip6h->ip6_nxt = nexthdr;
    872 		} else {
    873 			ip_pkt_t ipp;
    874 
    875 			bzero(&ipp, sizeof (ipp));
    876 			(void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
    877 			    NULL);
    878 			if (ipp.ipp_dstopts != NULL) {
    879 				ipp.ipp_dstopts->ip6d_nxt = nexthdr;
    880 			} else if (ipp.ipp_rthdr != NULL) {
    881 				ipp.ipp_rthdr->ip6r_nxt = nexthdr;
    882 			} else if (ipp.ipp_hopopts != NULL) {
    883 				ipp.ipp_hopopts->ip6h_nxt = nexthdr;
    884 			} else {
    885 				/* Panic a DEBUG kernel. */
    886 				ASSERT(ipp.ipp_hopopts != NULL);
    887 				/* Otherwise, pretend it's IP + ESP. */
    888 				cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
    889 				ip6h->ip6_nxt = nexthdr;
    890 			}
    891 		}
    892 
    893 		if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
    894 		    ivlen) {
    895 			ESP_BUMP_STAT(espstack, bad_decrypt);
    896 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    897 			    info.mi_idnum, 0, 0,
    898 			    SL_ERROR | SL_WARN,
    899 			    "Corrupt ESP packet (v6 padlen too big).\n");
    900 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
    901 			    padlen));
    902 			esp1dbg(espstack,
    903 			    ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
    904 			    "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
    905 			    + sizeof (ip6_t)), ivlen,
    906 			    (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
    907 			    sizeof (esph_t) - ivlen)));
    908 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
    909 			return (B_FALSE);
    910 		}
    911 
    912 
    913 		/*
    914 		 * Fix the rest of the header.  The explicit - 2 is for the
    915 		 * padding length and the next-header bytes.  IPv6 is nice,
    916 		 * because there's no hdr checksum!
    917 		 */
    918 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
    919 		    2 - sizeof (esph_t) - ivlen);
    920 	}
    921 
    922 	if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
    923 		/*
    924 		 * Weak padding check: compare last-byte to length, they
    925 		 * should be equal.
    926 		 */
    927 		lastpad = *lastbyte--;
    928 
    929 		if (padlen != lastpad) {
    930 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    931 			    info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
    932 			    "Corrupt ESP packet (lastpad != padlen).\n");
    933 			esp1dbg(espstack,
    934 			    ("lastpad (%d) not equal to padlen (%d):\n",
    935 			    lastpad, padlen));
    936 			ESP_BUMP_STAT(espstack, bad_padding);
    937 			*counter = DROPPER(ipss, ipds_esp_bad_padding);
    938 			return (B_FALSE);
    939 		}
    940 
    941 		/*
    942 		 * Strong padding check: Check all pad bytes to see that
    943 		 * they're ascending.  Go backwards using a descending counter
    944 		 * to verify.  padlen == 1 is checked by previous block, so
    945 		 * only bother if we've more than 1 byte of padding.
    946 		 * Consequently, start the check one byte before the location
    947 		 * of "lastpad".
    948 		 */
    949 		if (espstack->ipsecesp_padding_check > 1) {
    950 			/*
    951 			 * This assert may have to become an if and a pullup
    952 			 * if we start accepting multi-dblk mblks. For now,
    953 			 * though, any packet here will have been pulled up in
    954 			 * esp_inbound.
    955 			 */
    956 			ASSERT(MBLKL(scratch) >= lastpad + 3);
    957 
    958 			/*
    959 			 * Use "--lastpad" because we already checked the very
    960 			 * last pad byte previously.
    961 			 */
    962 			while (--lastpad != 0) {
    963 				if (lastpad != *lastbyte) {
    964 					ipsec_rl_strlog(
    965 					    espstack->ipsecesp_netstack,
    966 					    info.mi_idnum, 0, 0,
    967 					    SL_ERROR | SL_WARN, "Corrupt ESP "
    968 					    "packet (bad padding).\n");
    969 					esp1dbg(espstack,
    970 					    ("padding not in correct"
    971 					    " format:\n"));
    972 					ESP_BUMP_STAT(espstack, bad_padding);
    973 					*counter = DROPPER(ipss,
    974 					    ipds_esp_bad_padding);
    975 					return (B_FALSE);
    976 				}
    977 				lastbyte--;
    978 			}
    979 		}
    980 	}
    981 
    982 	/* Trim off the padding. */
    983 	ASSERT(data_mp->b_cont == NULL);
    984 	data_mp->b_wptr -= (padlen + 2);
    985 
    986 	/*
    987 	 * Remove the ESP header.
    988 	 *
    989 	 * The above assertions about data_mp's size will make this work.
    990 	 *
    991 	 * XXX  Question:  If I send up and get back a contiguous mblk,
    992 	 * would it be quicker to bcopy over, or keep doing the dupb stuff?
    993 	 * I go with copying for now.
    994 	 */
    995 
    996 	if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
    997 	    IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
    998 		uint8_t *start = data_mp->b_rptr;
    999 		uint32_t *src, *dst;
   1000 
   1001 		src = (uint32_t *)(start + divpoint);
   1002 		dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
   1003 
   1004 		ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
   1005 		    IS_P2ALIGNED(src, sizeof (uint32_t)));
   1006 
   1007 		do {
   1008 			src--;
   1009 			dst--;
   1010 			*dst = *src;
   1011 		} while (src != (uint32_t *)start);
   1012 
   1013 		data_mp->b_rptr = (uchar_t *)dst;
   1014 	} else {
   1015 		uint8_t *start = data_mp->b_rptr;
   1016 		uint8_t *src, *dst;
   1017 
   1018 		src = start + divpoint;
   1019 		dst = src + sizeof (esph_t) + ivlen;
   1020 
   1021 		do {
   1022 			src--;
   1023 			dst--;
   1024 			*dst = *src;
   1025 		} while (src != start);
   1026 
   1027 		data_mp->b_rptr = dst;
   1028 	}
   1029 
   1030 	esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
   1031 	esp2dbg(espstack, (dump_msg(data_mp)));
   1032 
   1033 	return (B_TRUE);
   1034 }
   1035 
   1036 /*
   1037  * Updating use times can be tricky business if the ipsa_haspeer flag is
   1038  * set.  This function is called once in an SA's lifetime.
   1039  *
   1040  * Caller has to REFRELE "assoc" which is passed in.  This function has
   1041  * to REFRELE any peer SA that is obtained.
   1042  */
   1043 static void
   1044 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
   1045 {
   1046 	ipsa_t *inassoc, *outassoc;
   1047 	isaf_t *bucket;
   1048 	sadb_t *sp;
   1049 	int outhash;
   1050 	boolean_t isv6;
   1051 	netstack_t		*ns = assoc->ipsa_netstack;
   1052 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
   1053 
   1054 	/* No peer?  No problem! */
   1055 	if (!assoc->ipsa_haspeer) {
   1056 		sadb_set_usetime(assoc);
   1057 		return;
   1058 	}
   1059 
   1060 	/*
   1061 	 * Otherwise, we want to grab both the original assoc and its peer.
   1062 	 * There might be a race for this, but if it's a real race, the times
   1063 	 * will be out-of-synch by at most a second, and since our time
   1064 	 * granularity is a second, this won't be a problem.
   1065 	 *
   1066 	 * If we need tight synchronization on the peer SA, then we need to
   1067 	 * reconsider.
   1068 	 */
   1069 
   1070 	/* Use address length to select IPv6/IPv4 */
   1071 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
   1072 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
   1073 
   1074 	if (inbound) {
   1075 		inassoc = assoc;
   1076 		if (isv6) {
   1077 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
   1078 			    &inassoc->ipsa_dstaddr));
   1079 		} else {
   1080 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
   1081 			    &inassoc->ipsa_dstaddr));
   1082 		}
   1083 		bucket = &sp->sdb_of[outhash];
   1084 		mutex_enter(&bucket->isaf_lock);
   1085 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
   1086 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
   1087 		    inassoc->ipsa_addrfam);
   1088 		mutex_exit(&bucket->isaf_lock);
   1089 		if (outassoc == NULL) {
   1090 			/* Q: Do we wish to set haspeer == B_FALSE? */
   1091 			esp0dbg(("esp_set_usetime: "
   1092 			    "can't find peer for inbound.\n"));
   1093 			sadb_set_usetime(inassoc);
   1094 			return;
   1095 		}
   1096 	} else {
   1097 		outassoc = assoc;
   1098 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
   1099 		mutex_enter(&bucket->isaf_lock);
   1100 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
   1101 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
   1102 		    outassoc->ipsa_addrfam);
   1103 		mutex_exit(&bucket->isaf_lock);
   1104 		if (inassoc == NULL) {
   1105 			/* Q: Do we wish to set haspeer == B_FALSE? */
   1106 			esp0dbg(("esp_set_usetime: "
   1107 			    "can't find peer for outbound.\n"));
   1108 			sadb_set_usetime(outassoc);
   1109 			return;
   1110 		}
   1111 	}
   1112 
   1113 	/* Update usetime on both. */
   1114 	sadb_set_usetime(inassoc);
   1115 	sadb_set_usetime(outassoc);
   1116 
   1117 	/*
   1118 	 * REFRELE any peer SA.
   1119 	 *
   1120 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
   1121 	 * them in { }.
   1122 	 */
   1123 	if (inbound) {
   1124 		IPSA_REFRELE(outassoc);
   1125 	} else {
   1126 		IPSA_REFRELE(inassoc);
   1127 	}
   1128 }
   1129 
   1130 /*
   1131  * Handle ESP inbound data for IPv4 and IPv6.
   1132  * On success returns B_TRUE, on failure returns B_FALSE and frees the
   1133  * mblk chain data_mp.
   1134  */
   1135 mblk_t *
   1136 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
   1137 {
   1138 	esph_t *esph = (esph_t *)arg;
   1139 	ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
   1140 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1141 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1142 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1143 
   1144 	/*
   1145 	 * We may wish to check replay in-range-only here as an optimization.
   1146 	 * Include the reality check of ipsa->ipsa_replay >
   1147 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
   1148 	 * where N == ipsa->ipsa_replay_wsize.
   1149 	 *
   1150 	 * Another check that may come here later is the "collision" check.
   1151 	 * If legitimate packets flow quickly enough, this won't be a problem,
   1152 	 * but collisions may cause authentication algorithm crunching to
   1153 	 * take place when it doesn't need to.
   1154 	 */
   1155 	if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
   1156 		ESP_BUMP_STAT(espstack, replay_early_failures);
   1157 		IP_ESP_BUMP_STAT(ipss, in_discards);
   1158 		ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   1159 		    DROPPER(ipss, ipds_esp_early_replay),
   1160 		    &espstack->esp_dropper);
   1161 		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1162 		return (NULL);
   1163 	}
   1164 
   1165 	/*
   1166 	 * Adjust the IP header's payload length to reflect the removal
   1167 	 * of the ICV.
   1168 	 */
   1169 	if (!(ira->ira_flags & IRAF_IS_IPV4)) {
   1170 		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
   1171 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
   1172 		    ipsa->ipsa_mac_len);
   1173 	} else {
   1174 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
   1175 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
   1176 		    ipsa->ipsa_mac_len);
   1177 	}
   1178 
   1179 	/* submit the request to the crypto framework */
   1180 	return (esp_submit_req_inbound(data_mp, ira, ipsa,
   1181 	    (uint8_t *)esph - data_mp->b_rptr));
   1182 }
   1183 
   1184 /*
   1185  * Perform the really difficult work of inserting the proposed situation.
   1186  * Called while holding the algorithm lock.
   1187  */
   1188 static void
   1189 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs,
   1190     netstack_t *ns)
   1191 {
   1192 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
   1193 	ipsec_action_t *ap;
   1194 	ipsec_prot_t *prot;
   1195 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1196 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1197 
   1198 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
   1199 
   1200 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
   1201 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
   1202 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
   1203 
   1204 	prop->sadb_prop_replay = espstack->ipsecesp_replay_size;
   1205 
   1206 	/*
   1207 	 * Based upon algorithm properties, and what-not, prioritize a
   1208 	 * proposal, based on the ordering of the ESP algorithms in the
   1209 	 * alternatives in the policy rule or socket that was placed
   1210 	 * in the acquire record.
   1211 	 *
   1212 	 * For each action in policy list
   1213 	 *   Add combination.  If I've hit limit, return.
   1214 	 */
   1215 
   1216 	for (ap = acqrec->ipsacq_act; ap != NULL;
   1217 	    ap = ap->ipa_next) {
   1218 		ipsec_alginfo_t *ealg = NULL;
   1219 		ipsec_alginfo_t *aalg = NULL;
   1220 
   1221 		if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
   1222 			continue;
   1223 
   1224 		prot = &ap->ipa_act.ipa_apply;
   1225 
   1226 		if (!(prot->ipp_use_esp))
   1227 			continue;
   1228 
   1229 		if (prot->ipp_esp_auth_alg != 0) {
   1230 			aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
   1231 			    [prot->ipp_esp_auth_alg];
   1232 			if (aalg == NULL || !ALG_VALID(aalg))
   1233 				continue;
   1234 		}
   1235 
   1236 		ASSERT(prot->ipp_encr_alg > 0);
   1237 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
   1238 		    [prot->ipp_encr_alg];
   1239 		if (ealg == NULL || !ALG_VALID(ealg))
   1240 			continue;
   1241 
   1242 		comb->sadb_comb_flags = 0;
   1243 		comb->sadb_comb_reserved = 0;
   1244 		comb->sadb_comb_encrypt = ealg->alg_id;
   1245 		comb->sadb_comb_encrypt_minbits =
   1246 		    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
   1247 		comb->sadb_comb_encrypt_maxbits =
   1248 		    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
   1249 
   1250 		if (aalg == NULL) {
   1251 			comb->sadb_comb_auth = 0;
   1252 			comb->sadb_comb_auth_minbits = 0;
   1253 			comb->sadb_comb_auth_maxbits = 0;
   1254 		} else {
   1255 			comb->sadb_comb_auth = aalg->alg_id;
   1256 			comb->sadb_comb_auth_minbits =
   1257 			    MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits);
   1258 			comb->sadb_comb_auth_maxbits =
   1259 			    MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits);
   1260 		}
   1261 
   1262 		/*
   1263 		 * The following may be based on algorithm
   1264 		 * properties, but in the meantime, we just pick
   1265 		 * some good, sensible numbers.  Key mgmt. can
   1266 		 * (and perhaps should) be the place to finalize
   1267 		 * such decisions.
   1268 		 */
   1269 
   1270 		/*
   1271 		 * No limits on allocations, since we really don't
   1272 		 * support that concept currently.
   1273 		 */
   1274 		comb->sadb_comb_soft_allocations = 0;
   1275 		comb->sadb_comb_hard_allocations = 0;
   1276 
   1277 		/*
   1278 		 * These may want to come from policy rule..
   1279 		 */
   1280 		comb->sadb_comb_soft_bytes =
   1281 		    espstack->ipsecesp_default_soft_bytes;
   1282 		comb->sadb_comb_hard_bytes =
   1283 		    espstack->ipsecesp_default_hard_bytes;
   1284 		comb->sadb_comb_soft_addtime =
   1285 		    espstack->ipsecesp_default_soft_addtime;
   1286 		comb->sadb_comb_hard_addtime =
   1287 		    espstack->ipsecesp_default_hard_addtime;
   1288 		comb->sadb_comb_soft_usetime =
   1289 		    espstack->ipsecesp_default_soft_usetime;
   1290 		comb->sadb_comb_hard_usetime =
   1291 		    espstack->ipsecesp_default_hard_usetime;
   1292 
   1293 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
   1294 		if (--combs == 0)
   1295 			break;	/* out of space.. */
   1296 		comb++;
   1297 	}
   1298 }
   1299 
   1300 /*
   1301  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
   1302  */
   1303 static void
   1304 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
   1305 {
   1306 	uint_t combs;
   1307 	sadb_msg_t *samsg;
   1308 	sadb_prop_t *prop;
   1309 	mblk_t *pfkeymp, *msgmp;
   1310 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1311 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1312 
   1313 	ESP_BUMP_STAT(espstack, acquire_requests);
   1314 
   1315 	if (espstack->esp_pfkey_q == NULL) {
   1316 		mutex_exit(&acqrec->ipsacq_lock);
   1317 		return;
   1318 	}
   1319 
   1320 	/* Set up ACQUIRE. */
   1321 	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP,
   1322 	    ns->netstack_ipsec);
   1323 	if (pfkeymp == NULL) {
   1324 		esp0dbg(("sadb_setup_acquire failed.\n"));
   1325 		mutex_exit(&acqrec->ipsacq_lock);
   1326 		return;
   1327 	}
   1328 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
   1329 	combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
   1330 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
   1331 	msgmp = pfkeymp->b_cont;
   1332 	samsg = (sadb_msg_t *)(msgmp->b_rptr);
   1333 
   1334 	/* Insert proposal here. */
   1335 
   1336 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
   1337 	esp_insert_prop(prop, acqrec, combs, ns);
   1338 	samsg->sadb_msg_len += prop->sadb_prop_len;
   1339 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
   1340 
   1341 	mutex_exit(&ipss->ipsec_alg_lock);
   1342 
   1343 	/*
   1344 	 * Must mutex_exit() before sending PF_KEY message up, in
   1345 	 * order to avoid recursive mutex_enter() if there are no registered
   1346 	 * listeners.
   1347 	 *
   1348 	 * Once I've sent the message, I'm cool anyway.
   1349 	 */
   1350 	mutex_exit(&acqrec->ipsacq_lock);
   1351 	if (extended != NULL) {
   1352 		putnext(espstack->esp_pfkey_q, extended);
   1353 	}
   1354 	putnext(espstack->esp_pfkey_q, pfkeymp);
   1355 }
   1356 
   1357 /* XXX refactor me */
   1358 /*
   1359  * Handle the SADB_GETSPI message.  Create a larval SA.
   1360  */
   1361 static void
   1362 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
   1363 {
   1364 	ipsa_t *newbie, *target;
   1365 	isaf_t *outbound, *inbound;
   1366 	int rc, diagnostic;
   1367 	sadb_sa_t *assoc;
   1368 	keysock_out_t *kso;
   1369 	uint32_t newspi;
   1370 
   1371 	/*
   1372 	 * Randomly generate a proposed SPI value
   1373 	 */
   1374 	if (cl_inet_getspi != NULL) {
   1375 		cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
   1376 		    IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
   1377 	} else {
   1378 		(void) random_get_pseudo_bytes((uint8_t *)&newspi,
   1379 		    sizeof (uint32_t));
   1380 	}
   1381 	newbie = sadb_getspi(ksi, newspi, &diagnostic,
   1382 	    espstack->ipsecesp_netstack, IPPROTO_ESP);
   1383 
   1384 	if (newbie == NULL) {
   1385 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
   1386 		    ksi->ks_in_serial);
   1387 		return;
   1388 	} else if (newbie == (ipsa_t *)-1) {
   1389 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
   1390 		    ksi->ks_in_serial);
   1391 		return;
   1392 	}
   1393 
   1394 	/*
   1395 	 * XXX - We may randomly collide.  We really should recover from this.
   1396 	 *	 Unfortunately, that could require spending way-too-much-time
   1397 	 *	 in here.  For now, let the user retry.
   1398 	 */
   1399 
   1400 	if (newbie->ipsa_addrfam == AF_INET6) {
   1401 		outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
   1402 		    *(uint32_t *)(newbie->ipsa_dstaddr));
   1403 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
   1404 		    newbie->ipsa_spi);
   1405 	} else {
   1406 		ASSERT(newbie->ipsa_addrfam == AF_INET);
   1407 		outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
   1408 		    *(uint32_t *)(newbie->ipsa_dstaddr));
   1409 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
   1410 		    newbie->ipsa_spi);
   1411 	}
   1412 
   1413 	mutex_enter(&outbound->isaf_lock);
   1414 	mutex_enter(&inbound->isaf_lock);
   1415 
   1416 	/*
   1417 	 * Check for collisions (i.e. did sadb_getspi() return with something
   1418 	 * that already exists?).
   1419 	 *
   1420 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
   1421 	 * for inbound SAs, you never know what a user might do.
   1422 	 */
   1423 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
   1424 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
   1425 	if (target == NULL) {
   1426 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
   1427 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
   1428 		    newbie->ipsa_addrfam);
   1429 	}
   1430 
   1431 	/*
   1432 	 * I don't have collisions elsewhere!
   1433 	 * (Nor will I because I'm still holding inbound/outbound locks.)
   1434 	 */
   1435 
   1436 	if (target != NULL) {
   1437 		rc = EEXIST;
   1438 		IPSA_REFRELE(target);
   1439 	} else {
   1440 		/*
   1441 		 * sadb_insertassoc() also checks for collisions, so
   1442 		 * if there's a colliding entry, rc will be set
   1443 		 * to EEXIST.
   1444 		 */
   1445 		rc = sadb_insertassoc(newbie, inbound);
   1446 		newbie->ipsa_hardexpiretime = gethrestime_sec();
   1447 		newbie->ipsa_hardexpiretime +=
   1448 		    espstack->ipsecesp_larval_timeout;
   1449 	}
   1450 
   1451 	/*
   1452 	 * Can exit outbound mutex.  Hold inbound until we're done
   1453 	 * with newbie.
   1454 	 */
   1455 	mutex_exit(&outbound->isaf_lock);
   1456 
   1457 	if (rc != 0) {
   1458 		mutex_exit(&inbound->isaf_lock);
   1459 		IPSA_REFRELE(newbie);
   1460 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
   1461 		    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
   1462 		return;
   1463 	}
   1464 
   1465 
   1466 	/* Can write here because I'm still holding the bucket lock. */
   1467 	newbie->ipsa_type = SADB_SATYPE_ESP;
   1468 
   1469 	/*
   1470 	 * Construct successful return message. We have one thing going
   1471 	 * for us in PF_KEY v2.  That's the fact that
   1472 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
   1473 	 */
   1474 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
   1475 	assoc->sadb_sa_exttype = SADB_EXT_SA;
   1476 	assoc->sadb_sa_spi = newbie->ipsa_spi;
   1477 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
   1478 	mutex_exit(&inbound->isaf_lock);
   1479 
   1480 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
   1481 	kso = (keysock_out_t *)ksi;
   1482 	kso->ks_out_len = sizeof (*kso);
   1483 	kso->ks_out_serial = ksi->ks_in_serial;
   1484 	kso->ks_out_type = KEYSOCK_OUT;
   1485 
   1486 	/*
   1487 	 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
   1488 	 * from the esp_pfkey_q.
   1489 	 */
   1490 	putnext(espstack->esp_pfkey_q, mp);
   1491 }
   1492 
   1493 /*
   1494  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
   1495  * allocated mblk with the ESP header in between the two.
   1496  */
   1497 static boolean_t
   1498 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
   1499     ipsecesp_stack_t *espstack)
   1500 {
   1501 	mblk_t *split_mp = mp;
   1502 	uint_t wheretodiv = divpoint;
   1503 
   1504 	while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
   1505 		wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
   1506 		split_mp = split_mp->b_cont;
   1507 		ASSERT(split_mp != NULL);
   1508 	}
   1509 
   1510 	if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
   1511 		mblk_t *scratch;
   1512 
   1513 		/* "scratch" is the 2nd half, split_mp is the first. */
   1514 		scratch = dupb(split_mp);
   1515 		if (scratch == NULL) {
   1516 			esp1dbg(espstack,
   1517 			    ("esp_insert_esp: can't allocate scratch.\n"));
   1518 			return (B_FALSE);
   1519 		}
   1520 		/* NOTE:  dupb() doesn't set b_cont appropriately. */
   1521 		scratch->b_cont = split_mp->b_cont;
   1522 		scratch->b_rptr += wheretodiv;
   1523 		split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
   1524 		split_mp->b_cont = scratch;
   1525 	}
   1526 	/*
   1527 	 * At this point, split_mp is exactly "wheretodiv" bytes long, and
   1528 	 * holds the end of the pre-ESP part of the datagram.
   1529 	 */
   1530 	esp_mp->b_cont = split_mp->b_cont;
   1531 	split_mp->b_cont = esp_mp;
   1532 
   1533 	return (B_TRUE);
   1534 }
   1535 
   1536 /*
   1537  * Section 7 of RFC 3947 says:
   1538  *
   1539  * 7.  Recovering from the Expiring NAT Mappings
   1540  *
   1541  *    There are cases where NAT box decides to remove mappings that are still
   1542  *    alive (for example, when the keepalive interval is too long, or when the
   1543  *    NAT box is rebooted).  To recover from this, ends that are NOT behind
   1544  *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
   1545  *    the other end to determine which IP and port addresses should be used.
   1546  *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
   1547  *    DoS attack possibility because the IP address or port of the other host
   1548  *    will not change (it is not behind NAT).
   1549  *
   1550  *    Keepalives cannot be used for these purposes, as they are not
   1551  *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
   1552  *    used to detect whether the IP address or the port has changed.
   1553  *
   1554  * The following function will check an SA and its explicitly-set pair to see
   1555  * if the NAT-T remote port matches the received packet (which must have
   1556  * passed ESP authentication, see esp_in_done() for the caller context).  If
   1557  * there is a mismatch, the SAs are updated.  It is not important if we race
   1558  * with a transmitting thread, as if there is a transmitting thread, it will
   1559  * merely emit a packet that will most-likely be dropped.
   1560  *
   1561  * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
   1562  * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
   1563  */
   1564 #ifdef _LITTLE_ENDIAN
   1565 #define	FIRST_16(x) ((x) & 0xFFFF)
   1566 #define	NEXT_16(x) (((x) >> 16) & 0xFFFF)
   1567 #else
   1568 #define	FIRST_16(x) (((x) >> 16) & 0xFFFF)
   1569 #define	NEXT_16(x) ((x) & 0xFFFF)
   1570 #endif
   1571 static void
   1572 esp_port_freshness(uint32_t ports, ipsa_t *assoc)
   1573 {
   1574 	uint16_t remote = FIRST_16(ports);
   1575 	uint16_t local = NEXT_16(ports);
   1576 	ipsa_t *outbound_peer;
   1577 	isaf_t *bucket;
   1578 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
   1579 
   1580 	/* We found a conn_t, therefore local != 0. */
   1581 	ASSERT(local != 0);
   1582 	/* Assume an IPv4 SA. */
   1583 	ASSERT(assoc->ipsa_addrfam == AF_INET);
   1584 
   1585 	/*
   1586 	 * On-the-wire rport == 0 means something's very wrong.
   1587 	 * An unpaired SA is also useless to us.
   1588 	 * If we are behind the NAT, don't bother.
   1589 	 * A zero local NAT port defaults to 4500, so check that too.
   1590 	 * And, of course, if the ports already match, we don't need to
   1591 	 * bother.
   1592 	 */
   1593 	if (remote == 0 || assoc->ipsa_otherspi == 0 ||
   1594 	    (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
   1595 	    (assoc->ipsa_remote_nat_port == 0 &&
   1596 	    remote == htons(IPPORT_IKE_NATT)) ||
   1597 	    remote == assoc->ipsa_remote_nat_port)
   1598 		return;
   1599 
   1600 	/* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
   1601 	bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
   1602 	    assoc->ipsa_srcaddr[0]);
   1603 	mutex_enter(&bucket->isaf_lock);
   1604 	outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
   1605 	    assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
   1606 	mutex_exit(&bucket->isaf_lock);
   1607 
   1608 	/* We probably lost a race to a deleting or expiring thread. */
   1609 	if (outbound_peer == NULL)
   1610 		return;
   1611 
   1612 	/*
   1613 	 * Hold the mutexes for both SAs so we don't race another inbound
   1614 	 * thread.  A lock-entry order shouldn't matter, since all other
   1615 	 * per-ipsa locks are individually held-then-released.
   1616 	 *
   1617 	 * Luckily, this has nothing to do with the remote-NAT address,
   1618 	 * so we don't have to re-scribble the cached-checksum differential.
   1619 	 */
   1620 	mutex_enter(&outbound_peer->ipsa_lock);
   1621 	mutex_enter(&assoc->ipsa_lock);
   1622 	outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
   1623 	    remote;
   1624 	mutex_exit(&assoc->ipsa_lock);
   1625 	mutex_exit(&outbound_peer->ipsa_lock);
   1626 	IPSA_REFRELE(outbound_peer);
   1627 	ESP_BUMP_STAT(espstack, sa_port_renumbers);
   1628 }
   1629 /*
   1630  * Finish processing of an inbound ESP packet after processing by the
   1631  * crypto framework.
   1632  * - Remove the ESP header.
   1633  * - Send packet back to IP.
   1634  * If authentication was performed on the packet, this function is called
   1635  * only if the authentication succeeded.
   1636  * On success returns B_TRUE, on failure returns B_FALSE and frees the
   1637  * mblk chain data_mp.
   1638  */
   1639 static mblk_t *
   1640 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
   1641 {
   1642 	ipsa_t *assoc;
   1643 	uint_t espstart;
   1644 	uint32_t ivlen = 0;
   1645 	uint_t processed_len;
   1646 	esph_t *esph;
   1647 	kstat_named_t *counter;
   1648 	boolean_t is_natt;
   1649 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1650 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1651 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1652 
   1653 	assoc = ira->ira_ipsec_esp_sa;
   1654 	ASSERT(assoc != NULL);
   1655 
   1656 	is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
   1657 
   1658 	/* get the pointer to the ESP header */
   1659 	if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
   1660 		/* authentication-only ESP */
   1661 		espstart = ic->ic_crypto_data.cd_offset;
   1662 		processed_len = ic->ic_crypto_data.cd_length;
   1663 	} else {
   1664 		/* encryption present */
   1665 		ivlen = assoc->ipsa_iv_len;
   1666 		if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
   1667 			/* encryption-only ESP */
   1668 			espstart = ic->ic_crypto_data.cd_offset -
   1669 			    sizeof (esph_t) - assoc->ipsa_iv_len;
   1670 			processed_len = ic->ic_crypto_data.cd_length +
   1671 			    ivlen;
   1672 		} else {
   1673 			/* encryption with authentication */
   1674 			espstart = ic->ic_crypto_dual_data.dd_offset1;
   1675 			processed_len = ic->ic_crypto_dual_data.dd_len2 +
   1676 			    ivlen;
   1677 		}
   1678 	}
   1679 
   1680 	esph = (esph_t *)(data_mp->b_rptr + espstart);
   1681 
   1682 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
   1683 	    (assoc->ipsa_flags & IPSA_F_COMBINED)) {
   1684 		/*
   1685 		 * Authentication passed if we reach this point.
   1686 		 * Packets with authentication will have the ICV
   1687 		 * after the crypto data. Adjust b_wptr before
   1688 		 * making padlen checks.
   1689 		 */
   1690 		ESP_BUMP_STAT(espstack, good_auth);
   1691 		data_mp->b_wptr -= assoc->ipsa_mac_len;
   1692 
   1693 		/*
   1694 		 * Check replay window here!
   1695 		 * For right now, assume keysock will set the replay window
   1696 		 * size to zero for SAs that have an unspecified sender.
   1697 		 * This may change...
   1698 		 */
   1699 
   1700 		if (!sadb_replay_check(assoc, esph->esph_replay)) {
   1701 			/*
   1702 			 * Log the event. As of now we print out an event.
   1703 			 * Do not print the replay failure number, or else
   1704 			 * syslog cannot collate the error messages.  Printing
   1705 			 * the replay number that failed opens a denial-of-
   1706 			 * service attack.
   1707 			 */
   1708 			ipsec_assocfailure(info.mi_idnum, 0, 0,
   1709 			    SL_ERROR | SL_WARN,
   1710 			    "Replay failed for ESP spi 0x%x, dst %s.\n",
   1711 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
   1712 			    assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
   1713 			ESP_BUMP_STAT(espstack, replay_failures);
   1714 			counter = DROPPER(ipss, ipds_esp_replay);
   1715 			goto drop_and_bail;
   1716 		}
   1717 
   1718 		if (is_natt) {
   1719 			ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
   1720 			ASSERT(ira->ira_esp_udp_ports != 0);
   1721 			esp_port_freshness(ira->ira_esp_udp_ports, assoc);
   1722 		}
   1723 	}
   1724 
   1725 	esp_set_usetime(assoc, B_TRUE);
   1726 
   1727 	if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
   1728 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
   1729 		ipsec_assocfailure(info.mi_idnum, 0, 0,
   1730 		    SL_ERROR | SL_WARN,
   1731 		    "ESP association 0x%x, dst %s had bytes expire.\n",
   1732 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
   1733 		    espstack->ipsecesp_netstack);
   1734 		ESP_BUMP_STAT(espstack, bytes_expired);
   1735 		counter = DROPPER(ipss, ipds_esp_bytes_expire);
   1736 		goto drop_and_bail;
   1737 	}
   1738 
   1739 	/*
   1740 	 * Remove ESP header and padding from packet.  I hope the compiler
   1741 	 * spews "branch, predict taken" code for this.
   1742 	 */
   1743 
   1744 	if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
   1745 	    ivlen, &counter, espstack)) {
   1746 
   1747 		if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
   1748 			if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
   1749 				ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   1750 				    DROPPER(ipss, ipds_ah_nomem),
   1751 				    &espstack->esp_dropper);
   1752 				BUMP_MIB(ira->ira_ill->ill_ip_mib,
   1753 				    ipIfStatsInDiscards);
   1754 				return (NULL);
   1755 			}
   1756 		}
   1757 		if (is_natt)
   1758 			return (esp_fix_natt_checksums(data_mp, assoc));
   1759 
   1760 		if (assoc->ipsa_state == IPSA_STATE_IDLE) {
   1761 			/*
   1762 			 * Cluster buffering case.  Tell caller that we're
   1763 			 * handling the packet.
   1764 			 */
   1765 			sadb_buf_pkt(assoc, data_mp, ira);
   1766 			return (NULL);
   1767 		}
   1768 
   1769 		return (data_mp);
   1770 	}
   1771 
   1772 	esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
   1773 drop_and_bail:
   1774 	IP_ESP_BUMP_STAT(ipss, in_discards);
   1775 	ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
   1776 	    &espstack->esp_dropper);
   1777 	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1778 	return (NULL);
   1779 }
   1780 
   1781 /*
   1782  * Called upon failing the inbound ICV check. The message passed as
   1783  * argument is freed.
   1784  */
   1785 static void
   1786 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
   1787 {
   1788 	ipsa_t		*assoc = ira->ira_ipsec_esp_sa;
   1789 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1790 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1791 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1792 
   1793 	/*
   1794 	 * Log the event. Don't print to the console, block
   1795 	 * potential denial-of-service attack.
   1796 	 */
   1797 	ESP_BUMP_STAT(espstack, bad_auth);
   1798 
   1799 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
   1800 	    "ESP Authentication failed for spi 0x%x, dst %s.\n",
   1801 	    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
   1802 	    espstack->ipsecesp_netstack);
   1803 
   1804 	IP_ESP_BUMP_STAT(ipss, in_discards);
   1805 	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
   1806 	    DROPPER(ipss, ipds_esp_bad_auth),
   1807 	    &espstack->esp_dropper);
   1808 }
   1809 
   1810 
   1811 /*
   1812  * Invoked for outbound packets after ESP processing. If the packet
   1813  * also requires AH, performs the AH SA selection and AH processing.
   1814  * Returns B_TRUE if the AH processing was not needed or if it was
   1815  * performed successfully. Returns B_FALSE and consumes the passed mblk
   1816  * if AH processing was required but could not be performed.
   1817  *
   1818  * Returns data_mp unless data_mp was consumed/queued.
   1819  */
   1820 static mblk_t *
   1821 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   1822 {
   1823 	ipsec_action_t *ap;
   1824 
   1825 	ap = ixa->ixa_ipsec_action;
   1826 	if (ap == NULL) {
   1827 		ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
   1828 		ap = pp->ipsp_act;
   1829 	}
   1830 
   1831 	if (!ap->ipa_want_ah)
   1832 		return (data_mp);
   1833 
   1834 	/*
   1835 	 * Normally the AH SA would have already been put in place
   1836 	 * but it could have been flushed so we need to look for it.
   1837 	 */
   1838 	if (ixa->ixa_ipsec_ah_sa == NULL) {
   1839 		if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
   1840 			sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
   1841 			return (NULL);
   1842 		}
   1843 	}
   1844 	ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
   1845 
   1846 	data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
   1847 	return (data_mp);
   1848 }
   1849 
   1850 
   1851 /*
   1852  * Kernel crypto framework callback invoked after completion of async
   1853  * crypto requests for outbound packets.
   1854  */
   1855 static void
   1856 esp_kcf_callback_outbound(void *arg, int status)
   1857 {
   1858 	mblk_t		*mp = (mblk_t *)arg;
   1859 	mblk_t		*async_mp;
   1860 	netstack_t	*ns;
   1861 	ipsec_stack_t	*ipss;
   1862 	ipsecesp_stack_t *espstack;
   1863 	mblk_t		*data_mp;
   1864 	ip_xmit_attr_t	ixas;
   1865 	ipsec_crypto_t	*ic;
   1866 	ill_t		*ill;
   1867 
   1868 	/*
   1869 	 * First remove the ipsec_crypto_t mblk
   1870 	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
   1871 	 */
   1872 	async_mp = ipsec_remove_crypto_data(mp, &ic);
   1873 	ASSERT(async_mp != NULL);
   1874 
   1875 	/*
   1876 	 * Extract the ip_xmit_attr_t from the first mblk.
   1877 	 * Verifies that the netstack and ill is still around; could
   1878 	 * have vanished while kEf was doing its work.
   1879 	 * On succesful return we have a nce_t and the ill/ipst can't
   1880 	 * disappear until we do the nce_refrele in ixa_cleanup.
   1881 	 */
   1882 	data_mp = async_mp->b_cont;
   1883 	async_mp->b_cont = NULL;
   1884 	if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
   1885 		/* Disappeared on us - no ill/ipst for MIB */
   1886 		/* We have nowhere to do stats since ixa_ipst could be NULL */
   1887 		if (ixas.ixa_nce != NULL) {
   1888 			ill = ixas.ixa_nce->nce_ill;
   1889 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   1890 			ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
   1891 		}
   1892 		freemsg(data_mp);
   1893 		goto done;
   1894 	}
   1895 	ns = ixas.ixa_ipst->ips_netstack;
   1896 	espstack = ns->netstack_ipsecesp;
   1897 	ipss = ns->netstack_ipsec;
   1898 	ill = ixas.ixa_nce->nce_ill;
   1899 
   1900 	if (status == CRYPTO_SUCCESS) {
   1901 		/*
   1902 		 * If a ICV was computed, it was stored by the
   1903 		 * crypto framework at the end of the packet.
   1904 		 */
   1905 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
   1906 
   1907 		esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
   1908 		/* NAT-T packet. */
   1909 		if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
   1910 		    ipha->ipha_protocol == IPPROTO_UDP)
   1911 			esp_prepare_udp(ns, data_mp, ipha);
   1912 
   1913 		/* do AH processing if needed */
   1914 		data_mp = esp_do_outbound_ah(data_mp, &ixas);
   1915 		if (data_mp == NULL)
   1916 			goto done;
   1917 
   1918 		(void) ip_output_post_ipsec(data_mp, &ixas);
   1919 	} else {
   1920 		/* Outbound shouldn't see invalid MAC */
   1921 		ASSERT(status != CRYPTO_INVALID_MAC);
   1922 
   1923 		esp1dbg(espstack,
   1924 		    ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
   1925 		    status));
   1926 		ESP_BUMP_STAT(espstack, crypto_failures);
   1927 		ESP_BUMP_STAT(espstack, out_discards);
   1928 		ip_drop_packet(data_mp, B_FALSE, ill,
   1929 		    DROPPER(ipss, ipds_esp_crypto_failed),
   1930 		    &espstack->esp_dropper);
   1931 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   1932 	}
   1933 done:
   1934 	ixa_cleanup(&ixas);
   1935 	(void) ipsec_free_crypto_data(mp);
   1936 }
   1937 
   1938 /*
   1939  * Kernel crypto framework callback invoked after completion of async
   1940  * crypto requests for inbound packets.
   1941  */
   1942 static void
   1943 esp_kcf_callback_inbound(void *arg, int status)
   1944 {
   1945 	mblk_t		*mp = (mblk_t *)arg;
   1946 	mblk_t		*async_mp;
   1947 	netstack_t	*ns;
   1948 	ipsecesp_stack_t *espstack;
   1949 	ipsec_stack_t	*ipss;
   1950 	mblk_t		*data_mp;
   1951 	ip_recv_attr_t	iras;
   1952 	ipsec_crypto_t	*ic;
   1953 
   1954 	/*
   1955 	 * First remove the ipsec_crypto_t mblk
   1956 	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
   1957 	 */
   1958 	async_mp = ipsec_remove_crypto_data(mp, &ic);
   1959 	ASSERT(async_mp != NULL);
   1960 
   1961 	/*
   1962 	 * Extract the ip_recv_attr_t from the first mblk.
   1963 	 * Verifies that the netstack and ill is still around; could
   1964 	 * have vanished while kEf was doing its work.
   1965 	 */
   1966 	data_mp = async_mp->b_cont;
   1967 	async_mp->b_cont = NULL;
   1968 	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
   1969 		/* The ill or ip_stack_t disappeared on us */
   1970 		ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
   1971 		freemsg(data_mp);
   1972 		goto done;
   1973 	}
   1974 
   1975 	ns = iras.ira_ill->ill_ipst->ips_netstack;
   1976 	espstack = ns->netstack_ipsecesp;
   1977 	ipss = ns->netstack_ipsec;
   1978 
   1979 	if (status == CRYPTO_SUCCESS) {
   1980 		data_mp = esp_in_done(data_mp, &iras, ic);
   1981 		if (data_mp == NULL)
   1982 			goto done;
   1983 
   1984 		/* finish IPsec processing */
   1985 		ip_input_post_ipsec(data_mp, &iras);
   1986 	} else if (status == CRYPTO_INVALID_MAC) {
   1987 		esp_log_bad_auth(data_mp, &iras);
   1988 	} else {
   1989 		esp1dbg(espstack,
   1990 		    ("esp_kcf_callback: crypto failed with 0x%x\n",
   1991 		    status));
   1992 		ESP_BUMP_STAT(espstack, crypto_failures);
   1993 		IP_ESP_BUMP_STAT(ipss, in_discards);
   1994 		ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
   1995 		    DROPPER(ipss, ipds_esp_crypto_failed),
   1996 		    &espstack->esp_dropper);
   1997 		BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1998 	}
   1999 done:
   2000 	ira_cleanup(&iras, B_TRUE);
   2001 	(void) ipsec_free_crypto_data(mp);
   2002 }
   2003 
   2004 /*
   2005  * Invoked on crypto framework failure during inbound and outbound processing.
   2006  */
   2007 static void
   2008 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
   2009     ill_t *ill, ipsecesp_stack_t *espstack)
   2010 {
   2011 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   2012 
   2013 	esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
   2014 	    is_inbound ? "inbound" : "outbound", kef_rc));
   2015 	ip_drop_packet(data_mp, is_inbound, ill,
   2016 	    DROPPER(ipss, ipds_esp_crypto_failed),
   2017 	    &espstack->esp_dropper);
   2018 	ESP_BUMP_STAT(espstack, crypto_failures);
   2019 	if (is_inbound)
   2020 		IP_ESP_BUMP_STAT(ipss, in_discards);
   2021 	else
   2022 		ESP_BUMP_STAT(espstack, out_discards);
   2023 }
   2024 
   2025 /*
   2026  * A statement-equivalent macro, _cr MUST point to a modifiable
   2027  * crypto_call_req_t.
   2028  */
   2029 #define	ESP_INIT_CALLREQ(_cr, _mp, _callback)				\
   2030 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;	\
   2031 	(_cr)->cr_callback_arg = (_mp);				\
   2032 	(_cr)->cr_callback_func = (_callback)
   2033 
   2034 #define	ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
   2035 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
   2036 	(mac)->cd_offset = 0;						\
   2037 	(mac)->cd_length = icvlen;					\
   2038 	(mac)->cd_raw.iov_base = (char *)icvbuf;			\
   2039 	(mac)->cd_raw.iov_len = icvlen;					\
   2040 }
   2041 
   2042 #define	ESP_INIT_CRYPTO_DATA(data, mp, off, len) {			\
   2043 	if (MBLKL(mp) >= (len) + (off)) {				\
   2044 		(data)->cd_format = CRYPTO_DATA_RAW;			\
   2045 		(data)->cd_raw.iov_base = (char *)(mp)->b_rptr;		\
   2046 		(data)->cd_raw.iov_len = MBLKL(mp);			\
   2047 		(data)->cd_offset = off;				\
   2048 	} else {							\
   2049 		(data)->cd_format = CRYPTO_DATA_MBLK;			\
   2050 		(data)->cd_mp = mp;			       		\
   2051 		(data)->cd_offset = off;				\
   2052 	}								\
   2053 	(data)->cd_length = len;					\
   2054 }
   2055 
   2056 #define	ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {	\
   2057 	(data)->dd_format = CRYPTO_DATA_MBLK;				\
   2058 	(data)->dd_mp = mp;						\
   2059 	(data)->dd_len1 = len1;						\
   2060 	(data)->dd_offset1 = off1;					\
   2061 	(data)->dd_len2 = len2;						\
   2062 	(data)->dd_offset2 = off2;					\
   2063 }
   2064 
   2065 /*
   2066  * Returns data_mp if successfully completed the request. Returns
   2067  * NULL if it failed (and increments InDiscards) or if it is pending.
   2068  */
   2069 static mblk_t *
   2070 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
   2071     ipsa_t *assoc, uint_t esph_offset)
   2072 {
   2073 	uint_t auth_offset, msg_len, auth_len;
   2074 	crypto_call_req_t call_req, *callrp;
   2075 	mblk_t *mp;
   2076 	esph_t *esph_ptr;
   2077 	int kef_rc;
   2078 	uint_t icv_len = assoc->ipsa_mac_len;
   2079 	crypto_ctx_template_t auth_ctx_tmpl;
   2080 	boolean_t do_auth, do_encr, force;
   2081 	uint_t encr_offset, encr_len;
   2082 	uint_t iv_len = assoc->ipsa_iv_len;
   2083 	crypto_ctx_template_t encr_ctx_tmpl;
   2084 	ipsec_crypto_t	*ic, icstack;
   2085 	uchar_t *iv_ptr;
   2086 	netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
   2087 	ipsec_stack_t *ipss = ns->netstack_ipsec;
   2088 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2089 
   2090 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
   2091 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
   2092 	force = (assoc->ipsa_flags & IPSA_F_ASYNC);
   2093 
   2094 #ifdef IPSEC_LATENCY_TEST
   2095 	kef_rc = CRYPTO_SUCCESS;
   2096 #else
   2097 	kef_rc = CRYPTO_FAILED;
   2098 #endif
   2099 
   2100 	/*
   2101 	 * An inbound packet is of the form:
   2102 	 * [IP,options,ESP,IV,data,ICV,pad]
   2103 	 */
   2104 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
   2105 	iv_ptr = (uchar_t *)(esph_ptr + 1);
   2106 	/* Packet length starting at IP header ending after ESP ICV. */
   2107 	msg_len = MBLKL(esp_mp);
   2108 
   2109 	encr_offset = esph_offset + sizeof (esph_t) + iv_len;
   2110 	encr_len = msg_len - encr_offset;
   2111 
   2112 	/*
   2113 	 * Counter mode algs need a nonce. This is setup in sadb_common_add().
   2114 	 * If for some reason we are using a SA which does not have a nonce
   2115 	 * then we must fail here.
   2116 	 */
   2117 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
   2118 	    (assoc->ipsa_nonce == NULL)) {
   2119 		ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
   2120 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2121 		return (NULL);
   2122 	}
   2123 
   2124 	if (force) {
   2125 		/* We are doing asynch; allocate mblks to hold state */
   2126 		if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
   2127 		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
   2128 			BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2129 			ip_drop_input("ipIfStatsInDiscards", esp_mp,
   2130 			    ira->ira_ill);
   2131 			return (NULL);
   2132 		}
   2133 		linkb(mp, esp_mp);
   2134 		callrp = &call_req;
   2135 		ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
   2136 	} else {
   2137 		/*
   2138 		 * If we know we are going to do sync then ipsec_crypto_t
   2139 		 * should be on the stack.
   2140 		 */
   2141 		ic = &icstack;
   2142 		bzero(ic, sizeof (*ic));
   2143 		callrp = NULL;
   2144 	}
   2145 
   2146 	if (do_auth) {
   2147 		/* authentication context template */
   2148 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
   2149 		    auth_ctx_tmpl);
   2150 
   2151 		/* ICV to be verified */
   2152 		ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
   2153 		    icv_len, esp_mp->b_wptr - icv_len);
   2154 
   2155 		/* authentication starts at the ESP header */
   2156 		auth_offset = esph_offset;
   2157 		auth_len = msg_len - auth_offset - icv_len;
   2158 		if (!do_encr) {
   2159 			/* authentication only */
   2160 			/* initialize input data argument */
   2161 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2162 			    esp_mp, auth_offset, auth_len);
   2163 
   2164 			/* call the crypto framework */
   2165 			kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
   2166 			    &ic->ic_crypto_data,
   2167 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
   2168 			    &ic->ic_crypto_mac, callrp);
   2169 		}
   2170 	}
   2171 
   2172 	if (do_encr) {
   2173 		/* encryption template */
   2174 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
   2175 		    encr_ctx_tmpl);
   2176 
   2177 		/* Call the nonce update function. Also passes in IV */
   2178 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
   2179 		    iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
   2180 
   2181 		if (!do_auth) {
   2182 			/* decryption only */
   2183 			/* initialize input data argument */
   2184 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2185 			    esp_mp, encr_offset, encr_len);
   2186 
   2187 			/* call the crypto framework */
   2188 			kef_rc = crypto_decrypt((crypto_mechanism_t *)
   2189 			    &ic->ic_cmm, &ic->ic_crypto_data,
   2190 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
   2191 			    NULL, callrp);
   2192 		}
   2193 	}
   2194 
   2195 	if (do_auth && do_encr) {
   2196 		/* dual operation */
   2197 		/* initialize input data argument */
   2198 		ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
   2199 		    esp_mp, auth_offset, auth_len,
   2200 		    encr_offset, encr_len - icv_len);
   2201 
   2202 		/* specify IV */
   2203 		ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
   2204 
   2205 		/* call the framework */
   2206 		kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
   2207 		    &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
   2208 		    &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
   2209 		    auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
   2210 		    NULL, callrp);
   2211 	}
   2212 
   2213 	switch (kef_rc) {
   2214 	case CRYPTO_SUCCESS:
   2215 		ESP_BUMP_STAT(espstack, crypto_sync);
   2216 		esp_mp = esp_in_done(esp_mp, ira, ic);
   2217 		if (force) {
   2218 			/* Free mp after we are done with ic */
   2219 			mp = ipsec_free_crypto_data(mp);
   2220 			(void) ip_recv_attr_free_mblk(mp);
   2221 		}
   2222 		return (esp_mp);
   2223 	case CRYPTO_QUEUED:
   2224 		/* esp_kcf_callback_inbound() will be invoked on completion */
   2225 		ESP_BUMP_STAT(espstack, crypto_async);
   2226 		return (NULL);
   2227 	case CRYPTO_INVALID_MAC:
   2228 		if (force) {
   2229 			mp = ipsec_free_crypto_data(mp);
   2230 			esp_mp = ip_recv_attr_free_mblk(mp);
   2231 		}
   2232 		ESP_BUMP_STAT(espstack, crypto_sync);
   2233 		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2234 		esp_log_bad_auth(esp_mp, ira);
   2235 		/* esp_mp was passed to ip_drop_packet */
   2236 		return (NULL);
   2237 	}
   2238 
   2239 	if (force) {
   2240 		mp = ipsec_free_crypto_data(mp);
   2241 		esp_mp = ip_recv_attr_free_mblk(mp);
   2242 	}
   2243 	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2244 	esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
   2245 	/* esp_mp was passed to ip_drop_packet */
   2246 	return (NULL);
   2247 }
   2248 
   2249 /*
   2250  * Compute the IP and UDP checksums -- common code for both keepalives and
   2251  * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
   2252  * uses mblk-insertion to insert the UDP header.
   2253  * TODO - If there is an easy way to prep a packet for HW checksums, make
   2254  * it happen here.
   2255  * Note that this is used before both before calling ip_output_simple and
   2256  * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
   2257  * latter.
   2258  */
   2259 static void
   2260 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
   2261 {
   2262 	int offset;
   2263 	uint32_t cksum;
   2264 	uint16_t *arr;
   2265 	mblk_t *udpmp = mp;
   2266 	uint_t hlen = IPH_HDR_LENGTH(ipha);
   2267 
   2268 	ASSERT(MBLKL(mp) >= sizeof (ipha_t));
   2269 
   2270 	ipha->ipha_hdr_checksum = 0;
   2271 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
   2272 
   2273 	if (ns->netstack_udp->us_do_checksum) {
   2274 		ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
   2275 		/* arr points to the IP header. */
   2276 		arr = (uint16_t *)ipha;
   2277 		IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
   2278 		IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
   2279 		    ntohs(htons(ipha->ipha_length) - hlen));
   2280 		/* arr[6-9] are the IP addresses. */
   2281 		cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
   2282 		    ntohs(htons(ipha->ipha_length) - hlen);
   2283 		cksum = IP_CSUM(mp, hlen, cksum);
   2284 		offset = hlen + UDP_CHECKSUM_OFFSET;
   2285 		while (offset >= MBLKL(udpmp)) {
   2286 			offset -= MBLKL(udpmp);
   2287 			udpmp = udpmp->b_cont;
   2288 		}
   2289 		/* arr points to the UDP header's checksum field. */
   2290 		arr = (uint16_t *)(udpmp->b_rptr + offset);
   2291 		*arr = cksum;
   2292 	}
   2293 }
   2294 
   2295 /*
   2296  * taskq handler so we can send the NAT-T keepalive on a separate thread.
   2297  */
   2298 static void
   2299 actually_send_keepalive(void *arg)
   2300 {
   2301 	mblk_t *mp = (mblk_t *)arg;
   2302 	ip_xmit_attr_t ixas;
   2303 	netstack_t	*ns;
   2304 	netstackid_t	stackid;
   2305 
   2306 	stackid = (netstackid_t)(uintptr_t)mp->b_prev;
   2307 	mp->b_prev = NULL;
   2308 	ns = netstack_find_by_stackid(stackid);
   2309 	if (ns == NULL) {
   2310 		/* Disappeared */
   2311 		ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
   2312 		freemsg(mp);
   2313 		return;
   2314 	}
   2315 
   2316 	bzero(&ixas, sizeof (ixas));
   2317 	ixas.ixa_zoneid = ALL_ZONES;
   2318 	ixas.ixa_cred = kcred;
   2319 	ixas.ixa_cpid = NOPID;
   2320 	ixas.ixa_tsl = NULL;
   2321 	ixas.ixa_ipst = ns->netstack_ip;
   2322 	/* No ULP checksum; done by esp_prepare_udp */
   2323 	ixas.ixa_flags = IXAF_IS_IPV4 | IXAF_NO_IPSEC;
   2324 
   2325 	(void) ip_output_simple(mp, &ixas);
   2326 	ixa_cleanup(&ixas);
   2327 	netstack_rele(ns);
   2328 }
   2329 
   2330 /*
   2331  * Send a one-byte UDP NAT-T keepalive.
   2332  */
   2333 void
   2334 ipsecesp_send_keepalive(ipsa_t *assoc)
   2335 {
   2336 	mblk_t		*mp;
   2337 	ipha_t		*ipha;
   2338 	udpha_t		*udpha;
   2339 	netstack_t	*ns = assoc->ipsa_netstack;
   2340 
   2341 	ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
   2342 
   2343 	mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
   2344 	if (mp == NULL)
   2345 		return;
   2346 	ipha = (ipha_t *)mp->b_rptr;
   2347 	ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
   2348 	ipha->ipha_type_of_service = 0;
   2349 	ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
   2350 	/* Use the low-16 of the SPI so we have some clue where it came from. */
   2351 	ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
   2352 	ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
   2353 	ipha->ipha_ttl = 0xFF;
   2354 	ipha->ipha_protocol = IPPROTO_UDP;
   2355 	ipha->ipha_hdr_checksum = 0;
   2356 	ipha->ipha_src = assoc->ipsa_srcaddr[0];
   2357 	ipha->ipha_dst = assoc->ipsa_dstaddr[0];
   2358 	udpha = (udpha_t *)(ipha + 1);
   2359 	udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
   2360 	    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
   2361 	udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
   2362 	    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
   2363 	udpha->uha_length = htons(sizeof (udpha_t) + 1);
   2364 	udpha->uha_checksum = 0;
   2365 	mp->b_wptr = (uint8_t *)(udpha + 1);
   2366 	*(mp->b_wptr++) = 0xFF;
   2367 
   2368 	esp_prepare_udp(ns, mp, ipha);
   2369 
   2370 	/*
   2371 	 * We're holding an isaf_t bucket lock, so pawn off the actual
   2372 	 * packet transmission to another thread.  Just in case syncq
   2373 	 * processing causes a same-bucket packet to be processed.
   2374 	 */
   2375 	mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
   2376 
   2377 	if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
   2378 	    TQ_NOSLEEP) == 0) {
   2379 		/* Assume no memory if taskq_dispatch() fails. */
   2380 		mp->b_prev = NULL;
   2381 		ip_drop_packet(mp, B_FALSE, NULL,
   2382 		    DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
   2383 		    &ns->netstack_ipsecesp->esp_dropper);
   2384 	}
   2385 }
   2386 
   2387 /*
   2388  * Returns mp if successfully completed the request. Returns
   2389  * NULL if it failed (and increments InDiscards) or if it is pending.
   2390  */
   2391 static mblk_t *
   2392 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
   2393     uchar_t *icv_buf, uint_t payload_len)
   2394 {
   2395 	uint_t auth_len;
   2396 	crypto_call_req_t call_req, *callrp;
   2397 	mblk_t *esp_mp;
   2398 	esph_t *esph_ptr;
   2399 	mblk_t *mp;
   2400 	int kef_rc = CRYPTO_FAILED;
   2401 	uint_t icv_len = assoc->ipsa_mac_len;
   2402 	crypto_ctx_template_t auth_ctx_tmpl;
   2403 	boolean_t do_auth, do_encr, force;
   2404 	uint_t iv_len = assoc->ipsa_iv_len;
   2405 	crypto_ctx_template_t encr_ctx_tmpl;
   2406 	boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
   2407 	size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
   2408 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   2409 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2410 	ipsec_crypto_t	*ic, icstack;
   2411 	uchar_t		*iv_ptr;
   2412 	crypto_data_t	*cd_ptr = NULL;
   2413 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   2414 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2415 
   2416 	esp3dbg(espstack, ("esp_submit_req_outbound:%s",
   2417 	    is_natt ? "natt" : "not natt"));
   2418 
   2419 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
   2420 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
   2421 	force = (assoc->ipsa_flags & IPSA_F_ASYNC);
   2422 
   2423 #ifdef IPSEC_LATENCY_TEST
   2424 	kef_rc = CRYPTO_SUCCESS;
   2425 #else
   2426 	kef_rc = CRYPTO_FAILED;
   2427 #endif
   2428 
   2429 	/*
   2430 	 * Outbound IPsec packets are of the form:
   2431 	 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
   2432 	 * unless it's NATT, then it's
   2433 	 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
   2434 	 * Get a pointer to the mblk containing the ESP header.
   2435 	 */
   2436 	ASSERT(data_mp->b_cont != NULL);
   2437 	esp_mp = data_mp->b_cont;
   2438 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
   2439 	iv_ptr = (uchar_t *)(esph_ptr + 1);
   2440 
   2441 	/*
   2442 	 * Combined mode algs need a nonce. This is setup in sadb_common_add().
   2443 	 * If for some reason we are using a SA which does not have a nonce
   2444 	 * then we must fail here.
   2445 	 */
   2446 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
   2447 	    (assoc->ipsa_nonce == NULL)) {
   2448 		ip_drop_packet(data_mp, B_FALSE, NULL,
   2449 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2450 		return (NULL);
   2451 	}
   2452 
   2453 	if (force) {
   2454 		/* We are doing asynch; allocate mblks to hold state */
   2455 		if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
   2456 		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
   2457 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2458 			ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
   2459 			freemsg(data_mp);
   2460 			return (NULL);
   2461 		}
   2462 
   2463 		linkb(mp, data_mp);
   2464 		callrp = &call_req;
   2465 		ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
   2466 	} else {
   2467 		/*
   2468 		 * If we know we are going to do sync then ipsec_crypto_t
   2469 		 * should be on the stack.
   2470 		 */
   2471 		ic = &icstack;
   2472 		bzero(ic, sizeof (*ic));
   2473 		callrp = NULL;
   2474 	}
   2475 
   2476 
   2477 	if (do_auth) {
   2478 		/* authentication context template */
   2479 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
   2480 		    auth_ctx_tmpl);
   2481 
   2482 		/* where to store the computed mac */
   2483 		ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
   2484 		    icv_len, icv_buf);
   2485 
   2486 		/* authentication starts at the ESP header */
   2487 		auth_len = payload_len + iv_len + sizeof (esph_t);
   2488 		if (!do_encr) {
   2489 			/* authentication only */
   2490 			/* initialize input data argument */
   2491 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2492 			    esp_mp, esph_offset, auth_len);
   2493 
   2494 			/* call the crypto framework */
   2495 			kef_rc = crypto_mac(&assoc->ipsa_amech,
   2496 			    &ic->ic_crypto_data,
   2497 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
   2498 			    &ic->ic_crypto_mac, callrp);
   2499 		}
   2500 	}
   2501 
   2502 	if (do_encr) {
   2503 		/* encryption context template */
   2504 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
   2505 		    encr_ctx_tmpl);
   2506 		/* Call the nonce update function. */
   2507 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
   2508 		    iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
   2509 
   2510 		if (!do_auth) {
   2511 			/* encryption only, skip mblk that contains ESP hdr */
   2512 			/* initialize input data argument */
   2513 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2514 			    esp_mp->b_cont, 0, payload_len);
   2515 
   2516 			/*
   2517 			 * For combined mode ciphers, the ciphertext is the same
   2518 			 * size as the clear text, the ICV should follow the
   2519 			 * ciphertext. To convince the kcf to allow in-line
   2520 			 * encryption, with an ICV, use ipsec_out_crypto_mac
   2521 			 * to point to the same buffer as the data. The calling
   2522 			 * function need to ensure the buffer is large enough to
   2523 			 * include the ICV.
   2524 			 *
   2525 			 * The IV is already written to the packet buffer, the
   2526 			 * nonce setup function copied it to the params struct
   2527 			 * for the cipher to use.
   2528 			 */
   2529 			if (assoc->ipsa_flags & IPSA_F_COMBINED) {
   2530 				bcopy(&ic->ic_crypto_data,
   2531 				    &ic->ic_crypto_mac,
   2532 				    sizeof (crypto_data_t));
   2533 				ic->ic_crypto_mac.cd_length =
   2534 				    payload_len + icv_len;
   2535 				cd_ptr = &ic->ic_crypto_mac;
   2536 			}
   2537 
   2538 			/* call the crypto framework */
   2539 			kef_rc = crypto_encrypt((crypto_mechanism_t *)
   2540 			    &ic->ic_cmm, &ic->ic_crypto_data,
   2541 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
   2542 			    cd_ptr, callrp);
   2543 
   2544 		}
   2545 	}
   2546 
   2547 	if (do_auth && do_encr) {
   2548 		/*
   2549 		 * Encryption and authentication:
   2550 		 * Pass the pointer to the mblk chain starting at the ESP
   2551 		 * header to the framework. Skip the ESP header mblk
   2552 		 * for encryption, which is reflected by an encryption
   2553 		 * offset equal to the length of that mblk. Start
   2554 		 * the authentication at the ESP header, i.e. use an
   2555 		 * authentication offset of zero.
   2556 		 */
   2557 		ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
   2558 		    esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
   2559 
   2560 		/* specify IV */
   2561 		ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
   2562 
   2563 		/* call the framework */
   2564 		kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
   2565 		    &assoc->ipsa_amech, NULL,
   2566 		    &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
   2567 		    encr_ctx_tmpl, auth_ctx_tmpl,
   2568 		    &ic->ic_crypto_dual_data,
   2569 		    &ic->ic_crypto_mac, callrp);
   2570 	}
   2571 
   2572 	switch (kef_rc) {
   2573 	case CRYPTO_SUCCESS:
   2574 		ESP_BUMP_STAT(espstack, crypto_sync);
   2575 		esp_set_usetime(assoc, B_FALSE);
   2576 		if (force) {
   2577 			mp = ipsec_free_crypto_data(mp);
   2578 			data_mp = ip_xmit_attr_free_mblk(mp);
   2579 		}
   2580 		if (is_natt)
   2581 			esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
   2582 		return (data_mp);
   2583 	case CRYPTO_QUEUED:
   2584 		/* esp_kcf_callback_outbound() will be invoked on completion */
   2585 		ESP_BUMP_STAT(espstack, crypto_async);
   2586 		return (NULL);
   2587 	}
   2588 
   2589 	if (force) {
   2590 		mp = ipsec_free_crypto_data(mp);
   2591 		data_mp = ip_xmit_attr_free_mblk(mp);
   2592 	}
   2593 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2594 	esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
   2595 	/* data_mp was passed to ip_drop_packet */
   2596 	return (NULL);
   2597 }
   2598 
   2599 /*
   2600  * Handle outbound IPsec processing for IPv4 and IPv6
   2601  *
   2602  * Returns data_mp if successfully completed the request. Returns
   2603  * NULL if it failed (and increments InDiscards) or if it is pending.
   2604  */
   2605 static mblk_t *
   2606 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   2607 {
   2608 	mblk_t *espmp, *tailmp;
   2609 	ipha_t *ipha;
   2610 	ip6_t *ip6h;
   2611 	esph_t *esph_ptr, *iv_ptr;
   2612 	uint_t af;
   2613 	uint8_t *nhp;
   2614 	uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
   2615 	uintptr_t esplen = sizeof (esph_t);
   2616 	uint8_t protocol;
   2617 	ipsa_t *assoc;
   2618 	uint_t iv_len, block_size, mac_len = 0;
   2619 	uchar_t *icv_buf;
   2620 	udpha_t *udpha;
   2621 	boolean_t is_natt = B_FALSE;
   2622 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   2623 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2624 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2625 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   2626 	boolean_t	need_refrele = B_FALSE;
   2627 
   2628 	ESP_BUMP_STAT(espstack, out_requests);
   2629 
   2630 	/*
   2631 	 * <sigh> We have to copy the message here, because TCP (for example)
   2632 	 * keeps a dupb() of the message lying around for retransmission.
   2633 	 * Since ESP changes the whole of the datagram, we have to create our
   2634 	 * own copy lest we clobber TCP's data.  Since we have to copy anyway,
   2635 	 * we might as well make use of msgpullup() and get the mblk into one
   2636 	 * contiguous piece!
   2637 	 */
   2638 	tailmp = msgpullup(data_mp, -1);
   2639 	if (tailmp == NULL) {
   2640 		esp0dbg(("esp_outbound: msgpullup() failed, "
   2641 		    "dropping packet.\n"));
   2642 		ip_drop_packet(data_mp, B_FALSE, ill,
   2643 		    DROPPER(ipss, ipds_esp_nomem),
   2644 		    &espstack->esp_dropper);
   2645 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2646 		return (NULL);
   2647 	}
   2648 	freemsg(data_mp);
   2649 	data_mp = tailmp;
   2650 
   2651 	assoc = ixa->ixa_ipsec_esp_sa;
   2652 	ASSERT(assoc != NULL);
   2653 
   2654 	/*
   2655 	 * Get the outer IP header in shape to escape this system..
   2656 	 */
   2657 	if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
   2658 		/*
   2659 		 * Need to update packet with any CIPSO option and update
   2660 		 * ixa_tsl to capture the new label.
   2661 		 * We allocate a separate ixa for that purpose.
   2662 		 */
   2663 		ixa = ip_xmit_attr_duplicate(ixa);
   2664 		if (ixa == NULL) {
   2665 			ip_drop_packet(data_mp, B_FALSE, ill,
   2666 			    DROPPER(ipss, ipds_esp_nomem),
   2667 			    &espstack->esp_dropper);
   2668 			return (NULL);
   2669 		}
   2670 		need_refrele = B_TRUE;
   2671 
   2672 		label_hold(assoc->ipsa_otsl);
   2673 		ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
   2674 
   2675 		data_mp = sadb_whack_label(data_mp, assoc, ixa,
   2676 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2677 		if (data_mp == NULL) {
   2678 			/* Packet dropped by sadb_whack_label */
   2679 			ixa_refrele(ixa);
   2680 			return (NULL);
   2681 		}
   2682 	}
   2683 
   2684 	/*
   2685 	 * Reality check....
   2686 	 */
   2687 	ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
   2688 
   2689 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2690 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
   2691 
   2692 		af = AF_INET;
   2693 		divpoint = IPH_HDR_LENGTH(ipha);
   2694 		datalen = ntohs(ipha->ipha_length) - divpoint;
   2695 		nhp = (uint8_t *)&ipha->ipha_protocol;
   2696 	} else {
   2697 		ip_pkt_t ipp;
   2698 
   2699 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
   2700 
   2701 		af = AF_INET6;
   2702 		ip6h = (ip6_t *)ipha;
   2703 		bzero(&ipp, sizeof (ipp));
   2704 		divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
   2705 		if (ipp.ipp_dstopts != NULL &&
   2706 		    ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
   2707 			/*
   2708 			 * Destination options are tricky.  If we get in here,
   2709 			 * then we have a terminal header following the
   2710 			 * destination options.  We need to adjust backwards
   2711 			 * so we insert ESP BEFORE the destination options
   2712 			 * bag.  (So that the dstopts get encrypted!)
   2713 			 *
   2714 			 * Since this is for outbound packets only, we know
   2715 			 * that non-terminal destination options only precede
   2716 			 * routing headers.
   2717 			 */
   2718 			divpoint -= ipp.ipp_dstoptslen;
   2719 		}
   2720 		datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
   2721 
   2722 		if (ipp.ipp_rthdr != NULL) {
   2723 			nhp = &ipp.ipp_rthdr->ip6r_nxt;
   2724 		} else if (ipp.ipp_hopopts != NULL) {
   2725 			nhp = &ipp.ipp_hopopts->ip6h_nxt;
   2726 		} else {
   2727 			ASSERT(divpoint == sizeof (ip6_t));
   2728 			/* It's probably IP + ESP. */
   2729 			nhp = &ip6h->ip6_nxt;
   2730 		}
   2731 	}
   2732 
   2733 	mac_len = assoc->ipsa_mac_len;
   2734 
   2735 	if (assoc->ipsa_flags & IPSA_F_NATT) {
   2736 		/* wedge in UDP header */
   2737 		is_natt = B_TRUE;
   2738 		esplen += UDPH_SIZE;
   2739 	}
   2740 
   2741 	/*
   2742 	 * Set up ESP header and encryption padding for ENCR PI request.
   2743 	 */
   2744 
   2745 	/* Determine the padding length.  Pad to 4-bytes for no-encryption. */
   2746 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
   2747 		iv_len = assoc->ipsa_iv_len;
   2748 		block_size = assoc->ipsa_datalen;
   2749 
   2750 		/*
   2751 		 * Pad the data to the length of the cipher block size.
   2752 		 * Include the two additional bytes (hence the - 2) for the
   2753 		 * padding length and the next header.  Take this into account
   2754 		 * when calculating the actual length of the padding.
   2755 		 */
   2756 		ASSERT(ISP2(iv_len));
   2757 		padlen = ((unsigned)(block_size - datalen - 2)) &
   2758 		    (block_size - 1);
   2759 	} else {
   2760 		iv_len = 0;
   2761 		padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
   2762 		    (sizeof (uint32_t) - 1);
   2763 	}
   2764 
   2765 	/* Allocate ESP header and IV. */
   2766 	esplen += iv_len;
   2767 
   2768 	/*
   2769 	 * Update association byte-count lifetimes.  Don't forget to take
   2770 	 * into account the padding length and next-header (hence the + 2).
   2771 	 *
   2772 	 * Use the amount of data fed into the "encryption algorithm".  This
   2773 	 * is the IV, the data length, the padding length, and the final two
   2774 	 * bytes (padlen, and next-header).
   2775 	 *
   2776 	 */
   2777 
   2778 	if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
   2779 		ip_drop_packet(data_mp, B_FALSE, ill,
   2780 		    DROPPER(ipss, ipds_esp_bytes_expire),
   2781 		    &espstack->esp_dropper);
   2782 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2783 		if (need_refrele)
   2784 			ixa_refrele(ixa);
   2785 		return (NULL);
   2786 	}
   2787 
   2788 	espmp = allocb(esplen, BPRI_HI);
   2789 	if (espmp == NULL) {
   2790 		ESP_BUMP_STAT(espstack, out_discards);
   2791 		esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
   2792 		ip_drop_packet(data_mp, B_FALSE, ill,
   2793 		    DROPPER(ipss, ipds_esp_nomem),
   2794 		    &espstack->esp_dropper);
   2795 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2796 		if (need_refrele)
   2797 			ixa_refrele(ixa);
   2798 		return (NULL);
   2799 	}
   2800 	espmp->b_wptr += esplen;
   2801 	esph_ptr = (esph_t *)espmp->b_rptr;
   2802 
   2803 	if (is_natt) {
   2804 		esp3dbg(espstack, ("esp_outbound: NATT"));
   2805 
   2806 		udpha = (udpha_t *)espmp->b_rptr;
   2807 		udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
   2808 		    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
   2809 		udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
   2810 		    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
   2811 		/*
   2812 		 * Set the checksum to 0, so that the esp_prepare_udp() call
   2813 		 * can do the right thing.
   2814 		 */
   2815 		udpha->uha_checksum = 0;
   2816 		esph_ptr = (esph_t *)(udpha + 1);
   2817 	}
   2818 
   2819 	esph_ptr->esph_spi = assoc->ipsa_spi;
   2820 
   2821 	esph_ptr->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
   2822 	if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
   2823 		/*
   2824 		 * XXX We have replay counter wrapping.
   2825 		 * We probably want to nuke this SA (and its peer).
   2826 		 */
   2827 		ipsec_assocfailure(info.mi_idnum, 0, 0,
   2828 		    SL_ERROR | SL_CONSOLE | SL_WARN,
   2829 		    "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
   2830 		    esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
   2831 		    espstack->ipsecesp_netstack);
   2832 
   2833 		ESP_BUMP_STAT(espstack, out_discards);
   2834 		sadb_replay_delete(assoc);
   2835 		ip_drop_packet(data_mp, B_FALSE, ill,
   2836 		    DROPPER(ipss, ipds_esp_replay),
   2837 		    &espstack->esp_dropper);
   2838 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2839 		if (need_refrele)
   2840 			ixa_refrele(ixa);
   2841 		return (NULL);
   2842 	}
   2843 
   2844 	iv_ptr = (esph_ptr + 1);
   2845 	/*
   2846 	 * iv_ptr points to the mblk which will contain the IV once we have
   2847 	 * written it there. This mblk will be part of a mblk chain that
   2848 	 * will make up the packet.
   2849 	 *
   2850 	 * For counter mode algorithms, the IV is a 64 bit quantity, it
   2851 	 * must NEVER repeat in the lifetime of the SA, otherwise an
   2852 	 * attacker who had recorded enough packets might be able to
   2853 	 * determine some clear text.
   2854 	 *
   2855 	 * To ensure this does not happen, the IV is stored in the SA and
   2856 	 * incremented for each packet, the IV is then copied into the
   2857 	 * "packet" for transmission to the receiving system. The IV will
   2858 	 * also be copied into the nonce, when the packet is encrypted.
   2859 	 *
   2860 	 * CBC mode algorithms use a random IV for each packet. We do not
   2861 	 * require the highest quality random bits, but for best security
   2862 	 * with CBC mode ciphers, the value must be unlikely to repeat and
   2863 	 * must not be known in advance to an adversary capable of influencing
   2864 	 * the clear text.
   2865 	 */
   2866 	if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
   2867 	    espstack)) {
   2868 		ip_drop_packet(data_mp, B_FALSE, ill,
   2869 		    DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
   2870 		if (need_refrele)
   2871 			ixa_refrele(ixa);
   2872 		return (NULL);
   2873 	}
   2874 
   2875 	/* Fix the IP header. */
   2876 	alloclen = padlen + 2 + mac_len;
   2877 	adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
   2878 
   2879 	protocol = *nhp;
   2880 
   2881 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2882 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
   2883 		if (is_natt) {
   2884 			*nhp = IPPROTO_UDP;
   2885 			udpha->uha_length = htons(ntohs(ipha->ipha_length) -
   2886 			    IPH_HDR_LENGTH(ipha));
   2887 		} else {
   2888 			*nhp = IPPROTO_ESP;
   2889 		}
   2890 		ipha->ipha_hdr_checksum = 0;
   2891 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
   2892 	} else {
   2893 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
   2894 		*nhp = IPPROTO_ESP;
   2895 	}
   2896 
   2897 	/* I've got the two ESP mblks, now insert them. */
   2898 
   2899 	esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
   2900 	esp2dbg(espstack, (dump_msg(data_mp)));
   2901 
   2902 	if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
   2903 		ESP_BUMP_STAT(espstack, out_discards);
   2904 		/* NOTE:  esp_insert_esp() only fails if there's no memory. */
   2905 		ip_drop_packet(data_mp, B_FALSE, ill,
   2906 		    DROPPER(ipss, ipds_esp_nomem),
   2907 		    &espstack->esp_dropper);
   2908 		freeb(espmp);
   2909 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2910 		if (need_refrele)
   2911 			ixa_refrele(ixa);
   2912 		return (NULL);
   2913 	}
   2914 
   2915 	/* Append padding (and leave room for ICV). */
   2916 	for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
   2917 		;
   2918 	if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
   2919 		tailmp->b_cont = allocb(alloclen, BPRI_HI);
   2920 		if (tailmp->b_cont == NULL) {
   2921 			ESP_BUMP_STAT(espstack, out_discards);
   2922 			esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
   2923 			ip_drop_packet(data_mp, B_FALSE, ill,
   2924 			    DROPPER(ipss, ipds_esp_nomem),
   2925 			    &espstack->esp_dropper);
   2926 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2927 			if (need_refrele)
   2928 				ixa_refrele(ixa);
   2929 			return (NULL);
   2930 		}
   2931 		tailmp = tailmp->b_cont;
   2932 	}
   2933 
   2934 	/*
   2935 	 * If there's padding, N bytes of padding must be of the form 0x1,
   2936 	 * 0x2, 0x3... 0xN.
   2937 	 */
   2938 	for (i = 0; i < padlen; ) {
   2939 		i++;
   2940 		*tailmp->b_wptr++ = i;
   2941 	}
   2942 	*tailmp->b_wptr++ = i;
   2943 	*tailmp->b_wptr++ = protocol;
   2944 
   2945 	esp2dbg(espstack, ("data_Mp before encryption:\n"));
   2946 	esp2dbg(espstack, (dump_msg(data_mp)));
   2947 
   2948 	/*
   2949 	 * Okay.  I've set up the pre-encryption ESP.  Let's do it!
   2950 	 */
   2951 
   2952 	if (mac_len > 0) {
   2953 		ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
   2954 		icv_buf = tailmp->b_wptr;
   2955 		tailmp->b_wptr += mac_len;
   2956 	} else {
   2957 		icv_buf = NULL;
   2958 	}
   2959 
   2960 	data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
   2961 	    datalen + padlen + 2);
   2962 	if (need_refrele)
   2963 		ixa_refrele(ixa);
   2964 	return (data_mp);
   2965 }
   2966 
   2967 /*
   2968  * IP calls this to validate the ICMP errors that
   2969  * we got from the network.
   2970  */
   2971 mblk_t *
   2972 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
   2973 {
   2974 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   2975 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2976 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2977 
   2978 	/*
   2979 	 * Unless we get an entire packet back, this function is useless.
   2980 	 * Why?
   2981 	 *
   2982 	 * 1.)	Partial packets are useless, because the "next header"
   2983 	 *	is at the end of the decrypted ESP packet.  Without the
   2984 	 *	whole packet, this is useless.
   2985 	 *
   2986 	 * 2.)	If we every use a stateful cipher, such as a stream or a
   2987 	 *	one-time pad, we can't do anything.
   2988 	 *
   2989 	 * Since the chances of us getting an entire packet back are very
   2990 	 * very small, we discard here.
   2991 	 */
   2992 	IP_ESP_BUMP_STAT(ipss, in_discards);
   2993 	ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   2994 	    DROPPER(ipss, ipds_esp_icmp),
   2995 	    &espstack->esp_dropper);
   2996 	return (NULL);
   2997 }
   2998 
   2999 /*
   3000  * Construct an SADB_REGISTER message with the current algorithms.
   3001  * This function gets called when 'ipsecalgs -s' is run or when
   3002  * in.iked (or other KMD) starts.
   3003  */
   3004 static boolean_t
   3005 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
   3006     ipsecesp_stack_t *espstack, cred_t *cr)
   3007 {
   3008 	mblk_t *pfkey_msg_mp, *keysock_out_mp;
   3009 	sadb_msg_t *samsg;
   3010 	sadb_supported_t *sasupp_auth = NULL;
   3011 	sadb_supported_t *sasupp_encr = NULL;
   3012 	sadb_alg_t *saalg;
   3013 	uint_t allocsize = sizeof (*samsg);
   3014 	uint_t i, numalgs_snap;
   3015 	int current_aalgs;
   3016 	ipsec_alginfo_t **authalgs;
   3017 	uint_t num_aalgs;
   3018 	int current_ealgs;
   3019 	ipsec_alginfo_t **encralgs;
   3020 	uint_t num_ealgs;
   3021 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   3022 	sadb_sens_t *sens;
   3023 	size_t sens_len = 0;
   3024 	sadb_ext_t *nextext;
   3025 	ts_label_t *sens_tsl = NULL;
   3026 
   3027 	/* Allocate the KEYSOCK_OUT. */
   3028 	keysock_out_mp = sadb_keysock_out(serial);
   3029 	if (keysock_out_mp == NULL) {
   3030 		esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
   3031 		return (B_FALSE);
   3032 	}
   3033 
   3034 	if (is_system_labeled() && (cr != NULL)) {
   3035 		sens_tsl = crgetlabel(cr);
   3036 		if (sens_tsl != NULL) {
   3037 			sens_len = sadb_sens_len_from_label(sens_tsl);
   3038 			allocsize += sens_len;
   3039 		}
   3040 	}
   3041 
   3042 	/*
   3043 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
   3044 	 */
   3045 
   3046 	mutex_enter(&ipss->ipsec_alg_lock);
   3047 	/*
   3048 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
   3049 	 * down the lock while filling it.
   3050 	 *
   3051 	 * Return only valid algorithms, so the number of algorithms
   3052 	 * to send up may be less than the number of algorithm entries
   3053 	 * in the table.
   3054 	 */
   3055 	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
   3056 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
   3057 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
   3058 			num_aalgs++;
   3059 
   3060 	if (num_aalgs != 0) {
   3061 		allocsize += (num_aalgs * sizeof (*saalg));
   3062 		allocsize += sizeof (*sasupp_auth);
   3063 	}
   3064 	encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
   3065 	for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
   3066 		if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
   3067 			num_ealgs++;
   3068 
   3069 	if (num_ealgs != 0) {
   3070 		allocsize += (num_ealgs * sizeof (*saalg));
   3071 		allocsize += sizeof (*sasupp_encr);
   3072 	}
   3073 	keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
   3074 	if (keysock_out_mp->b_cont == NULL) {
   3075 		mutex_exit(&ipss->ipsec_alg_lock);
   3076 		freemsg(keysock_out_mp);
   3077 		return (B_FALSE);
   3078 	}
   3079 	pfkey_msg_mp = keysock_out_mp->b_cont;
   3080 	pfkey_msg_mp->b_wptr += allocsize;
   3081 
   3082 	nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
   3083 
   3084 	if (num_aalgs != 0) {
   3085 		sasupp_auth = (sadb_supported_t *)nextext;
   3086 		saalg = (sadb_alg_t *)(sasupp_auth + 1);
   3087 
   3088 		ASSERT(((ulong_t)saalg & 0x7) == 0);
   3089 
   3090 		numalgs_snap = 0;
   3091 		for (i = 0;
   3092 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
   3093 		    i++) {
   3094 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
   3095 				continue;
   3096 
   3097 			saalg->sadb_alg_id = authalgs[i]->alg_id;
   3098 			saalg->sadb_alg_ivlen = 0;
   3099 			saalg->sadb_alg_minbits	= authalgs[i]->alg_ef_minbits;
   3100 			saalg->sadb_alg_maxbits	= authalgs[i]->alg_ef_maxbits;
   3101 			saalg->sadb_x_alg_increment =
   3102 			    authalgs[i]->alg_increment;
   3103 			saalg->sadb_x_alg_saltbits = SADB_8TO1(
   3104 			    authalgs[i]->alg_saltlen);
   3105 			numalgs_snap++;
   3106 			saalg++;
   3107 		}
   3108 		ASSERT(numalgs_snap == num_aalgs);
   3109 #ifdef DEBUG
   3110 		/*
   3111 		 * Reality check to make sure I snagged all of the
   3112 		 * algorithms.
   3113 		 */
   3114 		for (; i < IPSEC_MAX_ALGS; i++) {
   3115 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
   3116 				cmn_err(CE_PANIC, "esp_register_out()! "
   3117 				    "Missed aalg #%d.\n", i);
   3118 			}
   3119 		}
   3120 #endif /* DEBUG */
   3121 		nextext = (sadb_ext_t *)saalg;
   3122 	}
   3123 
   3124 	if (num_ealgs != 0) {
   3125 		sasupp_encr = (sadb_supported_t *)nextext;
   3126 		saalg = (sadb_alg_t *)(sasupp_encr + 1);
   3127 
   3128 		numalgs_snap = 0;
   3129 		for (i = 0;
   3130 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
   3131 			if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
   3132 				continue;
   3133 			saalg->sadb_alg_id = encralgs[i]->alg_id;
   3134 			saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
   3135 			saalg->sadb_alg_minbits	= encralgs[i]->alg_ef_minbits;
   3136 			saalg->sadb_alg_maxbits	= encralgs[i]->alg_ef_maxbits;
   3137 			/*
   3138 			 * We could advertise the ICV length, except there
   3139 			 * is not a value in sadb_x_algb to do this.
   3140 			 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
   3141 			 */
   3142 			saalg->sadb_x_alg_increment =
   3143 			    encralgs[i]->alg_increment;
   3144 			saalg->sadb_x_alg_saltbits =
   3145 			    SADB_8TO1(encralgs[i]->alg_saltlen);
   3146 
   3147 			numalgs_snap++;
   3148 			saalg++;
   3149 		}
   3150 		ASSERT(numalgs_snap == num_ealgs);
   3151 #ifdef DEBUG
   3152 		/*
   3153 		 * Reality check to make sure I snagged all of the
   3154 		 * algorithms.
   3155 		 */
   3156 		for (; i < IPSEC_MAX_ALGS; i++) {
   3157 			if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
   3158 				cmn_err(CE_PANIC, "esp_register_out()! "
   3159 				    "Missed ealg #%d.\n", i);
   3160 			}
   3161 		}
   3162 #endif /* DEBUG */
   3163 		nextext = (sadb_ext_t *)saalg;
   3164 	}
   3165 
   3166 	current_aalgs = num_aalgs;
   3167 	current_ealgs = num_ealgs;
   3168 
   3169 	mutex_exit(&ipss->ipsec_alg_lock);
   3170 
   3171 	if (sens_tsl != NULL) {
   3172 		sens = (sadb_sens_t *)nextext;
   3173 		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
   3174 		    sens_tsl, sens_len);
   3175 
   3176 		nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
   3177 	}
   3178 
   3179 	/* Now fill the rest of the SADB_REGISTER message. */
   3180 
   3181 	samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
   3182 	samsg->sadb_msg_version = PF_KEY_V2;
   3183 	samsg->sadb_msg_type = SADB_REGISTER;
   3184 	samsg->sadb_msg_errno = 0;
   3185 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
   3186 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
   3187 	samsg->sadb_msg_reserved = 0;
   3188 	/*
   3189 	 * Assume caller has sufficient sequence/pid number info.  If it's one
   3190 	 * from me over a new alg., I could give two hoots about sequence.
   3191 	 */
   3192 	samsg->sadb_msg_seq = sequence;
   3193 	samsg->sadb_msg_pid = pid;
   3194 
   3195 	if (sasupp_auth != NULL) {
   3196 		sasupp_auth->sadb_supported_len = SADB_8TO64(
   3197 		    sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
   3198 		sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
   3199 		sasupp_auth->sadb_supported_reserved = 0;
   3200 	}
   3201 
   3202 	if (sasupp_encr != NULL) {
   3203 		sasupp_encr->sadb_supported_len = SADB_8TO64(
   3204 		    sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
   3205 		sasupp_encr->sadb_supported_exttype =
   3206 		    SADB_EXT_SUPPORTED_ENCRYPT;
   3207 		sasupp_encr->sadb_supported_reserved = 0;
   3208 	}
   3209 
   3210 	if (espstack->esp_pfkey_q != NULL)
   3211 		putnext(espstack->esp_pfkey_q, keysock_out_mp);
   3212 	else {
   3213 		freemsg(keysock_out_mp);
   3214 		return (B_FALSE);
   3215 	}
   3216 
   3217 	return (B_TRUE);
   3218 }
   3219 
   3220 /*
   3221  * Invoked when the algorithm table changes. Causes SADB_REGISTER
   3222  * messages continaining the current list of algorithms to be
   3223  * sent up to the ESP listeners.
   3224  */
   3225 void
   3226 ipsecesp_algs_changed(netstack_t *ns)
   3227 {
   3228 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
   3229 
   3230 	/*
   3231 	 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
   3232 	 * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
   3233 	 */
   3234 	(void) esp_register_out(0, 0, 0, espstack, NULL);
   3235 }
   3236 
   3237 /*
   3238  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
   3239  * and send() it into ESP and IP again.
   3240  */
   3241 static void
   3242 inbound_task(void *arg)
   3243 {
   3244 	mblk_t		*mp = (mblk_t *)arg;
   3245 	mblk_t		*async_mp;
   3246 	ip_recv_attr_t	iras;
   3247 
   3248 	async_mp = mp;
   3249 	mp = async_mp->b_cont;
   3250 	async_mp->b_cont = NULL;
   3251 	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
   3252 		/* The ill or ip_stack_t disappeared on us */
   3253 		ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
   3254 		freemsg(mp);
   3255 		goto done;
   3256 	}
   3257 
   3258 	esp_inbound_restart(mp, &iras);
   3259 done:
   3260 	ira_cleanup(&iras, B_TRUE);
   3261 }
   3262 
   3263 /*
   3264  * Restart ESP after the SA has been added.
   3265  */
   3266 static void
   3267 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
   3268 {
   3269 	esph_t		*esph;
   3270 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   3271 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3272 
   3273 	esp2dbg(espstack, ("in ESP inbound_task"));
   3274 	ASSERT(espstack != NULL);
   3275 
   3276 	mp = ipsec_inbound_esp_sa(mp, ira, &esph);
   3277 	if (mp == NULL)
   3278 		return;
   3279 
   3280 	ASSERT(esph != NULL);
   3281 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
   3282 	ASSERT(ira->ira_ipsec_esp_sa != NULL);
   3283 
   3284 	mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
   3285 	if (mp == NULL) {
   3286 		/*
   3287 		 * Either it failed or is pending. In the former case
   3288 		 * ipIfStatsInDiscards was increased.
   3289 		 */
   3290 		return;
   3291 	}
   3292 
   3293 	ip_input_post_ipsec(mp, ira);
   3294 }
   3295 
   3296 /*
   3297  * Now that weak-key passed, actually ADD the security association, and
   3298  * send back a reply ADD message.
   3299  */
   3300 static int
   3301 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
   3302     int *diagnostic, ipsecesp_stack_t *espstack)
   3303 {
   3304 	isaf_t *primary = NULL, *secondary;
   3305 	boolean_t clone = B_FALSE, is_inbound = B_FALSE;
   3306 	ipsa_t *larval = NULL;
   3307 	ipsacq_t *acqrec;
   3308 	iacqf_t *acq_bucket;
   3309 	mblk_t *acq_msgs = NULL;
   3310 	int rc;
   3311 	mblk_t *lpkt;
   3312 	int error;
   3313 	ipsa_query_t sq;
   3314 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   3315 
   3316 	/*
   3317 	 * Locate the appropriate table(s).
   3318 	 */
   3319 	sq.spp = &espstack->esp_sadb;	/* XXX */
   3320 	error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
   3321 	    IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
   3322 	    &sq, diagnostic);
   3323 	if (error)
   3324 		return (error);
   3325 
   3326 	/*
   3327 	 * Use the direction flags provided by the KMD to determine
   3328 	 * if the inbound or outbound table should be the primary
   3329 	 * for this SA. If these flags were absent then make this
   3330 	 * decision based on the addresses.
   3331 	 */
   3332 	if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
   3333 		primary = sq.inbound;
   3334 		secondary = sq.outbound;
   3335 		is_inbound = B_TRUE;
   3336 		if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
   3337 			clone = B_TRUE;
   3338 	} else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
   3339 		primary = sq.outbound;
   3340 		secondary = sq.inbound;
   3341 	}
   3342 
   3343 	if (primary == NULL) {
   3344 		/*
   3345 		 * The KMD did not set a direction flag, determine which
   3346 		 * table to insert the SA into based on addresses.
   3347 		 */
   3348 		switch (ksi->ks_in_dsttype) {
   3349 		case KS_IN_ADDR_MBCAST:
   3350 			clone = B_TRUE;	/* All mcast SAs can be bidirectional */
   3351 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
   3352 			/* FALLTHRU */
   3353 		/*
   3354 		 * If the source address is either one of mine, or unspecified
   3355 		 * (which is best summed up by saying "not 'not mine'"),
   3356 		 * then the association is potentially bi-directional,
   3357 		 * in that it can be used for inbound traffic and outbound
   3358 		 * traffic.  The best example of such an SA is a multicast
   3359 		 * SA (which allows me to receive the outbound traffic).
   3360 		 */
   3361 		case KS_IN_ADDR_ME:
   3362 			sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
   3363 			primary = sq.inbound;
   3364 			secondary = sq.outbound;
   3365 			if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
   3366 				clone = B_TRUE;
   3367 			is_inbound = B_TRUE;
   3368 			break;
   3369 		/*
   3370 		 * If the source address literally not mine (either
   3371 		 * unspecified or not mine), then this SA may have an
   3372 		 * address that WILL be mine after some configuration.
   3373 		 * We pay the price for this by making it a bi-directional
   3374 		 * SA.
   3375 		 */
   3376 		case KS_IN_ADDR_NOTME:
   3377 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
   3378 			primary = sq.outbound;
   3379 			secondary = sq.inbound;
   3380 			if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
   3381 				sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
   3382 				clone = B_TRUE;
   3383 			}
   3384 			break;
   3385 		default:
   3386 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
   3387 			return (EINVAL);
   3388 		}
   3389 	}
   3390 
   3391 	/*
   3392 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
   3393 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
   3394 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
   3395 	 * high-bit of the sequence number to queue it.  Key off destination
   3396 	 * addr, and change acqrec's state.
   3397 	 */
   3398 
   3399 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
   3400 		acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
   3401 		mutex_enter(&acq_bucket->iacqf_lock);
   3402 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
   3403 		    acqrec = acqrec->ipsacq_next) {
   3404 			mutex_enter(&acqrec->ipsacq_lock);
   3405 			/*
   3406 			 * Q:  I only check sequence.  Should I check dst?
   3407 			 * A: Yes, check dest because those are the packets
   3408 			 *    that are queued up.
   3409 			 */
   3410 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
   3411 			    IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
   3412 			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
   3413 				break;
   3414 			mutex_exit(&acqrec->ipsacq_lock);
   3415 		}
   3416 		if (acqrec != NULL) {
   3417 			/*
   3418 			 * AHA!  I found an ACQUIRE record for this SA.
   3419 			 * Grab the msg list, and free the acquire record.
   3420 			 * I already am holding the lock for this record,
   3421 			 * so all I have to do is free it.
   3422 			 */
   3423 			acq_msgs = acqrec->ipsacq_mp;
   3424 			acqrec->ipsacq_mp = NULL;
   3425 			mutex_exit(&acqrec->ipsacq_lock);
   3426 			sadb_destroy_acquire(acqrec,
   3427 			    espstack->ipsecesp_netstack);
   3428 		}
   3429 		mutex_exit(&acq_bucket->iacqf_lock);
   3430 	}
   3431 
   3432 	/*
   3433 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
   3434 	 * in larval list (if there).
   3435 	 */
   3436 	if (samsg->sadb_msg_type == SADB_UPDATE) {
   3437 		mutex_enter(&sq.inbound->isaf_lock);
   3438 		larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
   3439 		    ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
   3440 		mutex_exit(&sq.inbound->isaf_lock);
   3441 
   3442 		if ((larval == NULL) ||
   3443 		    (larval->ipsa_state != IPSA_STATE_LARVAL)) {
   3444 			*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
   3445 			if (larval != NULL) {
   3446 				IPSA_REFRELE(larval);
   3447 			}
   3448 			esp0dbg(("Larval update, but larval disappeared.\n"));
   3449 			return (ESRCH);
   3450 		} /* Else sadb_common_add unlinks it for me! */
   3451 	}
   3452 
   3453 	if (larval != NULL) {
   3454 		/*
   3455 		 * Hold again, because sadb_common_add() consumes a reference,
   3456 		 * and we don't want to clear_lpkt() without a reference.
   3457 		 */
   3458 		IPSA_REFHOLD(larval);
   3459 	}
   3460 
   3461 	rc = sadb_common_add(espstack->esp_pfkey_q,
   3462 	    mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
   3463 	    diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
   3464 
   3465 	if (larval != NULL) {
   3466 		if (rc == 0) {
   3467 			lpkt = sadb_clear_lpkt(larval);
   3468 			if (lpkt != NULL) {
   3469 				rc = !taskq_dispatch(esp_taskq, inbound_task,
   3470 				    lpkt, TQ_NOSLEEP);
   3471 			}
   3472 		}
   3473 		IPSA_REFRELE(larval);
   3474 	}
   3475 
   3476 	/*
   3477 	 * How much more stack will I create with all of these
   3478 	 * esp_outbound() calls?
   3479 	 */
   3480 
   3481 	/* Handle the packets queued waiting for the SA */
   3482 	while (acq_msgs != NULL) {
   3483 		mblk_t		*asyncmp;
   3484 		mblk_t		*data_mp;
   3485 		ip_xmit_attr_t	ixas;
   3486 		ill_t		*ill;
   3487 
   3488 		asyncmp = acq_msgs;
   3489 		acq_msgs = acq_msgs->b_next;
   3490 		asyncmp->b_next = NULL;
   3491 
   3492 		/*
   3493 		 * Extract the ip_xmit_attr_t from the first mblk.
   3494 		 * Verifies that the netstack and ill is still around; could
   3495 		 * have vanished while iked was doing its work.
   3496 		 * On succesful return we have a nce_t and the ill/ipst can't
   3497 		 * disappear until we do the nce_refrele in ixa_cleanup.
   3498 		 */
   3499 		data_mp = asyncmp->b_cont;
   3500 		asyncmp->b_cont = NULL;
   3501 		if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
   3502 			ESP_BUMP_STAT(espstack, out_discards);
   3503 			ip_drop_packet(data_mp, B_FALSE, NULL,
   3504 			    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3505 			    &espstack->esp_dropper);
   3506 		} else if (rc != 0) {
   3507 			ill = ixas.ixa_nce->nce_ill;
   3508 			ESP_BUMP_STAT(espstack, out_discards);
   3509 			ip_drop_packet(data_mp, B_FALSE, ill,
   3510 			    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3511 			    &espstack->esp_dropper);
   3512 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   3513 		} else {
   3514 			esp_outbound_finish(data_mp, &ixas);
   3515 		}
   3516 		ixa_cleanup(&ixas);
   3517 	}
   3518 
   3519 	return (rc);
   3520 }
   3521 
   3522 /*
   3523  * Process one of the queued messages (from ipsacq_mp) once the SA
   3524  * has been added.
   3525  */
   3526 static void
   3527 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   3528 {
   3529 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   3530 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3531 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   3532 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   3533 
   3534 	if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
   3535 		ESP_BUMP_STAT(espstack, out_discards);
   3536 		ip_drop_packet(data_mp, B_FALSE, ill,
   3537 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3538 		    &espstack->esp_dropper);
   3539 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   3540 		return;
   3541 	}
   3542 
   3543 	data_mp = esp_outbound(data_mp, ixa);
   3544 	if (data_mp == NULL)
   3545 		return;
   3546 
   3547 	/* do AH processing if needed */
   3548 	data_mp = esp_do_outbound_ah(data_mp, ixa);
   3549 	if (data_mp == NULL)
   3550 		return;
   3551 
   3552 	(void) ip_output_post_ipsec(data_mp, ixa);
   3553 }
   3554 
   3555 /*
   3556  * Add new ESP security association.  This may become a generic AH/ESP
   3557  * routine eventually.
   3558  */
   3559 static int
   3560 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
   3561 {
   3562 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3563 	sadb_address_t *srcext =
   3564 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
   3565 	sadb_address_t *dstext =
   3566 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3567 	sadb_address_t *isrcext =
   3568 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
   3569 	sadb_address_t *idstext =
   3570 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
   3571 	sadb_address_t *nttext_loc =
   3572 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
   3573 	sadb_address_t *nttext_rem =
   3574 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
   3575 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
   3576 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
   3577 	struct sockaddr_in *src, *dst;
   3578 	struct sockaddr_in *natt_loc, *natt_rem;
   3579 	struct sockaddr_in6 *natt_loc6, *natt_rem6;
   3580 	sadb_lifetime_t *soft =
   3581 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
   3582 	sadb_lifetime_t *hard =
   3583 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
   3584 	sadb_lifetime_t *idle =
   3585 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
   3586 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3587 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   3588 
   3589 
   3590 
   3591 	/* I need certain extensions present for an ADD message. */
   3592 	if (srcext == NULL) {
   3593 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
   3594 		return (EINVAL);
   3595 	}
   3596 	if (dstext == NULL) {
   3597 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
   3598 		return (EINVAL);
   3599 	}
   3600 	if (isrcext == NULL && idstext != NULL) {
   3601 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
   3602 		return (EINVAL);
   3603 	}
   3604 	if (isrcext != NULL && idstext == NULL) {
   3605 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
   3606 		return (EINVAL);
   3607 	}
   3608 	if (assoc == NULL) {
   3609 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
   3610 		return (EINVAL);
   3611 	}
   3612 	if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
   3613 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
   3614 		return (EINVAL);
   3615 	}
   3616 
   3617 	src = (struct sockaddr_in *)(srcext + 1);
   3618 	dst = (struct sockaddr_in *)(dstext + 1);
   3619 	natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
   3620 	natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
   3621 	natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
   3622 	natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
   3623 
   3624 	/* Sundry ADD-specific reality checks. */
   3625 	/* XXX STATS :  Logging/stats here? */
   3626 
   3627 	if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
   3628 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
   3629 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
   3630 		return (EINVAL);
   3631 	}
   3632 	if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
   3633 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
   3634 		return (EINVAL);
   3635 	}
   3636 
   3637 #ifndef IPSEC_LATENCY_TEST
   3638 	if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
   3639 	    assoc->sadb_sa_auth == SADB_AALG_NONE) {
   3640 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
   3641 		return (EINVAL);
   3642 	}
   3643 #endif
   3644 
   3645 	if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
   3646 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
   3647 		return (EINVAL);
   3648 	}
   3649 
   3650 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
   3651 		return (EINVAL);
   3652 	}
   3653 	ASSERT(src->sin_family == dst->sin_family);
   3654 
   3655 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
   3656 		if (nttext_loc == NULL) {
   3657 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
   3658 			return (EINVAL);
   3659 		}
   3660 
   3661 		if (natt_loc->sin_family == AF_INET6 &&
   3662 		    !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
   3663 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
   3664 			return (EINVAL);
   3665 		}
   3666 	}
   3667 
   3668 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
   3669 		if (nttext_rem == NULL) {
   3670 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
   3671 			return (EINVAL);
   3672 		}
   3673 		if (natt_rem->sin_family == AF_INET6 &&
   3674 		    !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
   3675 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
   3676 			return (EINVAL);
   3677 		}
   3678 	}
   3679 
   3680 
   3681 	/* Stuff I don't support, for now.  XXX Diagnostic? */
   3682 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
   3683 		return (EOPNOTSUPP);
   3684 
   3685 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
   3686 		return (EINVAL);
   3687 
   3688 	/*
   3689 	 * XXX Policy :  I'm not checking identities at this time,
   3690 	 * but if I did, I'd do them here, before I sent
   3691 	 * the weak key check up to the algorithm.
   3692 	 */
   3693 
   3694 	mutex_enter(&ipss->ipsec_alg_lock);
   3695 
   3696 	/*
   3697 	 * First locate the authentication algorithm.
   3698 	 */
   3699 #ifdef IPSEC_LATENCY_TEST
   3700 	if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
   3701 #else
   3702 	if (akey != NULL) {
   3703 #endif
   3704 		ipsec_alginfo_t *aalg;
   3705 
   3706 		aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
   3707 		    [assoc->sadb_sa_auth];
   3708 		if (aalg == NULL || !ALG_VALID(aalg)) {
   3709 			mutex_exit(&ipss->ipsec_alg_lock);
   3710 			esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
   3711 			    assoc->sadb_sa_auth));
   3712 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
   3713 			return (EINVAL);
   3714 		}
   3715 
   3716 		/*
   3717 		 * Sanity check key sizes.
   3718 		 * Note: It's not possible to use SADB_AALG_NONE because
   3719 		 * this auth_alg is not defined with ALG_FLAG_VALID. If this
   3720 		 * ever changes, the same check for SADB_AALG_NONE and
   3721 		 * a auth_key != NULL should be made here ( see below).
   3722 		 */
   3723 		if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
   3724 			mutex_exit(&ipss->ipsec_alg_lock);
   3725 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
   3726 			return (EINVAL);
   3727 		}
   3728 		ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
   3729 
   3730 		/* check key and fix parity if needed */
   3731 		if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
   3732 		    diagnostic) != 0) {
   3733 			mutex_exit(&ipss->ipsec_alg_lock);
   3734 			return (EINVAL);
   3735 		}
   3736 	}
   3737 
   3738 	/*
   3739 	 * Then locate the encryption algorithm.
   3740 	 */
   3741 	if (ekey != NULL) {
   3742 		uint_t keybits;
   3743 		ipsec_alginfo_t *ealg;
   3744 
   3745 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
   3746 		    [assoc->sadb_sa_encrypt];
   3747 		if (ealg == NULL || !ALG_VALID(ealg)) {
   3748 			mutex_exit(&ipss->ipsec_alg_lock);
   3749 			esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
   3750 			    assoc->sadb_sa_encrypt));
   3751 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
   3752 			return (EINVAL);
   3753 		}
   3754 
   3755 		/*
   3756 		 * Sanity check key sizes. If the encryption algorithm is
   3757 		 * SADB_EALG_NULL but the encryption key is NOT
   3758 		 * NULL then complain.
   3759 		 *
   3760 		 * The keying material includes salt bits if required by
   3761 		 * algorithm and optionally the Initial IV, check the
   3762 		 * length of whats left.
   3763 		 */
   3764 		keybits = ekey->sadb_key_bits;
   3765 		keybits -= ekey->sadb_key_reserved;
   3766 		keybits -= SADB_8TO1(ealg->alg_saltlen);
   3767 		if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
   3768 		    (!ipsec_valid_key_size(keybits, ealg))) {
   3769 			mutex_exit(&ipss->ipsec_alg_lock);
   3770 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
   3771 			return (EINVAL);
   3772 		}
   3773 		ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
   3774 
   3775 		/* check key */
   3776 		if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
   3777 		    diagnostic) != 0) {
   3778 			mutex_exit(&ipss->ipsec_alg_lock);
   3779 			return (EINVAL);
   3780 		}
   3781 	}
   3782 	mutex_exit(&ipss->ipsec_alg_lock);
   3783 
   3784 	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
   3785 	    diagnostic, espstack));
   3786 }
   3787 
   3788 /*
   3789  * Update a security association.  Updates come in two varieties.  The first
   3790  * is an update of lifetimes on a non-larval SA.  The second is an update of
   3791  * a larval SA, which ends up looking a lot more like an add.
   3792  */
   3793 static int
   3794 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
   3795     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
   3796 {
   3797 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3798 	mblk_t    *buf_pkt;
   3799 	int rcode;
   3800 
   3801 	sadb_address_t *dstext =
   3802 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3803 
   3804 	if (dstext == NULL) {
   3805 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
   3806 		return (EINVAL);
   3807 	}
   3808 
   3809 	rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
   3810 	    diagnostic, espstack->esp_pfkey_q, esp_add_sa,
   3811 	    espstack->ipsecesp_netstack, sadb_msg_type);
   3812 
   3813 	if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
   3814 	    (rcode != 0)) {
   3815 		return (rcode);
   3816 	}
   3817 
   3818 	HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
   3819 	    espstack->esp_dropper, buf_pkt);
   3820 
   3821 	return (rcode);
   3822 }
   3823 
   3824 /* XXX refactor me */
   3825 /*
   3826  * Delete a security association.  This is REALLY likely to be code common to
   3827  * both AH and ESP.  Find the association, then unlink it.
   3828  */
   3829 static int
   3830 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
   3831     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
   3832 {
   3833 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3834 	sadb_address_t *dstext =
   3835 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3836 	sadb_address_t *srcext =
   3837 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
   3838 	struct sockaddr_in *sin;
   3839 
   3840 	if (assoc == NULL) {
   3841 		if (dstext != NULL) {
   3842 			sin = (struct sockaddr_in *)(dstext + 1);
   3843 		} else if (srcext != NULL) {
   3844 			sin = (struct sockaddr_in *)(srcext + 1);
   3845 		} else {
   3846 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
   3847 			return (EINVAL);
   3848 		}
   3849 		return (sadb_purge_sa(mp, ksi,
   3850 		    (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
   3851 		    &espstack->esp_sadb.s_v4, diagnostic,
   3852 		    espstack->esp_pfkey_q));
   3853 	}
   3854 
   3855 	return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
   3856 	    espstack->esp_pfkey_q, sadb_msg_type));
   3857 }
   3858 
   3859 /* XXX refactor me */
   3860 /*
   3861  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
   3862  * messages.
   3863  */
   3864 static void
   3865 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
   3866 {
   3867 	int error;
   3868 	sadb_msg_t *samsg;
   3869 
   3870 	/*
   3871 	 * Dump each fanout, bailing if error is non-zero.
   3872 	 */
   3873 
   3874 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
   3875 	    &espstack->esp_sadb.s_v4);
   3876 	if (error != 0)
   3877 		goto bail;
   3878 
   3879 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
   3880 	    &espstack->esp_sadb.s_v6);
   3881 bail:
   3882 	ASSERT(mp->b_cont != NULL);
   3883 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
   3884 	samsg->sadb_msg_errno = (uint8_t)error;
   3885 	sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
   3886 	    (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
   3887 }
   3888 
   3889 /*
   3890  * First-cut reality check for an inbound PF_KEY message.
   3891  */
   3892 static boolean_t
   3893 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
   3894     ipsecesp_stack_t *espstack)
   3895 {
   3896 	int diagnostic;
   3897 
   3898 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
   3899 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
   3900 		goto badmsg;
   3901 	}
   3902 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
   3903 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
   3904 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
   3905 		goto badmsg;
   3906 	}
   3907 	return (B_FALSE);	/* False ==> no failures */
   3908 
   3909 badmsg:
   3910 	sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
   3911 	    ksi->ks_in_serial);
   3912 	return (B_TRUE);	/* True ==> failures */
   3913 }
   3914 
   3915 /*
   3916  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
   3917  * error cases.  What I receive is a fully-formed, syntactically legal
   3918  * PF_KEY message.  I then need to check semantics...
   3919  *
   3920  * This code may become common to AH and ESP.  Stay tuned.
   3921  *
   3922  * I also make the assumption that db_ref's are cool.  If this assumption
   3923  * is wrong, this means that someone other than keysock or me has been
   3924  * mucking with PF_KEY messages.
   3925  */
   3926 static void
   3927 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
   3928 {
   3929 	mblk_t *msg = mp->b_cont;
   3930 	sadb_msg_t *samsg;
   3931 	keysock_in_t *ksi;
   3932 	int error;
   3933 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
   3934 
   3935 	ASSERT(msg != NULL);
   3936 
   3937 	samsg = (sadb_msg_t *)msg->b_rptr;
   3938 	ksi = (keysock_in_t *)mp->b_rptr;
   3939 
   3940 	/*
   3941 	 * If applicable, convert unspecified AF_INET6 to unspecified
   3942 	 * AF_INET.  And do other address reality checks.
   3943 	 */
   3944 	if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
   3945 	    espstack->ipsecesp_netstack) ||
   3946 	    esp_pfkey_reality_failures(mp, ksi, espstack)) {
   3947 		return;
   3948 	}
   3949 
   3950 	switch (samsg->sadb_msg_type) {
   3951 	case SADB_ADD:
   3952 		error = esp_add_sa(mp, ksi, &diagnostic,
   3953 		    espstack->ipsecesp_netstack);
   3954 		if (error != 0) {
   3955 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3956 			    diagnostic, ksi->ks_in_serial);
   3957 		}
   3958 		/* else esp_add_sa() took care of things. */
   3959 		break;
   3960 	case SADB_DELETE:
   3961 	case SADB_X_DELPAIR:
   3962 	case SADB_X_DELPAIR_STATE:
   3963 		error = esp_del_sa(mp, ksi, &diagnostic, espstack,
   3964 		    samsg->sadb_msg_type);
   3965 		if (error != 0) {
   3966 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3967 			    diagnostic, ksi->ks_in_serial);
   3968 		}
   3969 		/* Else esp_del_sa() took care of things. */
   3970 		break;
   3971 	case SADB_GET:
   3972 		error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
   3973 		    &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
   3974 		if (error != 0) {
   3975 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3976 			    diagnostic, ksi->ks_in_serial);
   3977 		}
   3978 		/* Else sadb_get_sa() took care of things. */
   3979 		break;
   3980 	case SADB_FLUSH:
   3981 		sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
   3982 		sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
   3983 		break;
   3984 	case SADB_REGISTER:
   3985 		/*
   3986 		 * Hmmm, let's do it!  Check for extensions (there should
   3987 		 * be none), extract the fields, call esp_register_out(),
   3988 		 * then either free or report an error.
   3989 		 *
   3990 		 * Keysock takes care of the PF_KEY bookkeeping for this.
   3991 		 */
   3992 		if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
   3993 		    ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
   3994 			freemsg(mp);
   3995 		} else {
   3996 			/*
   3997 			 * Only way this path hits is if there is a memory
   3998 			 * failure.  It will not return B_FALSE because of
   3999 			 * lack of esp_pfkey_q if I am in wput().
   4000 			 */
   4001 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
   4002 			    diagnostic, ksi->ks_in_serial);
   4003 		}
   4004 		break;
   4005 	case SADB_UPDATE:
   4006 	case SADB_X_UPDATEPAIR:
   4007 		/*
   4008 		 * Find a larval, if not there, find a full one and get
   4009 		 * strict.
   4010 		 */
   4011 		error = esp_update_sa(mp, ksi, &diagnostic, espstack,
   4012 		    samsg->sadb_msg_type);
   4013 		if (error != 0) {
   4014 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   4015 			    diagnostic, ksi->ks_in_serial);
   4016 		}
   4017 		/* else esp_update_sa() took care of things. */
   4018 		break;
   4019 	case SADB_GETSPI:
   4020 		/*
   4021 		 * Reserve a new larval entry.
   4022 		 */
   4023 		esp_getspi(mp, ksi, espstack);
   4024 		break;
   4025 	case SADB_ACQUIRE:
   4026 		/*
   4027 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
   4028 		 * most likely an error.  Inbound ACQUIRE messages should only
   4029 		 * have the base header.
   4030 		 */
   4031 		sadb_in_acquire(samsg, &espstack->esp_sadb,
   4032 		    espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
   4033 		freemsg(mp);
   4034 		break;
   4035 	case SADB_DUMP:
   4036 		/*
   4037 		 * Dump all entries.
   4038 		 */
   4039 		esp_dump(mp, ksi, espstack);
   4040 		/* esp_dump will take care of the return message, etc. */
   4041 		break;
   4042 	case SADB_EXPIRE:
   4043 		/* Should never reach me. */
   4044 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
   4045 		    diagnostic, ksi->ks_in_serial);
   4046 		break;
   4047 	default:
   4048 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
   4049 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
   4050 		break;
   4051 	}
   4052 }
   4053 
   4054 /*
   4055  * Handle case where PF_KEY says it can't find a keysock for one of my
   4056  * ACQUIRE messages.
   4057  */
   4058 static void
   4059 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
   4060 {
   4061 	sadb_msg_t *samsg;
   4062 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
   4063 
   4064 	if (mp->b_cont == NULL) {
   4065 		freemsg(mp);
   4066 		return;
   4067 	}
   4068 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
   4069 
   4070 	/*
   4071 	 * If keysock can't find any registered, delete the acquire record
   4072 	 * immediately, and handle errors.
   4073 	 */
   4074 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
   4075 		samsg->sadb_msg_errno = kse->ks_err_errno;
   4076 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
   4077 		/*
   4078 		 * Use the write-side of the esp_pfkey_q
   4079 		 */
   4080 		sadb_in_acquire(samsg, &espstack->esp_sadb,
   4081 		    WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
   4082 	}
   4083 
   4084 	freemsg(mp);
   4085 }
   4086 
   4087 /*
   4088  * ESP module write put routine.
   4089  */
   4090 static void
   4091 ipsecesp_wput(queue_t *q, mblk_t *mp)
   4092 {
   4093 	ipsec_info_t *ii;
   4094 	struct iocblk *iocp;
   4095 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
   4096 
   4097 	esp3dbg(espstack, ("In esp_wput().\n"));
   4098 
   4099 	/* NOTE: Each case must take care of freeing or passing mp. */
   4100 	switch (mp->b_datap->db_type) {
   4101 	case M_CTL:
   4102 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
   4103 			/* Not big enough message. */
   4104 			freemsg(mp);
   4105 			break;
   4106 		}
   4107 		ii = (ipsec_info_t *)mp->b_rptr;
   4108 
   4109 		switch (ii->ipsec_info_type) {
   4110 		case KEYSOCK_OUT_ERR:
   4111 			esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
   4112 			esp_keysock_no_socket(mp, espstack);
   4113 			break;
   4114 		case KEYSOCK_IN:
   4115 			ESP_BUMP_STAT(espstack, keysock_in);
   4116 			esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
   4117 
   4118 			/* Parse the message. */
   4119 			esp_parse_pfkey(mp, espstack);
   4120 			break;
   4121 		case KEYSOCK_HELLO:
   4122 			sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
   4123 			    esp_ager, (void *)espstack, &espstack->esp_event,
   4124 			    SADB_SATYPE_ESP);
   4125 			break;
   4126 		default:
   4127 			esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
   4128 			    ii->ipsec_info_type));
   4129 			freemsg(mp);
   4130 			break;
   4131 		}
   4132 		break;
   4133 	case M_IOCTL:
   4134 		iocp = (struct iocblk *)mp->b_rptr;
   4135 		switch (iocp->ioc_cmd) {
   4136 		case ND_SET:
   4137 		case ND_GET:
   4138 			if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
   4139 				qreply(q, mp);
   4140 				return;
   4141 			} else {
   4142 				iocp->ioc_error = ENOENT;
   4143 			}
   4144 			/* FALLTHRU */
   4145 		default:
   4146 			/* We really don't support any other ioctls, do we? */
   4147 
   4148 			/* Return EINVAL */
   4149 			if (iocp->ioc_error != ENOENT)
   4150 				iocp->ioc_error = EINVAL;
   4151 			iocp->ioc_count = 0;
   4152 			mp->b_datap->db_type = M_IOCACK;
   4153 			qreply(q, mp);
   4154 			return;
   4155 		}
   4156 	default:
   4157 		esp3dbg(espstack,
   4158 		    ("Got default message, type %d, passing to IP.\n",
   4159 		    mp->b_datap->db_type));
   4160 		putnext(q, mp);
   4161 	}
   4162 }
   4163 
   4164 /*
   4165  * Wrapper to allow IP to trigger an ESP association failure message
   4166  * during inbound SA selection.
   4167  */
   4168 void
   4169 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
   4170     uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
   4171 {
   4172 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   4173 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   4174 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   4175 
   4176 	if (espstack->ipsecesp_log_unknown_spi) {
   4177 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
   4178 		    addr, af, espstack->ipsecesp_netstack);
   4179 	}
   4180 
   4181 	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
   4182 	    DROPPER(ipss, ipds_esp_no_sa),
   4183 	    &espstack->esp_dropper);
   4184 }
   4185 
   4186 /*
   4187  * Initialize the ESP input and output processing functions.
   4188  */
   4189 void
   4190 ipsecesp_init_funcs(ipsa_t *sa)
   4191 {
   4192 	if (sa->ipsa_output_func == NULL)
   4193 		sa->ipsa_output_func = esp_outbound;
   4194 	if (sa->ipsa_input_func == NULL)
   4195 		sa->ipsa_input_func = esp_inbound;
   4196 }
   4197