Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/types.h>
     27 #include <sys/stream.h>
     28 #include <sys/stropts.h>
     29 #include <sys/errno.h>
     30 #include <sys/strlog.h>
     31 #include <sys/tihdr.h>
     32 #include <sys/socket.h>
     33 #include <sys/ddi.h>
     34 #include <sys/sunddi.h>
     35 #include <sys/kmem.h>
     36 #include <sys/zone.h>
     37 #include <sys/sysmacros.h>
     38 #include <sys/cmn_err.h>
     39 #include <sys/vtrace.h>
     40 #include <sys/debug.h>
     41 #include <sys/atomic.h>
     42 #include <sys/strsun.h>
     43 #include <sys/random.h>
     44 #include <netinet/in.h>
     45 #include <net/if.h>
     46 #include <netinet/ip6.h>
     47 #include <net/pfkeyv2.h>
     48 #include <net/pfpolicy.h>
     49 
     50 #include <inet/common.h>
     51 #include <inet/mi.h>
     52 #include <inet/nd.h>
     53 #include <inet/ip.h>
     54 #include <inet/ip_impl.h>
     55 #include <inet/ip6.h>
     56 #include <inet/ip_if.h>
     57 #include <inet/ip_ndp.h>
     58 #include <inet/sadb.h>
     59 #include <inet/ipsec_info.h>
     60 #include <inet/ipsec_impl.h>
     61 #include <inet/ipsecesp.h>
     62 #include <inet/ipdrop.h>
     63 #include <inet/tcp.h>
     64 #include <sys/kstat.h>
     65 #include <sys/policy.h>
     66 #include <sys/strsun.h>
     67 #include <sys/strsubr.h>
     68 #include <inet/udp_impl.h>
     69 #include <sys/taskq.h>
     70 #include <sys/note.h>
     71 
     72 #include <sys/tsol/tnet.h>
     73 
     74 /*
     75  * Table of ND variables supported by ipsecesp. These are loaded into
     76  * ipsecesp_g_nd in ipsecesp_init_nd.
     77  * All of these are alterable, within the min/max values given, at run time.
     78  */
     79 static	ipsecespparam_t	lcl_param_arr[] = {
     80 	/* min	max			value	name */
     81 	{ 0,	3,			0,	"ipsecesp_debug"},
     82 	{ 125,	32000, SADB_AGE_INTERVAL_DEFAULT, "ipsecesp_age_interval"},
     83 	{ 1,	10,			1,	"ipsecesp_reap_delay"},
     84 	{ 1,	SADB_MAX_REPLAY,	64,	"ipsecesp_replay_size"},
     85 	{ 1,	300,			15,	"ipsecesp_acquire_timeout"},
     86 	{ 1,	1800,			90,	"ipsecesp_larval_timeout"},
     87 	/* Default lifetime values for ACQUIRE messages. */
     88 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_bytes"},
     89 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_bytes"},
     90 	{ 0,	0xffffffffU,	24000,	"ipsecesp_default_soft_addtime"},
     91 	{ 0,	0xffffffffU,	28800,	"ipsecesp_default_hard_addtime"},
     92 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_soft_usetime"},
     93 	{ 0,	0xffffffffU,	0,	"ipsecesp_default_hard_usetime"},
     94 	{ 0,	1,		0,	"ipsecesp_log_unknown_spi"},
     95 	{ 0,	2,		1,	"ipsecesp_padding_check"},
     96 	{ 0,	600,		20,	"ipsecesp_nat_keepalive_interval"},
     97 };
     98 #define	ipsecesp_debug	ipsecesp_params[0].ipsecesp_param_value
     99 #define	ipsecesp_age_interval ipsecesp_params[1].ipsecesp_param_value
    100 #define	ipsecesp_age_int_max	ipsecesp_params[1].ipsecesp_param_max
    101 #define	ipsecesp_reap_delay	ipsecesp_params[2].ipsecesp_param_value
    102 #define	ipsecesp_replay_size	ipsecesp_params[3].ipsecesp_param_value
    103 #define	ipsecesp_acquire_timeout	\
    104 	ipsecesp_params[4].ipsecesp_param_value
    105 #define	ipsecesp_larval_timeout	\
    106 	ipsecesp_params[5].ipsecesp_param_value
    107 #define	ipsecesp_default_soft_bytes	\
    108 	ipsecesp_params[6].ipsecesp_param_value
    109 #define	ipsecesp_default_hard_bytes	\
    110 	ipsecesp_params[7].ipsecesp_param_value
    111 #define	ipsecesp_default_soft_addtime	\
    112 	ipsecesp_params[8].ipsecesp_param_value
    113 #define	ipsecesp_default_hard_addtime	\
    114 	ipsecesp_params[9].ipsecesp_param_value
    115 #define	ipsecesp_default_soft_usetime	\
    116 	ipsecesp_params[10].ipsecesp_param_value
    117 #define	ipsecesp_default_hard_usetime	\
    118 	ipsecesp_params[11].ipsecesp_param_value
    119 #define	ipsecesp_log_unknown_spi	\
    120 	ipsecesp_params[12].ipsecesp_param_value
    121 #define	ipsecesp_padding_check	\
    122 	ipsecesp_params[13].ipsecesp_param_value
    123 /* For ipsecesp_nat_keepalive_interval, see ipsecesp.h. */
    124 
    125 #define	esp0dbg(a)	printf a
    126 /* NOTE:  != 0 instead of > 0 so lint doesn't complain. */
    127 #define	esp1dbg(espstack, a)	if (espstack->ipsecesp_debug != 0) printf a
    128 #define	esp2dbg(espstack, a)	if (espstack->ipsecesp_debug > 1) printf a
    129 #define	esp3dbg(espstack, a)	if (espstack->ipsecesp_debug > 2) printf a
    130 
    131 static int ipsecesp_open(queue_t *, dev_t *, int, int, cred_t *);
    132 static int ipsecesp_close(queue_t *);
    133 static void ipsecesp_wput(queue_t *, mblk_t *);
    134 static void	*ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns);
    135 static void	ipsecesp_stack_fini(netstackid_t stackid, void *arg);
    136 static void esp_send_acquire(ipsacq_t *, mblk_t *, netstack_t *);
    137 
    138 static void esp_prepare_udp(netstack_t *, mblk_t *, ipha_t *);
    139 static void esp_outbound_finish(mblk_t *, ip_xmit_attr_t *);
    140 static void esp_inbound_restart(mblk_t *, ip_recv_attr_t *);
    141 
    142 static boolean_t esp_register_out(uint32_t, uint32_t, uint_t,
    143     ipsecesp_stack_t *, cred_t *);
    144 static boolean_t esp_strip_header(mblk_t *, boolean_t, uint32_t,
    145     kstat_named_t **, ipsecesp_stack_t *);
    146 static mblk_t *esp_submit_req_inbound(mblk_t *, ip_recv_attr_t *,
    147     ipsa_t *, uint_t);
    148 static mblk_t *esp_submit_req_outbound(mblk_t *, ip_xmit_attr_t *,
    149     ipsa_t *, uchar_t *, uint_t);
    150 
    151 /* Setable in /etc/system */
    152 uint32_t esp_hash_size = IPSEC_DEFAULT_HASH_SIZE;
    153 
    154 static struct module_info info = {
    155 	5137, "ipsecesp", 0, INFPSZ, 65536, 1024
    156 };
    157 
    158 static struct qinit rinit = {
    159 	(pfi_t)putnext, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
    160 	NULL
    161 };
    162 
    163 static struct qinit winit = {
    164 	(pfi_t)ipsecesp_wput, NULL, ipsecesp_open, ipsecesp_close, NULL, &info,
    165 	NULL
    166 };
    167 
    168 struct streamtab ipsecespinfo = {
    169 	&rinit, &winit, NULL, NULL
    170 };
    171 
    172 static taskq_t *esp_taskq;
    173 
    174 /*
    175  * OTOH, this one is set at open/close, and I'm D_MTQPAIR for now.
    176  *
    177  * Question:	Do I need this, given that all instance's esps->esps_wq point
    178  *		to IP?
    179  *
    180  * Answer:	Yes, because I need to know which queue is BOUND to
    181  *		IPPROTO_ESP
    182  */
    183 
    184 /*
    185  * Stats.  This may eventually become a full-blown SNMP MIB once that spec
    186  * stabilizes.
    187  */
    188 
    189 typedef struct esp_kstats_s {
    190 	kstat_named_t esp_stat_num_aalgs;
    191 	kstat_named_t esp_stat_good_auth;
    192 	kstat_named_t esp_stat_bad_auth;
    193 	kstat_named_t esp_stat_bad_padding;
    194 	kstat_named_t esp_stat_replay_failures;
    195 	kstat_named_t esp_stat_replay_early_failures;
    196 	kstat_named_t esp_stat_keysock_in;
    197 	kstat_named_t esp_stat_out_requests;
    198 	kstat_named_t esp_stat_acquire_requests;
    199 	kstat_named_t esp_stat_bytes_expired;
    200 	kstat_named_t esp_stat_out_discards;
    201 	kstat_named_t esp_stat_crypto_sync;
    202 	kstat_named_t esp_stat_crypto_async;
    203 	kstat_named_t esp_stat_crypto_failures;
    204 	kstat_named_t esp_stat_num_ealgs;
    205 	kstat_named_t esp_stat_bad_decrypt;
    206 	kstat_named_t esp_stat_sa_port_renumbers;
    207 } esp_kstats_t;
    208 
    209 /*
    210  * espstack->esp_kstats is equal to espstack->esp_ksp->ks_data if
    211  * kstat_create_netstack for espstack->esp_ksp succeeds, but when it
    212  * fails, it will be NULL. Note this is done for all stack instances,
    213  * so it *could* fail. hence a non-NULL checking is done for
    214  * ESP_BUMP_STAT and ESP_DEBUMP_STAT
    215  */
    216 #define	ESP_BUMP_STAT(espstack, x)					\
    217 do {									\
    218 	if (espstack->esp_kstats != NULL)				\
    219 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64++;	\
    220 _NOTE(CONSTCOND)							\
    221 } while (0)
    222 
    223 #define	ESP_DEBUMP_STAT(espstack, x)					\
    224 do {									\
    225 	if (espstack->esp_kstats != NULL)				\
    226 		(espstack->esp_kstats->esp_stat_ ## x).value.ui64--;	\
    227 _NOTE(CONSTCOND)							\
    228 } while (0)
    229 
    230 static int	esp_kstat_update(kstat_t *, int);
    231 
    232 static boolean_t
    233 esp_kstat_init(ipsecesp_stack_t *espstack, netstackid_t stackid)
    234 {
    235 	espstack->esp_ksp = kstat_create_netstack("ipsecesp", 0, "esp_stat",
    236 	    "net", KSTAT_TYPE_NAMED,
    237 	    sizeof (esp_kstats_t) / sizeof (kstat_named_t),
    238 	    KSTAT_FLAG_PERSISTENT, stackid);
    239 
    240 	if (espstack->esp_ksp == NULL || espstack->esp_ksp->ks_data == NULL)
    241 		return (B_FALSE);
    242 
    243 	espstack->esp_kstats = espstack->esp_ksp->ks_data;
    244 
    245 	espstack->esp_ksp->ks_update = esp_kstat_update;
    246 	espstack->esp_ksp->ks_private = (void *)(uintptr_t)stackid;
    247 
    248 #define	K64 KSTAT_DATA_UINT64
    249 #define	KI(x) kstat_named_init(&(espstack->esp_kstats->esp_stat_##x), #x, K64)
    250 
    251 	KI(num_aalgs);
    252 	KI(num_ealgs);
    253 	KI(good_auth);
    254 	KI(bad_auth);
    255 	KI(bad_padding);
    256 	KI(replay_failures);
    257 	KI(replay_early_failures);
    258 	KI(keysock_in);
    259 	KI(out_requests);
    260 	KI(acquire_requests);
    261 	KI(bytes_expired);
    262 	KI(out_discards);
    263 	KI(crypto_sync);
    264 	KI(crypto_async);
    265 	KI(crypto_failures);
    266 	KI(bad_decrypt);
    267 	KI(sa_port_renumbers);
    268 
    269 #undef KI
    270 #undef K64
    271 
    272 	kstat_install(espstack->esp_ksp);
    273 
    274 	return (B_TRUE);
    275 }
    276 
    277 static int
    278 esp_kstat_update(kstat_t *kp, int rw)
    279 {
    280 	esp_kstats_t *ekp;
    281 	netstackid_t	stackid = (zoneid_t)(uintptr_t)kp->ks_private;
    282 	netstack_t	*ns;
    283 	ipsec_stack_t	*ipss;
    284 
    285 	if ((kp == NULL) || (kp->ks_data == NULL))
    286 		return (EIO);
    287 
    288 	if (rw == KSTAT_WRITE)
    289 		return (EACCES);
    290 
    291 	ns = netstack_find_by_stackid(stackid);
    292 	if (ns == NULL)
    293 		return (-1);
    294 	ipss = ns->netstack_ipsec;
    295 	if (ipss == NULL) {
    296 		netstack_rele(ns);
    297 		return (-1);
    298 	}
    299 	ekp = (esp_kstats_t *)kp->ks_data;
    300 
    301 	mutex_enter(&ipss->ipsec_alg_lock);
    302 	ekp->esp_stat_num_aalgs.value.ui64 =
    303 	    ipss->ipsec_nalgs[IPSEC_ALG_AUTH];
    304 	ekp->esp_stat_num_ealgs.value.ui64 =
    305 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
    306 	mutex_exit(&ipss->ipsec_alg_lock);
    307 
    308 	netstack_rele(ns);
    309 	return (0);
    310 }
    311 
    312 #ifdef DEBUG
    313 /*
    314  * Debug routine, useful to see pre-encryption data.
    315  */
    316 static char *
    317 dump_msg(mblk_t *mp)
    318 {
    319 	char tmp_str[3], tmp_line[256];
    320 
    321 	while (mp != NULL) {
    322 		unsigned char *ptr;
    323 
    324 		printf("mblk address 0x%p, length %ld, db_ref %d "
    325 		    "type %d, base 0x%p, lim 0x%p\n",
    326 		    (void *) mp, (long)(mp->b_wptr - mp->b_rptr),
    327 		    mp->b_datap->db_ref, mp->b_datap->db_type,
    328 		    (void *)mp->b_datap->db_base, (void *)mp->b_datap->db_lim);
    329 		ptr = mp->b_rptr;
    330 
    331 		tmp_line[0] = '\0';
    332 		while (ptr < mp->b_wptr) {
    333 			uint_t diff;
    334 
    335 			diff = (ptr - mp->b_rptr);
    336 			if (!(diff & 0x1f)) {
    337 				if (strlen(tmp_line) > 0) {
    338 					printf("bytes: %s\n", tmp_line);
    339 					tmp_line[0] = '\0';
    340 				}
    341 			}
    342 			if (!(diff & 0x3))
    343 				(void) strcat(tmp_line, " ");
    344 			(void) sprintf(tmp_str, "%02x", *ptr);
    345 			(void) strcat(tmp_line, tmp_str);
    346 			ptr++;
    347 		}
    348 		if (strlen(tmp_line) > 0)
    349 			printf("bytes: %s\n", tmp_line);
    350 
    351 		mp = mp->b_cont;
    352 	}
    353 
    354 	return ("\n");
    355 }
    356 
    357 #else /* DEBUG */
    358 static char *
    359 dump_msg(mblk_t *mp)
    360 {
    361 	printf("Find value of mp %p.\n", mp);
    362 	return ("\n");
    363 }
    364 #endif /* DEBUG */
    365 
    366 /*
    367  * Don't have to lock age_interval, as only one thread will access it at
    368  * a time, because I control the one function that does with timeout().
    369  */
    370 static void
    371 esp_ager(void *arg)
    372 {
    373 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
    374 	netstack_t	*ns = espstack->ipsecesp_netstack;
    375 	hrtime_t begin = gethrtime();
    376 
    377 	sadb_ager(&espstack->esp_sadb.s_v4, espstack->esp_pfkey_q,
    378 	    espstack->ipsecesp_reap_delay, ns);
    379 	sadb_ager(&espstack->esp_sadb.s_v6, espstack->esp_pfkey_q,
    380 	    espstack->ipsecesp_reap_delay, ns);
    381 
    382 	espstack->esp_event = sadb_retimeout(begin, espstack->esp_pfkey_q,
    383 	    esp_ager, espstack,
    384 	    &espstack->ipsecesp_age_interval, espstack->ipsecesp_age_int_max,
    385 	    info.mi_idnum);
    386 }
    387 
    388 /*
    389  * Get an ESP NDD parameter.
    390  */
    391 /* ARGSUSED */
    392 static int
    393 ipsecesp_param_get(q, mp, cp, cr)
    394 	queue_t	*q;
    395 	mblk_t	*mp;
    396 	caddr_t	cp;
    397 	cred_t *cr;
    398 {
    399 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
    400 	uint_t value;
    401 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    402 
    403 	mutex_enter(&espstack->ipsecesp_param_lock);
    404 	value = ipsecesppa->ipsecesp_param_value;
    405 	mutex_exit(&espstack->ipsecesp_param_lock);
    406 
    407 	(void) mi_mpprintf(mp, "%u", value);
    408 	return (0);
    409 }
    410 
    411 /*
    412  * This routine sets an NDD variable in a ipsecespparam_t structure.
    413  */
    414 /* ARGSUSED */
    415 static int
    416 ipsecesp_param_set(q, mp, value, cp, cr)
    417 	queue_t	*q;
    418 	mblk_t	*mp;
    419 	char	*value;
    420 	caddr_t	cp;
    421 	cred_t *cr;
    422 {
    423 	ulong_t	new_value;
    424 	ipsecespparam_t	*ipsecesppa = (ipsecespparam_t *)cp;
    425 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    426 
    427 	/*
    428 	 * Fail the request if the new value does not lie within the
    429 	 * required bounds.
    430 	 */
    431 	if (ddi_strtoul(value, NULL, 10, &new_value) != 0 ||
    432 	    new_value < ipsecesppa->ipsecesp_param_min ||
    433 	    new_value > ipsecesppa->ipsecesp_param_max) {
    434 		return (EINVAL);
    435 	}
    436 
    437 	/* Set the new value */
    438 	mutex_enter(&espstack->ipsecesp_param_lock);
    439 	ipsecesppa->ipsecesp_param_value = new_value;
    440 	mutex_exit(&espstack->ipsecesp_param_lock);
    441 	return (0);
    442 }
    443 
    444 /*
    445  * Using lifetime NDD variables, fill in an extended combination's
    446  * lifetime information.
    447  */
    448 void
    449 ipsecesp_fill_defs(sadb_x_ecomb_t *ecomb, netstack_t *ns)
    450 {
    451 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
    452 
    453 	ecomb->sadb_x_ecomb_soft_bytes = espstack->ipsecesp_default_soft_bytes;
    454 	ecomb->sadb_x_ecomb_hard_bytes = espstack->ipsecesp_default_hard_bytes;
    455 	ecomb->sadb_x_ecomb_soft_addtime =
    456 	    espstack->ipsecesp_default_soft_addtime;
    457 	ecomb->sadb_x_ecomb_hard_addtime =
    458 	    espstack->ipsecesp_default_hard_addtime;
    459 	ecomb->sadb_x_ecomb_soft_usetime =
    460 	    espstack->ipsecesp_default_soft_usetime;
    461 	ecomb->sadb_x_ecomb_hard_usetime =
    462 	    espstack->ipsecesp_default_hard_usetime;
    463 }
    464 
    465 /*
    466  * Initialize things for ESP at module load time.
    467  */
    468 boolean_t
    469 ipsecesp_ddi_init(void)
    470 {
    471 	esp_taskq = taskq_create("esp_taskq", 1, minclsyspri,
    472 	    IPSEC_TASKQ_MIN, IPSEC_TASKQ_MAX, 0);
    473 
    474 	/*
    475 	 * We want to be informed each time a stack is created or
    476 	 * destroyed in the kernel, so we can maintain the
    477 	 * set of ipsecesp_stack_t's.
    478 	 */
    479 	netstack_register(NS_IPSECESP, ipsecesp_stack_init, NULL,
    480 	    ipsecesp_stack_fini);
    481 
    482 	return (B_TRUE);
    483 }
    484 
    485 /*
    486  * Walk through the param array specified registering each element with the
    487  * named dispatch handler.
    488  */
    489 static boolean_t
    490 ipsecesp_param_register(IDP *ndp, ipsecespparam_t *espp, int cnt)
    491 {
    492 	for (; cnt-- > 0; espp++) {
    493 		if (espp->ipsecesp_param_name != NULL &&
    494 		    espp->ipsecesp_param_name[0]) {
    495 			if (!nd_load(ndp,
    496 			    espp->ipsecesp_param_name,
    497 			    ipsecesp_param_get, ipsecesp_param_set,
    498 			    (caddr_t)espp)) {
    499 				nd_free(ndp);
    500 				return (B_FALSE);
    501 			}
    502 		}
    503 	}
    504 	return (B_TRUE);
    505 }
    506 /*
    507  * Initialize things for ESP for each stack instance
    508  */
    509 static void *
    510 ipsecesp_stack_init(netstackid_t stackid, netstack_t *ns)
    511 {
    512 	ipsecesp_stack_t	*espstack;
    513 	ipsecespparam_t		*espp;
    514 
    515 	espstack = (ipsecesp_stack_t *)kmem_zalloc(sizeof (*espstack),
    516 	    KM_SLEEP);
    517 	espstack->ipsecesp_netstack = ns;
    518 
    519 	espp = (ipsecespparam_t *)kmem_alloc(sizeof (lcl_param_arr), KM_SLEEP);
    520 	espstack->ipsecesp_params = espp;
    521 	bcopy(lcl_param_arr, espp, sizeof (lcl_param_arr));
    522 
    523 	(void) ipsecesp_param_register(&espstack->ipsecesp_g_nd, espp,
    524 	    A_CNT(lcl_param_arr));
    525 
    526 	(void) esp_kstat_init(espstack, stackid);
    527 
    528 	espstack->esp_sadb.s_acquire_timeout =
    529 	    &espstack->ipsecesp_acquire_timeout;
    530 	espstack->esp_sadb.s_acqfn = esp_send_acquire;
    531 	sadbp_init("ESP", &espstack->esp_sadb, SADB_SATYPE_ESP, esp_hash_size,
    532 	    espstack->ipsecesp_netstack);
    533 
    534 	mutex_init(&espstack->ipsecesp_param_lock, NULL, MUTEX_DEFAULT, 0);
    535 
    536 	ip_drop_register(&espstack->esp_dropper, "IPsec ESP");
    537 	return (espstack);
    538 }
    539 
    540 /*
    541  * Destroy things for ESP at module unload time.
    542  */
    543 void
    544 ipsecesp_ddi_destroy(void)
    545 {
    546 	netstack_unregister(NS_IPSECESP);
    547 	taskq_destroy(esp_taskq);
    548 }
    549 
    550 /*
    551  * Destroy things for ESP for one stack instance
    552  */
    553 static void
    554 ipsecesp_stack_fini(netstackid_t stackid, void *arg)
    555 {
    556 	ipsecesp_stack_t *espstack = (ipsecesp_stack_t *)arg;
    557 
    558 	if (espstack->esp_pfkey_q != NULL) {
    559 		(void) quntimeout(espstack->esp_pfkey_q, espstack->esp_event);
    560 	}
    561 	espstack->esp_sadb.s_acqfn = NULL;
    562 	espstack->esp_sadb.s_acquire_timeout = NULL;
    563 	sadbp_destroy(&espstack->esp_sadb, espstack->ipsecesp_netstack);
    564 	ip_drop_unregister(&espstack->esp_dropper);
    565 	mutex_destroy(&espstack->ipsecesp_param_lock);
    566 	nd_free(&espstack->ipsecesp_g_nd);
    567 
    568 	kmem_free(espstack->ipsecesp_params, sizeof (lcl_param_arr));
    569 	espstack->ipsecesp_params = NULL;
    570 	kstat_delete_netstack(espstack->esp_ksp, stackid);
    571 	espstack->esp_ksp = NULL;
    572 	espstack->esp_kstats = NULL;
    573 	kmem_free(espstack, sizeof (*espstack));
    574 }
    575 
    576 /*
    577  * ESP module open routine, which is here for keysock plumbing.
    578  * Keysock is pushed over {AH,ESP} which is an artifact from the Bad Old
    579  * Days of export control, and fears that ESP would not be allowed
    580  * to be shipped at all by default.  Eventually, keysock should
    581  * either access AH and ESP via modstubs or krtld dependencies, or
    582  * perhaps be folded in with AH and ESP into a single IPsec/netsec
    583  * module ("netsec" if PF_KEY provides more than AH/ESP keying tables).
    584  */
    585 /* ARGSUSED */
    586 static int
    587 ipsecesp_open(queue_t *q, dev_t *devp, int flag, int sflag, cred_t *credp)
    588 {
    589 	netstack_t		*ns;
    590 	ipsecesp_stack_t	*espstack;
    591 
    592 	if (secpolicy_ip_config(credp, B_FALSE) != 0)
    593 		return (EPERM);
    594 
    595 	if (q->q_ptr != NULL)
    596 		return (0);  /* Re-open of an already open instance. */
    597 
    598 	if (sflag != MODOPEN)
    599 		return (EINVAL);
    600 
    601 	ns = netstack_find_by_cred(credp);
    602 	ASSERT(ns != NULL);
    603 	espstack = ns->netstack_ipsecesp;
    604 	ASSERT(espstack != NULL);
    605 
    606 	q->q_ptr = espstack;
    607 	WR(q)->q_ptr = q->q_ptr;
    608 
    609 	qprocson(q);
    610 	return (0);
    611 }
    612 
    613 /*
    614  * ESP module close routine.
    615  */
    616 static int
    617 ipsecesp_close(queue_t *q)
    618 {
    619 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
    620 
    621 	/*
    622 	 * Clean up q_ptr, if needed.
    623 	 */
    624 	qprocsoff(q);
    625 
    626 	/* Keysock queue check is safe, because of OCEXCL perimeter. */
    627 
    628 	if (q == espstack->esp_pfkey_q) {
    629 		esp1dbg(espstack,
    630 		    ("ipsecesp_close:  Ummm... keysock is closing ESP.\n"));
    631 		espstack->esp_pfkey_q = NULL;
    632 		/* Detach qtimeouts. */
    633 		(void) quntimeout(q, espstack->esp_event);
    634 	}
    635 
    636 	netstack_rele(espstack->ipsecesp_netstack);
    637 	return (0);
    638 }
    639 
    640 /*
    641  * Add a number of bytes to what the SA has protected so far.  Return
    642  * B_TRUE if the SA can still protect that many bytes.
    643  *
    644  * Caller must REFRELE the passed-in assoc.  This function must REFRELE
    645  * any obtained peer SA.
    646  */
    647 static boolean_t
    648 esp_age_bytes(ipsa_t *assoc, uint64_t bytes, boolean_t inbound)
    649 {
    650 	ipsa_t *inassoc, *outassoc;
    651 	isaf_t *bucket;
    652 	boolean_t inrc, outrc, isv6;
    653 	sadb_t *sp;
    654 	int outhash;
    655 	netstack_t		*ns = assoc->ipsa_netstack;
    656 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
    657 
    658 	/* No peer?  No problem! */
    659 	if (!assoc->ipsa_haspeer) {
    660 		return (sadb_age_bytes(espstack->esp_pfkey_q, assoc, bytes,
    661 		    B_TRUE));
    662 	}
    663 
    664 	/*
    665 	 * Otherwise, we want to grab both the original assoc and its peer.
    666 	 * There might be a race for this, but if it's a real race, two
    667 	 * expire messages may occur.  We limit this by only sending the
    668 	 * expire message on one of the peers, we'll pick the inbound
    669 	 * arbitrarily.
    670 	 *
    671 	 * If we need tight synchronization on the peer SA, then we need to
    672 	 * reconsider.
    673 	 */
    674 
    675 	/* Use address length to select IPv6/IPv4 */
    676 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
    677 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
    678 
    679 	if (inbound) {
    680 		inassoc = assoc;
    681 		if (isv6) {
    682 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
    683 			    &inassoc->ipsa_dstaddr));
    684 		} else {
    685 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
    686 			    &inassoc->ipsa_dstaddr));
    687 		}
    688 		bucket = &sp->sdb_of[outhash];
    689 		mutex_enter(&bucket->isaf_lock);
    690 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
    691 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
    692 		    inassoc->ipsa_addrfam);
    693 		mutex_exit(&bucket->isaf_lock);
    694 		if (outassoc == NULL) {
    695 			/* Q: Do we wish to set haspeer == B_FALSE? */
    696 			esp0dbg(("esp_age_bytes: "
    697 			    "can't find peer for inbound.\n"));
    698 			return (sadb_age_bytes(espstack->esp_pfkey_q, inassoc,
    699 			    bytes, B_TRUE));
    700 		}
    701 	} else {
    702 		outassoc = assoc;
    703 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
    704 		mutex_enter(&bucket->isaf_lock);
    705 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
    706 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
    707 		    outassoc->ipsa_addrfam);
    708 		mutex_exit(&bucket->isaf_lock);
    709 		if (inassoc == NULL) {
    710 			/* Q: Do we wish to set haspeer == B_FALSE? */
    711 			esp0dbg(("esp_age_bytes: "
    712 			    "can't find peer for outbound.\n"));
    713 			return (sadb_age_bytes(espstack->esp_pfkey_q, outassoc,
    714 			    bytes, B_TRUE));
    715 		}
    716 	}
    717 
    718 	inrc = sadb_age_bytes(espstack->esp_pfkey_q, inassoc, bytes, B_TRUE);
    719 	outrc = sadb_age_bytes(espstack->esp_pfkey_q, outassoc, bytes, B_FALSE);
    720 
    721 	/*
    722 	 * REFRELE any peer SA.
    723 	 *
    724 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
    725 	 * them in { }.
    726 	 */
    727 	if (inbound) {
    728 		IPSA_REFRELE(outassoc);
    729 	} else {
    730 		IPSA_REFRELE(inassoc);
    731 	}
    732 
    733 	return (inrc && outrc);
    734 }
    735 
    736 /*
    737  * Do incoming NAT-T manipulations for packet.
    738  * Returns NULL if the mblk chain is consumed.
    739  */
    740 static mblk_t *
    741 esp_fix_natt_checksums(mblk_t *data_mp, ipsa_t *assoc)
    742 {
    743 	ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
    744 	tcpha_t *tcpha;
    745 	udpha_t *udpha;
    746 	/* Initialize to our inbound cksum adjustment... */
    747 	uint32_t sum = assoc->ipsa_inbound_cksum;
    748 
    749 	switch (ipha->ipha_protocol) {
    750 	case IPPROTO_TCP:
    751 		tcpha = (tcpha_t *)(data_mp->b_rptr +
    752 		    IPH_HDR_LENGTH(ipha));
    753 
    754 #define	DOWN_SUM(x) (x) = ((x) & 0xFFFF) +	 ((x) >> 16)
    755 		sum += ~ntohs(tcpha->tha_sum) & 0xFFFF;
    756 		DOWN_SUM(sum);
    757 		DOWN_SUM(sum);
    758 		tcpha->tha_sum = ~htons(sum);
    759 		break;
    760 	case IPPROTO_UDP:
    761 		udpha = (udpha_t *)(data_mp->b_rptr + IPH_HDR_LENGTH(ipha));
    762 
    763 		if (udpha->uha_checksum != 0) {
    764 			/* Adujst if the inbound one was not zero. */
    765 			sum += ~ntohs(udpha->uha_checksum) & 0xFFFF;
    766 			DOWN_SUM(sum);
    767 			DOWN_SUM(sum);
    768 			udpha->uha_checksum = ~htons(sum);
    769 			if (udpha->uha_checksum == 0)
    770 				udpha->uha_checksum = 0xFFFF;
    771 		}
    772 #undef DOWN_SUM
    773 		break;
    774 	case IPPROTO_IP:
    775 		/*
    776 		 * This case is only an issue for self-encapsulated
    777 		 * packets.  So for now, fall through.
    778 		 */
    779 		break;
    780 	}
    781 	return (data_mp);
    782 }
    783 
    784 
    785 /*
    786  * Strip ESP header, check padding, and fix IP header.
    787  * Returns B_TRUE on success, B_FALSE if an error occured.
    788  */
    789 static boolean_t
    790 esp_strip_header(mblk_t *data_mp, boolean_t isv4, uint32_t ivlen,
    791     kstat_named_t **counter, ipsecesp_stack_t *espstack)
    792 {
    793 	ipha_t *ipha;
    794 	ip6_t *ip6h;
    795 	uint_t divpoint;
    796 	mblk_t *scratch;
    797 	uint8_t nexthdr, padlen;
    798 	uint8_t lastpad;
    799 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
    800 	uint8_t *lastbyte;
    801 
    802 	/*
    803 	 * Strip ESP data and fix IP header.
    804 	 *
    805 	 * XXX In case the beginning of esp_inbound() changes to not do a
    806 	 * pullup, this part of the code can remain unchanged.
    807 	 */
    808 	if (isv4) {
    809 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ipha_t));
    810 		ipha = (ipha_t *)data_mp->b_rptr;
    811 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (esph_t) +
    812 		    IPH_HDR_LENGTH(ipha));
    813 		divpoint = IPH_HDR_LENGTH(ipha);
    814 	} else {
    815 		ASSERT((data_mp->b_wptr - data_mp->b_rptr) >= sizeof (ip6_t));
    816 		ip6h = (ip6_t *)data_mp->b_rptr;
    817 		divpoint = ip_hdr_length_v6(data_mp, ip6h);
    818 	}
    819 
    820 	scratch = data_mp;
    821 	while (scratch->b_cont != NULL)
    822 		scratch = scratch->b_cont;
    823 
    824 	ASSERT((scratch->b_wptr - scratch->b_rptr) >= 3);
    825 
    826 	/*
    827 	 * "Next header" and padding length are the last two bytes in the
    828 	 * ESP-protected datagram, thus the explicit - 1 and - 2.
    829 	 * lastpad is the last byte of the padding, which can be used for
    830 	 * a quick check to see if the padding is correct.
    831 	 */
    832 	lastbyte = scratch->b_wptr - 1;
    833 	nexthdr = *lastbyte--;
    834 	padlen = *lastbyte--;
    835 
    836 	if (isv4) {
    837 		/* Fix part of the IP header. */
    838 		ipha->ipha_protocol = nexthdr;
    839 		/*
    840 		 * Reality check the padlen.  The explicit - 2 is for the
    841 		 * padding length and the next-header bytes.
    842 		 */
    843 		if (padlen >= ntohs(ipha->ipha_length) - sizeof (ipha_t) - 2 -
    844 		    sizeof (esph_t) - ivlen) {
    845 			ESP_BUMP_STAT(espstack, bad_decrypt);
    846 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    847 			    info.mi_idnum, 0, 0,
    848 			    SL_ERROR | SL_WARN,
    849 			    "Corrupt ESP packet (padlen too big).\n");
    850 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
    851 			    padlen));
    852 			esp1dbg(espstack, ("pkt len(%d) - ip hdr - esp "
    853 			    "hdr - ivlen(%d) = %d.\n",
    854 			    ntohs(ipha->ipha_length), ivlen,
    855 			    (int)(ntohs(ipha->ipha_length) - sizeof (ipha_t) -
    856 			    2 - sizeof (esph_t) - ivlen)));
    857 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
    858 			return (B_FALSE);
    859 		}
    860 
    861 		/*
    862 		 * Fix the rest of the header.  The explicit - 2 is for the
    863 		 * padding length and the next-header bytes.
    864 		 */
    865 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) - padlen -
    866 		    2 - sizeof (esph_t) - ivlen);
    867 		ipha->ipha_hdr_checksum = 0;
    868 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
    869 	} else {
    870 		if (ip6h->ip6_nxt == IPPROTO_ESP) {
    871 			ip6h->ip6_nxt = nexthdr;
    872 		} else {
    873 			ip_pkt_t ipp;
    874 
    875 			bzero(&ipp, sizeof (ipp));
    876 			(void) ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp,
    877 			    NULL);
    878 			if (ipp.ipp_dstopts != NULL) {
    879 				ipp.ipp_dstopts->ip6d_nxt = nexthdr;
    880 			} else if (ipp.ipp_rthdr != NULL) {
    881 				ipp.ipp_rthdr->ip6r_nxt = nexthdr;
    882 			} else if (ipp.ipp_hopopts != NULL) {
    883 				ipp.ipp_hopopts->ip6h_nxt = nexthdr;
    884 			} else {
    885 				/* Panic a DEBUG kernel. */
    886 				ASSERT(ipp.ipp_hopopts != NULL);
    887 				/* Otherwise, pretend it's IP + ESP. */
    888 				cmn_err(CE_WARN, "ESP IPv6 headers wrong.\n");
    889 				ip6h->ip6_nxt = nexthdr;
    890 			}
    891 		}
    892 
    893 		if (padlen >= ntohs(ip6h->ip6_plen) - 2 - sizeof (esph_t) -
    894 		    ivlen) {
    895 			ESP_BUMP_STAT(espstack, bad_decrypt);
    896 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    897 			    info.mi_idnum, 0, 0,
    898 			    SL_ERROR | SL_WARN,
    899 			    "Corrupt ESP packet (v6 padlen too big).\n");
    900 			esp1dbg(espstack, ("padlen (%d) is greater than:\n",
    901 			    padlen));
    902 			esp1dbg(espstack,
    903 			    ("pkt len(%u) - ip hdr - esp hdr - ivlen(%d) = "
    904 			    "%u.\n", (unsigned)(ntohs(ip6h->ip6_plen)
    905 			    + sizeof (ip6_t)), ivlen,
    906 			    (unsigned)(ntohs(ip6h->ip6_plen) - 2 -
    907 			    sizeof (esph_t) - ivlen)));
    908 			*counter = DROPPER(ipss, ipds_esp_bad_padlen);
    909 			return (B_FALSE);
    910 		}
    911 
    912 
    913 		/*
    914 		 * Fix the rest of the header.  The explicit - 2 is for the
    915 		 * padding length and the next-header bytes.  IPv6 is nice,
    916 		 * because there's no hdr checksum!
    917 		 */
    918 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) - padlen -
    919 		    2 - sizeof (esph_t) - ivlen);
    920 	}
    921 
    922 	if (espstack->ipsecesp_padding_check > 0 && padlen > 0) {
    923 		/*
    924 		 * Weak padding check: compare last-byte to length, they
    925 		 * should be equal.
    926 		 */
    927 		lastpad = *lastbyte--;
    928 
    929 		if (padlen != lastpad) {
    930 			ipsec_rl_strlog(espstack->ipsecesp_netstack,
    931 			    info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
    932 			    "Corrupt ESP packet (lastpad != padlen).\n");
    933 			esp1dbg(espstack,
    934 			    ("lastpad (%d) not equal to padlen (%d):\n",
    935 			    lastpad, padlen));
    936 			ESP_BUMP_STAT(espstack, bad_padding);
    937 			*counter = DROPPER(ipss, ipds_esp_bad_padding);
    938 			return (B_FALSE);
    939 		}
    940 
    941 		/*
    942 		 * Strong padding check: Check all pad bytes to see that
    943 		 * they're ascending.  Go backwards using a descending counter
    944 		 * to verify.  padlen == 1 is checked by previous block, so
    945 		 * only bother if we've more than 1 byte of padding.
    946 		 * Consequently, start the check one byte before the location
    947 		 * of "lastpad".
    948 		 */
    949 		if (espstack->ipsecesp_padding_check > 1) {
    950 			/*
    951 			 * This assert may have to become an if and a pullup
    952 			 * if we start accepting multi-dblk mblks. For now,
    953 			 * though, any packet here will have been pulled up in
    954 			 * esp_inbound.
    955 			 */
    956 			ASSERT(MBLKL(scratch) >= lastpad + 3);
    957 
    958 			/*
    959 			 * Use "--lastpad" because we already checked the very
    960 			 * last pad byte previously.
    961 			 */
    962 			while (--lastpad != 0) {
    963 				if (lastpad != *lastbyte) {
    964 					ipsec_rl_strlog(
    965 					    espstack->ipsecesp_netstack,
    966 					    info.mi_idnum, 0, 0,
    967 					    SL_ERROR | SL_WARN, "Corrupt ESP "
    968 					    "packet (bad padding).\n");
    969 					esp1dbg(espstack,
    970 					    ("padding not in correct"
    971 					    " format:\n"));
    972 					ESP_BUMP_STAT(espstack, bad_padding);
    973 					*counter = DROPPER(ipss,
    974 					    ipds_esp_bad_padding);
    975 					return (B_FALSE);
    976 				}
    977 				lastbyte--;
    978 			}
    979 		}
    980 	}
    981 
    982 	/* Trim off the padding. */
    983 	ASSERT(data_mp->b_cont == NULL);
    984 	data_mp->b_wptr -= (padlen + 2);
    985 
    986 	/*
    987 	 * Remove the ESP header.
    988 	 *
    989 	 * The above assertions about data_mp's size will make this work.
    990 	 *
    991 	 * XXX  Question:  If I send up and get back a contiguous mblk,
    992 	 * would it be quicker to bcopy over, or keep doing the dupb stuff?
    993 	 * I go with copying for now.
    994 	 */
    995 
    996 	if (IS_P2ALIGNED(data_mp->b_rptr, sizeof (uint32_t)) &&
    997 	    IS_P2ALIGNED(ivlen, sizeof (uint32_t))) {
    998 		uint8_t *start = data_mp->b_rptr;
    999 		uint32_t *src, *dst;
   1000 
   1001 		src = (uint32_t *)(start + divpoint);
   1002 		dst = (uint32_t *)(start + divpoint + sizeof (esph_t) + ivlen);
   1003 
   1004 		ASSERT(IS_P2ALIGNED(dst, sizeof (uint32_t)) &&
   1005 		    IS_P2ALIGNED(src, sizeof (uint32_t)));
   1006 
   1007 		do {
   1008 			src--;
   1009 			dst--;
   1010 			*dst = *src;
   1011 		} while (src != (uint32_t *)start);
   1012 
   1013 		data_mp->b_rptr = (uchar_t *)dst;
   1014 	} else {
   1015 		uint8_t *start = data_mp->b_rptr;
   1016 		uint8_t *src, *dst;
   1017 
   1018 		src = start + divpoint;
   1019 		dst = src + sizeof (esph_t) + ivlen;
   1020 
   1021 		do {
   1022 			src--;
   1023 			dst--;
   1024 			*dst = *src;
   1025 		} while (src != start);
   1026 
   1027 		data_mp->b_rptr = dst;
   1028 	}
   1029 
   1030 	esp2dbg(espstack, ("data_mp after inbound ESP adjustment:\n"));
   1031 	esp2dbg(espstack, (dump_msg(data_mp)));
   1032 
   1033 	return (B_TRUE);
   1034 }
   1035 
   1036 /*
   1037  * Updating use times can be tricky business if the ipsa_haspeer flag is
   1038  * set.  This function is called once in an SA's lifetime.
   1039  *
   1040  * Caller has to REFRELE "assoc" which is passed in.  This function has
   1041  * to REFRELE any peer SA that is obtained.
   1042  */
   1043 static void
   1044 esp_set_usetime(ipsa_t *assoc, boolean_t inbound)
   1045 {
   1046 	ipsa_t *inassoc, *outassoc;
   1047 	isaf_t *bucket;
   1048 	sadb_t *sp;
   1049 	int outhash;
   1050 	boolean_t isv6;
   1051 	netstack_t		*ns = assoc->ipsa_netstack;
   1052 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
   1053 
   1054 	/* No peer?  No problem! */
   1055 	if (!assoc->ipsa_haspeer) {
   1056 		sadb_set_usetime(assoc);
   1057 		return;
   1058 	}
   1059 
   1060 	/*
   1061 	 * Otherwise, we want to grab both the original assoc and its peer.
   1062 	 * There might be a race for this, but if it's a real race, the times
   1063 	 * will be out-of-synch by at most a second, and since our time
   1064 	 * granularity is a second, this won't be a problem.
   1065 	 *
   1066 	 * If we need tight synchronization on the peer SA, then we need to
   1067 	 * reconsider.
   1068 	 */
   1069 
   1070 	/* Use address length to select IPv6/IPv4 */
   1071 	isv6 = (assoc->ipsa_addrfam == AF_INET6);
   1072 	sp = isv6 ? &espstack->esp_sadb.s_v6 : &espstack->esp_sadb.s_v4;
   1073 
   1074 	if (inbound) {
   1075 		inassoc = assoc;
   1076 		if (isv6) {
   1077 			outhash = OUTBOUND_HASH_V6(sp, *((in6_addr_t *)
   1078 			    &inassoc->ipsa_dstaddr));
   1079 		} else {
   1080 			outhash = OUTBOUND_HASH_V4(sp, *((ipaddr_t *)
   1081 			    &inassoc->ipsa_dstaddr));
   1082 		}
   1083 		bucket = &sp->sdb_of[outhash];
   1084 		mutex_enter(&bucket->isaf_lock);
   1085 		outassoc = ipsec_getassocbyspi(bucket, inassoc->ipsa_spi,
   1086 		    inassoc->ipsa_srcaddr, inassoc->ipsa_dstaddr,
   1087 		    inassoc->ipsa_addrfam);
   1088 		mutex_exit(&bucket->isaf_lock);
   1089 		if (outassoc == NULL) {
   1090 			/* Q: Do we wish to set haspeer == B_FALSE? */
   1091 			esp0dbg(("esp_set_usetime: "
   1092 			    "can't find peer for inbound.\n"));
   1093 			sadb_set_usetime(inassoc);
   1094 			return;
   1095 		}
   1096 	} else {
   1097 		outassoc = assoc;
   1098 		bucket = INBOUND_BUCKET(sp, outassoc->ipsa_spi);
   1099 		mutex_enter(&bucket->isaf_lock);
   1100 		inassoc = ipsec_getassocbyspi(bucket, outassoc->ipsa_spi,
   1101 		    outassoc->ipsa_srcaddr, outassoc->ipsa_dstaddr,
   1102 		    outassoc->ipsa_addrfam);
   1103 		mutex_exit(&bucket->isaf_lock);
   1104 		if (inassoc == NULL) {
   1105 			/* Q: Do we wish to set haspeer == B_FALSE? */
   1106 			esp0dbg(("esp_set_usetime: "
   1107 			    "can't find peer for outbound.\n"));
   1108 			sadb_set_usetime(outassoc);
   1109 			return;
   1110 		}
   1111 	}
   1112 
   1113 	/* Update usetime on both. */
   1114 	sadb_set_usetime(inassoc);
   1115 	sadb_set_usetime(outassoc);
   1116 
   1117 	/*
   1118 	 * REFRELE any peer SA.
   1119 	 *
   1120 	 * Because of the multi-line macro nature of IPSA_REFRELE, keep
   1121 	 * them in { }.
   1122 	 */
   1123 	if (inbound) {
   1124 		IPSA_REFRELE(outassoc);
   1125 	} else {
   1126 		IPSA_REFRELE(inassoc);
   1127 	}
   1128 }
   1129 
   1130 /*
   1131  * Handle ESP inbound data for IPv4 and IPv6.
   1132  * On success returns B_TRUE, on failure returns B_FALSE and frees the
   1133  * mblk chain data_mp.
   1134  */
   1135 mblk_t *
   1136 esp_inbound(mblk_t *data_mp, void *arg, ip_recv_attr_t *ira)
   1137 {
   1138 	esph_t *esph = (esph_t *)arg;
   1139 	ipsa_t *ipsa = ira->ira_ipsec_esp_sa;
   1140 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1141 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1142 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1143 
   1144 	/*
   1145 	 * We may wish to check replay in-range-only here as an optimization.
   1146 	 * Include the reality check of ipsa->ipsa_replay >
   1147 	 * ipsa->ipsa_replay_wsize for times when it's the first N packets,
   1148 	 * where N == ipsa->ipsa_replay_wsize.
   1149 	 *
   1150 	 * Another check that may come here later is the "collision" check.
   1151 	 * If legitimate packets flow quickly enough, this won't be a problem,
   1152 	 * but collisions may cause authentication algorithm crunching to
   1153 	 * take place when it doesn't need to.
   1154 	 */
   1155 	if (!sadb_replay_peek(ipsa, esph->esph_replay)) {
   1156 		ESP_BUMP_STAT(espstack, replay_early_failures);
   1157 		IP_ESP_BUMP_STAT(ipss, in_discards);
   1158 		ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   1159 		    DROPPER(ipss, ipds_esp_early_replay),
   1160 		    &espstack->esp_dropper);
   1161 		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1162 		return (NULL);
   1163 	}
   1164 
   1165 	/*
   1166 	 * Adjust the IP header's payload length to reflect the removal
   1167 	 * of the ICV.
   1168 	 */
   1169 	if (!(ira->ira_flags & IRAF_IS_IPV4)) {
   1170 		ip6_t *ip6h = (ip6_t *)data_mp->b_rptr;
   1171 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) -
   1172 		    ipsa->ipsa_mac_len);
   1173 	} else {
   1174 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
   1175 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) -
   1176 		    ipsa->ipsa_mac_len);
   1177 	}
   1178 
   1179 	/* submit the request to the crypto framework */
   1180 	return (esp_submit_req_inbound(data_mp, ira, ipsa,
   1181 	    (uint8_t *)esph - data_mp->b_rptr));
   1182 }
   1183 
   1184 /*
   1185  * Perform the really difficult work of inserting the proposed situation.
   1186  * Called while holding the algorithm lock.
   1187  */
   1188 static void
   1189 esp_insert_prop(sadb_prop_t *prop, ipsacq_t *acqrec, uint_t combs,
   1190     netstack_t *ns)
   1191 {
   1192 	sadb_comb_t *comb = (sadb_comb_t *)(prop + 1);
   1193 	ipsec_action_t *ap;
   1194 	ipsec_prot_t *prot;
   1195 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1196 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1197 
   1198 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
   1199 
   1200 	prop->sadb_prop_exttype = SADB_EXT_PROPOSAL;
   1201 	prop->sadb_prop_len = SADB_8TO64(sizeof (sadb_prop_t));
   1202 	*(uint32_t *)(&prop->sadb_prop_replay) = 0;	/* Quick zero-out! */
   1203 
   1204 	prop->sadb_prop_replay = espstack->ipsecesp_replay_size;
   1205 
   1206 	/*
   1207 	 * Based upon algorithm properties, and what-not, prioritize a
   1208 	 * proposal, based on the ordering of the ESP algorithms in the
   1209 	 * alternatives in the policy rule or socket that was placed
   1210 	 * in the acquire record.
   1211 	 *
   1212 	 * For each action in policy list
   1213 	 *   Add combination.  If I've hit limit, return.
   1214 	 */
   1215 
   1216 	for (ap = acqrec->ipsacq_act; ap != NULL;
   1217 	    ap = ap->ipa_next) {
   1218 		ipsec_alginfo_t *ealg = NULL;
   1219 		ipsec_alginfo_t *aalg = NULL;
   1220 
   1221 		if (ap->ipa_act.ipa_type != IPSEC_POLICY_APPLY)
   1222 			continue;
   1223 
   1224 		prot = &ap->ipa_act.ipa_apply;
   1225 
   1226 		if (!(prot->ipp_use_esp))
   1227 			continue;
   1228 
   1229 		if (prot->ipp_esp_auth_alg != 0) {
   1230 			aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
   1231 			    [prot->ipp_esp_auth_alg];
   1232 			if (aalg == NULL || !ALG_VALID(aalg))
   1233 				continue;
   1234 		}
   1235 
   1236 		ASSERT(prot->ipp_encr_alg > 0);
   1237 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
   1238 		    [prot->ipp_encr_alg];
   1239 		if (ealg == NULL || !ALG_VALID(ealg))
   1240 			continue;
   1241 
   1242 		comb->sadb_comb_flags = 0;
   1243 		comb->sadb_comb_reserved = 0;
   1244 		comb->sadb_comb_encrypt = ealg->alg_id;
   1245 		comb->sadb_comb_encrypt_minbits =
   1246 		    MAX(prot->ipp_espe_minbits, ealg->alg_ef_minbits);
   1247 		comb->sadb_comb_encrypt_maxbits =
   1248 		    MIN(prot->ipp_espe_maxbits, ealg->alg_ef_maxbits);
   1249 
   1250 		if (aalg == NULL) {
   1251 			comb->sadb_comb_auth = 0;
   1252 			comb->sadb_comb_auth_minbits = 0;
   1253 			comb->sadb_comb_auth_maxbits = 0;
   1254 		} else {
   1255 			comb->sadb_comb_auth = aalg->alg_id;
   1256 			comb->sadb_comb_auth_minbits =
   1257 			    MAX(prot->ipp_espa_minbits, aalg->alg_ef_minbits);
   1258 			comb->sadb_comb_auth_maxbits =
   1259 			    MIN(prot->ipp_espa_maxbits, aalg->alg_ef_maxbits);
   1260 		}
   1261 
   1262 		/*
   1263 		 * The following may be based on algorithm
   1264 		 * properties, but in the meantime, we just pick
   1265 		 * some good, sensible numbers.  Key mgmt. can
   1266 		 * (and perhaps should) be the place to finalize
   1267 		 * such decisions.
   1268 		 */
   1269 
   1270 		/*
   1271 		 * No limits on allocations, since we really don't
   1272 		 * support that concept currently.
   1273 		 */
   1274 		comb->sadb_comb_soft_allocations = 0;
   1275 		comb->sadb_comb_hard_allocations = 0;
   1276 
   1277 		/*
   1278 		 * These may want to come from policy rule..
   1279 		 */
   1280 		comb->sadb_comb_soft_bytes =
   1281 		    espstack->ipsecesp_default_soft_bytes;
   1282 		comb->sadb_comb_hard_bytes =
   1283 		    espstack->ipsecesp_default_hard_bytes;
   1284 		comb->sadb_comb_soft_addtime =
   1285 		    espstack->ipsecesp_default_soft_addtime;
   1286 		comb->sadb_comb_hard_addtime =
   1287 		    espstack->ipsecesp_default_hard_addtime;
   1288 		comb->sadb_comb_soft_usetime =
   1289 		    espstack->ipsecesp_default_soft_usetime;
   1290 		comb->sadb_comb_hard_usetime =
   1291 		    espstack->ipsecesp_default_hard_usetime;
   1292 
   1293 		prop->sadb_prop_len += SADB_8TO64(sizeof (*comb));
   1294 		if (--combs == 0)
   1295 			break;	/* out of space.. */
   1296 		comb++;
   1297 	}
   1298 }
   1299 
   1300 /*
   1301  * Prepare and actually send the SADB_ACQUIRE message to PF_KEY.
   1302  */
   1303 static void
   1304 esp_send_acquire(ipsacq_t *acqrec, mblk_t *extended, netstack_t *ns)
   1305 {
   1306 	uint_t combs;
   1307 	sadb_msg_t *samsg;
   1308 	sadb_prop_t *prop;
   1309 	mblk_t *pfkeymp, *msgmp;
   1310 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1311 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1312 
   1313 	ESP_BUMP_STAT(espstack, acquire_requests);
   1314 
   1315 	if (espstack->esp_pfkey_q == NULL) {
   1316 		mutex_exit(&acqrec->ipsacq_lock);
   1317 		return;
   1318 	}
   1319 
   1320 	/* Set up ACQUIRE. */
   1321 	pfkeymp = sadb_setup_acquire(acqrec, SADB_SATYPE_ESP,
   1322 	    ns->netstack_ipsec);
   1323 	if (pfkeymp == NULL) {
   1324 		esp0dbg(("sadb_setup_acquire failed.\n"));
   1325 		mutex_exit(&acqrec->ipsacq_lock);
   1326 		return;
   1327 	}
   1328 	ASSERT(MUTEX_HELD(&ipss->ipsec_alg_lock));
   1329 	combs = ipss->ipsec_nalgs[IPSEC_ALG_AUTH] *
   1330 	    ipss->ipsec_nalgs[IPSEC_ALG_ENCR];
   1331 	msgmp = pfkeymp->b_cont;
   1332 	samsg = (sadb_msg_t *)(msgmp->b_rptr);
   1333 
   1334 	/* Insert proposal here. */
   1335 
   1336 	prop = (sadb_prop_t *)(((uint64_t *)samsg) + samsg->sadb_msg_len);
   1337 	esp_insert_prop(prop, acqrec, combs, ns);
   1338 	samsg->sadb_msg_len += prop->sadb_prop_len;
   1339 	msgmp->b_wptr += SADB_64TO8(samsg->sadb_msg_len);
   1340 
   1341 	mutex_exit(&ipss->ipsec_alg_lock);
   1342 
   1343 	/*
   1344 	 * Must mutex_exit() before sending PF_KEY message up, in
   1345 	 * order to avoid recursive mutex_enter() if there are no registered
   1346 	 * listeners.
   1347 	 *
   1348 	 * Once I've sent the message, I'm cool anyway.
   1349 	 */
   1350 	mutex_exit(&acqrec->ipsacq_lock);
   1351 	if (extended != NULL) {
   1352 		putnext(espstack->esp_pfkey_q, extended);
   1353 	}
   1354 	putnext(espstack->esp_pfkey_q, pfkeymp);
   1355 }
   1356 
   1357 /* XXX refactor me */
   1358 /*
   1359  * Handle the SADB_GETSPI message.  Create a larval SA.
   1360  */
   1361 static void
   1362 esp_getspi(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
   1363 {
   1364 	ipsa_t *newbie, *target;
   1365 	isaf_t *outbound, *inbound;
   1366 	int rc, diagnostic;
   1367 	sadb_sa_t *assoc;
   1368 	keysock_out_t *kso;
   1369 	uint32_t newspi;
   1370 
   1371 	/*
   1372 	 * Randomly generate a proposed SPI value
   1373 	 */
   1374 	if (cl_inet_getspi != NULL) {
   1375 		cl_inet_getspi(espstack->ipsecesp_netstack->netstack_stackid,
   1376 		    IPPROTO_ESP, (uint8_t *)&newspi, sizeof (uint32_t), NULL);
   1377 	} else {
   1378 		(void) random_get_pseudo_bytes((uint8_t *)&newspi,
   1379 		    sizeof (uint32_t));
   1380 	}
   1381 	newbie = sadb_getspi(ksi, newspi, &diagnostic,
   1382 	    espstack->ipsecesp_netstack, IPPROTO_ESP);
   1383 
   1384 	if (newbie == NULL) {
   1385 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM, diagnostic,
   1386 		    ksi->ks_in_serial);
   1387 		return;
   1388 	} else if (newbie == (ipsa_t *)-1) {
   1389 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
   1390 		    ksi->ks_in_serial);
   1391 		return;
   1392 	}
   1393 
   1394 	/*
   1395 	 * XXX - We may randomly collide.  We really should recover from this.
   1396 	 *	 Unfortunately, that could require spending way-too-much-time
   1397 	 *	 in here.  For now, let the user retry.
   1398 	 */
   1399 
   1400 	if (newbie->ipsa_addrfam == AF_INET6) {
   1401 		outbound = OUTBOUND_BUCKET_V6(&espstack->esp_sadb.s_v6,
   1402 		    *(uint32_t *)(newbie->ipsa_dstaddr));
   1403 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v6,
   1404 		    newbie->ipsa_spi);
   1405 	} else {
   1406 		ASSERT(newbie->ipsa_addrfam == AF_INET);
   1407 		outbound = OUTBOUND_BUCKET_V4(&espstack->esp_sadb.s_v4,
   1408 		    *(uint32_t *)(newbie->ipsa_dstaddr));
   1409 		inbound = INBOUND_BUCKET(&espstack->esp_sadb.s_v4,
   1410 		    newbie->ipsa_spi);
   1411 	}
   1412 
   1413 	mutex_enter(&outbound->isaf_lock);
   1414 	mutex_enter(&inbound->isaf_lock);
   1415 
   1416 	/*
   1417 	 * Check for collisions (i.e. did sadb_getspi() return with something
   1418 	 * that already exists?).
   1419 	 *
   1420 	 * Try outbound first.  Even though SADB_GETSPI is traditionally
   1421 	 * for inbound SAs, you never know what a user might do.
   1422 	 */
   1423 	target = ipsec_getassocbyspi(outbound, newbie->ipsa_spi,
   1424 	    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr, newbie->ipsa_addrfam);
   1425 	if (target == NULL) {
   1426 		target = ipsec_getassocbyspi(inbound, newbie->ipsa_spi,
   1427 		    newbie->ipsa_srcaddr, newbie->ipsa_dstaddr,
   1428 		    newbie->ipsa_addrfam);
   1429 	}
   1430 
   1431 	/*
   1432 	 * I don't have collisions elsewhere!
   1433 	 * (Nor will I because I'm still holding inbound/outbound locks.)
   1434 	 */
   1435 
   1436 	if (target != NULL) {
   1437 		rc = EEXIST;
   1438 		IPSA_REFRELE(target);
   1439 	} else {
   1440 		/*
   1441 		 * sadb_insertassoc() also checks for collisions, so
   1442 		 * if there's a colliding entry, rc will be set
   1443 		 * to EEXIST.
   1444 		 */
   1445 		rc = sadb_insertassoc(newbie, inbound);
   1446 		newbie->ipsa_hardexpiretime = gethrestime_sec();
   1447 		newbie->ipsa_hardexpiretime +=
   1448 		    espstack->ipsecesp_larval_timeout;
   1449 	}
   1450 
   1451 	/*
   1452 	 * Can exit outbound mutex.  Hold inbound until we're done
   1453 	 * with newbie.
   1454 	 */
   1455 	mutex_exit(&outbound->isaf_lock);
   1456 
   1457 	if (rc != 0) {
   1458 		mutex_exit(&inbound->isaf_lock);
   1459 		IPSA_REFRELE(newbie);
   1460 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, rc,
   1461 		    SADB_X_DIAGNOSTIC_NONE, ksi->ks_in_serial);
   1462 		return;
   1463 	}
   1464 
   1465 
   1466 	/* Can write here because I'm still holding the bucket lock. */
   1467 	newbie->ipsa_type = SADB_SATYPE_ESP;
   1468 
   1469 	/*
   1470 	 * Construct successful return message. We have one thing going
   1471 	 * for us in PF_KEY v2.  That's the fact that
   1472 	 *	sizeof (sadb_spirange_t) == sizeof (sadb_sa_t)
   1473 	 */
   1474 	assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SPIRANGE];
   1475 	assoc->sadb_sa_exttype = SADB_EXT_SA;
   1476 	assoc->sadb_sa_spi = newbie->ipsa_spi;
   1477 	*((uint64_t *)(&assoc->sadb_sa_replay)) = 0;
   1478 	mutex_exit(&inbound->isaf_lock);
   1479 
   1480 	/* Convert KEYSOCK_IN to KEYSOCK_OUT. */
   1481 	kso = (keysock_out_t *)ksi;
   1482 	kso->ks_out_len = sizeof (*kso);
   1483 	kso->ks_out_serial = ksi->ks_in_serial;
   1484 	kso->ks_out_type = KEYSOCK_OUT;
   1485 
   1486 	/*
   1487 	 * Can safely putnext() to esp_pfkey_q, because this is a turnaround
   1488 	 * from the esp_pfkey_q.
   1489 	 */
   1490 	putnext(espstack->esp_pfkey_q, mp);
   1491 }
   1492 
   1493 /*
   1494  * Insert the ESP header into a packet.  Duplicate an mblk, and insert a newly
   1495  * allocated mblk with the ESP header in between the two.
   1496  */
   1497 static boolean_t
   1498 esp_insert_esp(mblk_t *mp, mblk_t *esp_mp, uint_t divpoint,
   1499     ipsecesp_stack_t *espstack)
   1500 {
   1501 	mblk_t *split_mp = mp;
   1502 	uint_t wheretodiv = divpoint;
   1503 
   1504 	while ((split_mp->b_wptr - split_mp->b_rptr) < wheretodiv) {
   1505 		wheretodiv -= (split_mp->b_wptr - split_mp->b_rptr);
   1506 		split_mp = split_mp->b_cont;
   1507 		ASSERT(split_mp != NULL);
   1508 	}
   1509 
   1510 	if (split_mp->b_wptr - split_mp->b_rptr != wheretodiv) {
   1511 		mblk_t *scratch;
   1512 
   1513 		/* "scratch" is the 2nd half, split_mp is the first. */
   1514 		scratch = dupb(split_mp);
   1515 		if (scratch == NULL) {
   1516 			esp1dbg(espstack,
   1517 			    ("esp_insert_esp: can't allocate scratch.\n"));
   1518 			return (B_FALSE);
   1519 		}
   1520 		/* NOTE:  dupb() doesn't set b_cont appropriately. */
   1521 		scratch->b_cont = split_mp->b_cont;
   1522 		scratch->b_rptr += wheretodiv;
   1523 		split_mp->b_wptr = split_mp->b_rptr + wheretodiv;
   1524 		split_mp->b_cont = scratch;
   1525 	}
   1526 	/*
   1527 	 * At this point, split_mp is exactly "wheretodiv" bytes long, and
   1528 	 * holds the end of the pre-ESP part of the datagram.
   1529 	 */
   1530 	esp_mp->b_cont = split_mp->b_cont;
   1531 	split_mp->b_cont = esp_mp;
   1532 
   1533 	return (B_TRUE);
   1534 }
   1535 
   1536 /*
   1537  * Section 7 of RFC 3947 says:
   1538  *
   1539  * 7.  Recovering from the Expiring NAT Mappings
   1540  *
   1541  *    There are cases where NAT box decides to remove mappings that are still
   1542  *    alive (for example, when the keepalive interval is too long, or when the
   1543  *    NAT box is rebooted).  To recover from this, ends that are NOT behind
   1544  *    NAT SHOULD use the last valid UDP encapsulated IKE or IPsec packet from
   1545  *    the other end to determine which IP and port addresses should be used.
   1546  *    The host behind dynamic NAT MUST NOT do this, as otherwise it opens a
   1547  *    DoS attack possibility because the IP address or port of the other host
   1548  *    will not change (it is not behind NAT).
   1549  *
   1550  *    Keepalives cannot be used for these purposes, as they are not
   1551  *    authenticated, but any IKE authenticated IKE packet or ESP packet can be
   1552  *    used to detect whether the IP address or the port has changed.
   1553  *
   1554  * The following function will check an SA and its explicitly-set pair to see
   1555  * if the NAT-T remote port matches the received packet (which must have
   1556  * passed ESP authentication, see esp_in_done() for the caller context).  If
   1557  * there is a mismatch, the SAs are updated.  It is not important if we race
   1558  * with a transmitting thread, as if there is a transmitting thread, it will
   1559  * merely emit a packet that will most-likely be dropped.
   1560  *
   1561  * "ports" are ordered src,dst, and assoc is an inbound SA, where src should
   1562  * match ipsa_remote_nat_port and dst should match ipsa_local_nat_port.
   1563  */
   1564 #ifdef _LITTLE_ENDIAN
   1565 #define	FIRST_16(x) ((x) & 0xFFFF)
   1566 #define	NEXT_16(x) (((x) >> 16) & 0xFFFF)
   1567 #else
   1568 #define	FIRST_16(x) (((x) >> 16) & 0xFFFF)
   1569 #define	NEXT_16(x) ((x) & 0xFFFF)
   1570 #endif
   1571 static void
   1572 esp_port_freshness(uint32_t ports, ipsa_t *assoc)
   1573 {
   1574 	uint16_t remote = FIRST_16(ports);
   1575 	uint16_t local = NEXT_16(ports);
   1576 	ipsa_t *outbound_peer;
   1577 	isaf_t *bucket;
   1578 	ipsecesp_stack_t *espstack = assoc->ipsa_netstack->netstack_ipsecesp;
   1579 
   1580 	/* We found a conn_t, therefore local != 0. */
   1581 	ASSERT(local != 0);
   1582 	/* Assume an IPv4 SA. */
   1583 	ASSERT(assoc->ipsa_addrfam == AF_INET);
   1584 
   1585 	/*
   1586 	 * On-the-wire rport == 0 means something's very wrong.
   1587 	 * An unpaired SA is also useless to us.
   1588 	 * If we are behind the NAT, don't bother.
   1589 	 * A zero local NAT port defaults to 4500, so check that too.
   1590 	 * And, of course, if the ports already match, we don't need to
   1591 	 * bother.
   1592 	 */
   1593 	if (remote == 0 || assoc->ipsa_otherspi == 0 ||
   1594 	    (assoc->ipsa_flags & IPSA_F_BEHIND_NAT) ||
   1595 	    (assoc->ipsa_remote_nat_port == 0 &&
   1596 	    remote == htons(IPPORT_IKE_NATT)) ||
   1597 	    remote == assoc->ipsa_remote_nat_port)
   1598 		return;
   1599 
   1600 	/* Try and snag the peer.   NOTE:  Assume IPv4 for now. */
   1601 	bucket = OUTBOUND_BUCKET_V4(&(espstack->esp_sadb.s_v4),
   1602 	    assoc->ipsa_srcaddr[0]);
   1603 	mutex_enter(&bucket->isaf_lock);
   1604 	outbound_peer = ipsec_getassocbyspi(bucket, assoc->ipsa_otherspi,
   1605 	    assoc->ipsa_dstaddr, assoc->ipsa_srcaddr, AF_INET);
   1606 	mutex_exit(&bucket->isaf_lock);
   1607 
   1608 	/* We probably lost a race to a deleting or expiring thread. */
   1609 	if (outbound_peer == NULL)
   1610 		return;
   1611 
   1612 	/*
   1613 	 * Hold the mutexes for both SAs so we don't race another inbound
   1614 	 * thread.  A lock-entry order shouldn't matter, since all other
   1615 	 * per-ipsa locks are individually held-then-released.
   1616 	 *
   1617 	 * Luckily, this has nothing to do with the remote-NAT address,
   1618 	 * so we don't have to re-scribble the cached-checksum differential.
   1619 	 */
   1620 	mutex_enter(&outbound_peer->ipsa_lock);
   1621 	mutex_enter(&assoc->ipsa_lock);
   1622 	outbound_peer->ipsa_remote_nat_port = assoc->ipsa_remote_nat_port =
   1623 	    remote;
   1624 	mutex_exit(&assoc->ipsa_lock);
   1625 	mutex_exit(&outbound_peer->ipsa_lock);
   1626 	IPSA_REFRELE(outbound_peer);
   1627 	ESP_BUMP_STAT(espstack, sa_port_renumbers);
   1628 }
   1629 /*
   1630  * Finish processing of an inbound ESP packet after processing by the
   1631  * crypto framework.
   1632  * - Remove the ESP header.
   1633  * - Send packet back to IP.
   1634  * If authentication was performed on the packet, this function is called
   1635  * only if the authentication succeeded.
   1636  * On success returns B_TRUE, on failure returns B_FALSE and frees the
   1637  * mblk chain data_mp.
   1638  */
   1639 static mblk_t *
   1640 esp_in_done(mblk_t *data_mp, ip_recv_attr_t *ira, ipsec_crypto_t *ic)
   1641 {
   1642 	ipsa_t *assoc;
   1643 	uint_t espstart;
   1644 	uint32_t ivlen = 0;
   1645 	uint_t processed_len;
   1646 	esph_t *esph;
   1647 	kstat_named_t *counter;
   1648 	boolean_t is_natt;
   1649 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1650 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1651 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1652 
   1653 	assoc = ira->ira_ipsec_esp_sa;
   1654 	ASSERT(assoc != NULL);
   1655 
   1656 	is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
   1657 
   1658 	/* get the pointer to the ESP header */
   1659 	if (assoc->ipsa_encr_alg == SADB_EALG_NULL) {
   1660 		/* authentication-only ESP */
   1661 		espstart = ic->ic_crypto_data.cd_offset;
   1662 		processed_len = ic->ic_crypto_data.cd_length;
   1663 	} else {
   1664 		/* encryption present */
   1665 		ivlen = assoc->ipsa_iv_len;
   1666 		if (assoc->ipsa_auth_alg == SADB_AALG_NONE) {
   1667 			/* encryption-only ESP */
   1668 			espstart = ic->ic_crypto_data.cd_offset -
   1669 			    sizeof (esph_t) - assoc->ipsa_iv_len;
   1670 			processed_len = ic->ic_crypto_data.cd_length +
   1671 			    ivlen;
   1672 		} else {
   1673 			/* encryption with authentication */
   1674 			espstart = ic->ic_crypto_dual_data.dd_offset1;
   1675 			processed_len = ic->ic_crypto_dual_data.dd_len2 +
   1676 			    ivlen;
   1677 		}
   1678 	}
   1679 
   1680 	esph = (esph_t *)(data_mp->b_rptr + espstart);
   1681 
   1682 	if (assoc->ipsa_auth_alg != IPSA_AALG_NONE ||
   1683 	    (assoc->ipsa_flags & IPSA_F_COMBINED)) {
   1684 		/*
   1685 		 * Authentication passed if we reach this point.
   1686 		 * Packets with authentication will have the ICV
   1687 		 * after the crypto data. Adjust b_wptr before
   1688 		 * making padlen checks.
   1689 		 */
   1690 		ESP_BUMP_STAT(espstack, good_auth);
   1691 		data_mp->b_wptr -= assoc->ipsa_mac_len;
   1692 
   1693 		/*
   1694 		 * Check replay window here!
   1695 		 * For right now, assume keysock will set the replay window
   1696 		 * size to zero for SAs that have an unspecified sender.
   1697 		 * This may change...
   1698 		 */
   1699 
   1700 		if (!sadb_replay_check(assoc, esph->esph_replay)) {
   1701 			/*
   1702 			 * Log the event. As of now we print out an event.
   1703 			 * Do not print the replay failure number, or else
   1704 			 * syslog cannot collate the error messages.  Printing
   1705 			 * the replay number that failed opens a denial-of-
   1706 			 * service attack.
   1707 			 */
   1708 			ipsec_assocfailure(info.mi_idnum, 0, 0,
   1709 			    SL_ERROR | SL_WARN,
   1710 			    "Replay failed for ESP spi 0x%x, dst %s.\n",
   1711 			    assoc->ipsa_spi, assoc->ipsa_dstaddr,
   1712 			    assoc->ipsa_addrfam, espstack->ipsecesp_netstack);
   1713 			ESP_BUMP_STAT(espstack, replay_failures);
   1714 			counter = DROPPER(ipss, ipds_esp_replay);
   1715 			goto drop_and_bail;
   1716 		}
   1717 
   1718 		if (is_natt) {
   1719 			ASSERT(ira->ira_flags & IRAF_ESP_UDP_PORTS);
   1720 			ASSERT(ira->ira_esp_udp_ports != 0);
   1721 			esp_port_freshness(ira->ira_esp_udp_ports, assoc);
   1722 		}
   1723 	}
   1724 
   1725 	esp_set_usetime(assoc, B_TRUE);
   1726 
   1727 	if (!esp_age_bytes(assoc, processed_len, B_TRUE)) {
   1728 		/* The ipsa has hit hard expiration, LOG and AUDIT. */
   1729 		ipsec_assocfailure(info.mi_idnum, 0, 0,
   1730 		    SL_ERROR | SL_WARN,
   1731 		    "ESP association 0x%x, dst %s had bytes expire.\n",
   1732 		    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
   1733 		    espstack->ipsecesp_netstack);
   1734 		ESP_BUMP_STAT(espstack, bytes_expired);
   1735 		counter = DROPPER(ipss, ipds_esp_bytes_expire);
   1736 		goto drop_and_bail;
   1737 	}
   1738 
   1739 	/*
   1740 	 * Remove ESP header and padding from packet.  I hope the compiler
   1741 	 * spews "branch, predict taken" code for this.
   1742 	 */
   1743 
   1744 	if (esp_strip_header(data_mp, (ira->ira_flags & IRAF_IS_IPV4),
   1745 	    ivlen, &counter, espstack)) {
   1746 
   1747 		if (is_system_labeled() && assoc->ipsa_tsl != NULL) {
   1748 			if (!ip_recv_attr_replace_label(ira, assoc->ipsa_tsl)) {
   1749 				ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   1750 				    DROPPER(ipss, ipds_ah_nomem),
   1751 				    &espstack->esp_dropper);
   1752 				BUMP_MIB(ira->ira_ill->ill_ip_mib,
   1753 				    ipIfStatsInDiscards);
   1754 				return (NULL);
   1755 			}
   1756 		}
   1757 		if (is_natt)
   1758 			return (esp_fix_natt_checksums(data_mp, assoc));
   1759 
   1760 		if (assoc->ipsa_state == IPSA_STATE_IDLE) {
   1761 			/*
   1762 			 * Cluster buffering case.  Tell caller that we're
   1763 			 * handling the packet.
   1764 			 */
   1765 			sadb_buf_pkt(assoc, data_mp, ira);
   1766 			return (NULL);
   1767 		}
   1768 
   1769 		return (data_mp);
   1770 	}
   1771 
   1772 	esp1dbg(espstack, ("esp_in_done: esp_strip_header() failed\n"));
   1773 drop_and_bail:
   1774 	IP_ESP_BUMP_STAT(ipss, in_discards);
   1775 	ip_drop_packet(data_mp, B_TRUE, ira->ira_ill, counter,
   1776 	    &espstack->esp_dropper);
   1777 	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1778 	return (NULL);
   1779 }
   1780 
   1781 /*
   1782  * Called upon failing the inbound ICV check. The message passed as
   1783  * argument is freed.
   1784  */
   1785 static void
   1786 esp_log_bad_auth(mblk_t *mp, ip_recv_attr_t *ira)
   1787 {
   1788 	ipsa_t		*assoc = ira->ira_ipsec_esp_sa;
   1789 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   1790 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   1791 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   1792 
   1793 	/*
   1794 	 * Log the event. Don't print to the console, block
   1795 	 * potential denial-of-service attack.
   1796 	 */
   1797 	ESP_BUMP_STAT(espstack, bad_auth);
   1798 
   1799 	ipsec_assocfailure(info.mi_idnum, 0, 0, SL_ERROR | SL_WARN,
   1800 	    "ESP Authentication failed for spi 0x%x, dst %s.\n",
   1801 	    assoc->ipsa_spi, assoc->ipsa_dstaddr, assoc->ipsa_addrfam,
   1802 	    espstack->ipsecesp_netstack);
   1803 
   1804 	IP_ESP_BUMP_STAT(ipss, in_discards);
   1805 	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
   1806 	    DROPPER(ipss, ipds_esp_bad_auth),
   1807 	    &espstack->esp_dropper);
   1808 }
   1809 
   1810 
   1811 /*
   1812  * Invoked for outbound packets after ESP processing. If the packet
   1813  * also requires AH, performs the AH SA selection and AH processing.
   1814  * Returns B_TRUE if the AH processing was not needed or if it was
   1815  * performed successfully. Returns B_FALSE and consumes the passed mblk
   1816  * if AH processing was required but could not be performed.
   1817  *
   1818  * Returns data_mp unless data_mp was consumed/queued.
   1819  */
   1820 static mblk_t *
   1821 esp_do_outbound_ah(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   1822 {
   1823 	ipsec_action_t *ap;
   1824 
   1825 	ap = ixa->ixa_ipsec_action;
   1826 	if (ap == NULL) {
   1827 		ipsec_policy_t *pp = ixa->ixa_ipsec_policy;
   1828 		ap = pp->ipsp_act;
   1829 	}
   1830 
   1831 	if (!ap->ipa_want_ah)
   1832 		return (data_mp);
   1833 
   1834 	/*
   1835 	 * Normally the AH SA would have already been put in place
   1836 	 * but it could have been flushed so we need to look for it.
   1837 	 */
   1838 	if (ixa->ixa_ipsec_ah_sa == NULL) {
   1839 		if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_AH)) {
   1840 			sadb_acquire(data_mp, ixa, B_TRUE, B_FALSE);
   1841 			return (NULL);
   1842 		}
   1843 	}
   1844 	ASSERT(ixa->ixa_ipsec_ah_sa != NULL);
   1845 
   1846 	data_mp = ixa->ixa_ipsec_ah_sa->ipsa_output_func(data_mp, ixa);
   1847 	return (data_mp);
   1848 }
   1849 
   1850 
   1851 /*
   1852  * Kernel crypto framework callback invoked after completion of async
   1853  * crypto requests for outbound packets.
   1854  */
   1855 static void
   1856 esp_kcf_callback_outbound(void *arg, int status)
   1857 {
   1858 	mblk_t		*mp = (mblk_t *)arg;
   1859 	mblk_t		*async_mp;
   1860 	netstack_t	*ns;
   1861 	ipsec_stack_t	*ipss;
   1862 	ipsecesp_stack_t *espstack;
   1863 	mblk_t		*data_mp;
   1864 	ip_xmit_attr_t	ixas;
   1865 	ipsec_crypto_t	*ic;
   1866 	ill_t		*ill;
   1867 
   1868 	/*
   1869 	 * First remove the ipsec_crypto_t mblk
   1870 	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
   1871 	 */
   1872 	async_mp = ipsec_remove_crypto_data(mp, &ic);
   1873 	ASSERT(async_mp != NULL);
   1874 
   1875 	/*
   1876 	 * Extract the ip_xmit_attr_t from the first mblk.
   1877 	 * Verifies that the netstack and ill is still around; could
   1878 	 * have vanished while kEf was doing its work.
   1879 	 * On succesful return we have a nce_t and the ill/ipst can't
   1880 	 * disappear until we do the nce_refrele in ixa_cleanup.
   1881 	 */
   1882 	data_mp = async_mp->b_cont;
   1883 	async_mp->b_cont = NULL;
   1884 	if (!ip_xmit_attr_from_mblk(async_mp, &ixas)) {
   1885 		/* Disappeared on us - no ill/ipst for MIB */
   1886 		/* We have nowhere to do stats since ixa_ipst could be NULL */
   1887 		if (ixas.ixa_nce != NULL) {
   1888 			ill = ixas.ixa_nce->nce_ill;
   1889 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   1890 			ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
   1891 		}
   1892 		freemsg(data_mp);
   1893 		goto done;
   1894 	}
   1895 	ns = ixas.ixa_ipst->ips_netstack;
   1896 	espstack = ns->netstack_ipsecesp;
   1897 	ipss = ns->netstack_ipsec;
   1898 	ill = ixas.ixa_nce->nce_ill;
   1899 
   1900 	if (status == CRYPTO_SUCCESS) {
   1901 		/*
   1902 		 * If a ICV was computed, it was stored by the
   1903 		 * crypto framework at the end of the packet.
   1904 		 */
   1905 		ipha_t *ipha = (ipha_t *)data_mp->b_rptr;
   1906 
   1907 		esp_set_usetime(ixas.ixa_ipsec_esp_sa, B_FALSE);
   1908 		/* NAT-T packet. */
   1909 		if (IPH_HDR_VERSION(ipha) == IP_VERSION &&
   1910 		    ipha->ipha_protocol == IPPROTO_UDP)
   1911 			esp_prepare_udp(ns, data_mp, ipha);
   1912 
   1913 		/* do AH processing if needed */
   1914 		data_mp = esp_do_outbound_ah(data_mp, &ixas);
   1915 		if (data_mp == NULL)
   1916 			goto done;
   1917 
   1918 		(void) ip_output_post_ipsec(data_mp, &ixas);
   1919 	} else {
   1920 		/* Outbound shouldn't see invalid MAC */
   1921 		ASSERT(status != CRYPTO_INVALID_MAC);
   1922 
   1923 		esp1dbg(espstack,
   1924 		    ("esp_kcf_callback_outbound: crypto failed with 0x%x\n",
   1925 		    status));
   1926 		ESP_BUMP_STAT(espstack, crypto_failures);
   1927 		ESP_BUMP_STAT(espstack, out_discards);
   1928 		ip_drop_packet(data_mp, B_FALSE, ill,
   1929 		    DROPPER(ipss, ipds_esp_crypto_failed),
   1930 		    &espstack->esp_dropper);
   1931 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   1932 	}
   1933 done:
   1934 	ixa_cleanup(&ixas);
   1935 	(void) ipsec_free_crypto_data(mp);
   1936 }
   1937 
   1938 /*
   1939  * Kernel crypto framework callback invoked after completion of async
   1940  * crypto requests for inbound packets.
   1941  */
   1942 static void
   1943 esp_kcf_callback_inbound(void *arg, int status)
   1944 {
   1945 	mblk_t		*mp = (mblk_t *)arg;
   1946 	mblk_t		*async_mp;
   1947 	netstack_t	*ns;
   1948 	ipsecesp_stack_t *espstack;
   1949 	ipsec_stack_t	*ipss;
   1950 	mblk_t		*data_mp;
   1951 	ip_recv_attr_t	iras;
   1952 	ipsec_crypto_t	*ic;
   1953 
   1954 	/*
   1955 	 * First remove the ipsec_crypto_t mblk
   1956 	 * Note that we need to ipsec_free_crypto_data(mp) once done with ic.
   1957 	 */
   1958 	async_mp = ipsec_remove_crypto_data(mp, &ic);
   1959 	ASSERT(async_mp != NULL);
   1960 
   1961 	/*
   1962 	 * Extract the ip_recv_attr_t from the first mblk.
   1963 	 * Verifies that the netstack and ill is still around; could
   1964 	 * have vanished while kEf was doing its work.
   1965 	 */
   1966 	data_mp = async_mp->b_cont;
   1967 	async_mp->b_cont = NULL;
   1968 	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
   1969 		/* The ill or ip_stack_t disappeared on us */
   1970 		ip_drop_input("ip_recv_attr_from_mblk", data_mp, NULL);
   1971 		freemsg(data_mp);
   1972 		goto done;
   1973 	}
   1974 
   1975 	ns = iras.ira_ill->ill_ipst->ips_netstack;
   1976 	espstack = ns->netstack_ipsecesp;
   1977 	ipss = ns->netstack_ipsec;
   1978 
   1979 	if (status == CRYPTO_SUCCESS) {
   1980 		data_mp = esp_in_done(data_mp, &iras, ic);
   1981 		if (data_mp == NULL)
   1982 			goto done;
   1983 
   1984 		/* finish IPsec processing */
   1985 		ip_input_post_ipsec(data_mp, &iras);
   1986 	} else if (status == CRYPTO_INVALID_MAC) {
   1987 		esp_log_bad_auth(data_mp, &iras);
   1988 	} else {
   1989 		esp1dbg(espstack,
   1990 		    ("esp_kcf_callback: crypto failed with 0x%x\n",
   1991 		    status));
   1992 		ESP_BUMP_STAT(espstack, crypto_failures);
   1993 		IP_ESP_BUMP_STAT(ipss, in_discards);
   1994 		ip_drop_packet(data_mp, B_TRUE, iras.ira_ill,
   1995 		    DROPPER(ipss, ipds_esp_crypto_failed),
   1996 		    &espstack->esp_dropper);
   1997 		BUMP_MIB(iras.ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   1998 	}
   1999 done:
   2000 	ira_cleanup(&iras, B_TRUE);
   2001 	(void) ipsec_free_crypto_data(mp);
   2002 }
   2003 
   2004 /*
   2005  * Invoked on crypto framework failure during inbound and outbound processing.
   2006  */
   2007 static void
   2008 esp_crypto_failed(mblk_t *data_mp, boolean_t is_inbound, int kef_rc,
   2009     ill_t *ill, ipsecesp_stack_t *espstack)
   2010 {
   2011 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   2012 
   2013 	esp1dbg(espstack, ("crypto failed for %s ESP with 0x%x\n",
   2014 	    is_inbound ? "inbound" : "outbound", kef_rc));
   2015 	ip_drop_packet(data_mp, is_inbound, ill,
   2016 	    DROPPER(ipss, ipds_esp_crypto_failed),
   2017 	    &espstack->esp_dropper);
   2018 	ESP_BUMP_STAT(espstack, crypto_failures);
   2019 	if (is_inbound)
   2020 		IP_ESP_BUMP_STAT(ipss, in_discards);
   2021 	else
   2022 		ESP_BUMP_STAT(espstack, out_discards);
   2023 }
   2024 
   2025 /*
   2026  * A statement-equivalent macro, _cr MUST point to a modifiable
   2027  * crypto_call_req_t.
   2028  */
   2029 #define	ESP_INIT_CALLREQ(_cr, _mp, _callback)				\
   2030 	(_cr)->cr_flag = CRYPTO_SKIP_REQID|CRYPTO_ALWAYS_QUEUE;	\
   2031 	(_cr)->cr_callback_arg = (_mp);				\
   2032 	(_cr)->cr_callback_func = (_callback)
   2033 
   2034 #define	ESP_INIT_CRYPTO_MAC(mac, icvlen, icvbuf) {			\
   2035 	(mac)->cd_format = CRYPTO_DATA_RAW;				\
   2036 	(mac)->cd_offset = 0;						\
   2037 	(mac)->cd_length = icvlen;					\
   2038 	(mac)->cd_raw.iov_base = (char *)icvbuf;			\
   2039 	(mac)->cd_raw.iov_len = icvlen;					\
   2040 }
   2041 
   2042 #define	ESP_INIT_CRYPTO_DATA(data, mp, off, len) {			\
   2043 	if (MBLKL(mp) >= (len) + (off)) {				\
   2044 		(data)->cd_format = CRYPTO_DATA_RAW;			\
   2045 		(data)->cd_raw.iov_base = (char *)(mp)->b_rptr;		\
   2046 		(data)->cd_raw.iov_len = MBLKL(mp);			\
   2047 		(data)->cd_offset = off;				\
   2048 	} else {							\
   2049 		(data)->cd_format = CRYPTO_DATA_MBLK;			\
   2050 		(data)->cd_mp = mp;			       		\
   2051 		(data)->cd_offset = off;				\
   2052 	}								\
   2053 	(data)->cd_length = len;					\
   2054 }
   2055 
   2056 #define	ESP_INIT_CRYPTO_DUAL_DATA(data, mp, off1, len1, off2, len2) {	\
   2057 	(data)->dd_format = CRYPTO_DATA_MBLK;				\
   2058 	(data)->dd_mp = mp;						\
   2059 	(data)->dd_len1 = len1;						\
   2060 	(data)->dd_offset1 = off1;					\
   2061 	(data)->dd_len2 = len2;						\
   2062 	(data)->dd_offset2 = off2;					\
   2063 }
   2064 
   2065 /*
   2066  * Returns data_mp if successfully completed the request. Returns
   2067  * NULL if it failed (and increments InDiscards) or if it is pending.
   2068  */
   2069 static mblk_t *
   2070 esp_submit_req_inbound(mblk_t *esp_mp, ip_recv_attr_t *ira,
   2071     ipsa_t *assoc, uint_t esph_offset)
   2072 {
   2073 	uint_t auth_offset, msg_len, auth_len;
   2074 	crypto_call_req_t call_req, *callrp;
   2075 	mblk_t *mp;
   2076 	esph_t *esph_ptr;
   2077 	int kef_rc;
   2078 	uint_t icv_len = assoc->ipsa_mac_len;
   2079 	crypto_ctx_template_t auth_ctx_tmpl;
   2080 	boolean_t do_auth, do_encr, force;
   2081 	uint_t encr_offset, encr_len;
   2082 	uint_t iv_len = assoc->ipsa_iv_len;
   2083 	crypto_ctx_template_t encr_ctx_tmpl;
   2084 	ipsec_crypto_t	*ic, icstack;
   2085 	uchar_t *iv_ptr;
   2086 	netstack_t *ns = ira->ira_ill->ill_ipst->ips_netstack;
   2087 	ipsec_stack_t *ipss = ns->netstack_ipsec;
   2088 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2089 
   2090 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
   2091 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
   2092 	force = (assoc->ipsa_flags & IPSA_F_ASYNC);
   2093 
   2094 #ifdef IPSEC_LATENCY_TEST
   2095 	kef_rc = CRYPTO_SUCCESS;
   2096 #else
   2097 	kef_rc = CRYPTO_FAILED;
   2098 #endif
   2099 
   2100 	/*
   2101 	 * An inbound packet is of the form:
   2102 	 * [IP,options,ESP,IV,data,ICV,pad]
   2103 	 */
   2104 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
   2105 	iv_ptr = (uchar_t *)(esph_ptr + 1);
   2106 	/* Packet length starting at IP header ending after ESP ICV. */
   2107 	msg_len = MBLKL(esp_mp);
   2108 
   2109 	encr_offset = esph_offset + sizeof (esph_t) + iv_len;
   2110 	encr_len = msg_len - encr_offset;
   2111 
   2112 	/*
   2113 	 * Counter mode algs need a nonce. This is setup in sadb_common_add().
   2114 	 * If for some reason we are using a SA which does not have a nonce
   2115 	 * then we must fail here.
   2116 	 */
   2117 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
   2118 	    (assoc->ipsa_nonce == NULL)) {
   2119 		ip_drop_packet(esp_mp, B_TRUE, ira->ira_ill,
   2120 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2121 		return (NULL);
   2122 	}
   2123 
   2124 	if (force) {
   2125 		/* We are doing asynch; allocate mblks to hold state */
   2126 		if ((mp = ip_recv_attr_to_mblk(ira)) == NULL ||
   2127 		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
   2128 			BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2129 			ip_drop_input("ipIfStatsInDiscards", esp_mp,
   2130 			    ira->ira_ill);
   2131 			return (NULL);
   2132 		}
   2133 		linkb(mp, esp_mp);
   2134 		callrp = &call_req;
   2135 		ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_inbound);
   2136 	} else {
   2137 		/*
   2138 		 * If we know we are going to do sync then ipsec_crypto_t
   2139 		 * should be on the stack.
   2140 		 */
   2141 		ic = &icstack;
   2142 		bzero(ic, sizeof (*ic));
   2143 		callrp = NULL;
   2144 	}
   2145 
   2146 	if (do_auth) {
   2147 		/* authentication context template */
   2148 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
   2149 		    auth_ctx_tmpl);
   2150 
   2151 		/* ICV to be verified */
   2152 		ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
   2153 		    icv_len, esp_mp->b_wptr - icv_len);
   2154 
   2155 		/* authentication starts at the ESP header */
   2156 		auth_offset = esph_offset;
   2157 		auth_len = msg_len - auth_offset - icv_len;
   2158 		if (!do_encr) {
   2159 			/* authentication only */
   2160 			/* initialize input data argument */
   2161 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2162 			    esp_mp, auth_offset, auth_len);
   2163 
   2164 			/* call the crypto framework */
   2165 			kef_rc = crypto_mac_verify(&assoc->ipsa_amech,
   2166 			    &ic->ic_crypto_data,
   2167 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
   2168 			    &ic->ic_crypto_mac, callrp);
   2169 		}
   2170 	}
   2171 
   2172 	if (do_encr) {
   2173 		/* encryption template */
   2174 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
   2175 		    encr_ctx_tmpl);
   2176 
   2177 		/* Call the nonce update function. Also passes in IV */
   2178 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, encr_len,
   2179 		    iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
   2180 
   2181 		if (!do_auth) {
   2182 			/* decryption only */
   2183 			/* initialize input data argument */
   2184 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2185 			    esp_mp, encr_offset, encr_len);
   2186 
   2187 			/* call the crypto framework */
   2188 			kef_rc = crypto_decrypt((crypto_mechanism_t *)
   2189 			    &ic->ic_cmm, &ic->ic_crypto_data,
   2190 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
   2191 			    NULL, callrp);
   2192 		}
   2193 	}
   2194 
   2195 	if (do_auth && do_encr) {
   2196 		/* dual operation */
   2197 		/* initialize input data argument */
   2198 		ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
   2199 		    esp_mp, auth_offset, auth_len,
   2200 		    encr_offset, encr_len - icv_len);
   2201 
   2202 		/* specify IV */
   2203 		ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
   2204 
   2205 		/* call the framework */
   2206 		kef_rc = crypto_mac_verify_decrypt(&assoc->ipsa_amech,
   2207 		    &assoc->ipsa_emech, &ic->ic_crypto_dual_data,
   2208 		    &assoc->ipsa_kcfauthkey, &assoc->ipsa_kcfencrkey,
   2209 		    auth_ctx_tmpl, encr_ctx_tmpl, &ic->ic_crypto_mac,
   2210 		    NULL, callrp);
   2211 	}
   2212 
   2213 	switch (kef_rc) {
   2214 	case CRYPTO_SUCCESS:
   2215 		ESP_BUMP_STAT(espstack, crypto_sync);
   2216 		esp_mp = esp_in_done(esp_mp, ira, ic);
   2217 		if (force) {
   2218 			/* Free mp after we are done with ic */
   2219 			mp = ipsec_free_crypto_data(mp);
   2220 			(void) ip_recv_attr_free_mblk(mp);
   2221 		}
   2222 		return (esp_mp);
   2223 	case CRYPTO_QUEUED:
   2224 		/* esp_kcf_callback_inbound() will be invoked on completion */
   2225 		ESP_BUMP_STAT(espstack, crypto_async);
   2226 		return (NULL);
   2227 	case CRYPTO_INVALID_MAC:
   2228 		if (force) {
   2229 			mp = ipsec_free_crypto_data(mp);
   2230 			esp_mp = ip_recv_attr_free_mblk(mp);
   2231 		}
   2232 		ESP_BUMP_STAT(espstack, crypto_sync);
   2233 		BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2234 		esp_log_bad_auth(esp_mp, ira);
   2235 		/* esp_mp was passed to ip_drop_packet */
   2236 		return (NULL);
   2237 	}
   2238 
   2239 	mp = ipsec_free_crypto_data(mp);
   2240 	esp_mp = ip_recv_attr_free_mblk(mp);
   2241 	BUMP_MIB(ira->ira_ill->ill_ip_mib, ipIfStatsInDiscards);
   2242 	esp_crypto_failed(esp_mp, B_TRUE, kef_rc, ira->ira_ill, espstack);
   2243 	/* esp_mp was passed to ip_drop_packet */
   2244 	return (NULL);
   2245 }
   2246 
   2247 /*
   2248  * Compute the IP and UDP checksums -- common code for both keepalives and
   2249  * actual ESP-in-UDP packets.  Be flexible with multiple mblks because ESP
   2250  * uses mblk-insertion to insert the UDP header.
   2251  * TODO - If there is an easy way to prep a packet for HW checksums, make
   2252  * it happen here.
   2253  * Note that this is used before both before calling ip_output_simple and
   2254  * in the esp datapath. The former could use IXAF_SET_ULP_CKSUM but not the
   2255  * latter.
   2256  */
   2257 static void
   2258 esp_prepare_udp(netstack_t *ns, mblk_t *mp, ipha_t *ipha)
   2259 {
   2260 	int offset;
   2261 	uint32_t cksum;
   2262 	uint16_t *arr;
   2263 	mblk_t *udpmp = mp;
   2264 	uint_t hlen = IPH_HDR_LENGTH(ipha);
   2265 
   2266 	ASSERT(MBLKL(mp) >= sizeof (ipha_t));
   2267 
   2268 	ipha->ipha_hdr_checksum = 0;
   2269 	ipha->ipha_hdr_checksum = ip_csum_hdr(ipha);
   2270 
   2271 	if (ns->netstack_udp->us_do_checksum) {
   2272 		ASSERT(MBLKL(udpmp) >= sizeof (udpha_t));
   2273 		/* arr points to the IP header. */
   2274 		arr = (uint16_t *)ipha;
   2275 		IP_STAT(ns->netstack_ip, ip_out_sw_cksum);
   2276 		IP_STAT_UPDATE(ns->netstack_ip, ip_out_sw_cksum_bytes,
   2277 		    ntohs(htons(ipha->ipha_length) - hlen));
   2278 		/* arr[6-9] are the IP addresses. */
   2279 		cksum = IP_UDP_CSUM_COMP + arr[6] + arr[7] + arr[8] + arr[9] +
   2280 		    ntohs(htons(ipha->ipha_length) - hlen);
   2281 		cksum = IP_CSUM(mp, hlen, cksum);
   2282 		offset = hlen + UDP_CHECKSUM_OFFSET;
   2283 		while (offset >= MBLKL(udpmp)) {
   2284 			offset -= MBLKL(udpmp);
   2285 			udpmp = udpmp->b_cont;
   2286 		}
   2287 		/* arr points to the UDP header's checksum field. */
   2288 		arr = (uint16_t *)(udpmp->b_rptr + offset);
   2289 		*arr = cksum;
   2290 	}
   2291 }
   2292 
   2293 /*
   2294  * taskq handler so we can send the NAT-T keepalive on a separate thread.
   2295  */
   2296 static void
   2297 actually_send_keepalive(void *arg)
   2298 {
   2299 	mblk_t *mp = (mblk_t *)arg;
   2300 	ip_xmit_attr_t ixas;
   2301 	netstack_t	*ns;
   2302 	netstackid_t	stackid;
   2303 
   2304 	stackid = (netstackid_t)(uintptr_t)mp->b_prev;
   2305 	mp->b_prev = NULL;
   2306 	ns = netstack_find_by_stackid(stackid);
   2307 	if (ns == NULL) {
   2308 		/* Disappeared */
   2309 		ip_drop_output("ipIfStatsOutDiscards", mp, NULL);
   2310 		freemsg(mp);
   2311 		return;
   2312 	}
   2313 
   2314 	bzero(&ixas, sizeof (ixas));
   2315 	ixas.ixa_zoneid = ALL_ZONES;
   2316 	ixas.ixa_cred = kcred;
   2317 	ixas.ixa_cpid = NOPID;
   2318 	ixas.ixa_tsl = NULL;
   2319 	ixas.ixa_ipst = ns->netstack_ip;
   2320 	/* No ULP checksum; done by esp_prepare_udp */
   2321 	ixas.ixa_flags = IXAF_IS_IPV4 | IXAF_NO_IPSEC;
   2322 
   2323 	(void) ip_output_simple(mp, &ixas);
   2324 	ixa_cleanup(&ixas);
   2325 	netstack_rele(ns);
   2326 }
   2327 
   2328 /*
   2329  * Send a one-byte UDP NAT-T keepalive.
   2330  */
   2331 void
   2332 ipsecesp_send_keepalive(ipsa_t *assoc)
   2333 {
   2334 	mblk_t		*mp;
   2335 	ipha_t		*ipha;
   2336 	udpha_t		*udpha;
   2337 	netstack_t	*ns = assoc->ipsa_netstack;
   2338 
   2339 	ASSERT(MUTEX_NOT_HELD(&assoc->ipsa_lock));
   2340 
   2341 	mp = allocb(sizeof (ipha_t) + sizeof (udpha_t) + 1, BPRI_HI);
   2342 	if (mp == NULL)
   2343 		return;
   2344 	ipha = (ipha_t *)mp->b_rptr;
   2345 	ipha->ipha_version_and_hdr_length = IP_SIMPLE_HDR_VERSION;
   2346 	ipha->ipha_type_of_service = 0;
   2347 	ipha->ipha_length = htons(sizeof (ipha_t) + sizeof (udpha_t) + 1);
   2348 	/* Use the low-16 of the SPI so we have some clue where it came from. */
   2349 	ipha->ipha_ident = *(((uint16_t *)(&assoc->ipsa_spi)) + 1);
   2350 	ipha->ipha_fragment_offset_and_flags = 0;  /* Too small to fragment! */
   2351 	ipha->ipha_ttl = 0xFF;
   2352 	ipha->ipha_protocol = IPPROTO_UDP;
   2353 	ipha->ipha_hdr_checksum = 0;
   2354 	ipha->ipha_src = assoc->ipsa_srcaddr[0];
   2355 	ipha->ipha_dst = assoc->ipsa_dstaddr[0];
   2356 	udpha = (udpha_t *)(ipha + 1);
   2357 	udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
   2358 	    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
   2359 	udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
   2360 	    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
   2361 	udpha->uha_length = htons(sizeof (udpha_t) + 1);
   2362 	udpha->uha_checksum = 0;
   2363 	mp->b_wptr = (uint8_t *)(udpha + 1);
   2364 	*(mp->b_wptr++) = 0xFF;
   2365 
   2366 	esp_prepare_udp(ns, mp, ipha);
   2367 
   2368 	/*
   2369 	 * We're holding an isaf_t bucket lock, so pawn off the actual
   2370 	 * packet transmission to another thread.  Just in case syncq
   2371 	 * processing causes a same-bucket packet to be processed.
   2372 	 */
   2373 	mp->b_prev = (mblk_t *)(uintptr_t)ns->netstack_stackid;
   2374 
   2375 	if (taskq_dispatch(esp_taskq, actually_send_keepalive, mp,
   2376 	    TQ_NOSLEEP) == 0) {
   2377 		/* Assume no memory if taskq_dispatch() fails. */
   2378 		mp->b_prev = NULL;
   2379 		ip_drop_packet(mp, B_FALSE, NULL,
   2380 		    DROPPER(ns->netstack_ipsec, ipds_esp_nomem),
   2381 		    &ns->netstack_ipsecesp->esp_dropper);
   2382 	}
   2383 }
   2384 
   2385 /*
   2386  * Returns mp if successfully completed the request. Returns
   2387  * NULL if it failed (and increments InDiscards) or if it is pending.
   2388  */
   2389 static mblk_t *
   2390 esp_submit_req_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa, ipsa_t *assoc,
   2391     uchar_t *icv_buf, uint_t payload_len)
   2392 {
   2393 	uint_t auth_len;
   2394 	crypto_call_req_t call_req, *callrp;
   2395 	mblk_t *esp_mp;
   2396 	esph_t *esph_ptr;
   2397 	mblk_t *mp;
   2398 	int kef_rc = CRYPTO_FAILED;
   2399 	uint_t icv_len = assoc->ipsa_mac_len;
   2400 	crypto_ctx_template_t auth_ctx_tmpl;
   2401 	boolean_t do_auth, do_encr, force;
   2402 	uint_t iv_len = assoc->ipsa_iv_len;
   2403 	crypto_ctx_template_t encr_ctx_tmpl;
   2404 	boolean_t is_natt = ((assoc->ipsa_flags & IPSA_F_NATT) != 0);
   2405 	size_t esph_offset = (is_natt ? UDPH_SIZE : 0);
   2406 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   2407 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2408 	ipsec_crypto_t	*ic, icstack;
   2409 	uchar_t		*iv_ptr;
   2410 	crypto_data_t	*cd_ptr = NULL;
   2411 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   2412 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2413 
   2414 	esp3dbg(espstack, ("esp_submit_req_outbound:%s",
   2415 	    is_natt ? "natt" : "not natt"));
   2416 
   2417 	do_encr = assoc->ipsa_encr_alg != SADB_EALG_NULL;
   2418 	do_auth = assoc->ipsa_auth_alg != SADB_AALG_NONE;
   2419 	force = (assoc->ipsa_flags & IPSA_F_ASYNC);
   2420 
   2421 #ifdef IPSEC_LATENCY_TEST
   2422 	kef_rc = CRYPTO_SUCCESS;
   2423 #else
   2424 	kef_rc = CRYPTO_FAILED;
   2425 #endif
   2426 
   2427 	/*
   2428 	 * Outbound IPsec packets are of the form:
   2429 	 * [IP,options] -> [ESP,IV] -> [data] -> [pad,ICV]
   2430 	 * unless it's NATT, then it's
   2431 	 * [IP,options] -> [udp][ESP,IV] -> [data] -> [pad,ICV]
   2432 	 * Get a pointer to the mblk containing the ESP header.
   2433 	 */
   2434 	ASSERT(data_mp->b_cont != NULL);
   2435 	esp_mp = data_mp->b_cont;
   2436 	esph_ptr = (esph_t *)(esp_mp->b_rptr + esph_offset);
   2437 	iv_ptr = (uchar_t *)(esph_ptr + 1);
   2438 
   2439 	/*
   2440 	 * Combined mode algs need a nonce. This is setup in sadb_common_add().
   2441 	 * If for some reason we are using a SA which does not have a nonce
   2442 	 * then we must fail here.
   2443 	 */
   2444 	if ((assoc->ipsa_flags & IPSA_F_COUNTERMODE) &&
   2445 	    (assoc->ipsa_nonce == NULL)) {
   2446 		ip_drop_packet(data_mp, B_FALSE, NULL,
   2447 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2448 		return (NULL);
   2449 	}
   2450 
   2451 	if (force) {
   2452 		/* We are doing asynch; allocate mblks to hold state */
   2453 		if ((mp = ip_xmit_attr_to_mblk(ixa)) == NULL ||
   2454 		    (mp = ipsec_add_crypto_data(mp, &ic)) == NULL) {
   2455 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2456 			ip_drop_output("ipIfStatsOutDiscards", data_mp, ill);
   2457 			freemsg(data_mp);
   2458 			return (NULL);
   2459 		}
   2460 
   2461 		linkb(mp, data_mp);
   2462 		callrp = &call_req;
   2463 		ESP_INIT_CALLREQ(callrp, mp, esp_kcf_callback_outbound);
   2464 	} else {
   2465 		/*
   2466 		 * If we know we are going to do sync then ipsec_crypto_t
   2467 		 * should be on the stack.
   2468 		 */
   2469 		ic = &icstack;
   2470 		bzero(ic, sizeof (*ic));
   2471 		callrp = NULL;
   2472 	}
   2473 
   2474 
   2475 	if (do_auth) {
   2476 		/* authentication context template */
   2477 		IPSEC_CTX_TMPL(assoc, ipsa_authtmpl, IPSEC_ALG_AUTH,
   2478 		    auth_ctx_tmpl);
   2479 
   2480 		/* where to store the computed mac */
   2481 		ESP_INIT_CRYPTO_MAC(&ic->ic_crypto_mac,
   2482 		    icv_len, icv_buf);
   2483 
   2484 		/* authentication starts at the ESP header */
   2485 		auth_len = payload_len + iv_len + sizeof (esph_t);
   2486 		if (!do_encr) {
   2487 			/* authentication only */
   2488 			/* initialize input data argument */
   2489 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2490 			    esp_mp, esph_offset, auth_len);
   2491 
   2492 			/* call the crypto framework */
   2493 			kef_rc = crypto_mac(&assoc->ipsa_amech,
   2494 			    &ic->ic_crypto_data,
   2495 			    &assoc->ipsa_kcfauthkey, auth_ctx_tmpl,
   2496 			    &ic->ic_crypto_mac, callrp);
   2497 		}
   2498 	}
   2499 
   2500 	if (do_encr) {
   2501 		/* encryption context template */
   2502 		IPSEC_CTX_TMPL(assoc, ipsa_encrtmpl, IPSEC_ALG_ENCR,
   2503 		    encr_ctx_tmpl);
   2504 		/* Call the nonce update function. */
   2505 		(assoc->ipsa_noncefunc)(assoc, (uchar_t *)esph_ptr, payload_len,
   2506 		    iv_ptr, &ic->ic_cmm, &ic->ic_crypto_data);
   2507 
   2508 		if (!do_auth) {
   2509 			/* encryption only, skip mblk that contains ESP hdr */
   2510 			/* initialize input data argument */
   2511 			ESP_INIT_CRYPTO_DATA(&ic->ic_crypto_data,
   2512 			    esp_mp->b_cont, 0, payload_len);
   2513 
   2514 			/*
   2515 			 * For combined mode ciphers, the ciphertext is the same
   2516 			 * size as the clear text, the ICV should follow the
   2517 			 * ciphertext. To convince the kcf to allow in-line
   2518 			 * encryption, with an ICV, use ipsec_out_crypto_mac
   2519 			 * to point to the same buffer as the data. The calling
   2520 			 * function need to ensure the buffer is large enough to
   2521 			 * include the ICV.
   2522 			 *
   2523 			 * The IV is already written to the packet buffer, the
   2524 			 * nonce setup function copied it to the params struct
   2525 			 * for the cipher to use.
   2526 			 */
   2527 			if (assoc->ipsa_flags & IPSA_F_COMBINED) {
   2528 				bcopy(&ic->ic_crypto_data,
   2529 				    &ic->ic_crypto_mac,
   2530 				    sizeof (crypto_data_t));
   2531 				ic->ic_crypto_mac.cd_length =
   2532 				    payload_len + icv_len;
   2533 				cd_ptr = &ic->ic_crypto_mac;
   2534 			}
   2535 
   2536 			/* call the crypto framework */
   2537 			kef_rc = crypto_encrypt((crypto_mechanism_t *)
   2538 			    &ic->ic_cmm, &ic->ic_crypto_data,
   2539 			    &assoc->ipsa_kcfencrkey, encr_ctx_tmpl,
   2540 			    cd_ptr, callrp);
   2541 
   2542 		}
   2543 	}
   2544 
   2545 	if (do_auth && do_encr) {
   2546 		/*
   2547 		 * Encryption and authentication:
   2548 		 * Pass the pointer to the mblk chain starting at the ESP
   2549 		 * header to the framework. Skip the ESP header mblk
   2550 		 * for encryption, which is reflected by an encryption
   2551 		 * offset equal to the length of that mblk. Start
   2552 		 * the authentication at the ESP header, i.e. use an
   2553 		 * authentication offset of zero.
   2554 		 */
   2555 		ESP_INIT_CRYPTO_DUAL_DATA(&ic->ic_crypto_dual_data,
   2556 		    esp_mp, MBLKL(esp_mp), payload_len, esph_offset, auth_len);
   2557 
   2558 		/* specify IV */
   2559 		ic->ic_crypto_dual_data.dd_miscdata = (char *)iv_ptr;
   2560 
   2561 		/* call the framework */
   2562 		kef_rc = crypto_encrypt_mac(&assoc->ipsa_emech,
   2563 		    &assoc->ipsa_amech, NULL,
   2564 		    &assoc->ipsa_kcfencrkey, &assoc->ipsa_kcfauthkey,
   2565 		    encr_ctx_tmpl, auth_ctx_tmpl,
   2566 		    &ic->ic_crypto_dual_data,
   2567 		    &ic->ic_crypto_mac, callrp);
   2568 	}
   2569 
   2570 	switch (kef_rc) {
   2571 	case CRYPTO_SUCCESS:
   2572 		ESP_BUMP_STAT(espstack, crypto_sync);
   2573 		esp_set_usetime(assoc, B_FALSE);
   2574 		if (force) {
   2575 			mp = ipsec_free_crypto_data(mp);
   2576 			data_mp = ip_xmit_attr_free_mblk(mp);
   2577 		}
   2578 		if (is_natt)
   2579 			esp_prepare_udp(ns, data_mp, (ipha_t *)data_mp->b_rptr);
   2580 		return (data_mp);
   2581 	case CRYPTO_QUEUED:
   2582 		/* esp_kcf_callback_outbound() will be invoked on completion */
   2583 		ESP_BUMP_STAT(espstack, crypto_async);
   2584 		return (NULL);
   2585 	}
   2586 
   2587 	if (force) {
   2588 		mp = ipsec_free_crypto_data(mp);
   2589 		data_mp = ip_xmit_attr_free_mblk(mp);
   2590 	}
   2591 	BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2592 	esp_crypto_failed(data_mp, B_FALSE, kef_rc, NULL, espstack);
   2593 	/* data_mp was passed to ip_drop_packet */
   2594 	return (NULL);
   2595 }
   2596 
   2597 /*
   2598  * Handle outbound IPsec processing for IPv4 and IPv6
   2599  *
   2600  * Returns data_mp if successfully completed the request. Returns
   2601  * NULL if it failed (and increments InDiscards) or if it is pending.
   2602  */
   2603 static mblk_t *
   2604 esp_outbound(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   2605 {
   2606 	mblk_t *espmp, *tailmp;
   2607 	ipha_t *ipha;
   2608 	ip6_t *ip6h;
   2609 	esph_t *esph_ptr, *iv_ptr;
   2610 	uint_t af;
   2611 	uint8_t *nhp;
   2612 	uintptr_t divpoint, datalen, adj, padlen, i, alloclen;
   2613 	uintptr_t esplen = sizeof (esph_t);
   2614 	uint8_t protocol;
   2615 	ipsa_t *assoc;
   2616 	uint_t iv_len, block_size, mac_len = 0;
   2617 	uchar_t *icv_buf;
   2618 	udpha_t *udpha;
   2619 	boolean_t is_natt = B_FALSE;
   2620 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   2621 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2622 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2623 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   2624 	boolean_t	need_refrele = B_FALSE;
   2625 
   2626 	ESP_BUMP_STAT(espstack, out_requests);
   2627 
   2628 	/*
   2629 	 * <sigh> We have to copy the message here, because TCP (for example)
   2630 	 * keeps a dupb() of the message lying around for retransmission.
   2631 	 * Since ESP changes the whole of the datagram, we have to create our
   2632 	 * own copy lest we clobber TCP's data.  Since we have to copy anyway,
   2633 	 * we might as well make use of msgpullup() and get the mblk into one
   2634 	 * contiguous piece!
   2635 	 */
   2636 	tailmp = msgpullup(data_mp, -1);
   2637 	if (tailmp == NULL) {
   2638 		esp0dbg(("esp_outbound: msgpullup() failed, "
   2639 		    "dropping packet.\n"));
   2640 		ip_drop_packet(data_mp, B_FALSE, ill,
   2641 		    DROPPER(ipss, ipds_esp_nomem),
   2642 		    &espstack->esp_dropper);
   2643 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2644 		return (NULL);
   2645 	}
   2646 	freemsg(data_mp);
   2647 	data_mp = tailmp;
   2648 
   2649 	assoc = ixa->ixa_ipsec_esp_sa;
   2650 	ASSERT(assoc != NULL);
   2651 
   2652 	/*
   2653 	 * Get the outer IP header in shape to escape this system..
   2654 	 */
   2655 	if (is_system_labeled() && (assoc->ipsa_otsl != NULL)) {
   2656 		/*
   2657 		 * Need to update packet with any CIPSO option and update
   2658 		 * ixa_tsl to capture the new label.
   2659 		 * We allocate a separate ixa for that purpose.
   2660 		 */
   2661 		ixa = ip_xmit_attr_duplicate(ixa);
   2662 		if (ixa == NULL) {
   2663 			ip_drop_packet(data_mp, B_FALSE, ill,
   2664 			    DROPPER(ipss, ipds_esp_nomem),
   2665 			    &espstack->esp_dropper);
   2666 			return (NULL);
   2667 		}
   2668 		need_refrele = B_TRUE;
   2669 
   2670 		label_hold(assoc->ipsa_otsl);
   2671 		ip_xmit_attr_replace_tsl(ixa, assoc->ipsa_otsl);
   2672 
   2673 		data_mp = sadb_whack_label(data_mp, assoc, ixa,
   2674 		    DROPPER(ipss, ipds_esp_nomem), &espstack->esp_dropper);
   2675 		if (data_mp == NULL) {
   2676 			/* Packet dropped by sadb_whack_label */
   2677 			ixa_refrele(ixa);
   2678 			return (NULL);
   2679 		}
   2680 	}
   2681 
   2682 	/*
   2683 	 * Reality check....
   2684 	 */
   2685 	ipha = (ipha_t *)data_mp->b_rptr;  /* So we can call esp_acquire(). */
   2686 
   2687 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2688 		ASSERT(IPH_HDR_VERSION(ipha) == IPV4_VERSION);
   2689 
   2690 		af = AF_INET;
   2691 		divpoint = IPH_HDR_LENGTH(ipha);
   2692 		datalen = ntohs(ipha->ipha_length) - divpoint;
   2693 		nhp = (uint8_t *)&ipha->ipha_protocol;
   2694 	} else {
   2695 		ip_pkt_t ipp;
   2696 
   2697 		ASSERT(IPH_HDR_VERSION(ipha) == IPV6_VERSION);
   2698 
   2699 		af = AF_INET6;
   2700 		ip6h = (ip6_t *)ipha;
   2701 		bzero(&ipp, sizeof (ipp));
   2702 		divpoint = ip_find_hdr_v6(data_mp, ip6h, B_FALSE, &ipp, NULL);
   2703 		if (ipp.ipp_dstopts != NULL &&
   2704 		    ipp.ipp_dstopts->ip6d_nxt != IPPROTO_ROUTING) {
   2705 			/*
   2706 			 * Destination options are tricky.  If we get in here,
   2707 			 * then we have a terminal header following the
   2708 			 * destination options.  We need to adjust backwards
   2709 			 * so we insert ESP BEFORE the destination options
   2710 			 * bag.  (So that the dstopts get encrypted!)
   2711 			 *
   2712 			 * Since this is for outbound packets only, we know
   2713 			 * that non-terminal destination options only precede
   2714 			 * routing headers.
   2715 			 */
   2716 			divpoint -= ipp.ipp_dstoptslen;
   2717 		}
   2718 		datalen = ntohs(ip6h->ip6_plen) + sizeof (ip6_t) - divpoint;
   2719 
   2720 		if (ipp.ipp_rthdr != NULL) {
   2721 			nhp = &ipp.ipp_rthdr->ip6r_nxt;
   2722 		} else if (ipp.ipp_hopopts != NULL) {
   2723 			nhp = &ipp.ipp_hopopts->ip6h_nxt;
   2724 		} else {
   2725 			ASSERT(divpoint == sizeof (ip6_t));
   2726 			/* It's probably IP + ESP. */
   2727 			nhp = &ip6h->ip6_nxt;
   2728 		}
   2729 	}
   2730 
   2731 	mac_len = assoc->ipsa_mac_len;
   2732 
   2733 	if (assoc->ipsa_flags & IPSA_F_NATT) {
   2734 		/* wedge in UDP header */
   2735 		is_natt = B_TRUE;
   2736 		esplen += UDPH_SIZE;
   2737 	}
   2738 
   2739 	/*
   2740 	 * Set up ESP header and encryption padding for ENCR PI request.
   2741 	 */
   2742 
   2743 	/* Determine the padding length.  Pad to 4-bytes for no-encryption. */
   2744 	if (assoc->ipsa_encr_alg != SADB_EALG_NULL) {
   2745 		iv_len = assoc->ipsa_iv_len;
   2746 		block_size = assoc->ipsa_datalen;
   2747 
   2748 		/*
   2749 		 * Pad the data to the length of the cipher block size.
   2750 		 * Include the two additional bytes (hence the - 2) for the
   2751 		 * padding length and the next header.  Take this into account
   2752 		 * when calculating the actual length of the padding.
   2753 		 */
   2754 		ASSERT(ISP2(iv_len));
   2755 		padlen = ((unsigned)(block_size - datalen - 2)) &
   2756 		    (block_size - 1);
   2757 	} else {
   2758 		iv_len = 0;
   2759 		padlen = ((unsigned)(sizeof (uint32_t) - datalen - 2)) &
   2760 		    (sizeof (uint32_t) - 1);
   2761 	}
   2762 
   2763 	/* Allocate ESP header and IV. */
   2764 	esplen += iv_len;
   2765 
   2766 	/*
   2767 	 * Update association byte-count lifetimes.  Don't forget to take
   2768 	 * into account the padding length and next-header (hence the + 2).
   2769 	 *
   2770 	 * Use the amount of data fed into the "encryption algorithm".  This
   2771 	 * is the IV, the data length, the padding length, and the final two
   2772 	 * bytes (padlen, and next-header).
   2773 	 *
   2774 	 */
   2775 
   2776 	if (!esp_age_bytes(assoc, datalen + padlen + iv_len + 2, B_FALSE)) {
   2777 		ip_drop_packet(data_mp, B_FALSE, ill,
   2778 		    DROPPER(ipss, ipds_esp_bytes_expire),
   2779 		    &espstack->esp_dropper);
   2780 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2781 		if (need_refrele)
   2782 			ixa_refrele(ixa);
   2783 		return (NULL);
   2784 	}
   2785 
   2786 	espmp = allocb(esplen, BPRI_HI);
   2787 	if (espmp == NULL) {
   2788 		ESP_BUMP_STAT(espstack, out_discards);
   2789 		esp1dbg(espstack, ("esp_outbound: can't allocate espmp.\n"));
   2790 		ip_drop_packet(data_mp, B_FALSE, ill,
   2791 		    DROPPER(ipss, ipds_esp_nomem),
   2792 		    &espstack->esp_dropper);
   2793 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2794 		if (need_refrele)
   2795 			ixa_refrele(ixa);
   2796 		return (NULL);
   2797 	}
   2798 	espmp->b_wptr += esplen;
   2799 	esph_ptr = (esph_t *)espmp->b_rptr;
   2800 
   2801 	if (is_natt) {
   2802 		esp3dbg(espstack, ("esp_outbound: NATT"));
   2803 
   2804 		udpha = (udpha_t *)espmp->b_rptr;
   2805 		udpha->uha_src_port = (assoc->ipsa_local_nat_port != 0) ?
   2806 		    assoc->ipsa_local_nat_port : htons(IPPORT_IKE_NATT);
   2807 		udpha->uha_dst_port = (assoc->ipsa_remote_nat_port != 0) ?
   2808 		    assoc->ipsa_remote_nat_port : htons(IPPORT_IKE_NATT);
   2809 		/*
   2810 		 * Set the checksum to 0, so that the esp_prepare_udp() call
   2811 		 * can do the right thing.
   2812 		 */
   2813 		udpha->uha_checksum = 0;
   2814 		esph_ptr = (esph_t *)(udpha + 1);
   2815 	}
   2816 
   2817 	esph_ptr->esph_spi = assoc->ipsa_spi;
   2818 
   2819 	esph_ptr->esph_replay = htonl(atomic_add_32_nv(&assoc->ipsa_replay, 1));
   2820 	if (esph_ptr->esph_replay == 0 && assoc->ipsa_replay_wsize != 0) {
   2821 		/*
   2822 		 * XXX We have replay counter wrapping.
   2823 		 * We probably want to nuke this SA (and its peer).
   2824 		 */
   2825 		ipsec_assocfailure(info.mi_idnum, 0, 0,
   2826 		    SL_ERROR | SL_CONSOLE | SL_WARN,
   2827 		    "Outbound ESP SA (0x%x, %s) has wrapped sequence.\n",
   2828 		    esph_ptr->esph_spi, assoc->ipsa_dstaddr, af,
   2829 		    espstack->ipsecesp_netstack);
   2830 
   2831 		ESP_BUMP_STAT(espstack, out_discards);
   2832 		sadb_replay_delete(assoc);
   2833 		ip_drop_packet(data_mp, B_FALSE, ill,
   2834 		    DROPPER(ipss, ipds_esp_replay),
   2835 		    &espstack->esp_dropper);
   2836 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2837 		if (need_refrele)
   2838 			ixa_refrele(ixa);
   2839 		return (NULL);
   2840 	}
   2841 
   2842 	iv_ptr = (esph_ptr + 1);
   2843 	/*
   2844 	 * iv_ptr points to the mblk which will contain the IV once we have
   2845 	 * written it there. This mblk will be part of a mblk chain that
   2846 	 * will make up the packet.
   2847 	 *
   2848 	 * For counter mode algorithms, the IV is a 64 bit quantity, it
   2849 	 * must NEVER repeat in the lifetime of the SA, otherwise an
   2850 	 * attacker who had recorded enough packets might be able to
   2851 	 * determine some clear text.
   2852 	 *
   2853 	 * To ensure this does not happen, the IV is stored in the SA and
   2854 	 * incremented for each packet, the IV is then copied into the
   2855 	 * "packet" for transmission to the receiving system. The IV will
   2856 	 * also be copied into the nonce, when the packet is encrypted.
   2857 	 *
   2858 	 * CBC mode algorithms use a random IV for each packet. We do not
   2859 	 * require the highest quality random bits, but for best security
   2860 	 * with CBC mode ciphers, the value must be unlikely to repeat and
   2861 	 * must not be known in advance to an adversary capable of influencing
   2862 	 * the clear text.
   2863 	 */
   2864 	if (!update_iv((uint8_t *)iv_ptr, espstack->esp_pfkey_q, assoc,
   2865 	    espstack)) {
   2866 		ip_drop_packet(data_mp, B_FALSE, ill,
   2867 		    DROPPER(ipss, ipds_esp_iv_wrap), &espstack->esp_dropper);
   2868 		if (need_refrele)
   2869 			ixa_refrele(ixa);
   2870 		return (NULL);
   2871 	}
   2872 
   2873 	/* Fix the IP header. */
   2874 	alloclen = padlen + 2 + mac_len;
   2875 	adj = alloclen + (espmp->b_wptr - espmp->b_rptr);
   2876 
   2877 	protocol = *nhp;
   2878 
   2879 	if (ixa->ixa_flags & IXAF_IS_IPV4) {
   2880 		ipha->ipha_length = htons(ntohs(ipha->ipha_length) + adj);
   2881 		if (is_natt) {
   2882 			*nhp = IPPROTO_UDP;
   2883 			udpha->uha_length = htons(ntohs(ipha->ipha_length) -
   2884 			    IPH_HDR_LENGTH(ipha));
   2885 		} else {
   2886 			*nhp = IPPROTO_ESP;
   2887 		}
   2888 		ipha->ipha_hdr_checksum = 0;
   2889 		ipha->ipha_hdr_checksum = (uint16_t)ip_csum_hdr(ipha);
   2890 	} else {
   2891 		ip6h->ip6_plen = htons(ntohs(ip6h->ip6_plen) + adj);
   2892 		*nhp = IPPROTO_ESP;
   2893 	}
   2894 
   2895 	/* I've got the two ESP mblks, now insert them. */
   2896 
   2897 	esp2dbg(espstack, ("data_mp before outbound ESP adjustment:\n"));
   2898 	esp2dbg(espstack, (dump_msg(data_mp)));
   2899 
   2900 	if (!esp_insert_esp(data_mp, espmp, divpoint, espstack)) {
   2901 		ESP_BUMP_STAT(espstack, out_discards);
   2902 		/* NOTE:  esp_insert_esp() only fails if there's no memory. */
   2903 		ip_drop_packet(data_mp, B_FALSE, ill,
   2904 		    DROPPER(ipss, ipds_esp_nomem),
   2905 		    &espstack->esp_dropper);
   2906 		freeb(espmp);
   2907 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2908 		if (need_refrele)
   2909 			ixa_refrele(ixa);
   2910 		return (NULL);
   2911 	}
   2912 
   2913 	/* Append padding (and leave room for ICV). */
   2914 	for (tailmp = data_mp; tailmp->b_cont != NULL; tailmp = tailmp->b_cont)
   2915 		;
   2916 	if (tailmp->b_wptr + alloclen > tailmp->b_datap->db_lim) {
   2917 		tailmp->b_cont = allocb(alloclen, BPRI_HI);
   2918 		if (tailmp->b_cont == NULL) {
   2919 			ESP_BUMP_STAT(espstack, out_discards);
   2920 			esp0dbg(("esp_outbound:  Can't allocate tailmp.\n"));
   2921 			ip_drop_packet(data_mp, B_FALSE, ill,
   2922 			    DROPPER(ipss, ipds_esp_nomem),
   2923 			    &espstack->esp_dropper);
   2924 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   2925 			if (need_refrele)
   2926 				ixa_refrele(ixa);
   2927 			return (NULL);
   2928 		}
   2929 		tailmp = tailmp->b_cont;
   2930 	}
   2931 
   2932 	/*
   2933 	 * If there's padding, N bytes of padding must be of the form 0x1,
   2934 	 * 0x2, 0x3... 0xN.
   2935 	 */
   2936 	for (i = 0; i < padlen; ) {
   2937 		i++;
   2938 		*tailmp->b_wptr++ = i;
   2939 	}
   2940 	*tailmp->b_wptr++ = i;
   2941 	*tailmp->b_wptr++ = protocol;
   2942 
   2943 	esp2dbg(espstack, ("data_Mp before encryption:\n"));
   2944 	esp2dbg(espstack, (dump_msg(data_mp)));
   2945 
   2946 	/*
   2947 	 * Okay.  I've set up the pre-encryption ESP.  Let's do it!
   2948 	 */
   2949 
   2950 	if (mac_len > 0) {
   2951 		ASSERT(tailmp->b_wptr + mac_len <= tailmp->b_datap->db_lim);
   2952 		icv_buf = tailmp->b_wptr;
   2953 		tailmp->b_wptr += mac_len;
   2954 	} else {
   2955 		icv_buf = NULL;
   2956 	}
   2957 
   2958 	data_mp = esp_submit_req_outbound(data_mp, ixa, assoc, icv_buf,
   2959 	    datalen + padlen + 2);
   2960 	if (need_refrele)
   2961 		ixa_refrele(ixa);
   2962 	return (data_mp);
   2963 }
   2964 
   2965 /*
   2966  * IP calls this to validate the ICMP errors that
   2967  * we got from the network.
   2968  */
   2969 mblk_t *
   2970 ipsecesp_icmp_error(mblk_t *data_mp, ip_recv_attr_t *ira)
   2971 {
   2972 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   2973 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   2974 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   2975 
   2976 	/*
   2977 	 * Unless we get an entire packet back, this function is useless.
   2978 	 * Why?
   2979 	 *
   2980 	 * 1.)	Partial packets are useless, because the "next header"
   2981 	 *	is at the end of the decrypted ESP packet.  Without the
   2982 	 *	whole packet, this is useless.
   2983 	 *
   2984 	 * 2.)	If we every use a stateful cipher, such as a stream or a
   2985 	 *	one-time pad, we can't do anything.
   2986 	 *
   2987 	 * Since the chances of us getting an entire packet back are very
   2988 	 * very small, we discard here.
   2989 	 */
   2990 	IP_ESP_BUMP_STAT(ipss, in_discards);
   2991 	ip_drop_packet(data_mp, B_TRUE, ira->ira_ill,
   2992 	    DROPPER(ipss, ipds_esp_icmp),
   2993 	    &espstack->esp_dropper);
   2994 	return (NULL);
   2995 }
   2996 
   2997 /*
   2998  * Construct an SADB_REGISTER message with the current algorithms.
   2999  * This function gets called when 'ipsecalgs -s' is run or when
   3000  * in.iked (or other KMD) starts.
   3001  */
   3002 static boolean_t
   3003 esp_register_out(uint32_t sequence, uint32_t pid, uint_t serial,
   3004     ipsecesp_stack_t *espstack, cred_t *cr)
   3005 {
   3006 	mblk_t *pfkey_msg_mp, *keysock_out_mp;
   3007 	sadb_msg_t *samsg;
   3008 	sadb_supported_t *sasupp_auth = NULL;
   3009 	sadb_supported_t *sasupp_encr = NULL;
   3010 	sadb_alg_t *saalg;
   3011 	uint_t allocsize = sizeof (*samsg);
   3012 	uint_t i, numalgs_snap;
   3013 	int current_aalgs;
   3014 	ipsec_alginfo_t **authalgs;
   3015 	uint_t num_aalgs;
   3016 	int current_ealgs;
   3017 	ipsec_alginfo_t **encralgs;
   3018 	uint_t num_ealgs;
   3019 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   3020 	sadb_sens_t *sens;
   3021 	size_t sens_len = 0;
   3022 	sadb_ext_t *nextext;
   3023 	ts_label_t *sens_tsl = NULL;
   3024 
   3025 	/* Allocate the KEYSOCK_OUT. */
   3026 	keysock_out_mp = sadb_keysock_out(serial);
   3027 	if (keysock_out_mp == NULL) {
   3028 		esp0dbg(("esp_register_out: couldn't allocate mblk.\n"));
   3029 		return (B_FALSE);
   3030 	}
   3031 
   3032 	if (is_system_labeled() && (cr != NULL)) {
   3033 		sens_tsl = crgetlabel(cr);
   3034 		if (sens_tsl != NULL) {
   3035 			sens_len = sadb_sens_len_from_label(sens_tsl);
   3036 			allocsize += sens_len;
   3037 		}
   3038 	}
   3039 
   3040 	/*
   3041 	 * Allocate the PF_KEY message that follows KEYSOCK_OUT.
   3042 	 */
   3043 
   3044 	mutex_enter(&ipss->ipsec_alg_lock);
   3045 	/*
   3046 	 * Fill SADB_REGISTER message's algorithm descriptors.  Hold
   3047 	 * down the lock while filling it.
   3048 	 *
   3049 	 * Return only valid algorithms, so the number of algorithms
   3050 	 * to send up may be less than the number of algorithm entries
   3051 	 * in the table.
   3052 	 */
   3053 	authalgs = ipss->ipsec_alglists[IPSEC_ALG_AUTH];
   3054 	for (num_aalgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
   3055 		if (authalgs[i] != NULL && ALG_VALID(authalgs[i]))
   3056 			num_aalgs++;
   3057 
   3058 	if (num_aalgs != 0) {
   3059 		allocsize += (num_aalgs * sizeof (*saalg));
   3060 		allocsize += sizeof (*sasupp_auth);
   3061 	}
   3062 	encralgs = ipss->ipsec_alglists[IPSEC_ALG_ENCR];
   3063 	for (num_ealgs = 0, i = 0; i < IPSEC_MAX_ALGS; i++)
   3064 		if (encralgs[i] != NULL && ALG_VALID(encralgs[i]))
   3065 			num_ealgs++;
   3066 
   3067 	if (num_ealgs != 0) {
   3068 		allocsize += (num_ealgs * sizeof (*saalg));
   3069 		allocsize += sizeof (*sasupp_encr);
   3070 	}
   3071 	keysock_out_mp->b_cont = allocb(allocsize, BPRI_HI);
   3072 	if (keysock_out_mp->b_cont == NULL) {
   3073 		mutex_exit(&ipss->ipsec_alg_lock);
   3074 		freemsg(keysock_out_mp);
   3075 		return (B_FALSE);
   3076 	}
   3077 	pfkey_msg_mp = keysock_out_mp->b_cont;
   3078 	pfkey_msg_mp->b_wptr += allocsize;
   3079 
   3080 	nextext = (sadb_ext_t *)(pfkey_msg_mp->b_rptr + sizeof (*samsg));
   3081 
   3082 	if (num_aalgs != 0) {
   3083 		sasupp_auth = (sadb_supported_t *)nextext;
   3084 		saalg = (sadb_alg_t *)(sasupp_auth + 1);
   3085 
   3086 		ASSERT(((ulong_t)saalg & 0x7) == 0);
   3087 
   3088 		numalgs_snap = 0;
   3089 		for (i = 0;
   3090 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_aalgs));
   3091 		    i++) {
   3092 			if (authalgs[i] == NULL || !ALG_VALID(authalgs[i]))
   3093 				continue;
   3094 
   3095 			saalg->sadb_alg_id = authalgs[i]->alg_id;
   3096 			saalg->sadb_alg_ivlen = 0;
   3097 			saalg->sadb_alg_minbits	= authalgs[i]->alg_ef_minbits;
   3098 			saalg->sadb_alg_maxbits	= authalgs[i]->alg_ef_maxbits;
   3099 			saalg->sadb_x_alg_increment =
   3100 			    authalgs[i]->alg_increment;
   3101 			saalg->sadb_x_alg_saltbits = SADB_8TO1(
   3102 			    authalgs[i]->alg_saltlen);
   3103 			numalgs_snap++;
   3104 			saalg++;
   3105 		}
   3106 		ASSERT(numalgs_snap == num_aalgs);
   3107 #ifdef DEBUG
   3108 		/*
   3109 		 * Reality check to make sure I snagged all of the
   3110 		 * algorithms.
   3111 		 */
   3112 		for (; i < IPSEC_MAX_ALGS; i++) {
   3113 			if (authalgs[i] != NULL && ALG_VALID(authalgs[i])) {
   3114 				cmn_err(CE_PANIC, "esp_register_out()! "
   3115 				    "Missed aalg #%d.\n", i);
   3116 			}
   3117 		}
   3118 #endif /* DEBUG */
   3119 		nextext = (sadb_ext_t *)saalg;
   3120 	}
   3121 
   3122 	if (num_ealgs != 0) {
   3123 		sasupp_encr = (sadb_supported_t *)nextext;
   3124 		saalg = (sadb_alg_t *)(sasupp_encr + 1);
   3125 
   3126 		numalgs_snap = 0;
   3127 		for (i = 0;
   3128 		    ((i < IPSEC_MAX_ALGS) && (numalgs_snap < num_ealgs)); i++) {
   3129 			if (encralgs[i] == NULL || !ALG_VALID(encralgs[i]))
   3130 				continue;
   3131 			saalg->sadb_alg_id = encralgs[i]->alg_id;
   3132 			saalg->sadb_alg_ivlen = encralgs[i]->alg_ivlen;
   3133 			saalg->sadb_alg_minbits	= encralgs[i]->alg_ef_minbits;
   3134 			saalg->sadb_alg_maxbits	= encralgs[i]->alg_ef_maxbits;
   3135 			/*
   3136 			 * We could advertise the ICV length, except there
   3137 			 * is not a value in sadb_x_algb to do this.
   3138 			 * saalg->sadb_alg_maclen = encralgs[i]->alg_maclen;
   3139 			 */
   3140 			saalg->sadb_x_alg_increment =
   3141 			    encralgs[i]->alg_increment;
   3142 			saalg->sadb_x_alg_saltbits =
   3143 			    SADB_8TO1(encralgs[i]->alg_saltlen);
   3144 
   3145 			numalgs_snap++;
   3146 			saalg++;
   3147 		}
   3148 		ASSERT(numalgs_snap == num_ealgs);
   3149 #ifdef DEBUG
   3150 		/*
   3151 		 * Reality check to make sure I snagged all of the
   3152 		 * algorithms.
   3153 		 */
   3154 		for (; i < IPSEC_MAX_ALGS; i++) {
   3155 			if (encralgs[i] != NULL && ALG_VALID(encralgs[i])) {
   3156 				cmn_err(CE_PANIC, "esp_register_out()! "
   3157 				    "Missed ealg #%d.\n", i);
   3158 			}
   3159 		}
   3160 #endif /* DEBUG */
   3161 		nextext = (sadb_ext_t *)saalg;
   3162 	}
   3163 
   3164 	current_aalgs = num_aalgs;
   3165 	current_ealgs = num_ealgs;
   3166 
   3167 	mutex_exit(&ipss->ipsec_alg_lock);
   3168 
   3169 	if (sens_tsl != NULL) {
   3170 		sens = (sadb_sens_t *)nextext;
   3171 		sadb_sens_from_label(sens, SADB_EXT_SENSITIVITY,
   3172 		    sens_tsl, sens_len);
   3173 
   3174 		nextext = (sadb_ext_t *)(((uint8_t *)sens) + sens_len);
   3175 	}
   3176 
   3177 	/* Now fill the rest of the SADB_REGISTER message. */
   3178 
   3179 	samsg = (sadb_msg_t *)pfkey_msg_mp->b_rptr;
   3180 	samsg->sadb_msg_version = PF_KEY_V2;
   3181 	samsg->sadb_msg_type = SADB_REGISTER;
   3182 	samsg->sadb_msg_errno = 0;
   3183 	samsg->sadb_msg_satype = SADB_SATYPE_ESP;
   3184 	samsg->sadb_msg_len = SADB_8TO64(allocsize);
   3185 	samsg->sadb_msg_reserved = 0;
   3186 	/*
   3187 	 * Assume caller has sufficient sequence/pid number info.  If it's one
   3188 	 * from me over a new alg., I could give two hoots about sequence.
   3189 	 */
   3190 	samsg->sadb_msg_seq = sequence;
   3191 	samsg->sadb_msg_pid = pid;
   3192 
   3193 	if (sasupp_auth != NULL) {
   3194 		sasupp_auth->sadb_supported_len = SADB_8TO64(
   3195 		    sizeof (*sasupp_auth) + sizeof (*saalg) * current_aalgs);
   3196 		sasupp_auth->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
   3197 		sasupp_auth->sadb_supported_reserved = 0;
   3198 	}
   3199 
   3200 	if (sasupp_encr != NULL) {
   3201 		sasupp_encr->sadb_supported_len = SADB_8TO64(
   3202 		    sizeof (*sasupp_encr) + sizeof (*saalg) * current_ealgs);
   3203 		sasupp_encr->sadb_supported_exttype =
   3204 		    SADB_EXT_SUPPORTED_ENCRYPT;
   3205 		sasupp_encr->sadb_supported_reserved = 0;
   3206 	}
   3207 
   3208 	if (espstack->esp_pfkey_q != NULL)
   3209 		putnext(espstack->esp_pfkey_q, keysock_out_mp);
   3210 	else {
   3211 		freemsg(keysock_out_mp);
   3212 		return (B_FALSE);
   3213 	}
   3214 
   3215 	return (B_TRUE);
   3216 }
   3217 
   3218 /*
   3219  * Invoked when the algorithm table changes. Causes SADB_REGISTER
   3220  * messages continaining the current list of algorithms to be
   3221  * sent up to the ESP listeners.
   3222  */
   3223 void
   3224 ipsecesp_algs_changed(netstack_t *ns)
   3225 {
   3226 	ipsecesp_stack_t	*espstack = ns->netstack_ipsecesp;
   3227 
   3228 	/*
   3229 	 * Time to send a PF_KEY SADB_REGISTER message to ESP listeners
   3230 	 * everywhere.  (The function itself checks for NULL esp_pfkey_q.)
   3231 	 */
   3232 	(void) esp_register_out(0, 0, 0, espstack, NULL);
   3233 }
   3234 
   3235 /*
   3236  * Stub function that taskq_dispatch() invokes to take the mblk (in arg)
   3237  * and send() it into ESP and IP again.
   3238  */
   3239 static void
   3240 inbound_task(void *arg)
   3241 {
   3242 	mblk_t		*mp = (mblk_t *)arg;
   3243 	mblk_t		*async_mp;
   3244 	ip_recv_attr_t	iras;
   3245 
   3246 	async_mp = mp;
   3247 	mp = async_mp->b_cont;
   3248 	async_mp->b_cont = NULL;
   3249 	if (!ip_recv_attr_from_mblk(async_mp, &iras)) {
   3250 		/* The ill or ip_stack_t disappeared on us */
   3251 		ip_drop_input("ip_recv_attr_from_mblk", mp, NULL);
   3252 		freemsg(mp);
   3253 		goto done;
   3254 	}
   3255 
   3256 	esp_inbound_restart(mp, &iras);
   3257 done:
   3258 	ira_cleanup(&iras, B_TRUE);
   3259 }
   3260 
   3261 /*
   3262  * Restart ESP after the SA has been added.
   3263  */
   3264 static void
   3265 esp_inbound_restart(mblk_t *mp, ip_recv_attr_t *ira)
   3266 {
   3267 	esph_t		*esph;
   3268 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   3269 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3270 
   3271 	esp2dbg(espstack, ("in ESP inbound_task"));
   3272 	ASSERT(espstack != NULL);
   3273 
   3274 	mp = ipsec_inbound_esp_sa(mp, ira, &esph);
   3275 	if (mp == NULL)
   3276 		return;
   3277 
   3278 	ASSERT(esph != NULL);
   3279 	ASSERT(ira->ira_flags & IRAF_IPSEC_SECURE);
   3280 	ASSERT(ira->ira_ipsec_esp_sa != NULL);
   3281 
   3282 	mp = ira->ira_ipsec_esp_sa->ipsa_input_func(mp, esph, ira);
   3283 	if (mp == NULL) {
   3284 		/*
   3285 		 * Either it failed or is pending. In the former case
   3286 		 * ipIfStatsInDiscards was increased.
   3287 		 */
   3288 		return;
   3289 	}
   3290 
   3291 	ip_input_post_ipsec(mp, ira);
   3292 }
   3293 
   3294 /*
   3295  * Now that weak-key passed, actually ADD the security association, and
   3296  * send back a reply ADD message.
   3297  */
   3298 static int
   3299 esp_add_sa_finish(mblk_t *mp, sadb_msg_t *samsg, keysock_in_t *ksi,
   3300     int *diagnostic, ipsecesp_stack_t *espstack)
   3301 {
   3302 	isaf_t *primary = NULL, *secondary;
   3303 	boolean_t clone = B_FALSE, is_inbound = B_FALSE;
   3304 	ipsa_t *larval = NULL;
   3305 	ipsacq_t *acqrec;
   3306 	iacqf_t *acq_bucket;
   3307 	mblk_t *acq_msgs = NULL;
   3308 	int rc;
   3309 	mblk_t *lpkt;
   3310 	int error;
   3311 	ipsa_query_t sq;
   3312 	ipsec_stack_t	*ipss = espstack->ipsecesp_netstack->netstack_ipsec;
   3313 
   3314 	/*
   3315 	 * Locate the appropriate table(s).
   3316 	 */
   3317 	sq.spp = &espstack->esp_sadb;	/* XXX */
   3318 	error = sadb_form_query(ksi, IPSA_Q_SA|IPSA_Q_DST,
   3319 	    IPSA_Q_SA|IPSA_Q_DST|IPSA_Q_INBOUND|IPSA_Q_OUTBOUND,
   3320 	    &sq, diagnostic);
   3321 	if (error)
   3322 		return (error);
   3323 
   3324 	/*
   3325 	 * Use the direction flags provided by the KMD to determine
   3326 	 * if the inbound or outbound table should be the primary
   3327 	 * for this SA. If these flags were absent then make this
   3328 	 * decision based on the addresses.
   3329 	 */
   3330 	if (sq.assoc->sadb_sa_flags & IPSA_F_INBOUND) {
   3331 		primary = sq.inbound;
   3332 		secondary = sq.outbound;
   3333 		is_inbound = B_TRUE;
   3334 		if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND)
   3335 			clone = B_TRUE;
   3336 	} else if (sq.assoc->sadb_sa_flags & IPSA_F_OUTBOUND) {
   3337 		primary = sq.outbound;
   3338 		secondary = sq.inbound;
   3339 	}
   3340 
   3341 	if (primary == NULL) {
   3342 		/*
   3343 		 * The KMD did not set a direction flag, determine which
   3344 		 * table to insert the SA into based on addresses.
   3345 		 */
   3346 		switch (ksi->ks_in_dsttype) {
   3347 		case KS_IN_ADDR_MBCAST:
   3348 			clone = B_TRUE;	/* All mcast SAs can be bidirectional */
   3349 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
   3350 			/* FALLTHRU */
   3351 		/*
   3352 		 * If the source address is either one of mine, or unspecified
   3353 		 * (which is best summed up by saying "not 'not mine'"),
   3354 		 * then the association is potentially bi-directional,
   3355 		 * in that it can be used for inbound traffic and outbound
   3356 		 * traffic.  The best example of such an SA is a multicast
   3357 		 * SA (which allows me to receive the outbound traffic).
   3358 		 */
   3359 		case KS_IN_ADDR_ME:
   3360 			sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
   3361 			primary = sq.inbound;
   3362 			secondary = sq.outbound;
   3363 			if (ksi->ks_in_srctype != KS_IN_ADDR_NOTME)
   3364 				clone = B_TRUE;
   3365 			is_inbound = B_TRUE;
   3366 			break;
   3367 		/*
   3368 		 * If the source address literally not mine (either
   3369 		 * unspecified or not mine), then this SA may have an
   3370 		 * address that WILL be mine after some configuration.
   3371 		 * We pay the price for this by making it a bi-directional
   3372 		 * SA.
   3373 		 */
   3374 		case KS_IN_ADDR_NOTME:
   3375 			sq.assoc->sadb_sa_flags |= IPSA_F_OUTBOUND;
   3376 			primary = sq.outbound;
   3377 			secondary = sq.inbound;
   3378 			if (ksi->ks_in_srctype != KS_IN_ADDR_ME) {
   3379 				sq.assoc->sadb_sa_flags |= IPSA_F_INBOUND;
   3380 				clone = B_TRUE;
   3381 			}
   3382 			break;
   3383 		default:
   3384 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_DST;
   3385 			return (EINVAL);
   3386 		}
   3387 	}
   3388 
   3389 	/*
   3390 	 * Find a ACQUIRE list entry if possible.  If we've added an SA that
   3391 	 * suits the needs of an ACQUIRE list entry, we can eliminate the
   3392 	 * ACQUIRE list entry and transmit the enqueued packets.  Use the
   3393 	 * high-bit of the sequence number to queue it.  Key off destination
   3394 	 * addr, and change acqrec's state.
   3395 	 */
   3396 
   3397 	if (samsg->sadb_msg_seq & IACQF_LOWEST_SEQ) {
   3398 		acq_bucket = &(sq.sp->sdb_acq[sq.outhash]);
   3399 		mutex_enter(&acq_bucket->iacqf_lock);
   3400 		for (acqrec = acq_bucket->iacqf_ipsacq; acqrec != NULL;
   3401 		    acqrec = acqrec->ipsacq_next) {
   3402 			mutex_enter(&acqrec->ipsacq_lock);
   3403 			/*
   3404 			 * Q:  I only check sequence.  Should I check dst?
   3405 			 * A: Yes, check dest because those are the packets
   3406 			 *    that are queued up.
   3407 			 */
   3408 			if (acqrec->ipsacq_seq == samsg->sadb_msg_seq &&
   3409 			    IPSA_ARE_ADDR_EQUAL(sq.dstaddr,
   3410 			    acqrec->ipsacq_dstaddr, acqrec->ipsacq_addrfam))
   3411 				break;
   3412 			mutex_exit(&acqrec->ipsacq_lock);
   3413 		}
   3414 		if (acqrec != NULL) {
   3415 			/*
   3416 			 * AHA!  I found an ACQUIRE record for this SA.
   3417 			 * Grab the msg list, and free the acquire record.
   3418 			 * I already am holding the lock for this record,
   3419 			 * so all I have to do is free it.
   3420 			 */
   3421 			acq_msgs = acqrec->ipsacq_mp;
   3422 			acqrec->ipsacq_mp = NULL;
   3423 			mutex_exit(&acqrec->ipsacq_lock);
   3424 			sadb_destroy_acquire(acqrec,
   3425 			    espstack->ipsecesp_netstack);
   3426 		}
   3427 		mutex_exit(&acq_bucket->iacqf_lock);
   3428 	}
   3429 
   3430 	/*
   3431 	 * Find PF_KEY message, and see if I'm an update.  If so, find entry
   3432 	 * in larval list (if there).
   3433 	 */
   3434 	if (samsg->sadb_msg_type == SADB_UPDATE) {
   3435 		mutex_enter(&sq.inbound->isaf_lock);
   3436 		larval = ipsec_getassocbyspi(sq.inbound, sq.assoc->sadb_sa_spi,
   3437 		    ALL_ZEROES_PTR, sq.dstaddr, sq.dst->sin_family);
   3438 		mutex_exit(&sq.inbound->isaf_lock);
   3439 
   3440 		if ((larval == NULL) ||
   3441 		    (larval->ipsa_state != IPSA_STATE_LARVAL)) {
   3442 			*diagnostic = SADB_X_DIAGNOSTIC_SA_NOTFOUND;
   3443 			if (larval != NULL) {
   3444 				IPSA_REFRELE(larval);
   3445 			}
   3446 			esp0dbg(("Larval update, but larval disappeared.\n"));
   3447 			return (ESRCH);
   3448 		} /* Else sadb_common_add unlinks it for me! */
   3449 	}
   3450 
   3451 	lpkt = NULL;
   3452 	if (larval != NULL)
   3453 		lpkt = sadb_clear_lpkt(larval);
   3454 
   3455 	rc = sadb_common_add(espstack->esp_pfkey_q,
   3456 	    mp, samsg, ksi, primary, secondary, larval, clone, is_inbound,
   3457 	    diagnostic, espstack->ipsecesp_netstack, &espstack->esp_sadb);
   3458 
   3459 	if (lpkt != NULL) {
   3460 		if (rc == 0) {
   3461 			rc = !taskq_dispatch(esp_taskq, inbound_task,
   3462 			    lpkt, TQ_NOSLEEP);
   3463 		}
   3464 		if (rc != 0) {
   3465 			lpkt = ip_recv_attr_free_mblk(lpkt);
   3466 			ip_drop_packet(lpkt, B_TRUE, NULL,
   3467 			    DROPPER(ipss, ipds_sadb_inlarval_timeout),
   3468 			    &espstack->esp_dropper);
   3469 		}
   3470 	}
   3471 
   3472 	/*
   3473 	 * How much more stack will I create with all of these
   3474 	 * esp_outbound() calls?
   3475 	 */
   3476 
   3477 	/* Handle the packets queued waiting for the SA */
   3478 	while (acq_msgs != NULL) {
   3479 		mblk_t		*asyncmp;
   3480 		mblk_t		*data_mp;
   3481 		ip_xmit_attr_t	ixas;
   3482 		ill_t		*ill;
   3483 
   3484 		asyncmp = acq_msgs;
   3485 		acq_msgs = acq_msgs->b_next;
   3486 		asyncmp->b_next = NULL;
   3487 
   3488 		/*
   3489 		 * Extract the ip_xmit_attr_t from the first mblk.
   3490 		 * Verifies that the netstack and ill is still around; could
   3491 		 * have vanished while iked was doing its work.
   3492 		 * On succesful return we have a nce_t and the ill/ipst can't
   3493 		 * disappear until we do the nce_refrele in ixa_cleanup.
   3494 		 */
   3495 		data_mp = asyncmp->b_cont;
   3496 		asyncmp->b_cont = NULL;
   3497 		if (!ip_xmit_attr_from_mblk(asyncmp, &ixas)) {
   3498 			ESP_BUMP_STAT(espstack, out_discards);
   3499 			ip_drop_packet(data_mp, B_FALSE, NULL,
   3500 			    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3501 			    &espstack->esp_dropper);
   3502 		} else if (rc != 0) {
   3503 			ill = ixas.ixa_nce->nce_ill;
   3504 			ESP_BUMP_STAT(espstack, out_discards);
   3505 			ip_drop_packet(data_mp, B_FALSE, ill,
   3506 			    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3507 			    &espstack->esp_dropper);
   3508 			BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   3509 		} else {
   3510 			esp_outbound_finish(data_mp, &ixas);
   3511 		}
   3512 		ixa_cleanup(&ixas);
   3513 	}
   3514 
   3515 	return (rc);
   3516 }
   3517 
   3518 /*
   3519  * Process one of the queued messages (from ipsacq_mp) once the SA
   3520  * has been added.
   3521  */
   3522 static void
   3523 esp_outbound_finish(mblk_t *data_mp, ip_xmit_attr_t *ixa)
   3524 {
   3525 	netstack_t	*ns = ixa->ixa_ipst->ips_netstack;
   3526 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3527 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   3528 	ill_t		*ill = ixa->ixa_nce->nce_ill;
   3529 
   3530 	if (!ipsec_outbound_sa(data_mp, ixa, IPPROTO_ESP)) {
   3531 		ESP_BUMP_STAT(espstack, out_discards);
   3532 		ip_drop_packet(data_mp, B_FALSE, ill,
   3533 		    DROPPER(ipss, ipds_sadb_acquire_timeout),
   3534 		    &espstack->esp_dropper);
   3535 		BUMP_MIB(ill->ill_ip_mib, ipIfStatsOutDiscards);
   3536 		return;
   3537 	}
   3538 
   3539 	data_mp = esp_outbound(data_mp, ixa);
   3540 	if (data_mp == NULL)
   3541 		return;
   3542 
   3543 	/* do AH processing if needed */
   3544 	data_mp = esp_do_outbound_ah(data_mp, ixa);
   3545 	if (data_mp == NULL)
   3546 		return;
   3547 
   3548 	(void) ip_output_post_ipsec(data_mp, ixa);
   3549 }
   3550 
   3551 /*
   3552  * Add new ESP security association.  This may become a generic AH/ESP
   3553  * routine eventually.
   3554  */
   3555 static int
   3556 esp_add_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic, netstack_t *ns)
   3557 {
   3558 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3559 	sadb_address_t *srcext =
   3560 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
   3561 	sadb_address_t *dstext =
   3562 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3563 	sadb_address_t *isrcext =
   3564 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_SRC];
   3565 	sadb_address_t *idstext =
   3566 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_INNER_DST];
   3567 	sadb_address_t *nttext_loc =
   3568 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_LOC];
   3569 	sadb_address_t *nttext_rem =
   3570 	    (sadb_address_t *)ksi->ks_in_extv[SADB_X_EXT_ADDRESS_NATT_REM];
   3571 	sadb_key_t *akey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_AUTH];
   3572 	sadb_key_t *ekey = (sadb_key_t *)ksi->ks_in_extv[SADB_EXT_KEY_ENCRYPT];
   3573 	struct sockaddr_in *src, *dst;
   3574 	struct sockaddr_in *natt_loc, *natt_rem;
   3575 	struct sockaddr_in6 *natt_loc6, *natt_rem6;
   3576 	sadb_lifetime_t *soft =
   3577 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_SOFT];
   3578 	sadb_lifetime_t *hard =
   3579 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_EXT_LIFETIME_HARD];
   3580 	sadb_lifetime_t *idle =
   3581 	    (sadb_lifetime_t *)ksi->ks_in_extv[SADB_X_EXT_LIFETIME_IDLE];
   3582 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   3583 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   3584 
   3585 
   3586 
   3587 	/* I need certain extensions present for an ADD message. */
   3588 	if (srcext == NULL) {
   3589 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SRC;
   3590 		return (EINVAL);
   3591 	}
   3592 	if (dstext == NULL) {
   3593 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
   3594 		return (EINVAL);
   3595 	}
   3596 	if (isrcext == NULL && idstext != NULL) {
   3597 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_SRC;
   3598 		return (EINVAL);
   3599 	}
   3600 	if (isrcext != NULL && idstext == NULL) {
   3601 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_INNER_DST;
   3602 		return (EINVAL);
   3603 	}
   3604 	if (assoc == NULL) {
   3605 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
   3606 		return (EINVAL);
   3607 	}
   3608 	if (ekey == NULL && assoc->sadb_sa_encrypt != SADB_EALG_NULL) {
   3609 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_EKEY;
   3610 		return (EINVAL);
   3611 	}
   3612 
   3613 	src = (struct sockaddr_in *)(srcext + 1);
   3614 	dst = (struct sockaddr_in *)(dstext + 1);
   3615 	natt_loc = (struct sockaddr_in *)(nttext_loc + 1);
   3616 	natt_loc6 = (struct sockaddr_in6 *)(nttext_loc + 1);
   3617 	natt_rem = (struct sockaddr_in *)(nttext_rem + 1);
   3618 	natt_rem6 = (struct sockaddr_in6 *)(nttext_rem + 1);
   3619 
   3620 	/* Sundry ADD-specific reality checks. */
   3621 	/* XXX STATS :  Logging/stats here? */
   3622 
   3623 	if ((assoc->sadb_sa_state != SADB_SASTATE_MATURE) &&
   3624 	    (assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE_ELSEWHERE)) {
   3625 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SASTATE;
   3626 		return (EINVAL);
   3627 	}
   3628 	if (assoc->sadb_sa_encrypt == SADB_EALG_NONE) {
   3629 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
   3630 		return (EINVAL);
   3631 	}
   3632 
   3633 #ifndef IPSEC_LATENCY_TEST
   3634 	if (assoc->sadb_sa_encrypt == SADB_EALG_NULL &&
   3635 	    assoc->sadb_sa_auth == SADB_AALG_NONE) {
   3636 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
   3637 		return (EINVAL);
   3638 	}
   3639 #endif
   3640 
   3641 	if (assoc->sadb_sa_flags & ~espstack->esp_sadb.s_addflags) {
   3642 		*diagnostic = SADB_X_DIAGNOSTIC_BAD_SAFLAGS;
   3643 		return (EINVAL);
   3644 	}
   3645 
   3646 	if ((*diagnostic = sadb_hardsoftchk(hard, soft, idle)) != 0) {
   3647 		return (EINVAL);
   3648 	}
   3649 	ASSERT(src->sin_family == dst->sin_family);
   3650 
   3651 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_LOC) {
   3652 		if (nttext_loc == NULL) {
   3653 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_LOC;
   3654 			return (EINVAL);
   3655 		}
   3656 
   3657 		if (natt_loc->sin_family == AF_INET6 &&
   3658 		    !IN6_IS_ADDR_V4MAPPED(&natt_loc6->sin6_addr)) {
   3659 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_LOC;
   3660 			return (EINVAL);
   3661 		}
   3662 	}
   3663 
   3664 	if (assoc->sadb_sa_flags & SADB_X_SAFLAGS_NATT_REM) {
   3665 		if (nttext_rem == NULL) {
   3666 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_NATT_REM;
   3667 			return (EINVAL);
   3668 		}
   3669 		if (natt_rem->sin_family == AF_INET6 &&
   3670 		    !IN6_IS_ADDR_V4MAPPED(&natt_rem6->sin6_addr)) {
   3671 			*diagnostic = SADB_X_DIAGNOSTIC_MALFORMED_NATT_REM;
   3672 			return (EINVAL);
   3673 		}
   3674 	}
   3675 
   3676 
   3677 	/* Stuff I don't support, for now.  XXX Diagnostic? */
   3678 	if (ksi->ks_in_extv[SADB_EXT_LIFETIME_CURRENT] != NULL)
   3679 		return (EOPNOTSUPP);
   3680 
   3681 	if ((*diagnostic = sadb_labelchk(ksi)) != 0)
   3682 		return (EINVAL);
   3683 
   3684 	/*
   3685 	 * XXX Policy :  I'm not checking identities at this time,
   3686 	 * but if I did, I'd do them here, before I sent
   3687 	 * the weak key check up to the algorithm.
   3688 	 */
   3689 
   3690 	mutex_enter(&ipss->ipsec_alg_lock);
   3691 
   3692 	/*
   3693 	 * First locate the authentication algorithm.
   3694 	 */
   3695 #ifdef IPSEC_LATENCY_TEST
   3696 	if (akey != NULL && assoc->sadb_sa_auth != SADB_AALG_NONE) {
   3697 #else
   3698 	if (akey != NULL) {
   3699 #endif
   3700 		ipsec_alginfo_t *aalg;
   3701 
   3702 		aalg = ipss->ipsec_alglists[IPSEC_ALG_AUTH]
   3703 		    [assoc->sadb_sa_auth];
   3704 		if (aalg == NULL || !ALG_VALID(aalg)) {
   3705 			mutex_exit(&ipss->ipsec_alg_lock);
   3706 			esp1dbg(espstack, ("Couldn't find auth alg #%d.\n",
   3707 			    assoc->sadb_sa_auth));
   3708 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AALG;
   3709 			return (EINVAL);
   3710 		}
   3711 
   3712 		/*
   3713 		 * Sanity check key sizes.
   3714 		 * Note: It's not possible to use SADB_AALG_NONE because
   3715 		 * this auth_alg is not defined with ALG_FLAG_VALID. If this
   3716 		 * ever changes, the same check for SADB_AALG_NONE and
   3717 		 * a auth_key != NULL should be made here ( see below).
   3718 		 */
   3719 		if (!ipsec_valid_key_size(akey->sadb_key_bits, aalg)) {
   3720 			mutex_exit(&ipss->ipsec_alg_lock);
   3721 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_AKEYBITS;
   3722 			return (EINVAL);
   3723 		}
   3724 		ASSERT(aalg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
   3725 
   3726 		/* check key and fix parity if needed */
   3727 		if (ipsec_check_key(aalg->alg_mech_type, akey, B_TRUE,
   3728 		    diagnostic) != 0) {
   3729 			mutex_exit(&ipss->ipsec_alg_lock);
   3730 			return (EINVAL);
   3731 		}
   3732 	}
   3733 
   3734 	/*
   3735 	 * Then locate the encryption algorithm.
   3736 	 */
   3737 	if (ekey != NULL) {
   3738 		uint_t keybits;
   3739 		ipsec_alginfo_t *ealg;
   3740 
   3741 		ealg = ipss->ipsec_alglists[IPSEC_ALG_ENCR]
   3742 		    [assoc->sadb_sa_encrypt];
   3743 		if (ealg == NULL || !ALG_VALID(ealg)) {
   3744 			mutex_exit(&ipss->ipsec_alg_lock);
   3745 			esp1dbg(espstack, ("Couldn't find encr alg #%d.\n",
   3746 			    assoc->sadb_sa_encrypt));
   3747 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EALG;
   3748 			return (EINVAL);
   3749 		}
   3750 
   3751 		/*
   3752 		 * Sanity check key sizes. If the encryption algorithm is
   3753 		 * SADB_EALG_NULL but the encryption key is NOT
   3754 		 * NULL then complain.
   3755 		 *
   3756 		 * The keying material includes salt bits if required by
   3757 		 * algorithm and optionally the Initial IV, check the
   3758 		 * length of whats left.
   3759 		 */
   3760 		keybits = ekey->sadb_key_bits;
   3761 		keybits -= ekey->sadb_key_reserved;
   3762 		keybits -= SADB_8TO1(ealg->alg_saltlen);
   3763 		if ((assoc->sadb_sa_encrypt == SADB_EALG_NULL) ||
   3764 		    (!ipsec_valid_key_size(keybits, ealg))) {
   3765 			mutex_exit(&ipss->ipsec_alg_lock);
   3766 			*diagnostic = SADB_X_DIAGNOSTIC_BAD_EKEYBITS;
   3767 			return (EINVAL);
   3768 		}
   3769 		ASSERT(ealg->alg_mech_type != CRYPTO_MECHANISM_INVALID);
   3770 
   3771 		/* check key */
   3772 		if (ipsec_check_key(ealg->alg_mech_type, ekey, B_FALSE,
   3773 		    diagnostic) != 0) {
   3774 			mutex_exit(&ipss->ipsec_alg_lock);
   3775 			return (EINVAL);
   3776 		}
   3777 	}
   3778 	mutex_exit(&ipss->ipsec_alg_lock);
   3779 
   3780 	return (esp_add_sa_finish(mp, (sadb_msg_t *)mp->b_cont->b_rptr, ksi,
   3781 	    diagnostic, espstack));
   3782 }
   3783 
   3784 /*
   3785  * Update a security association.  Updates come in two varieties.  The first
   3786  * is an update of lifetimes on a non-larval SA.  The second is an update of
   3787  * a larval SA, which ends up looking a lot more like an add.
   3788  */
   3789 static int
   3790 esp_update_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
   3791     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
   3792 {
   3793 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3794 	mblk_t    *buf_pkt;
   3795 	int rcode;
   3796 
   3797 	sadb_address_t *dstext =
   3798 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3799 
   3800 	if (dstext == NULL) {
   3801 		*diagnostic = SADB_X_DIAGNOSTIC_MISSING_DST;
   3802 		return (EINVAL);
   3803 	}
   3804 
   3805 	rcode = sadb_update_sa(mp, ksi, &buf_pkt, &espstack->esp_sadb,
   3806 	    diagnostic, espstack->esp_pfkey_q, esp_add_sa,
   3807 	    espstack->ipsecesp_netstack, sadb_msg_type);
   3808 
   3809 	if ((assoc->sadb_sa_state != SADB_X_SASTATE_ACTIVE) ||
   3810 	    (rcode != 0)) {
   3811 		return (rcode);
   3812 	}
   3813 
   3814 	HANDLE_BUF_PKT(esp_taskq, espstack->ipsecesp_netstack->netstack_ipsec,
   3815 	    espstack->esp_dropper, buf_pkt);
   3816 
   3817 	return (rcode);
   3818 }
   3819 
   3820 /* XXX refactor me */
   3821 /*
   3822  * Delete a security association.  This is REALLY likely to be code common to
   3823  * both AH and ESP.  Find the association, then unlink it.
   3824  */
   3825 static int
   3826 esp_del_sa(mblk_t *mp, keysock_in_t *ksi, int *diagnostic,
   3827     ipsecesp_stack_t *espstack, uint8_t sadb_msg_type)
   3828 {
   3829 	sadb_sa_t *assoc = (sadb_sa_t *)ksi->ks_in_extv[SADB_EXT_SA];
   3830 	sadb_address_t *dstext =
   3831 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_DST];
   3832 	sadb_address_t *srcext =
   3833 	    (sadb_address_t *)ksi->ks_in_extv[SADB_EXT_ADDRESS_SRC];
   3834 	struct sockaddr_in *sin;
   3835 
   3836 	if (assoc == NULL) {
   3837 		if (dstext != NULL) {
   3838 			sin = (struct sockaddr_in *)(dstext + 1);
   3839 		} else if (srcext != NULL) {
   3840 			sin = (struct sockaddr_in *)(srcext + 1);
   3841 		} else {
   3842 			*diagnostic = SADB_X_DIAGNOSTIC_MISSING_SA;
   3843 			return (EINVAL);
   3844 		}
   3845 		return (sadb_purge_sa(mp, ksi,
   3846 		    (sin->sin_family == AF_INET6) ? &espstack->esp_sadb.s_v6 :
   3847 		    &espstack->esp_sadb.s_v4, diagnostic,
   3848 		    espstack->esp_pfkey_q));
   3849 	}
   3850 
   3851 	return (sadb_delget_sa(mp, ksi, &espstack->esp_sadb, diagnostic,
   3852 	    espstack->esp_pfkey_q, sadb_msg_type));
   3853 }
   3854 
   3855 /* XXX refactor me */
   3856 /*
   3857  * Convert the entire contents of all of ESP's SA tables into PF_KEY SADB_DUMP
   3858  * messages.
   3859  */
   3860 static void
   3861 esp_dump(mblk_t *mp, keysock_in_t *ksi, ipsecesp_stack_t *espstack)
   3862 {
   3863 	int error;
   3864 	sadb_msg_t *samsg;
   3865 
   3866 	/*
   3867 	 * Dump each fanout, bailing if error is non-zero.
   3868 	 */
   3869 
   3870 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
   3871 	    &espstack->esp_sadb.s_v4);
   3872 	if (error != 0)
   3873 		goto bail;
   3874 
   3875 	error = sadb_dump(espstack->esp_pfkey_q, mp, ksi,
   3876 	    &espstack->esp_sadb.s_v6);
   3877 bail:
   3878 	ASSERT(mp->b_cont != NULL);
   3879 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
   3880 	samsg->sadb_msg_errno = (uint8_t)error;
   3881 	sadb_pfkey_echo(espstack->esp_pfkey_q, mp,
   3882 	    (sadb_msg_t *)mp->b_cont->b_rptr, ksi, NULL);
   3883 }
   3884 
   3885 /*
   3886  * First-cut reality check for an inbound PF_KEY message.
   3887  */
   3888 static boolean_t
   3889 esp_pfkey_reality_failures(mblk_t *mp, keysock_in_t *ksi,
   3890     ipsecesp_stack_t *espstack)
   3891 {
   3892 	int diagnostic;
   3893 
   3894 	if (ksi->ks_in_extv[SADB_EXT_PROPOSAL] != NULL) {
   3895 		diagnostic = SADB_X_DIAGNOSTIC_PROP_PRESENT;
   3896 		goto badmsg;
   3897 	}
   3898 	if (ksi->ks_in_extv[SADB_EXT_SUPPORTED_AUTH] != NULL ||
   3899 	    ksi->ks_in_extv[SADB_EXT_SUPPORTED_ENCRYPT] != NULL) {
   3900 		diagnostic = SADB_X_DIAGNOSTIC_SUPP_PRESENT;
   3901 		goto badmsg;
   3902 	}
   3903 	return (B_FALSE);	/* False ==> no failures */
   3904 
   3905 badmsg:
   3906 	sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL, diagnostic,
   3907 	    ksi->ks_in_serial);
   3908 	return (B_TRUE);	/* True ==> failures */
   3909 }
   3910 
   3911 /*
   3912  * ESP parsing of PF_KEY messages.  Keysock did most of the really silly
   3913  * error cases.  What I receive is a fully-formed, syntactically legal
   3914  * PF_KEY message.  I then need to check semantics...
   3915  *
   3916  * This code may become common to AH and ESP.  Stay tuned.
   3917  *
   3918  * I also make the assumption that db_ref's are cool.  If this assumption
   3919  * is wrong, this means that someone other than keysock or me has been
   3920  * mucking with PF_KEY messages.
   3921  */
   3922 static void
   3923 esp_parse_pfkey(mblk_t *mp, ipsecesp_stack_t *espstack)
   3924 {
   3925 	mblk_t *msg = mp->b_cont;
   3926 	sadb_msg_t *samsg;
   3927 	keysock_in_t *ksi;
   3928 	int error;
   3929 	int diagnostic = SADB_X_DIAGNOSTIC_NONE;
   3930 
   3931 	ASSERT(msg != NULL);
   3932 
   3933 	samsg = (sadb_msg_t *)msg->b_rptr;
   3934 	ksi = (keysock_in_t *)mp->b_rptr;
   3935 
   3936 	/*
   3937 	 * If applicable, convert unspecified AF_INET6 to unspecified
   3938 	 * AF_INET.  And do other address reality checks.
   3939 	 */
   3940 	if (!sadb_addrfix(ksi, espstack->esp_pfkey_q, mp,
   3941 	    espstack->ipsecesp_netstack) ||
   3942 	    esp_pfkey_reality_failures(mp, ksi, espstack)) {
   3943 		return;
   3944 	}
   3945 
   3946 	switch (samsg->sadb_msg_type) {
   3947 	case SADB_ADD:
   3948 		error = esp_add_sa(mp, ksi, &diagnostic,
   3949 		    espstack->ipsecesp_netstack);
   3950 		if (error != 0) {
   3951 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3952 			    diagnostic, ksi->ks_in_serial);
   3953 		}
   3954 		/* else esp_add_sa() took care of things. */
   3955 		break;
   3956 	case SADB_DELETE:
   3957 	case SADB_X_DELPAIR:
   3958 	case SADB_X_DELPAIR_STATE:
   3959 		error = esp_del_sa(mp, ksi, &diagnostic, espstack,
   3960 		    samsg->sadb_msg_type);
   3961 		if (error != 0) {
   3962 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3963 			    diagnostic, ksi->ks_in_serial);
   3964 		}
   3965 		/* Else esp_del_sa() took care of things. */
   3966 		break;
   3967 	case SADB_GET:
   3968 		error = sadb_delget_sa(mp, ksi, &espstack->esp_sadb,
   3969 		    &diagnostic, espstack->esp_pfkey_q, samsg->sadb_msg_type);
   3970 		if (error != 0) {
   3971 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   3972 			    diagnostic, ksi->ks_in_serial);
   3973 		}
   3974 		/* Else sadb_get_sa() took care of things. */
   3975 		break;
   3976 	case SADB_FLUSH:
   3977 		sadbp_flush(&espstack->esp_sadb, espstack->ipsecesp_netstack);
   3978 		sadb_pfkey_echo(espstack->esp_pfkey_q, mp, samsg, ksi, NULL);
   3979 		break;
   3980 	case SADB_REGISTER:
   3981 		/*
   3982 		 * Hmmm, let's do it!  Check for extensions (there should
   3983 		 * be none), extract the fields, call esp_register_out(),
   3984 		 * then either free or report an error.
   3985 		 *
   3986 		 * Keysock takes care of the PF_KEY bookkeeping for this.
   3987 		 */
   3988 		if (esp_register_out(samsg->sadb_msg_seq, samsg->sadb_msg_pid,
   3989 		    ksi->ks_in_serial, espstack, msg_getcred(mp, NULL))) {
   3990 			freemsg(mp);
   3991 		} else {
   3992 			/*
   3993 			 * Only way this path hits is if there is a memory
   3994 			 * failure.  It will not return B_FALSE because of
   3995 			 * lack of esp_pfkey_q if I am in wput().
   3996 			 */
   3997 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, ENOMEM,
   3998 			    diagnostic, ksi->ks_in_serial);
   3999 		}
   4000 		break;
   4001 	case SADB_UPDATE:
   4002 	case SADB_X_UPDATEPAIR:
   4003 		/*
   4004 		 * Find a larval, if not there, find a full one and get
   4005 		 * strict.
   4006 		 */
   4007 		error = esp_update_sa(mp, ksi, &diagnostic, espstack,
   4008 		    samsg->sadb_msg_type);
   4009 		if (error != 0) {
   4010 			sadb_pfkey_error(espstack->esp_pfkey_q, mp, error,
   4011 			    diagnostic, ksi->ks_in_serial);
   4012 		}
   4013 		/* else esp_update_sa() took care of things. */
   4014 		break;
   4015 	case SADB_GETSPI:
   4016 		/*
   4017 		 * Reserve a new larval entry.
   4018 		 */
   4019 		esp_getspi(mp, ksi, espstack);
   4020 		break;
   4021 	case SADB_ACQUIRE:
   4022 		/*
   4023 		 * Find larval and/or ACQUIRE record and kill it (them), I'm
   4024 		 * most likely an error.  Inbound ACQUIRE messages should only
   4025 		 * have the base header.
   4026 		 */
   4027 		sadb_in_acquire(samsg, &espstack->esp_sadb,
   4028 		    espstack->esp_pfkey_q, espstack->ipsecesp_netstack);
   4029 		freemsg(mp);
   4030 		break;
   4031 	case SADB_DUMP:
   4032 		/*
   4033 		 * Dump all entries.
   4034 		 */
   4035 		esp_dump(mp, ksi, espstack);
   4036 		/* esp_dump will take care of the return message, etc. */
   4037 		break;
   4038 	case SADB_EXPIRE:
   4039 		/* Should never reach me. */
   4040 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EOPNOTSUPP,
   4041 		    diagnostic, ksi->ks_in_serial);
   4042 		break;
   4043 	default:
   4044 		sadb_pfkey_error(espstack->esp_pfkey_q, mp, EINVAL,
   4045 		    SADB_X_DIAGNOSTIC_UNKNOWN_MSG, ksi->ks_in_serial);
   4046 		break;
   4047 	}
   4048 }
   4049 
   4050 /*
   4051  * Handle case where PF_KEY says it can't find a keysock for one of my
   4052  * ACQUIRE messages.
   4053  */
   4054 static void
   4055 esp_keysock_no_socket(mblk_t *mp, ipsecesp_stack_t *espstack)
   4056 {
   4057 	sadb_msg_t *samsg;
   4058 	keysock_out_err_t *kse = (keysock_out_err_t *)mp->b_rptr;
   4059 
   4060 	if (mp->b_cont == NULL) {
   4061 		freemsg(mp);
   4062 		return;
   4063 	}
   4064 	samsg = (sadb_msg_t *)mp->b_cont->b_rptr;
   4065 
   4066 	/*
   4067 	 * If keysock can't find any registered, delete the acquire record
   4068 	 * immediately, and handle errors.
   4069 	 */
   4070 	if (samsg->sadb_msg_type == SADB_ACQUIRE) {
   4071 		samsg->sadb_msg_errno = kse->ks_err_errno;
   4072 		samsg->sadb_msg_len = SADB_8TO64(sizeof (*samsg));
   4073 		/*
   4074 		 * Use the write-side of the esp_pfkey_q
   4075 		 */
   4076 		sadb_in_acquire(samsg, &espstack->esp_sadb,
   4077 		    WR(espstack->esp_pfkey_q), espstack->ipsecesp_netstack);
   4078 	}
   4079 
   4080 	freemsg(mp);
   4081 }
   4082 
   4083 /*
   4084  * ESP module write put routine.
   4085  */
   4086 static void
   4087 ipsecesp_wput(queue_t *q, mblk_t *mp)
   4088 {
   4089 	ipsec_info_t *ii;
   4090 	struct iocblk *iocp;
   4091 	ipsecesp_stack_t	*espstack = (ipsecesp_stack_t *)q->q_ptr;
   4092 
   4093 	esp3dbg(espstack, ("In esp_wput().\n"));
   4094 
   4095 	/* NOTE: Each case must take care of freeing or passing mp. */
   4096 	switch (mp->b_datap->db_type) {
   4097 	case M_CTL:
   4098 		if ((mp->b_wptr - mp->b_rptr) < sizeof (ipsec_info_t)) {
   4099 			/* Not big enough message. */
   4100 			freemsg(mp);
   4101 			break;
   4102 		}
   4103 		ii = (ipsec_info_t *)mp->b_rptr;
   4104 
   4105 		switch (ii->ipsec_info_type) {
   4106 		case KEYSOCK_OUT_ERR:
   4107 			esp1dbg(espstack, ("Got KEYSOCK_OUT_ERR message.\n"));
   4108 			esp_keysock_no_socket(mp, espstack);
   4109 			break;
   4110 		case KEYSOCK_IN:
   4111 			ESP_BUMP_STAT(espstack, keysock_in);
   4112 			esp3dbg(espstack, ("Got KEYSOCK_IN message.\n"));
   4113 
   4114 			/* Parse the message. */
   4115 			esp_parse_pfkey(mp, espstack);
   4116 			break;
   4117 		case KEYSOCK_HELLO:
   4118 			sadb_keysock_hello(&espstack->esp_pfkey_q, q, mp,
   4119 			    esp_ager, (void *)espstack, &espstack->esp_event,
   4120 			    SADB_SATYPE_ESP);
   4121 			break;
   4122 		default:
   4123 			esp2dbg(espstack, ("Got M_CTL from above of 0x%x.\n",
   4124 			    ii->ipsec_info_type));
   4125 			freemsg(mp);
   4126 			break;
   4127 		}
   4128 		break;
   4129 	case M_IOCTL:
   4130 		iocp = (struct iocblk *)mp->b_rptr;
   4131 		switch (iocp->ioc_cmd) {
   4132 		case ND_SET:
   4133 		case ND_GET:
   4134 			if (nd_getset(q, espstack->ipsecesp_g_nd, mp)) {
   4135 				qreply(q, mp);
   4136 				return;
   4137 			} else {
   4138 				iocp->ioc_error = ENOENT;
   4139 			}
   4140 			/* FALLTHRU */
   4141 		default:
   4142 			/* We really don't support any other ioctls, do we? */
   4143 
   4144 			/* Return EINVAL */
   4145 			if (iocp->ioc_error != ENOENT)
   4146 				iocp->ioc_error = EINVAL;
   4147 			iocp->ioc_count = 0;
   4148 			mp->b_datap->db_type = M_IOCACK;
   4149 			qreply(q, mp);
   4150 			return;
   4151 		}
   4152 	default:
   4153 		esp3dbg(espstack,
   4154 		    ("Got default message, type %d, passing to IP.\n",
   4155 		    mp->b_datap->db_type));
   4156 		putnext(q, mp);
   4157 	}
   4158 }
   4159 
   4160 /*
   4161  * Wrapper to allow IP to trigger an ESP association failure message
   4162  * during inbound SA selection.
   4163  */
   4164 void
   4165 ipsecesp_in_assocfailure(mblk_t *mp, char level, ushort_t sl, char *fmt,
   4166     uint32_t spi, void *addr, int af, ip_recv_attr_t *ira)
   4167 {
   4168 	netstack_t	*ns = ira->ira_ill->ill_ipst->ips_netstack;
   4169 	ipsecesp_stack_t *espstack = ns->netstack_ipsecesp;
   4170 	ipsec_stack_t	*ipss = ns->netstack_ipsec;
   4171 
   4172 	if (espstack->ipsecesp_log_unknown_spi) {
   4173 		ipsec_assocfailure(info.mi_idnum, 0, level, sl, fmt, spi,
   4174 		    addr, af, espstack->ipsecesp_netstack);
   4175 	}
   4176 
   4177 	ip_drop_packet(mp, B_TRUE, ira->ira_ill,
   4178 	    DROPPER(ipss, ipds_esp_no_sa),
   4179 	    &espstack->esp_dropper);
   4180 }
   4181 
   4182 /*
   4183  * Initialize the ESP input and output processing functions.
   4184  */
   4185 void
   4186 ipsecesp_init_funcs(ipsa_t *sa)
   4187 {
   4188 	if (sa->ipsa_output_func == NULL)
   4189 		sa->ipsa_output_func = esp_outbound;
   4190 	if (sa->ipsa_input_func == NULL)
   4191 		sa->ipsa_input_func = esp_inbound;
   4192 }
   4193