Home | History | Annotate | Download | only in ip
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 
     22 /*
     23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     24  * Use is subject to license terms.
     25  */
     26 
     27 /*
     28  * Functions to implement IP address -> link layer address (PSARC 2006/482)
     29  */
     30 #include <inet/ip2mac.h>
     31 #include <inet/ip2mac_impl.h>
     32 #include <sys/zone.h>
     33 #include <inet/ip_ndp.h>
     34 #include <inet/ip_if.h>
     35 #include <inet/ip6.h>
     36 
     37 /*
     38  * dispatch pending callbacks.
     39  */
     40 void
     41 ncec_cb_dispatch(ncec_t *ncec)
     42 {
     43 	ncec_cb_t *ncec_cb;
     44 	ip2mac_t ip2m;
     45 
     46 	mutex_enter(&ncec->ncec_lock);
     47 	if (list_is_empty(&ncec->ncec_cb)) {
     48 		mutex_exit(&ncec->ncec_lock);
     49 		return;
     50 	}
     51 	ncec_ip2mac_response(&ip2m, ncec);
     52 	ncec_cb_refhold_locked(ncec);
     53 	/*
     54 	 * IP does not hold internal locks like nce_lock across calls to
     55 	 * other subsystems for fear of recursive lock entry and lock
     56 	 * hierarchy violation. The caller may be holding locks across
     57 	 * the call to IP. (It would be ideal if no subsystem holds locks
     58 	 * across calls into another subsystem, especially if calls can
     59 	 * happen in either direction).
     60 	 */
     61 	ncec_cb = list_head(&ncec->ncec_cb);
     62 	for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
     63 		if (ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED)
     64 			continue;
     65 		ncec_cb->ncec_cb_flags |= NCE_CB_DISPATCHED;
     66 		mutex_exit(&ncec->ncec_lock);
     67 		(*ncec_cb->ncec_cb_func)(&ip2m, ncec_cb->ncec_cb_arg);
     68 		mutex_enter(&ncec->ncec_lock);
     69 	}
     70 	ncec_cb_refrele(ncec);
     71 	mutex_exit(&ncec->ncec_lock);
     72 }
     73 
     74 /*
     75  * fill up the ip2m response fields with inforamation from the nce.
     76  */
     77 void
     78 ncec_ip2mac_response(ip2mac_t *ip2m, ncec_t *ncec)
     79 {
     80 	boolean_t isv6 = (ncec->ncec_ipversion == IPV6_VERSION);
     81 	sin_t	*sin;
     82 	sin6_t	*sin6;
     83 	struct sockaddr_dl *sdl;
     84 
     85 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
     86 	bzero(ip2m, sizeof (*ip2m));
     87 	if (NCE_ISREACHABLE(ncec) && !NCE_ISCONDEMNED(ncec))
     88 		ip2m->ip2mac_err = 0;
     89 	else
     90 		ip2m->ip2mac_err = ESRCH;
     91 	if (isv6) {
     92 		sin6 = (sin6_t *)&ip2m->ip2mac_pa;
     93 		sin6->sin6_family = AF_INET6;
     94 		sin6->sin6_addr = ncec->ncec_addr;
     95 	} else {
     96 		sin = (sin_t *)&ip2m->ip2mac_pa;
     97 		sin->sin_family = AF_INET;
     98 		IN6_V4MAPPED_TO_INADDR(&ncec->ncec_addr, &sin->sin_addr);
     99 	}
    100 	if (ip2m->ip2mac_err == 0) {
    101 		sdl = &ip2m->ip2mac_ha;
    102 		sdl->sdl_family = AF_LINK;
    103 		sdl->sdl_type = ncec->ncec_ill->ill_type;
    104 		/*
    105 		 * should we put ncec_ill->ill_name in there? why?
    106 		 * likewise for the sdl_index
    107 		 */
    108 		sdl->sdl_nlen = 0;
    109 		sdl->sdl_alen = ncec->ncec_ill->ill_phys_addr_length;
    110 		if (ncec->ncec_lladdr != NULL)
    111 			bcopy(ncec->ncec_lladdr, LLADDR(sdl), sdl->sdl_alen);
    112 	}
    113 }
    114 
    115 void
    116 ncec_cb_refhold_locked(ncec_t *ncec)
    117 {
    118 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
    119 	ncec->ncec_cb_walker_cnt++;
    120 }
    121 
    122 void
    123 ncec_cb_refrele(ncec_t *ncec)
    124 {
    125 	ncec_cb_t *ncec_cb, *ncec_cb_next = NULL;
    126 
    127 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
    128 	if (--ncec->ncec_cb_walker_cnt == 0) {
    129 		for (ncec_cb = list_head(&ncec->ncec_cb); ncec_cb != NULL;
    130 		    ncec_cb = ncec_cb_next) {
    131 
    132 			ncec_cb_next = list_next(&ncec->ncec_cb, ncec_cb);
    133 			if ((ncec_cb->ncec_cb_flags & NCE_CB_DISPATCHED) == 0)
    134 				continue;
    135 			list_remove(&ncec->ncec_cb, ncec_cb);
    136 			kmem_free(ncec_cb, sizeof (*ncec_cb));
    137 		}
    138 	}
    139 }
    140 
    141 /*
    142  * add a callback to the nce, so that the callback can be invoked
    143  * after address resolution succeeds/fails.
    144  */
    145 static ip2mac_id_t
    146 ncec_add_cb(ncec_t *ncec, ip2mac_callback_t *cb, void *cbarg)
    147 {
    148 	ncec_cb_t	*nce_cb;
    149 	ip2mac_id_t	ip2mid = NULL;
    150 
    151 	ASSERT(MUTEX_HELD(&ncec->ncec_lock));
    152 	if ((nce_cb = kmem_zalloc(sizeof (*nce_cb), KM_NOSLEEP)) == NULL)
    153 		return (ip2mid);
    154 	nce_cb->ncec_cb_func = cb;
    155 	nce_cb->ncec_cb_arg = cbarg;
    156 	/*
    157 	 * We identify the ncec_cb_t during cancellation by the address
    158 	 * of the nce_cb_t itself, and, as a short-cut for eliminating
    159 	 * clear mismatches, only look in the callback list of ncec's
    160 	 * whose address is equal to the nce_cb_id.
    161 	 */
    162 	nce_cb->ncec_cb_id = ncec; /* no refs! just an address */
    163 	list_insert_tail(&ncec->ncec_cb, nce_cb);
    164 	ip2mid = ncec;  /* this is the id to be used in ip2mac_cancel */
    165 
    166 	return (nce_cb);
    167 }
    168 
    169 /*
    170  * Resolve an IP address to a link-layer address using the data-structures
    171  * defined in PSARC 2006/482. If the current link-layer address for the
    172  * IP address is not known, the state-machine for resolving the resolution
    173  * will be triggered, and the callback function (*cb) will be invoked after
    174  * the resolution completes.
    175  */
    176 ip2mac_id_t
    177 ip2mac(uint_t op, ip2mac_t *ip2m, ip2mac_callback_t *cb, void *cbarg,
    178     zoneid_t zoneid)
    179 {
    180 	ncec_t		*ncec;
    181 	nce_t		*nce = NULL;
    182 	boolean_t	isv6;
    183 	ill_t		*ill;
    184 	netstack_t	*ns;
    185 	ip_stack_t	*ipst;
    186 	ip2mac_id_t	ip2mid = NULL;
    187 	sin_t		*sin;
    188 	sin6_t		*sin6;
    189 	int		err;
    190 	uint64_t	delta;
    191 	boolean_t	need_resolve = B_FALSE;
    192 
    193 	isv6 = (ip2m->ip2mac_pa.ss_family == AF_INET6);
    194 
    195 	ns = netstack_find_by_zoneid(zoneid);
    196 	if (ns == NULL) {
    197 		ip2m->ip2mac_err = EINVAL;
    198 		return (NULL);
    199 	}
    200 	/*
    201 	 * For exclusive stacks we reset the zoneid to zero
    202 	 * since IP uses the global zoneid in the exclusive stacks.
    203 	 */
    204 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
    205 		zoneid = GLOBAL_ZONEID;
    206 	ipst = ns->netstack_ip;
    207 	/*
    208 	 * find the ill from the ip2m->ip2mac_ifindex
    209 	 */
    210 	ill = ill_lookup_on_ifindex(ip2m->ip2mac_ifindex, isv6, ipst);
    211 	if (ill == NULL) {
    212 		ip2m->ip2mac_err = ENXIO;
    213 		netstack_rele(ns);
    214 		return (NULL);
    215 	}
    216 	if (isv6) {
    217 		sin6 = (sin6_t *)&ip2m->ip2mac_pa;
    218 		if (op == IP2MAC_LOOKUP) {
    219 			nce = nce_lookup_v6(ill, &sin6->sin6_addr);
    220 		} else {
    221 			err = nce_lookup_then_add_v6(ill, NULL,
    222 			    ill->ill_phys_addr_length,
    223 			    &sin6->sin6_addr, 0, ND_UNCHANGED, &nce);
    224 		}
    225 	} else  {
    226 		sin = (sin_t *)&ip2m->ip2mac_pa;
    227 		if (op == IP2MAC_LOOKUP) {
    228 			nce = nce_lookup_v4(ill, &sin->sin_addr.s_addr);
    229 		} else {
    230 			err = nce_lookup_then_add_v4(ill, NULL,
    231 			    ill->ill_phys_addr_length,
    232 			    &sin->sin_addr.s_addr, 0, ND_UNCHANGED, &nce);
    233 		}
    234 	}
    235 	if (op == IP2MAC_LOOKUP) {
    236 		if (nce == NULL) {
    237 			ip2m->ip2mac_err = ESRCH;
    238 			goto done;
    239 		}
    240 		ncec = nce->nce_common;
    241 		delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last;
    242 		mutex_enter(&ncec->ncec_lock);
    243 		if (NCE_ISREACHABLE(ncec) &&
    244 		    delta < (uint64_t)ill->ill_reachable_time) {
    245 			ncec_ip2mac_response(ip2m, ncec);
    246 			ip2m->ip2mac_err = 0;
    247 		} else {
    248 			ip2m->ip2mac_err = ESRCH;
    249 		}
    250 		mutex_exit(&ncec->ncec_lock);
    251 		goto done;
    252 	} else {
    253 		if (err != 0 && err != EEXIST) {
    254 			ip2m->ip2mac_err = err;
    255 			goto done;
    256 		}
    257 	}
    258 	ncec = nce->nce_common;
    259 	delta = TICK_TO_MSEC(ddi_get_lbolt64()) - ncec->ncec_last;
    260 	mutex_enter(&ncec->ncec_lock);
    261 	if (NCE_ISCONDEMNED(ncec)) {
    262 		ip2m->ip2mac_err = ESRCH;
    263 	} else {
    264 		if (NCE_ISREACHABLE(ncec)) {
    265 			if (NCE_MYADDR(ncec) ||
    266 			    delta < (uint64_t)ill->ill_reachable_time) {
    267 				ncec_ip2mac_response(ip2m, ncec);
    268 				ip2m->ip2mac_err = 0;
    269 				mutex_exit(&ncec->ncec_lock);
    270 				goto done;
    271 			}
    272 			/*
    273 			 * Since we do not control the packet output
    274 			 * path for ip2mac() callers, we need to verify
    275 			 * if the existing information in the nce is
    276 			 * very old, and retrigger resolution if necessary.
    277 			 * We will not return the existing stale
    278 			 * information until it is verified through a
    279 			 * resolver request/response exchange.
    280 			 *
    281 			 * In the future, we may want to support extensions
    282 			 * that do additional callbacks on link-layer updates,
    283 			 * so that we can return the stale information but
    284 			 * also update the caller if the lladdr changes.
    285 			 */
    286 			ncec->ncec_rcnt = ill->ill_xmit_count;
    287 			ncec->ncec_state = ND_PROBE;
    288 			need_resolve = B_TRUE; /* reachable but very old nce */
    289 		} else if (ncec->ncec_state == ND_INITIAL) {
    290 			need_resolve = B_TRUE; /* ND_INITIAL nce */
    291 			ncec->ncec_state = ND_INCOMPLETE;
    292 		}
    293 		/*
    294 		 * NCE not known to be reachable in the recent past. We must
    295 		 * reconfirm the information before returning it to the caller
    296 		 */
    297 		if (ncec->ncec_rcnt > 0) {
    298 			/*
    299 			 * Still resolving this ncec, so we can queue the
    300 			 * callback information in ncec->ncec_cb
    301 			 */
    302 			ip2mid = ncec_add_cb(ncec, cb, cbarg);
    303 			ip2m->ip2mac_err = EINPROGRESS;
    304 		} else {
    305 			/*
    306 			 * No more retransmits allowed -- resolution failed.
    307 			 */
    308 			ip2m->ip2mac_err = ESRCH;
    309 		}
    310 	}
    311 	mutex_exit(&ncec->ncec_lock);
    312 done:
    313 	/*
    314 	 * if NCE_ISREACHABLE(ncec) but very old, or if it is ND_INITIAL,
    315 	 * trigger resolve.
    316 	 */
    317 	if (need_resolve)
    318 		ip_ndp_resolve(ncec);
    319 	if (nce != NULL)
    320 		nce_refrele(nce);
    321 	netstack_rele(ns);
    322 	ill_refrele(ill);
    323 	return (ip2mid);
    324 }
    325 
    326 /*
    327  * data passed to ncec_walk for canceling outstanding callbacks.
    328  */
    329 typedef struct ip2mac_cancel_data_s {
    330 	ip2mac_id_t ip2m_cancel_id;
    331 	int	ip2m_cancel_err;
    332 } ip2mac_cancel_data_t;
    333 
    334 /*
    335  * callback invoked for each active ncec. If the ip2mac_id_t corresponds
    336  * to an active nce_cb_t in the ncec's callback list, we want to remove
    337  * the callback (if there are no walkers) or return EBUSY to the caller
    338  */
    339 static int
    340 ip2mac_cancel_callback(ncec_t *ncec, void *arg)
    341 {
    342 	ip2mac_cancel_data_t *ip2m_wdata = arg;
    343 	ncec_cb_t *ip2m_nce_cb = ip2m_wdata->ip2m_cancel_id;
    344 	ncec_cb_t *ncec_cb;
    345 
    346 	if (ip2m_nce_cb->ncec_cb_id != ncec)
    347 		return (0);
    348 
    349 	mutex_enter(&ncec->ncec_lock);
    350 	if (list_is_empty(&ncec->ncec_cb)) {
    351 		mutex_exit(&ncec->ncec_lock);
    352 		return (0);
    353 	}
    354 	/*
    355 	 * IP does not hold internal locks like nce_lock across calls to
    356 	 * other subsystems for fear of recursive lock entry and lock
    357 	 * hierarchy violation. The caller may be holding locks across
    358 	 * the call to IP. (It would be ideal if no subsystem holds locks
    359 	 * across calls into another subsystem, especially if calls can
    360 	 * happen in either direction).
    361 	 */
    362 	ncec_cb = list_head(&ncec->ncec_cb);
    363 	for (; ncec_cb != NULL; ncec_cb = list_next(&ncec->ncec_cb, ncec_cb)) {
    364 		if (ncec_cb != ip2m_nce_cb)
    365 			continue;
    366 		/*
    367 		 * If there are no walkers we can remove the nce_cb.
    368 		 * Otherwise the exiting walker will clean up.
    369 		 */
    370 		if (ncec->ncec_cb_walker_cnt == 0) {
    371 			list_remove(&ncec->ncec_cb, ncec_cb);
    372 		} else {
    373 			ip2m_wdata->ip2m_cancel_err = EBUSY;
    374 		}
    375 		break;
    376 	}
    377 	mutex_exit(&ncec->ncec_lock);
    378 	return (0);
    379 }
    380 
    381 /*
    382  * cancel an outstanding timeout set up via ip2mac
    383  */
    384 int
    385 ip2mac_cancel(ip2mac_id_t ip2mid, zoneid_t zoneid)
    386 {
    387 	netstack_t	*ns;
    388 	ip_stack_t	*ipst;
    389 	ip2mac_cancel_data_t ip2m_wdata;
    390 
    391 	ns = netstack_find_by_zoneid(zoneid);
    392 	if (ns == NULL) {
    393 		ip2m_wdata.ip2m_cancel_err = EINVAL;
    394 		return (ip2m_wdata.ip2m_cancel_err);
    395 	}
    396 	/*
    397 	 * For exclusive stacks we reset the zoneid to zero
    398 	 * since IP uses the global zoneid in the exclusive stacks.
    399 	 */
    400 	if (ns->netstack_stackid != GLOBAL_NETSTACKID)
    401 		zoneid = GLOBAL_ZONEID;
    402 	ipst = ns->netstack_ip;
    403 
    404 	ip2m_wdata.ip2m_cancel_id = ip2mid;
    405 	ip2m_wdata.ip2m_cancel_err = 0;
    406 	ncec_walk(NULL, ip2mac_cancel_callback, &ip2m_wdata, ipst);
    407 	/*
    408 	 * We may return EBUSY if a walk to dispatch callbacks is
    409 	 * in progress, in which case the caller needs to synchronize
    410 	 * with the registered callback function to make sure the
    411 	 * module does not exit when there is a callback pending.
    412 	 */
    413 	netstack_rele(ns);
    414 	return (ip2m_wdata.ip2m_cancel_err);
    415 }
    416