Home | History | Annotate | Download | only in ipf
      1 /*
      2  * Copyright (C) 1993-2001, 2003 by Darren Reed.
      3  *
      4  * See the IPFILTER.LICENCE file for details on licencing.
      5  *
      6  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
      7  * Use is subject to license terms.
      8  */
      9 
     10 #if !defined(lint)
     11 static const char sccsid[] = "@(#)ip_fil_solaris.c	1.7 07/22/06 (C) 1993-2000 Darren Reed";
     12 static const char rcsid[] = "@(#)$Id: ip_fil_solaris.c,v 2.62.2.19 2005/07/13 21:40:46 darrenr Exp $";
     13 #endif
     14 
     15 #include <sys/types.h>
     16 #include <sys/errno.h>
     17 #include <sys/param.h>
     18 #include <sys/cpuvar.h>
     19 #include <sys/open.h>
     20 #include <sys/ioctl.h>
     21 #include <sys/filio.h>
     22 #include <sys/systm.h>
     23 #include <sys/strsubr.h>
     24 #include <sys/cred.h>
     25 #include <sys/ddi.h>
     26 #include <sys/sunddi.h>
     27 #include <sys/ksynch.h>
     28 #include <sys/kmem.h>
     29 #include <sys/mkdev.h>
     30 #include <sys/protosw.h>
     31 #include <sys/socket.h>
     32 #include <sys/dditypes.h>
     33 #include <sys/cmn_err.h>
     34 #include <sys/zone.h>
     35 #include <net/if.h>
     36 #include <net/af.h>
     37 #include <net/route.h>
     38 #include <netinet/in.h>
     39 #include <netinet/in_systm.h>
     40 #include <netinet/ip.h>
     41 #include <netinet/ip_var.h>
     42 #include <netinet/tcp.h>
     43 #include <netinet/udp.h>
     44 #include <netinet/tcpip.h>
     45 #include <netinet/ip_icmp.h>
     46 #include "netinet/ip_compat.h"
     47 #ifdef	USE_INET6
     48 # include <netinet/icmp6.h>
     49 #endif
     50 #include "netinet/ip_fil.h"
     51 #include "netinet/ip_nat.h"
     52 #include "netinet/ip_frag.h"
     53 #include "netinet/ip_state.h"
     54 #include "netinet/ip_auth.h"
     55 #include "netinet/ip_proxy.h"
     56 #include "netinet/ipf_stack.h"
     57 #ifdef	IPFILTER_LOOKUP
     58 # include "netinet/ip_lookup.h"
     59 #endif
     60 #include <inet/ip_ire.h>
     61 
     62 #include <sys/md5.h>
     63 #include <sys/neti.h>
     64 
     65 static	int	frzerostats __P((caddr_t, ipf_stack_t *));
     66 static	int	fr_setipfloopback __P((int, ipf_stack_t *));
     67 static	int	fr_enableipf __P((ipf_stack_t *, int));
     68 static	int	fr_send_ip __P((fr_info_t *fin, mblk_t *m, mblk_t **mp));
     69 static	int	ipf_nic_event_v4 __P((hook_event_token_t, hook_data_t, void *));
     70 static	int	ipf_nic_event_v6 __P((hook_event_token_t, hook_data_t, void *));
     71 static	int	ipf_hook __P((hook_data_t, int, int, void *));
     72 static	int	ipf_hook4_in __P((hook_event_token_t, hook_data_t, void *));
     73 static	int	ipf_hook4_out __P((hook_event_token_t, hook_data_t, void *));
     74 static	int	ipf_hook4_loop_out __P((hook_event_token_t, hook_data_t,
     75     void *));
     76 static	int	ipf_hook4_loop_in __P((hook_event_token_t, hook_data_t, void *));
     77 static	int	ipf_hook4 __P((hook_data_t, int, int, void *));
     78 static	int	ipf_hook6_out __P((hook_event_token_t, hook_data_t, void *));
     79 static	int	ipf_hook6_in __P((hook_event_token_t, hook_data_t, void *));
     80 static	int	ipf_hook6_loop_out __P((hook_event_token_t, hook_data_t,
     81     void *));
     82 static	int	ipf_hook6_loop_in __P((hook_event_token_t, hook_data_t,
     83     void *));
     84 static	int     ipf_hook6 __P((hook_data_t, int, int, void *));
     85 extern	int	ipf_geniter __P((ipftoken_t *, ipfgeniter_t *, ipf_stack_t *));
     86 extern	int	ipf_frruleiter __P((void *, int, void *, ipf_stack_t *));
     87 
     88 #if SOLARIS2 < 10
     89 #if SOLARIS2 >= 7
     90 u_int		*ip_ttl_ptr = NULL;
     91 u_int		*ip_mtudisc = NULL;
     92 # if SOLARIS2 >= 8
     93 int		*ip_forwarding = NULL;
     94 u_int		*ip6_forwarding = NULL;
     95 # else
     96 u_int		*ip_forwarding = NULL;
     97 # endif
     98 #else
     99 u_long		*ip_ttl_ptr = NULL;
    100 u_long		*ip_mtudisc = NULL;
    101 u_long		*ip_forwarding = NULL;
    102 #endif
    103 #endif
    104 
    105 
    106 /* ------------------------------------------------------------------------ */
    107 /* Function:    ipldetach                                                   */
    108 /* Returns:     int - 0 == success, else error.                             */
    109 /* Parameters:  Nil                                                         */
    110 /*                                                                          */
    111 /* This function is responsible for undoing anything that might have been   */
    112 /* done in a call to iplattach().  It must be able to clean up from a call  */
    113 /* to iplattach() that did not succeed.  Why might that happen?  Someone    */
    114 /* configures a table to be so large that we cannot allocate enough memory  */
    115 /* for it.                                                                  */
    116 /* ------------------------------------------------------------------------ */
    117 int ipldetach(ifs)
    118 ipf_stack_t *ifs;
    119 {
    120 
    121 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
    122 
    123 #if SOLARIS2 < 10
    124 
    125 	if (ifs->ifs_fr_control_forwarding & 2) {
    126 		if (ip_forwarding != NULL)
    127 			*ip_forwarding = 0;
    128 #if SOLARIS2 >= 8
    129 		if (ip6_forwarding != NULL)
    130 			*ip6_forwarding = 0;
    131 #endif
    132 	}
    133 #endif
    134 
    135 	/*
    136 	 * This lock needs to be dropped around the net_hook_unregister calls
    137 	 * because we can deadlock here with:
    138 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
    139 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (active hook running)
    140 	 */
    141 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
    142 
    143 #define	UNDO_HOOK(_f, _b, _e, _h)					\
    144 	do {								\
    145 		if (ifs->_f != NULL) {					\
    146 			if (ifs->_b) {					\
    147 				ifs->_b = (net_hook_unregister(ifs->_f,	\
    148 					   _e, ifs->_h) != 0);		\
    149 				if (!ifs->_b) {				\
    150 					hook_free(ifs->_h);		\
    151 					ifs->_h = NULL;			\
    152 				}					\
    153 			} else if (ifs->_h != NULL) {			\
    154 				hook_free(ifs->_h);			\
    155 				ifs->_h = NULL;				\
    156 			}						\
    157 		}							\
    158 		_NOTE(CONSTCOND)					\
    159 	} while (0)
    160 
    161 	/*
    162 	 * Remove IPv6 Hooks
    163 	 */
    164 	if (ifs->ifs_ipf_ipv6 != NULL) {
    165 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_in,
    166 			  NH_PHYSICAL_IN, ifs_ipfhook6_in);
    167 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_physical_out,
    168 			  NH_PHYSICAL_OUT, ifs_ipfhook6_out);
    169 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_nic_events,
    170 			  NH_NIC_EVENTS, ifs_ipfhook6_nicevents);
    171 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_in,
    172 			  NH_LOOPBACK_IN, ifs_ipfhook6_loop_in);
    173 		UNDO_HOOK(ifs_ipf_ipv6, ifs_hook6_loopback_out,
    174 			  NH_LOOPBACK_OUT, ifs_ipfhook6_loop_out);
    175 
    176 		if (net_protocol_release(ifs->ifs_ipf_ipv6) != 0)
    177 			goto detach_failed;
    178 		ifs->ifs_ipf_ipv6 = NULL;
    179         }
    180 
    181 	/*
    182 	 * Remove IPv4 Hooks
    183 	 */
    184 	if (ifs->ifs_ipf_ipv4 != NULL) {
    185 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_in,
    186 			  NH_PHYSICAL_IN, ifs_ipfhook4_in);
    187 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_physical_out,
    188 			  NH_PHYSICAL_OUT, ifs_ipfhook4_out);
    189 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_nic_events,
    190 			  NH_NIC_EVENTS, ifs_ipfhook4_nicevents);
    191 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_in,
    192 			  NH_LOOPBACK_IN, ifs_ipfhook4_loop_in);
    193 		UNDO_HOOK(ifs_ipf_ipv4, ifs_hook4_loopback_out,
    194 			  NH_LOOPBACK_OUT, ifs_ipfhook4_loop_out);
    195 
    196 		if (net_protocol_release(ifs->ifs_ipf_ipv4) != 0)
    197 			goto detach_failed;
    198 		ifs->ifs_ipf_ipv4 = NULL;
    199 	}
    200 
    201 #undef UNDO_HOOK
    202 
    203 #ifdef	IPFDEBUG
    204 	cmn_err(CE_CONT, "ipldetach()\n");
    205 #endif
    206 
    207 	WRITE_ENTER(&ifs->ifs_ipf_global);
    208 	fr_deinitialise(ifs);
    209 
    210 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE|FR_INACTIVE, ifs);
    211 	(void) frflush(IPL_LOGIPF, 0, FR_INQUE|FR_OUTQUE, ifs);
    212 
    213 	if (ifs->ifs_ipf_locks_done == 1) {
    214 		MUTEX_DESTROY(&ifs->ifs_ipf_timeoutlock);
    215 		MUTEX_DESTROY(&ifs->ifs_ipf_rw);
    216 		RW_DESTROY(&ifs->ifs_ipf_tokens);
    217 		RW_DESTROY(&ifs->ifs_ipf_ipidfrag);
    218 		ifs->ifs_ipf_locks_done = 0;
    219 	}
    220 
    221 	if (ifs->ifs_hook4_physical_in || ifs->ifs_hook4_physical_out ||
    222 	    ifs->ifs_hook4_nic_events || ifs->ifs_hook4_loopback_in ||
    223 	    ifs->ifs_hook4_loopback_out || ifs->ifs_hook6_nic_events ||
    224 	    ifs->ifs_hook6_physical_in || ifs->ifs_hook6_physical_out ||
    225 	    ifs->ifs_hook6_loopback_in || ifs->ifs_hook6_loopback_out)
    226 		return -1;
    227 
    228 	return 0;
    229 
    230 detach_failed:
    231 	WRITE_ENTER(&ifs->ifs_ipf_global);
    232 	return -1;
    233 }
    234 
    235 int iplattach(ifs)
    236 ipf_stack_t *ifs;
    237 {
    238 #if SOLARIS2 < 10
    239 	int i;
    240 #endif
    241 	netid_t id = ifs->ifs_netid;
    242 
    243 #ifdef	IPFDEBUG
    244 	cmn_err(CE_CONT, "iplattach()\n");
    245 #endif
    246 
    247 	ASSERT(rw_read_locked(&ifs->ifs_ipf_global.ipf_lk) == 0);
    248 	ifs->ifs_fr_flags = IPF_LOGGING;
    249 #ifdef _KERNEL
    250 	ifs->ifs_fr_update_ipid = 0;
    251 #else
    252 	ifs->ifs_fr_update_ipid = 1;
    253 #endif
    254 	ifs->ifs_fr_minttl = 4;
    255 	ifs->ifs_fr_icmpminfragmtu = 68;
    256 #if defined(IPFILTER_DEFAULT_BLOCK)
    257 	ifs->ifs_fr_pass = FR_BLOCK|FR_NOMATCH;
    258 #else
    259 	ifs->ifs_fr_pass = (IPF_DEFAULT_PASS)|FR_NOMATCH;
    260 #endif
    261 
    262 	bzero((char *)ifs->ifs_frcache, sizeof(ifs->ifs_frcache));
    263 	MUTEX_INIT(&ifs->ifs_ipf_rw, "ipf rw mutex");
    264 	MUTEX_INIT(&ifs->ifs_ipf_timeoutlock, "ipf timeout lock mutex");
    265 	RWLOCK_INIT(&ifs->ifs_ipf_ipidfrag, "ipf IP NAT-Frag rwlock");
    266 	RWLOCK_INIT(&ifs->ifs_ipf_tokens, "ipf token rwlock");
    267 	ifs->ifs_ipf_locks_done = 1;
    268 
    269 	if (fr_initialise(ifs) < 0)
    270 		return -1;
    271 
    272 	HOOK_INIT(ifs->ifs_ipfhook4_nicevents, ipf_nic_event_v4,
    273 		  "ipfilter_hook4_nicevents", ifs);
    274 	HOOK_INIT(ifs->ifs_ipfhook4_in, ipf_hook4_in,
    275 		  "ipfilter_hook4_in", ifs);
    276 	HOOK_INIT(ifs->ifs_ipfhook4_out, ipf_hook4_out,
    277 		  "ipfilter_hook4_out", ifs);
    278 	HOOK_INIT(ifs->ifs_ipfhook4_loop_in, ipf_hook4_loop_in,
    279 		  "ipfilter_hook4_loop_in", ifs);
    280 	HOOK_INIT(ifs->ifs_ipfhook4_loop_out, ipf_hook4_loop_out,
    281 		  "ipfilter_hook4_loop_out", ifs);
    282 
    283 	/*
    284 	 * If we hold this lock over all of the net_hook_register calls, we
    285 	 * can cause a deadlock to occur with the following lock ordering:
    286 	 * W(ipf_global)->R(hook_family)->W(hei_lock) (this code path) vs
    287 	 * R(hook_family)->R(hei_lock)->R(ipf_global) (packet path)
    288 	 */
    289 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
    290 
    291 	/*
    292 	 * Add IPv4 hooks
    293 	 */
    294 	ifs->ifs_ipf_ipv4 = net_protocol_lookup(id, NHF_INET);
    295 	if (ifs->ifs_ipf_ipv4 == NULL)
    296 		goto hookup_failed;
    297 
    298 	ifs->ifs_hook4_nic_events = (net_hook_register(ifs->ifs_ipf_ipv4,
    299 	    NH_NIC_EVENTS, ifs->ifs_ipfhook4_nicevents) == 0);
    300 	if (!ifs->ifs_hook4_nic_events)
    301 		goto hookup_failed;
    302 
    303 	ifs->ifs_hook4_physical_in = (net_hook_register(ifs->ifs_ipf_ipv4,
    304 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook4_in) == 0);
    305 	if (!ifs->ifs_hook4_physical_in)
    306 		goto hookup_failed;
    307 
    308 	ifs->ifs_hook4_physical_out = (net_hook_register(ifs->ifs_ipf_ipv4,
    309 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook4_out) == 0);
    310 	if (!ifs->ifs_hook4_physical_out)
    311 		goto hookup_failed;
    312 
    313 	if (ifs->ifs_ipf_loopback) {
    314 		ifs->ifs_hook4_loopback_in = (net_hook_register(
    315 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
    316 		    ifs->ifs_ipfhook4_loop_in) == 0);
    317 		if (!ifs->ifs_hook4_loopback_in)
    318 			goto hookup_failed;
    319 
    320 		ifs->ifs_hook4_loopback_out = (net_hook_register(
    321 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
    322 		    ifs->ifs_ipfhook4_loop_out) == 0);
    323 		if (!ifs->ifs_hook4_loopback_out)
    324 			goto hookup_failed;
    325 	}
    326 	/*
    327 	 * Add IPv6 hooks
    328 	 */
    329 	ifs->ifs_ipf_ipv6 = net_protocol_lookup(id, NHF_INET6);
    330 	if (ifs->ifs_ipf_ipv6 == NULL)
    331 		goto hookup_failed;
    332 
    333 	HOOK_INIT(ifs->ifs_ipfhook6_nicevents, ipf_nic_event_v6,
    334 		  "ipfilter_hook6_nicevents", ifs);
    335 	HOOK_INIT(ifs->ifs_ipfhook6_in, ipf_hook6_in,
    336 		  "ipfilter_hook6_in", ifs);
    337 	HOOK_INIT(ifs->ifs_ipfhook6_out, ipf_hook6_out,
    338 		  "ipfilter_hook6_out", ifs);
    339 	HOOK_INIT(ifs->ifs_ipfhook6_loop_in, ipf_hook6_loop_in,
    340 		  "ipfilter_hook6_loop_in", ifs);
    341 	HOOK_INIT(ifs->ifs_ipfhook6_loop_out, ipf_hook6_loop_out,
    342 		  "ipfilter_hook6_loop_out", ifs);
    343 
    344 	ifs->ifs_hook6_nic_events = (net_hook_register(ifs->ifs_ipf_ipv6,
    345 	    NH_NIC_EVENTS, ifs->ifs_ipfhook6_nicevents) == 0);
    346 	if (!ifs->ifs_hook6_nic_events)
    347 		goto hookup_failed;
    348 
    349 	ifs->ifs_hook6_physical_in = (net_hook_register(ifs->ifs_ipf_ipv6,
    350 	    NH_PHYSICAL_IN, ifs->ifs_ipfhook6_in) == 0);
    351 	if (!ifs->ifs_hook6_physical_in)
    352 		goto hookup_failed;
    353 
    354 	ifs->ifs_hook6_physical_out = (net_hook_register(ifs->ifs_ipf_ipv6,
    355 	    NH_PHYSICAL_OUT, ifs->ifs_ipfhook6_out) == 0);
    356 	if (!ifs->ifs_hook6_physical_out)
    357 		goto hookup_failed;
    358 
    359 	if (ifs->ifs_ipf_loopback) {
    360 		ifs->ifs_hook6_loopback_in = (net_hook_register(
    361 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
    362 		    ifs->ifs_ipfhook6_loop_in) == 0);
    363 		if (!ifs->ifs_hook6_loopback_in)
    364 			goto hookup_failed;
    365 
    366 		ifs->ifs_hook6_loopback_out = (net_hook_register(
    367 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
    368 		    ifs->ifs_ipfhook6_loop_out) == 0);
    369 		if (!ifs->ifs_hook6_loopback_out)
    370 			goto hookup_failed;
    371 	}
    372 
    373 	/*
    374 	 * Reacquire ipf_global, now it is safe.
    375 	 */
    376 	WRITE_ENTER(&ifs->ifs_ipf_global);
    377 
    378 /* Do not use private interface ip_params_arr[] in Solaris 10 */
    379 #if SOLARIS2 < 10
    380 
    381 #if SOLARIS2 >= 8
    382 	ip_forwarding = &ip_g_forward;
    383 #endif
    384 	/*
    385 	 * XXX - There is no terminator for this array, so it is not possible
    386 	 * to tell if what we are looking for is missing and go off the end
    387 	 * of the array.
    388 	 */
    389 
    390 #if SOLARIS2 <= 8
    391 	for (i = 0; ; i++) {
    392 		if (!strcmp(ip_param_arr[i].ip_param_name, "ip_def_ttl")) {
    393 			ip_ttl_ptr = &ip_param_arr[i].ip_param_value;
    394 		} else if (!strcmp(ip_param_arr[i].ip_param_name,
    395 			    "ip_path_mtu_discovery")) {
    396 			ip_mtudisc = &ip_param_arr[i].ip_param_value;
    397 		}
    398 #if SOLARIS2 < 8
    399 		else if (!strcmp(ip_param_arr[i].ip_param_name,
    400 			    "ip_forwarding")) {
    401 			ip_forwarding = &ip_param_arr[i].ip_param_value;
    402 		}
    403 #else
    404 		else if (!strcmp(ip_param_arr[i].ip_param_name,
    405 			    "ip6_forwarding")) {
    406 			ip6_forwarding = &ip_param_arr[i].ip_param_value;
    407 		}
    408 #endif
    409 
    410 		if (ip_mtudisc != NULL && ip_ttl_ptr != NULL &&
    411 #if SOLARIS2 >= 8
    412 		    ip6_forwarding != NULL &&
    413 #endif
    414 		    ip_forwarding != NULL)
    415 			break;
    416 	}
    417 #endif
    418 
    419 	if (ifs->ifs_fr_control_forwarding & 1) {
    420 		if (ip_forwarding != NULL)
    421 			*ip_forwarding = 1;
    422 #if SOLARIS2 >= 8
    423 		if (ip6_forwarding != NULL)
    424 			*ip6_forwarding = 1;
    425 #endif
    426 	}
    427 
    428 #endif
    429 
    430 	return 0;
    431 hookup_failed:
    432 	WRITE_ENTER(&ifs->ifs_ipf_global);
    433 	return -1;
    434 }
    435 
    436 static	int	fr_setipfloopback(set, ifs)
    437 int set;
    438 ipf_stack_t *ifs;
    439 {
    440 	if (ifs->ifs_ipf_ipv4 == NULL || ifs->ifs_ipf_ipv6 == NULL)
    441 		return EFAULT;
    442 
    443 	if (set && !ifs->ifs_ipf_loopback) {
    444 		ifs->ifs_ipf_loopback = 1;
    445 
    446 		ifs->ifs_hook4_loopback_in = (net_hook_register(
    447 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_IN,
    448 		    ifs->ifs_ipfhook4_loop_in) == 0);
    449 		if (!ifs->ifs_hook4_loopback_in)
    450 			return EINVAL;
    451 
    452 		ifs->ifs_hook4_loopback_out = (net_hook_register(
    453 		    ifs->ifs_ipf_ipv4, NH_LOOPBACK_OUT,
    454 		    ifs->ifs_ipfhook4_loop_out) == 0);
    455 		if (!ifs->ifs_hook4_loopback_out)
    456 			return EINVAL;
    457 
    458 		ifs->ifs_hook6_loopback_in = (net_hook_register(
    459 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_IN,
    460 		    ifs->ifs_ipfhook6_loop_in) == 0);
    461 		if (!ifs->ifs_hook6_loopback_in)
    462 			return EINVAL;
    463 
    464 		ifs->ifs_hook6_loopback_out = (net_hook_register(
    465 		    ifs->ifs_ipf_ipv6, NH_LOOPBACK_OUT,
    466 		    ifs->ifs_ipfhook6_loop_out) == 0);
    467 		if (!ifs->ifs_hook6_loopback_out)
    468 			return EINVAL;
    469 
    470 	} else if (!set && ifs->ifs_ipf_loopback) {
    471 		ifs->ifs_ipf_loopback = 0;
    472 
    473 		ifs->ifs_hook4_loopback_in =
    474 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
    475 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
    476 		if (ifs->ifs_hook4_loopback_in)
    477 			return EBUSY;
    478 
    479 		ifs->ifs_hook4_loopback_out =
    480 		    (net_hook_unregister(ifs->ifs_ipf_ipv4,
    481 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook4_loop_out) != 0);
    482 		if (ifs->ifs_hook4_loopback_out)
    483 			return EBUSY;
    484 
    485 		ifs->ifs_hook6_loopback_in =
    486 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
    487 		    NH_LOOPBACK_IN, ifs->ifs_ipfhook4_loop_in) != 0);
    488 		if (ifs->ifs_hook6_loopback_in)
    489 			return EBUSY;
    490 
    491 		ifs->ifs_hook6_loopback_out =
    492 		    (net_hook_unregister(ifs->ifs_ipf_ipv6,
    493 		    NH_LOOPBACK_OUT, ifs->ifs_ipfhook6_loop_out) != 0);
    494 		if (ifs->ifs_hook6_loopback_out)
    495 			return EBUSY;
    496 	}
    497 	return 0;
    498 }
    499 
    500 
    501 /*
    502  * Filter ioctl interface.
    503  */
    504 /*ARGSUSED*/
    505 int iplioctl(dev, cmd, data, mode, cp, rp)
    506 dev_t dev;
    507 int cmd;
    508 #if SOLARIS2 >= 7
    509 intptr_t data;
    510 #else
    511 int *data;
    512 #endif
    513 int mode;
    514 cred_t *cp;
    515 int *rp;
    516 {
    517 	int error = 0, tmp;
    518 	friostat_t fio;
    519 	minor_t unit;
    520 	u_int enable;
    521 	ipf_stack_t *ifs;
    522 
    523 #ifdef	IPFDEBUG
    524 	cmn_err(CE_CONT, "iplioctl(%x,%x,%x,%d,%x,%d)\n",
    525 		dev, cmd, data, mode, cp, rp);
    526 #endif
    527 	unit = getminor(dev);
    528 	if (IPL_LOGMAX < unit)
    529 		return ENXIO;
    530 
    531         /*
    532 	 * As we're calling ipf_find_stack in user space, from a given zone
    533 	 * to find the stack pointer for this zone, there is no need to have
    534 	 * a hold/refence count here.
    535 	 */
    536 	ifs = ipf_find_stack(crgetzoneid(cp));
    537 	ASSERT(ifs != NULL);
    538 
    539 	if (ifs->ifs_fr_running <= 0) {
    540 		if (unit != IPL_LOGIPF) {
    541 			return EIO;
    542 		}
    543 		if (cmd != SIOCIPFGETNEXT && cmd != SIOCIPFGET &&
    544 		    cmd != SIOCIPFSET && cmd != SIOCFRENB &&
    545 		    cmd != SIOCGETFS && cmd != SIOCGETFF) {
    546 			return EIO;
    547 		}
    548 	}
    549 
    550 	READ_ENTER(&ifs->ifs_ipf_global);
    551 	if (ifs->ifs_fr_enable_active != 0) {
    552 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
    553 		return EBUSY;
    554 	}
    555 
    556 	error = fr_ioctlswitch(unit, (caddr_t)data, cmd, mode, crgetuid(cp),
    557 			       curproc, ifs);
    558 	if (error != -1) {
    559 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
    560 		return error;
    561 	}
    562 	error = 0;
    563 
    564 	switch (cmd)
    565 	{
    566 	case SIOCFRENB :
    567 		if (!(mode & FWRITE))
    568 			error = EPERM;
    569 		else {
    570 			error = COPYIN((caddr_t)data, (caddr_t)&enable,
    571 				       sizeof(enable));
    572 			if (error != 0) {
    573 				error = EFAULT;
    574 				break;
    575 			}
    576 
    577 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
    578 			WRITE_ENTER(&ifs->ifs_ipf_global);
    579 
    580 			/*
    581 			 * We must recheck fr_enable_active here, since we've
    582 			 * dropped ifs_ipf_global from R in order to get it
    583 			 * exclusively.
    584 			 */
    585 			if (ifs->ifs_fr_enable_active == 0) {
    586 				ifs->ifs_fr_enable_active = 1;
    587 				error = fr_enableipf(ifs, enable);
    588 				ifs->ifs_fr_enable_active = 0;
    589 			}
    590 		}
    591 		break;
    592 	case SIOCIPFSET :
    593 		if (!(mode & FWRITE)) {
    594 			error = EPERM;
    595 			break;
    596 		}
    597 		/* FALLTHRU */
    598 	case SIOCIPFGETNEXT :
    599 	case SIOCIPFGET :
    600 		error = fr_ipftune(cmd, (void *)data, ifs);
    601 		break;
    602 	case SIOCSETFF :
    603 		if (!(mode & FWRITE))
    604 			error = EPERM;
    605 		else {
    606 			error = COPYIN((caddr_t)data,
    607 				       (caddr_t)&ifs->ifs_fr_flags,
    608 				       sizeof(ifs->ifs_fr_flags));
    609 			if (error != 0)
    610 				error = EFAULT;
    611 		}
    612 		break;
    613 	case SIOCIPFLP :
    614 		error = COPYIN((caddr_t)data, (caddr_t)&tmp,
    615 			       sizeof(tmp));
    616 		if (error != 0)
    617 			error = EFAULT;
    618 		else
    619 			error = fr_setipfloopback(tmp, ifs);
    620 		break;
    621 	case SIOCGETFF :
    622 		error = COPYOUT((caddr_t)&ifs->ifs_fr_flags, (caddr_t)data,
    623 				sizeof(ifs->ifs_fr_flags));
    624 		if (error != 0)
    625 			error = EFAULT;
    626 		break;
    627 	case SIOCFUNCL :
    628 		error = fr_resolvefunc((void *)data);
    629 		break;
    630 	case SIOCINAFR :
    631 	case SIOCRMAFR :
    632 	case SIOCADAFR :
    633 	case SIOCZRLST :
    634 		if (!(mode & FWRITE))
    635 			error = EPERM;
    636 		else
    637 			error = frrequest(unit, cmd, (caddr_t)data,
    638 					  ifs->ifs_fr_active, 1, ifs);
    639 		break;
    640 	case SIOCINIFR :
    641 	case SIOCRMIFR :
    642 	case SIOCADIFR :
    643 		if (!(mode & FWRITE))
    644 			error = EPERM;
    645 		else
    646 			error = frrequest(unit, cmd, (caddr_t)data,
    647 					  1 - ifs->ifs_fr_active, 1, ifs);
    648 		break;
    649 	case SIOCSWAPA :
    650 		if (!(mode & FWRITE))
    651 			error = EPERM;
    652 		else {
    653 			WRITE_ENTER(&ifs->ifs_ipf_mutex);
    654 			bzero((char *)ifs->ifs_frcache,
    655 			    sizeof (ifs->ifs_frcache));
    656 			error = COPYOUT((caddr_t)&ifs->ifs_fr_active,
    657 					(caddr_t)data,
    658 					sizeof(ifs->ifs_fr_active));
    659 			if (error != 0)
    660 				error = EFAULT;
    661 			else
    662 				ifs->ifs_fr_active = 1 - ifs->ifs_fr_active;
    663 			RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
    664 		}
    665 		break;
    666 	case SIOCGETFS :
    667 		fr_getstat(&fio, ifs);
    668 		error = fr_outobj((void *)data, &fio, IPFOBJ_IPFSTAT);
    669 		break;
    670 	case SIOCFRZST :
    671 		if (!(mode & FWRITE))
    672 			error = EPERM;
    673 		else
    674 			error = fr_zerostats((caddr_t)data, ifs);
    675 		break;
    676 	case	SIOCIPFFL :
    677 		if (!(mode & FWRITE))
    678 			error = EPERM;
    679 		else {
    680 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
    681 				       sizeof(tmp));
    682 			if (!error) {
    683 				tmp = frflush(unit, 4, tmp, ifs);
    684 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
    685 						sizeof(tmp));
    686 				if (error != 0)
    687 					error = EFAULT;
    688 			} else
    689 				error = EFAULT;
    690 		}
    691 		break;
    692 #ifdef USE_INET6
    693 	case	SIOCIPFL6 :
    694 		if (!(mode & FWRITE))
    695 			error = EPERM;
    696 		else {
    697 			error = COPYIN((caddr_t)data, (caddr_t)&tmp,
    698 				       sizeof(tmp));
    699 			if (!error) {
    700 				tmp = frflush(unit, 6, tmp, ifs);
    701 				error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
    702 						sizeof(tmp));
    703 				if (error != 0)
    704 					error = EFAULT;
    705 			} else
    706 				error = EFAULT;
    707 		}
    708 		break;
    709 #endif
    710 	case SIOCSTLCK :
    711 		error = COPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
    712 		if (error == 0) {
    713 			ifs->ifs_fr_state_lock = tmp;
    714 			ifs->ifs_fr_nat_lock = tmp;
    715 			ifs->ifs_fr_frag_lock = tmp;
    716 			ifs->ifs_fr_auth_lock = tmp;
    717 		} else
    718 			error = EFAULT;
    719 	break;
    720 #ifdef	IPFILTER_LOG
    721 	case	SIOCIPFFB :
    722 		if (!(mode & FWRITE))
    723 			error = EPERM;
    724 		else {
    725 			tmp = ipflog_clear(unit, ifs);
    726 			error = COPYOUT((caddr_t)&tmp, (caddr_t)data,
    727 				       sizeof(tmp));
    728 			if (error)
    729 				error = EFAULT;
    730 		}
    731 		break;
    732 #endif /* IPFILTER_LOG */
    733 	case SIOCFRSYN :
    734 		if (!(mode & FWRITE))
    735 			error = EPERM;
    736 		else {
    737 			RWLOCK_EXIT(&ifs->ifs_ipf_global);
    738 			WRITE_ENTER(&ifs->ifs_ipf_global);
    739 
    740 			frsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
    741 			fr_natifpsync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
    742 			fr_nataddrsync(0, NULL, NULL, ifs);
    743 			fr_statesync(IPFSYNC_RESYNC, 0, NULL, NULL, ifs);
    744 			error = 0;
    745 		}
    746 		break;
    747 	case SIOCGFRST :
    748 		error = fr_outobj((void *)data, fr_fragstats(ifs),
    749 				  IPFOBJ_FRAGSTAT);
    750 		break;
    751 	case FIONREAD :
    752 #ifdef	IPFILTER_LOG
    753 		tmp = (int)ifs->ifs_iplused[IPL_LOGIPF];
    754 
    755 		error = COPYOUT((caddr_t)&tmp, (caddr_t)data, sizeof(tmp));
    756 		if (error != 0)
    757 			error = EFAULT;
    758 #endif
    759 		break;
    760 	case SIOCIPFITER :
    761 		error = ipf_frruleiter((caddr_t)data, crgetuid(cp),
    762 				       curproc, ifs);
    763 		break;
    764 
    765 	case SIOCGENITER :
    766 		error = ipf_genericiter((caddr_t)data, crgetuid(cp),
    767 					curproc, ifs);
    768 		break;
    769 
    770 	case SIOCIPFDELTOK :
    771 		error = BCOPYIN((caddr_t)data, (caddr_t)&tmp, sizeof(tmp));
    772 		if (error != 0) {
    773 			error = EFAULT;
    774 		} else {
    775 			error = ipf_deltoken(tmp, crgetuid(cp), curproc, ifs);
    776 		}
    777 		break;
    778 
    779 	default :
    780 #ifdef	IPFDEBUG
    781 		cmn_err(CE_NOTE, "Unknown: cmd 0x%x data %p",
    782 			cmd, (void *)data);
    783 #endif
    784 		error = EINVAL;
    785 		break;
    786 	}
    787 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
    788 	return error;
    789 }
    790 
    791 
    792 static int fr_enableipf(ifs, enable)
    793 ipf_stack_t *ifs;
    794 int enable;
    795 {
    796 	int error;
    797 
    798 	if (!enable) {
    799 		error = ipldetach(ifs);
    800 		if (error == 0)
    801 			ifs->ifs_fr_running = -1;
    802 		return error;
    803 	}
    804 
    805 	if (ifs->ifs_fr_running > 0)
    806 		return 0;
    807 
    808 	error = iplattach(ifs);
    809 	if (error == 0) {
    810 		if (ifs->ifs_fr_timer_id == NULL) {
    811 			int hz = drv_usectohz(500000);
    812 
    813 			ifs->ifs_fr_timer_id = timeout(fr_slowtimer,
    814 						       (void *)ifs,
    815 						       hz);
    816 		}
    817 		ifs->ifs_fr_running = 1;
    818 	} else {
    819 		(void) ipldetach(ifs);
    820 	}
    821 	return error;
    822 }
    823 
    824 
    825 phy_if_t get_unit(name, v, ifs)
    826 char *name;
    827 int v;
    828 ipf_stack_t *ifs;
    829 {
    830 	net_handle_t nif;
    831 
    832   	if (v == 4)
    833  		nif = ifs->ifs_ipf_ipv4;
    834   	else if (v == 6)
    835  		nif = ifs->ifs_ipf_ipv6;
    836   	else
    837  		return 0;
    838 
    839  	return (net_phylookup(nif, name));
    840 }
    841 
    842 /*
    843  * routines below for saving IP headers to buffer
    844  */
    845 /*ARGSUSED*/
    846 int iplopen(devp, flags, otype, cred)
    847 dev_t *devp;
    848 int flags, otype;
    849 cred_t *cred;
    850 {
    851 	minor_t min = getminor(*devp);
    852 
    853 #ifdef	IPFDEBUG
    854 	cmn_err(CE_CONT, "iplopen(%x,%x,%x,%x)\n", devp, flags, otype, cred);
    855 #endif
    856 	if (!(otype & OTYP_CHR))
    857 		return ENXIO;
    858 
    859 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
    860 	return min;
    861 }
    862 
    863 
    864 /*ARGSUSED*/
    865 int iplclose(dev, flags, otype, cred)
    866 dev_t dev;
    867 int flags, otype;
    868 cred_t *cred;
    869 {
    870 	minor_t	min = getminor(dev);
    871 
    872 #ifdef	IPFDEBUG
    873 	cmn_err(CE_CONT, "iplclose(%x,%x,%x,%x)\n", dev, flags, otype, cred);
    874 #endif
    875 
    876 	min = (IPL_LOGMAX < min) ? ENXIO : 0;
    877 	return min;
    878 }
    879 
    880 #ifdef	IPFILTER_LOG
    881 /*
    882  * iplread/ipllog
    883  * both of these must operate with at least splnet() lest they be
    884  * called during packet processing and cause an inconsistancy to appear in
    885  * the filter lists.
    886  */
    887 /*ARGSUSED*/
    888 int iplread(dev, uio, cp)
    889 dev_t dev;
    890 register struct uio *uio;
    891 cred_t *cp;
    892 {
    893 	ipf_stack_t *ifs;
    894 	int ret;
    895 
    896         /*
    897 	 * As we're calling ipf_find_stack in user space, from a given zone
    898 	 * to find the stack pointer for this zone, there is no need to have
    899 	 * a hold/refence count here.
    900 	 */
    901 	ifs = ipf_find_stack(crgetzoneid(cp));
    902 	ASSERT(ifs != NULL);
    903 
    904 # ifdef	IPFDEBUG
    905 	cmn_err(CE_CONT, "iplread(%x,%x,%x)\n", dev, uio, cp);
    906 # endif
    907 
    908 	if (ifs->ifs_fr_running < 1) {
    909 		return EIO;
    910 	}
    911 
    912 # ifdef	IPFILTER_SYNC
    913 	if (getminor(dev) == IPL_LOGSYNC) {
    914 		return ipfsync_read(uio);
    915 	}
    916 # endif
    917 
    918 	ret = ipflog_read(getminor(dev), uio, ifs);
    919 	return ret;
    920 }
    921 #endif /* IPFILTER_LOG */
    922 
    923 
    924 /*
    925  * iplread/ipllog
    926  * both of these must operate with at least splnet() lest they be
    927  * called during packet processing and cause an inconsistancy to appear in
    928  * the filter lists.
    929  */
    930 int iplwrite(dev, uio, cp)
    931 dev_t dev;
    932 register struct uio *uio;
    933 cred_t *cp;
    934 {
    935 	ipf_stack_t *ifs;
    936 
    937         /*
    938 	 * As we're calling ipf_find_stack in user space, from a given zone
    939 	 * to find the stack pointer for this zone, there is no need to have
    940 	 * a hold/refence count here.
    941 	 */
    942 	ifs = ipf_find_stack(crgetzoneid(cp));
    943 	ASSERT(ifs != NULL);
    944 
    945 #ifdef	IPFDEBUG
    946 	cmn_err(CE_CONT, "iplwrite(%x,%x,%x)\n", dev, uio, cp);
    947 #endif
    948 
    949 	if (ifs->ifs_fr_running < 1) {
    950 		return EIO;
    951 	}
    952 
    953 #ifdef	IPFILTER_SYNC
    954 	if (getminor(dev) == IPL_LOGSYNC)
    955 		return ipfsync_write(uio);
    956 #endif /* IPFILTER_SYNC */
    957 	dev = dev;	/* LINT */
    958 	uio = uio;	/* LINT */
    959 	cp = cp;	/* LINT */
    960 	return ENXIO;
    961 }
    962 
    963 
    964 /*
    965  * fr_send_reset - this could conceivably be a call to tcp_respond(), but that
    966  * requires a large amount of setting up and isn't any more efficient.
    967  */
    968 int fr_send_reset(fin)
    969 fr_info_t *fin;
    970 {
    971 	tcphdr_t *tcp, *tcp2;
    972 	int tlen, hlen;
    973 	mblk_t *m;
    974 #ifdef	USE_INET6
    975 	ip6_t *ip6;
    976 #endif
    977 	ip_t *ip;
    978 
    979 	tcp = fin->fin_dp;
    980 	if (tcp->th_flags & TH_RST)
    981 		return -1;
    982 
    983 #ifndef	IPFILTER_CKSUM
    984 	if (fr_checkl4sum(fin) == -1)
    985 		return -1;
    986 #endif
    987 
    988 	tlen = (tcp->th_flags & (TH_SYN|TH_FIN)) ? 1 : 0;
    989 #ifdef	USE_INET6
    990 	if (fin->fin_v == 6)
    991 		hlen = sizeof(ip6_t);
    992 	else
    993 #endif
    994 		hlen = sizeof(ip_t);
    995 	hlen += sizeof(*tcp2);
    996 	if ((m = (mblk_t *)allocb(hlen + 64, BPRI_HI)) == NULL)
    997 		return -1;
    998 
    999 	m->b_rptr += 64;
   1000 	MTYPE(m) = M_DATA;
   1001 	m->b_wptr = m->b_rptr + hlen;
   1002 	ip = (ip_t *)m->b_rptr;
   1003 	bzero((char *)ip, hlen);
   1004 	tcp2 = (struct tcphdr *)(m->b_rptr + hlen - sizeof(*tcp2));
   1005 	tcp2->th_dport = tcp->th_sport;
   1006 	tcp2->th_sport = tcp->th_dport;
   1007 	if (tcp->th_flags & TH_ACK) {
   1008 		tcp2->th_seq = tcp->th_ack;
   1009 		tcp2->th_flags = TH_RST;
   1010 	} else {
   1011 		tcp2->th_ack = ntohl(tcp->th_seq);
   1012 		tcp2->th_ack += tlen;
   1013 		tcp2->th_ack = htonl(tcp2->th_ack);
   1014 		tcp2->th_flags = TH_RST|TH_ACK;
   1015 	}
   1016 	tcp2->th_off = sizeof(struct tcphdr) >> 2;
   1017 
   1018 	ip->ip_v = fin->fin_v;
   1019 #ifdef	USE_INET6
   1020 	if (fin->fin_v == 6) {
   1021 		ip6 = (ip6_t *)m->b_rptr;
   1022 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
   1023 		ip6->ip6_src = fin->fin_dst6.in6;
   1024 		ip6->ip6_dst = fin->fin_src6.in6;
   1025 		ip6->ip6_plen = htons(sizeof(*tcp));
   1026 		ip6->ip6_nxt = IPPROTO_TCP;
   1027 		tcp2->th_sum = fr_cksum(m, (ip_t *)ip6, IPPROTO_TCP, tcp2);
   1028 	} else
   1029 #endif
   1030 	{
   1031 		ip->ip_src.s_addr = fin->fin_daddr;
   1032 		ip->ip_dst.s_addr = fin->fin_saddr;
   1033 		ip->ip_id = fr_nextipid(fin);
   1034 		ip->ip_hl = sizeof(*ip) >> 2;
   1035 		ip->ip_p = IPPROTO_TCP;
   1036 		ip->ip_len = sizeof(*ip) + sizeof(*tcp);
   1037 		ip->ip_tos = fin->fin_ip->ip_tos;
   1038 		tcp2->th_sum = fr_cksum(m, ip, IPPROTO_TCP, tcp2);
   1039 	}
   1040 	return fr_send_ip(fin, m, &m);
   1041 }
   1042 
   1043 /*
   1044  * Function:	fr_send_ip
   1045  * Returns:	 0: success
   1046  *		-1: failed
   1047  * Parameters:
   1048  *	fin: packet information
   1049  *	m: the message block where ip head starts
   1050  *
   1051  * Send a new packet through the IP stack.
   1052  *
   1053  * For IPv4 packets, ip_len must be in host byte order, and ip_v,
   1054  * ip_ttl, ip_off, and ip_sum are ignored (filled in by this
   1055  * function).
   1056  *
   1057  * For IPv6 packets, ip6_flow, ip6_vfc, and ip6_hlim are filled
   1058  * in by this function.
   1059  *
   1060  * All other portions of the packet must be in on-the-wire format.
   1061  */
   1062 /*ARGSUSED*/
   1063 static int fr_send_ip(fin, m, mpp)
   1064 fr_info_t *fin;
   1065 mblk_t *m, **mpp;
   1066 {
   1067 	qpktinfo_t qpi, *qpip;
   1068 	fr_info_t fnew;
   1069 	ip_t *ip;
   1070 	int i, hlen;
   1071 	ipf_stack_t *ifs = fin->fin_ifs;
   1072 
   1073 	ip = (ip_t *)m->b_rptr;
   1074 	bzero((char *)&fnew, sizeof(fnew));
   1075 
   1076 #ifdef	USE_INET6
   1077 	if (fin->fin_v == 6) {
   1078 		ip6_t *ip6;
   1079 
   1080 		ip6 = (ip6_t *)ip;
   1081 		ip6->ip6_vfc = 0x60;
   1082 		ip6->ip6_hlim = 127;
   1083 		fnew.fin_v = 6;
   1084 		hlen = sizeof(*ip6);
   1085 		fnew.fin_plen = ntohs(ip6->ip6_plen) + hlen;
   1086 	} else
   1087 #endif
   1088 	{
   1089 		fnew.fin_v = 4;
   1090 #if SOLARIS2 >= 10
   1091 		ip->ip_ttl = 255;
   1092 		if (net_getpmtuenabled(ifs->ifs_ipf_ipv4) == 1)
   1093 			ip->ip_off = htons(IP_DF);
   1094 #else
   1095 		if (ip_ttl_ptr != NULL)
   1096 			ip->ip_ttl = (u_char)(*ip_ttl_ptr);
   1097 		else
   1098 			ip->ip_ttl = 63;
   1099 		if (ip_mtudisc != NULL)
   1100 			ip->ip_off = htons(*ip_mtudisc ? IP_DF : 0);
   1101 		else
   1102 			ip->ip_off = htons(IP_DF);
   1103 #endif
   1104 		/*
   1105 		 * The dance with byte order and ip_len/ip_off is because in
   1106 		 * fr_fastroute, it expects them to be in host byte order but
   1107 		 * ipf_cksum expects them to be in network byte order.
   1108 		 */
   1109 		ip->ip_len = htons(ip->ip_len);
   1110 		ip->ip_sum = ipf_cksum((u_short *)ip, sizeof(*ip));
   1111 		ip->ip_len = ntohs(ip->ip_len);
   1112 		ip->ip_off = ntohs(ip->ip_off);
   1113 		hlen = sizeof(*ip);
   1114 		fnew.fin_plen = ip->ip_len;
   1115 	}
   1116 
   1117 	qpip = fin->fin_qpi;
   1118 	qpi.qpi_off = 0;
   1119 	qpi.qpi_ill = qpip->qpi_ill;
   1120 	qpi.qpi_m = m;
   1121 	qpi.qpi_data = ip;
   1122 	fnew.fin_qpi = &qpi;
   1123 	fnew.fin_ifp = fin->fin_ifp;
   1124 	fnew.fin_flx = FI_NOCKSUM;
   1125 	fnew.fin_m = m;
   1126 	fnew.fin_qfm = m;
   1127 	fnew.fin_ip = ip;
   1128 	fnew.fin_mp = mpp;
   1129 	fnew.fin_hlen = hlen;
   1130 	fnew.fin_dp = (char *)ip + hlen;
   1131 	fnew.fin_ifs = fin->fin_ifs;
   1132 	(void) fr_makefrip(hlen, ip, &fnew);
   1133 
   1134 	i = fr_fastroute(m, mpp, &fnew, NULL);
   1135 	return i;
   1136 }
   1137 
   1138 
   1139 int fr_send_icmp_err(type, fin, dst)
   1140 int type;
   1141 fr_info_t *fin;
   1142 int dst;
   1143 {
   1144 	struct in_addr dst4;
   1145 	struct icmp *icmp;
   1146 	qpktinfo_t *qpi;
   1147 	int hlen, code;
   1148 	phy_if_t phy;
   1149 	u_short sz;
   1150 #ifdef	USE_INET6
   1151 	mblk_t *mb;
   1152 #endif
   1153 	mblk_t *m;
   1154 #ifdef	USE_INET6
   1155 	ip6_t *ip6;
   1156 #endif
   1157 	ip_t *ip;
   1158 	ipf_stack_t *ifs = fin->fin_ifs;
   1159 
   1160 	if ((type < 0) || (type > ICMP_MAXTYPE))
   1161 		return -1;
   1162 
   1163 	code = fin->fin_icode;
   1164 #ifdef USE_INET6
   1165 	if ((code < 0) || (code >= ICMP_MAX_UNREACH))
   1166 		return -1;
   1167 #endif
   1168 
   1169 #ifndef	IPFILTER_CKSUM
   1170 	if (fr_checkl4sum(fin) == -1)
   1171 		return -1;
   1172 #endif
   1173 
   1174 	qpi = fin->fin_qpi;
   1175 
   1176 #ifdef	USE_INET6
   1177 	mb = fin->fin_qfm;
   1178 
   1179 	if (fin->fin_v == 6) {
   1180 		sz = sizeof(ip6_t);
   1181 		sz += MIN(mb->b_wptr - mb->b_rptr, 512);
   1182 		hlen = sizeof(ip6_t);
   1183 		type = icmptoicmp6types[type];
   1184 		if (type == ICMP6_DST_UNREACH)
   1185 			code = icmptoicmp6unreach[code];
   1186 	} else
   1187 #endif
   1188 	{
   1189 		if ((fin->fin_p == IPPROTO_ICMP) &&
   1190 		    !(fin->fin_flx & FI_SHORT))
   1191 			switch (ntohs(fin->fin_data[0]) >> 8)
   1192 			{
   1193 			case ICMP_ECHO :
   1194 			case ICMP_TSTAMP :
   1195 			case ICMP_IREQ :
   1196 			case ICMP_MASKREQ :
   1197 				break;
   1198 			default :
   1199 				return 0;
   1200 			}
   1201 
   1202 		sz = sizeof(ip_t) * 2;
   1203 		sz += 8;		/* 64 bits of data */
   1204 		hlen = sizeof(ip_t);
   1205 	}
   1206 
   1207 	sz += offsetof(struct icmp, icmp_ip);
   1208 	if ((m = (mblk_t *)allocb((size_t)sz + 64, BPRI_HI)) == NULL)
   1209 		return -1;
   1210 	MTYPE(m) = M_DATA;
   1211 	m->b_rptr += 64;
   1212 	m->b_wptr = m->b_rptr + sz;
   1213 	bzero((char *)m->b_rptr, (size_t)sz);
   1214 	ip = (ip_t *)m->b_rptr;
   1215 	ip->ip_v = fin->fin_v;
   1216 	icmp = (struct icmp *)(m->b_rptr + hlen);
   1217 	icmp->icmp_type = type & 0xff;
   1218 	icmp->icmp_code = code & 0xff;
   1219 	phy = (phy_if_t)qpi->qpi_ill;
   1220 	if (type == ICMP_UNREACH && (phy != 0) &&
   1221 	    fin->fin_icode == ICMP_UNREACH_NEEDFRAG)
   1222 		icmp->icmp_nextmtu = net_getmtu(ifs->ifs_ipf_ipv4, phy,0 );
   1223 
   1224 #ifdef	USE_INET6
   1225 	if (fin->fin_v == 6) {
   1226 		struct in6_addr dst6;
   1227 		int csz;
   1228 
   1229 		if (dst == 0) {
   1230 			ipf_stack_t *ifs = fin->fin_ifs;
   1231 
   1232 			if (fr_ifpaddr(6, FRI_NORMAL, (void *)phy,
   1233 				       (void *)&dst6, NULL, ifs) == -1) {
   1234 				FREE_MB_T(m);
   1235 				return -1;
   1236 			}
   1237 		} else
   1238 			dst6 = fin->fin_dst6.in6;
   1239 
   1240 		csz = sz;
   1241 		sz -= sizeof(ip6_t);
   1242 		ip6 = (ip6_t *)m->b_rptr;
   1243 		ip6->ip6_flow = ((ip6_t *)fin->fin_ip)->ip6_flow;
   1244 		ip6->ip6_plen = htons((u_short)sz);
   1245 		ip6->ip6_nxt = IPPROTO_ICMPV6;
   1246 		ip6->ip6_src = dst6;
   1247 		ip6->ip6_dst = fin->fin_src6.in6;
   1248 		sz -= offsetof(struct icmp, icmp_ip);
   1249 		bcopy((char *)mb->b_rptr, (char *)&icmp->icmp_ip, sz);
   1250 		icmp->icmp_cksum = csz - sizeof(ip6_t);
   1251 	} else
   1252 #endif
   1253 	{
   1254 		ip->ip_hl = sizeof(*ip) >> 2;
   1255 		ip->ip_p = IPPROTO_ICMP;
   1256 		ip->ip_id = fin->fin_ip->ip_id;
   1257 		ip->ip_tos = fin->fin_ip->ip_tos;
   1258 		ip->ip_len = (u_short)sz;
   1259 		if (dst == 0) {
   1260 			ipf_stack_t *ifs = fin->fin_ifs;
   1261 
   1262 			if (fr_ifpaddr(4, FRI_NORMAL, (void *)phy,
   1263 				       (void *)&dst4, NULL, ifs) == -1) {
   1264 				FREE_MB_T(m);
   1265 				return -1;
   1266 			}
   1267 		} else {
   1268 			dst4 = fin->fin_dst;
   1269 		}
   1270 		ip->ip_src = dst4;
   1271 		ip->ip_dst = fin->fin_src;
   1272 		bcopy((char *)fin->fin_ip, (char *)&icmp->icmp_ip,
   1273 		      sizeof(*fin->fin_ip));
   1274 		bcopy((char *)fin->fin_ip + fin->fin_hlen,
   1275 		      (char *)&icmp->icmp_ip + sizeof(*fin->fin_ip), 8);
   1276 		icmp->icmp_ip.ip_len = htons(icmp->icmp_ip.ip_len);
   1277 		icmp->icmp_ip.ip_off = htons(icmp->icmp_ip.ip_off);
   1278 		icmp->icmp_cksum = ipf_cksum((u_short *)icmp,
   1279 					     sz - sizeof(ip_t));
   1280 	}
   1281 
   1282 	/*
   1283 	 * Need to exit out of these so we don't recursively call rw_enter
   1284 	 * from fr_qout.
   1285 	 */
   1286 	return fr_send_ip(fin, m, &m);
   1287 }
   1288 
   1289 #include <sys/time.h>
   1290 #include <sys/varargs.h>
   1291 
   1292 #ifndef _KERNEL
   1293 #include <stdio.h>
   1294 #endif
   1295 
   1296 #define	NULLADDR_RATE_LIMIT 10	/* 10 seconds */
   1297 
   1298 
   1299 /*
   1300  * Print out warning message at rate-limited speed.
   1301  */
   1302 static void rate_limit_message(ipf_stack_t *ifs,
   1303 			       int rate, const char *message, ...)
   1304 {
   1305 	static time_t last_time = 0;
   1306 	time_t now;
   1307 	va_list args;
   1308 	char msg_buf[256];
   1309 	int  need_printed = 0;
   1310 
   1311 	now = ddi_get_time();
   1312 
   1313 	/* make sure, no multiple entries */
   1314 	ASSERT(MUTEX_NOT_HELD(&(ifs->ifs_ipf_rw.ipf_lk)));
   1315 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
   1316 	if (now - last_time >= rate) {
   1317 		need_printed = 1;
   1318 		last_time = now;
   1319 	}
   1320 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
   1321 
   1322 	if (need_printed) {
   1323 		va_start(args, message);
   1324 		(void)vsnprintf(msg_buf, 255, message, args);
   1325 		va_end(args);
   1326 #ifdef _KERNEL
   1327 		cmn_err(CE_WARN, msg_buf);
   1328 #else
   1329 		fprintf(std_err, msg_buf);
   1330 #endif
   1331 	}
   1332 }
   1333 
   1334 /*
   1335  * Return the first IP Address associated with an interface
   1336  * For IPv6, we walk through the list of logical interfaces and return
   1337  * the address of the first one that isn't a link-local interface.
   1338  * We can't assume that it is :1 because another link-local address
   1339  * may have been assigned there.
   1340  */
   1341 /*ARGSUSED*/
   1342 int fr_ifpaddr(v, atype, ifptr, inp, inpmask, ifs)
   1343 int v, atype;
   1344 void *ifptr;
   1345 struct in_addr  *inp, *inpmask;
   1346 ipf_stack_t *ifs;
   1347 {
   1348 	struct sockaddr_in6 v6addr[2];
   1349 	struct sockaddr_in v4addr[2];
   1350 	net_ifaddr_t type[2];
   1351 	net_handle_t net_data;
   1352 	phy_if_t phyif;
   1353 	void *array;
   1354 
   1355 	switch (v)
   1356 	{
   1357 	case 4:
   1358 		net_data = ifs->ifs_ipf_ipv4;
   1359 		array = v4addr;
   1360 		break;
   1361 	case 6:
   1362 		net_data = ifs->ifs_ipf_ipv6;
   1363 		array = v6addr;
   1364 		break;
   1365 	default:
   1366 		net_data = NULL;
   1367 		break;
   1368 	}
   1369 
   1370 	if (net_data == NULL)
   1371 		return -1;
   1372 
   1373 	phyif = (phy_if_t)ifptr;
   1374 
   1375 	switch (atype)
   1376 	{
   1377 	case FRI_PEERADDR :
   1378 		type[0] = NA_PEER;
   1379 		break;
   1380 
   1381 	case FRI_BROADCAST :
   1382 		type[0] = NA_BROADCAST;
   1383 		break;
   1384 
   1385 	default :
   1386 		type[0] = NA_ADDRESS;
   1387 		break;
   1388 	}
   1389 
   1390 	type[1] = NA_NETMASK;
   1391 
   1392 	if (v == 6) {
   1393 		lif_if_t idx = 0;
   1394 
   1395 		do {
   1396 			idx = net_lifgetnext(net_data, phyif, idx);
   1397 			if (net_getlifaddr(net_data, phyif, idx, 2, type,
   1398 					   array) < 0)
   1399 				return -1;
   1400 			if (!IN6_IS_ADDR_LINKLOCAL(&v6addr[0].sin6_addr) &&
   1401 			    !IN6_IS_ADDR_MULTICAST(&v6addr[0].sin6_addr))
   1402 				break;
   1403 		} while (idx != 0);
   1404 
   1405 		if (idx == 0)
   1406 			return -1;
   1407 
   1408 		return fr_ifpfillv6addr(atype, &v6addr[0], &v6addr[1],
   1409 					inp, inpmask);
   1410 	}
   1411 
   1412 	if (net_getlifaddr(net_data, phyif, 0, 2, type, array) < 0)
   1413 		return -1;
   1414 
   1415 	return fr_ifpfillv4addr(atype, &v4addr[0], &v4addr[1], inp, inpmask);
   1416 }
   1417 
   1418 
   1419 u_32_t fr_newisn(fin)
   1420 fr_info_t *fin;
   1421 {
   1422 	static int iss_seq_off = 0;
   1423 	u_char hash[16];
   1424 	u_32_t newiss;
   1425 	MD5_CTX ctx;
   1426 	ipf_stack_t *ifs = fin->fin_ifs;
   1427 
   1428 	/*
   1429 	 * Compute the base value of the ISS.  It is a hash
   1430 	 * of (saddr, sport, daddr, dport, secret).
   1431 	 */
   1432 	MD5Init(&ctx);
   1433 
   1434 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_src,
   1435 		  sizeof(fin->fin_fi.fi_src));
   1436 	MD5Update(&ctx, (u_char *) &fin->fin_fi.fi_dst,
   1437 		  sizeof(fin->fin_fi.fi_dst));
   1438 	MD5Update(&ctx, (u_char *) &fin->fin_dat, sizeof(fin->fin_dat));
   1439 
   1440 	MD5Update(&ctx, ifs->ifs_ipf_iss_secret, sizeof(ifs->ifs_ipf_iss_secret));
   1441 
   1442 	MD5Final(hash, &ctx);
   1443 
   1444 	bcopy(hash, &newiss, sizeof(newiss));
   1445 
   1446 	/*
   1447 	 * Now increment our "timer", and add it in to
   1448 	 * the computed value.
   1449 	 *
   1450 	 * XXX Use `addin'?
   1451 	 * XXX TCP_ISSINCR too large to use?
   1452 	 */
   1453 	iss_seq_off += 0x00010000;
   1454 	newiss += iss_seq_off;
   1455 	return newiss;
   1456 }
   1457 
   1458 
   1459 /* ------------------------------------------------------------------------ */
   1460 /* Function:    fr_nextipid                                                 */
   1461 /* Returns:     int - 0 == success, -1 == error (packet should be droppped) */
   1462 /* Parameters:  fin(I) - pointer to packet information                      */
   1463 /*                                                                          */
   1464 /* Returns the next IPv4 ID to use for this packet.                         */
   1465 /* ------------------------------------------------------------------------ */
   1466 u_short fr_nextipid(fin)
   1467 fr_info_t *fin;
   1468 {
   1469 	static u_short ipid = 0;
   1470 	u_short id;
   1471 	ipf_stack_t *ifs = fin->fin_ifs;
   1472 
   1473 	MUTEX_ENTER(&ifs->ifs_ipf_rw);
   1474 	if (fin->fin_pktnum != 0) {
   1475 		id = fin->fin_pktnum & 0xffff;
   1476 	} else {
   1477 		id = ipid++;
   1478 	}
   1479 	MUTEX_EXIT(&ifs->ifs_ipf_rw);
   1480 
   1481 	return id;
   1482 }
   1483 
   1484 
   1485 #ifndef IPFILTER_CKSUM
   1486 /* ARGSUSED */
   1487 #endif
   1488 INLINE void fr_checkv4sum(fin)
   1489 fr_info_t *fin;
   1490 {
   1491 #ifdef IPFILTER_CKSUM
   1492 	if (fr_checkl4sum(fin) == -1)
   1493 		fin->fin_flx |= FI_BAD;
   1494 #endif
   1495 }
   1496 
   1497 
   1498 #ifdef USE_INET6
   1499 # ifndef IPFILTER_CKSUM
   1500 /* ARGSUSED */
   1501 # endif
   1502 INLINE void fr_checkv6sum(fin)
   1503 fr_info_t *fin;
   1504 {
   1505 # ifdef IPFILTER_CKSUM
   1506 	if (fr_checkl4sum(fin) == -1)
   1507 		fin->fin_flx |= FI_BAD;
   1508 # endif
   1509 }
   1510 #endif /* USE_INET6 */
   1511 
   1512 
   1513 #if (SOLARIS2 < 7)
   1514 void fr_slowtimer()
   1515 #else
   1516 /*ARGSUSED*/
   1517 void fr_slowtimer __P((void *arg))
   1518 #endif
   1519 {
   1520 	ipf_stack_t *ifs = arg;
   1521 
   1522 	READ_ENTER(&ifs->ifs_ipf_global);
   1523 	if (ifs->ifs_fr_running != 1) {
   1524 		ifs->ifs_fr_timer_id = NULL;
   1525 		RWLOCK_EXIT(&ifs->ifs_ipf_global);
   1526 		return;
   1527 	}
   1528 	ipf_expiretokens(ifs);
   1529 	fr_fragexpire(ifs);
   1530 	fr_timeoutstate(ifs);
   1531 	fr_natexpire(ifs);
   1532 	fr_authexpire(ifs);
   1533 	ifs->ifs_fr_ticks++;
   1534 	if (ifs->ifs_fr_running == 1)
   1535 		ifs->ifs_fr_timer_id = timeout(fr_slowtimer, arg,
   1536 		    drv_usectohz(500000));
   1537 	else
   1538 		ifs->ifs_fr_timer_id = NULL;
   1539 	RWLOCK_EXIT(&ifs->ifs_ipf_global);
   1540 }
   1541 
   1542 
   1543 /* ------------------------------------------------------------------------ */
   1544 /* Function:    fr_pullup                                                   */
   1545 /* Returns:     NULL == pullup failed, else pointer to protocol header      */
   1546 /* Parameters:  m(I)   - pointer to buffer where data packet starts         */
   1547 /*              fin(I) - pointer to packet information                      */
   1548 /*              len(I) - number of bytes to pullup                          */
   1549 /*                                                                          */
   1550 /* Attempt to move at least len bytes (from the start of the buffer) into a */
   1551 /* single buffer for ease of access.  Operating system native functions are */
   1552 /* used to manage buffers - if necessary.  If the entire packet ends up in  */
   1553 /* a single buffer, set the FI_COALESCE flag even though fr_coalesce() has  */
   1554 /* not been called.  Both fin_ip and fin_dp are updated before exiting _IF_ */
   1555 /* and ONLY if the pullup succeeds.                                         */
   1556 /*                                                                          */
   1557 /* We assume that 'min' is a pointer to a buffer that is part of the chain  */
   1558 /* of buffers that starts at *fin->fin_mp.                                  */
   1559 /* ------------------------------------------------------------------------ */
   1560 void *fr_pullup(min, fin, len)
   1561 mb_t *min;
   1562 fr_info_t *fin;
   1563 int len;
   1564 {
   1565 	qpktinfo_t *qpi = fin->fin_qpi;
   1566 	int out = fin->fin_out, dpoff, ipoff;
   1567 	mb_t *m = min, *m1, *m2;
   1568 	char *ip;
   1569 	uint32_t start, stuff, end, value, flags;
   1570 	ipf_stack_t *ifs = fin->fin_ifs;
   1571 
   1572 	if (m == NULL)
   1573 		return NULL;
   1574 
   1575 	ip = (char *)fin->fin_ip;
   1576 	if ((fin->fin_flx & FI_COALESCE) != 0)
   1577 		return ip;
   1578 
   1579 	ipoff = fin->fin_ipoff;
   1580 	if (fin->fin_dp != NULL)
   1581 		dpoff = (char *)fin->fin_dp - (char *)ip;
   1582 	else
   1583 		dpoff = 0;
   1584 
   1585 	if (M_LEN(m) < len + ipoff) {
   1586 
   1587 		/*
   1588 		 * pfil_precheck ensures the IP header is on a 32bit
   1589 		 * aligned address so simply fail if that isn't currently
   1590 		 * the case (should never happen).
   1591 		 */
   1592 		int inc = 0;
   1593 
   1594 		if (ipoff > 0) {
   1595 			if ((ipoff & 3) != 0) {
   1596 				inc = 4 - (ipoff & 3);
   1597 				if (m->b_rptr - inc >= m->b_datap->db_base)
   1598 					m->b_rptr -= inc;
   1599 				else
   1600 					inc = 0;
   1601 			}
   1602 		}
   1603 
   1604 		/*
   1605 		 * XXX This is here as a work around for a bug with DEBUG
   1606 		 * XXX Solaris kernels.  The problem is b_prev is used by IP
   1607 		 * XXX code as a way to stash the phyint_index for a packet,
   1608 		 * XXX this doesn't get reset by IP but freeb does an ASSERT()
   1609 		 * XXX for both of these to be NULL.  See 6442390.
   1610 		 */
   1611 		m1 = m;
   1612 		m2 = m->b_prev;
   1613 
   1614 		do {
   1615 			m1->b_next = NULL;
   1616 			m1->b_prev = NULL;
   1617 			m1 = m1->b_cont;
   1618 		} while (m1);
   1619 
   1620 		/*
   1621 		 * Need to preserve checksum information by copying them
   1622 		 * to newmp which heads the pulluped message.
   1623 		 */
   1624 		hcksum_retrieve(m, NULL, NULL, &start, &stuff, &end,
   1625 		    &value, &flags);
   1626 
   1627 		if (pullupmsg(m, len + ipoff + inc) == 0) {
   1628 			ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[1]);
   1629 			FREE_MB_T(*fin->fin_mp);
   1630 			*fin->fin_mp = NULL;
   1631 			fin->fin_m = NULL;
   1632 			fin->fin_ip = NULL;
   1633 			fin->fin_dp = NULL;
   1634 			qpi->qpi_data = NULL;
   1635 			return NULL;
   1636 		}
   1637 
   1638 		(void) hcksum_assoc(m, NULL, NULL, start, stuff, end,
   1639 		    value, flags, 0);
   1640 
   1641 		m->b_prev = m2;
   1642 		m->b_rptr += inc;
   1643 		fin->fin_m = m;
   1644 		ip = MTOD(m, char *) + ipoff;
   1645 		qpi->qpi_data = ip;
   1646 	}
   1647 
   1648 	ATOMIC_INCL(ifs->ifs_frstats[out].fr_pull[0]);
   1649 	fin->fin_ip = (ip_t *)ip;
   1650 	if (fin->fin_dp != NULL)
   1651 		fin->fin_dp = (char *)fin->fin_ip + dpoff;
   1652 
   1653 	if (len == fin->fin_plen)
   1654 		fin->fin_flx |= FI_COALESCE;
   1655 	return ip;
   1656 }
   1657 
   1658 
   1659 /*
   1660  * Function:	fr_verifysrc
   1661  * Returns:	int (really boolean)
   1662  * Parameters:	fin - packet information
   1663  *
   1664  * Check whether the packet has a valid source address for the interface on
   1665  * which the packet arrived, implementing the "fr_chksrc" feature.
   1666  * Returns true iff the packet's source address is valid.
   1667  */
   1668 int fr_verifysrc(fin)
   1669 fr_info_t *fin;
   1670 {
   1671 	net_handle_t net_data_p;
   1672 	phy_if_t phy_ifdata_routeto;
   1673 	struct sockaddr	sin;
   1674 	ipf_stack_t *ifs = fin->fin_ifs;
   1675 
   1676 	if (fin->fin_v == 4) {
   1677 		net_data_p = ifs->ifs_ipf_ipv4;
   1678 	} else if (fin->fin_v == 6) {
   1679 		net_data_p = ifs->ifs_ipf_ipv6;
   1680 	} else {
   1681 		return (0);
   1682 	}
   1683 
   1684 	/* Get the index corresponding to the if name */
   1685 	sin.sa_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
   1686 	bcopy(&fin->fin_saddr, &sin.sa_data, sizeof (struct in_addr));
   1687 	phy_ifdata_routeto = net_routeto(net_data_p, &sin, NULL);
   1688 
   1689 	return (((phy_if_t)fin->fin_ifp == phy_ifdata_routeto) ? 1 : 0);
   1690 }
   1691 
   1692 
   1693 /*
   1694  * Function:	fr_fastroute
   1695  * Returns:	 0: success;
   1696  *		-1: failed
   1697  * Parameters:
   1698  *	mb: the message block where ip head starts
   1699  *	mpp: the pointer to the pointer of the orignal
   1700  *		packet message
   1701  *	fin: packet information
   1702  *	fdp: destination interface information
   1703  *	if it is NULL, no interface information provided.
   1704  *
   1705  * This function is for fastroute/to/dup-to rules. It calls
   1706  * pfil_make_lay2_packet to search route, make lay-2 header
   1707  * ,and identify output queue for the IP packet.
   1708  * The destination address depends on the following conditions:
   1709  * 1: for fastroute rule, fdp is passed in as NULL, so the
   1710  *	destination address is the IP Packet's destination address
   1711  * 2: for to/dup-to rule, if an ip address is specified after
   1712  *	the interface name, this address is the as destination
   1713  *	address. Otherwise IP Packet's destination address is used
   1714  */
   1715 int fr_fastroute(mb, mpp, fin, fdp)
   1716 mblk_t *mb, **mpp;
   1717 fr_info_t *fin;
   1718 frdest_t *fdp;
   1719 {
   1720         net_handle_t net_data_p;
   1721 	net_inject_t *inj;
   1722 	mblk_t *mp = NULL;
   1723 	frentry_t *fr = fin->fin_fr;
   1724 	qpktinfo_t *qpi;
   1725 	ip_t *ip;
   1726 
   1727 	struct sockaddr_in *sin;
   1728 	struct sockaddr_in6 *sin6;
   1729 	struct sockaddr *sinp;
   1730 	ipf_stack_t *ifs = fin->fin_ifs;
   1731 #ifndef	sparc
   1732 	u_short __iplen, __ipoff;
   1733 #endif
   1734 
   1735 	if (fin->fin_v == 4) {
   1736 		net_data_p = ifs->ifs_ipf_ipv4;
   1737 	} else if (fin->fin_v == 6) {
   1738 		net_data_p = ifs->ifs_ipf_ipv6;
   1739 	} else {
   1740 		return (-1);
   1741 	}
   1742 
   1743 	inj = net_inject_alloc(NETINFO_VERSION);
   1744 	if (inj == NULL)
   1745 		return -1;
   1746 
   1747 	ip = fin->fin_ip;
   1748 	qpi = fin->fin_qpi;
   1749 
   1750 	/*
   1751 	 * If this is a duplicate mblk then we want ip to point at that
   1752 	 * data, not the original, if and only if it is already pointing at
   1753 	 * the current mblk data.
   1754 	 *
   1755 	 * Otherwise, if it's not a duplicate, and we're not already pointing
   1756 	 * at the current mblk data, then we want to ensure that the data
   1757 	 * points at ip.
   1758 	 */
   1759 
   1760 	if ((ip == (ip_t *)qpi->qpi_m->b_rptr) && (qpi->qpi_m != mb)) {
   1761 		ip = (ip_t *)mb->b_rptr;
   1762 	} else if ((qpi->qpi_m == mb) && (ip != (ip_t *)qpi->qpi_m->b_rptr)) {
   1763 		qpi->qpi_m->b_rptr = (uchar_t *)ip;
   1764 		qpi->qpi_off = 0;
   1765 	}
   1766 
   1767 	/*
   1768 	 * If there is another M_PROTO, we don't want it
   1769 	 */
   1770 	if (*mpp != mb) {
   1771 		mp = unlinkb(*mpp);
   1772 		freeb(*mpp);
   1773 		*mpp = mp;
   1774 	}
   1775 
   1776 	sinp = (struct sockaddr *)&inj->ni_addr;
   1777 	sin = (struct sockaddr_in *)sinp;
   1778 	sin6 = (struct sockaddr_in6 *)sinp;
   1779 	bzero((char *)&inj->ni_addr, sizeof (inj->ni_addr));
   1780 	inj->ni_addr.ss_family = (fin->fin_v == 4) ? AF_INET : AF_INET6;
   1781 	inj->ni_packet = mb;
   1782 
   1783 	/*
   1784 	 * In case we're here due to "to <if>" being used with
   1785 	 * "keep state", check that we're going in the correct
   1786 	 * direction.
   1787 	 */
   1788 	if (fdp != NULL) {
   1789 		if ((fr != NULL) && (fdp->fd_ifp != NULL) &&
   1790 			(fin->fin_rev != 0) && (fdp == &fr->fr_tif))
   1791 			goto bad_fastroute;
   1792 		inj->ni_physical = (phy_if_t)fdp->fd_ifp;
   1793 		if (fin->fin_v == 4) {
   1794 			sin->sin_addr = fdp->fd_ip;
   1795 		} else {
   1796 			sin6->sin6_addr = fdp->fd_ip6.in6;
   1797 		}
   1798 	} else {
   1799 		if (fin->fin_v == 4) {
   1800 			sin->sin_addr = ip->ip_dst;
   1801 		} else {
   1802 			sin6->sin6_addr = ((ip6_t *)ip)->ip6_dst;
   1803 		}
   1804 		inj->ni_physical = net_routeto(net_data_p, sinp, NULL);
   1805 	}
   1806 
   1807 	/*
   1808 	 * Clear the hardware checksum flags from packets that we are doing
   1809 	 * input processing on as leaving them set will cause the outgoing
   1810 	 * NIC (if it supports hardware checksum) to calculate them anew,
   1811 	 * using the old (correct) checksums as the pseudo value to start
   1812 	 * from.
   1813 	 */
   1814 	if (fin->fin_out == 0) {
   1815 		DB_CKSUMFLAGS(mb) = 0;
   1816 	}
   1817 
   1818 	*mpp = mb;
   1819 
   1820 	if (fin->fin_out == 0) {
   1821 		void *saveifp;
   1822 		u_32_t pass;
   1823 
   1824 		saveifp = fin->fin_ifp;
   1825 		fin->fin_ifp = (void *)inj->ni_physical;
   1826 		fin->fin_flx &= ~FI_STATE;
   1827 		fin->fin_out = 1;
   1828 		(void) fr_acctpkt(fin, &pass);
   1829 		fin->fin_fr = NULL;
   1830 		if (!fr || !(fr->fr_flags & FR_RETMASK))
   1831 			(void) fr_checkstate(fin, &pass);
   1832 		if (fr_checknatout(fin, NULL) == -1)
   1833 			goto bad_fastroute;
   1834 		fin->fin_out = 0;
   1835 		fin->fin_ifp = saveifp;
   1836 	}
   1837 #ifndef	sparc
   1838 	if (fin->fin_v == 4) {
   1839 		__iplen = (u_short)ip->ip_len,
   1840 		__ipoff = (u_short)ip->ip_off;
   1841 
   1842 		ip->ip_len = htons(__iplen);
   1843 		ip->ip_off = htons(__ipoff);
   1844 	}
   1845 #endif
   1846 
   1847 	if (net_data_p) {
   1848 		if (net_inject(net_data_p, NI_DIRECT_OUT, inj) < 0) {
   1849 			net_inject_free(inj);
   1850 			return (-1);
   1851 		}
   1852 	}
   1853 
   1854 	ifs->ifs_fr_frouteok[0]++;
   1855 	net_inject_free(inj);
   1856 	return 0;
   1857 bad_fastroute:
   1858 	net_inject_free(inj);
   1859 	freemsg(mb);
   1860 	ifs->ifs_fr_frouteok[1]++;
   1861 	return -1;
   1862 }
   1863 
   1864 
   1865 /* ------------------------------------------------------------------------ */
   1866 /* Function:    ipf_hook4_out                                               */
   1867 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
   1868 /* Parameters:  event(I)     - pointer to event                             */
   1869 /*              info(I)      - pointer to hook information for firewalling  */
   1870 /*                                                                          */
   1871 /* Calling ipf_hook.                                                        */
   1872 /* ------------------------------------------------------------------------ */
   1873 /*ARGSUSED*/
   1874 int ipf_hook4_out(hook_event_token_t token, hook_data_t info, void *arg)
   1875 {
   1876 	return ipf_hook(info, 1, 0, arg);
   1877 }
   1878 /*ARGSUSED*/
   1879 int ipf_hook6_out(hook_event_token_t token, hook_data_t info, void *arg)
   1880 {
   1881 	return ipf_hook6(info, 1, 0, arg);
   1882 }
   1883 
   1884 /* ------------------------------------------------------------------------ */
   1885 /* Function:    ipf_hook4_in                                                */
   1886 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
   1887 /* Parameters:  event(I)     - pointer to event                             */
   1888 /*              info(I)      - pointer to hook information for firewalling  */
   1889 /*                                                                          */
   1890 /* Calling ipf_hook.                                                        */
   1891 /* ------------------------------------------------------------------------ */
   1892 /*ARGSUSED*/
   1893 int ipf_hook4_in(hook_event_token_t token, hook_data_t info, void *arg)
   1894 {
   1895 	return ipf_hook(info, 0, 0, arg);
   1896 }
   1897 /*ARGSUSED*/
   1898 int ipf_hook6_in(hook_event_token_t token, hook_data_t info, void *arg)
   1899 {
   1900 	return ipf_hook6(info, 0, 0, arg);
   1901 }
   1902 
   1903 
   1904 /* ------------------------------------------------------------------------ */
   1905 /* Function:    ipf_hook4_loop_out                                          */
   1906 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
   1907 /* Parameters:  event(I)     - pointer to event                             */
   1908 /*              info(I)      - pointer to hook information for firewalling  */
   1909 /*                                                                          */
   1910 /* Calling ipf_hook.                                                        */
   1911 /* ------------------------------------------------------------------------ */
   1912 /*ARGSUSED*/
   1913 int ipf_hook4_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
   1914 {
   1915 	return ipf_hook(info, 1, FI_NOCKSUM, arg);
   1916 }
   1917 /*ARGSUSED*/
   1918 int ipf_hook6_loop_out(hook_event_token_t token, hook_data_t info, void *arg)
   1919 {
   1920 	return ipf_hook6(info, 1, FI_NOCKSUM, arg);
   1921 }
   1922 
   1923 /* ------------------------------------------------------------------------ */
   1924 /* Function:    ipf_hook4_loop_in                                           */
   1925 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
   1926 /* Parameters:  event(I)     - pointer to event                             */
   1927 /*              info(I)      - pointer to hook information for firewalling  */
   1928 /*                                                                          */
   1929 /* Calling ipf_hook.                                                        */
   1930 /* ------------------------------------------------------------------------ */
   1931 /*ARGSUSED*/
   1932 int ipf_hook4_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
   1933 {
   1934 	return ipf_hook(info, 0, FI_NOCKSUM, arg);
   1935 }
   1936 /*ARGSUSED*/
   1937 int ipf_hook6_loop_in(hook_event_token_t token, hook_data_t info, void *arg)
   1938 {
   1939 	return ipf_hook6(info, 0, FI_NOCKSUM, arg);
   1940 }
   1941 
   1942 /* ------------------------------------------------------------------------ */
   1943 /* Function:    ipf_hook                                                    */
   1944 /* Returns:     int - 0 == packet ok, else problem, free packet if not done */
   1945 /* Parameters:  info(I)      - pointer to hook information for firewalling  */
   1946 /*              out(I)       - whether packet is going in or out            */
   1947 /*              loopback(I)  - whether packet is a loopback packet or not   */
   1948 /*                                                                          */
   1949 /* Stepping stone function between the IP mainline and IPFilter.  Extracts  */
   1950 /* parameters out of the info structure and forms them up to be useful for  */
   1951 /* calling ipfilter.                                                        */
   1952 /* ------------------------------------------------------------------------ */
   1953 int ipf_hook(hook_data_t info, int out, int loopback, void *arg)
   1954 {
   1955 	hook_pkt_event_t *fw;
   1956 	ipf_stack_t *ifs;
   1957 	qpktinfo_t qpi;
   1958 	int rval, hlen;
   1959 	u_short swap;
   1960 	phy_if_t phy;
   1961 	ip_t *ip;
   1962 
   1963 	ifs = arg;
   1964 	fw = (hook_pkt_event_t *)info;
   1965 
   1966 	ASSERT(fw != NULL);
   1967 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
   1968 
   1969 	ip = fw->hpe_hdr;
   1970 	swap = ntohs(ip->ip_len);
   1971 	ip->ip_len = swap;
   1972 	swap = ntohs(ip->ip_off);
   1973 	ip->ip_off = swap;
   1974 	hlen = IPH_HDR_LENGTH(ip);
   1975 
   1976 	qpi.qpi_m = fw->hpe_mb;
   1977 	qpi.qpi_data = fw->hpe_hdr;
   1978 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
   1979 	qpi.qpi_ill = (void *)phy;
   1980 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
   1981 	if (qpi.qpi_flags)
   1982 		qpi.qpi_flags |= FI_MBCAST;
   1983 	qpi.qpi_flags |= loopback;
   1984 
   1985 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
   1986 	    &qpi, fw->hpe_mp, ifs);
   1987 
   1988 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
   1989 	if (rval == 0 && *(fw->hpe_mp) == NULL)
   1990 		rval = 1;
   1991 
   1992 	/* Notify IP the packet mblk_t and IP header pointers. */
   1993 	fw->hpe_mb = qpi.qpi_m;
   1994 	fw->hpe_hdr = qpi.qpi_data;
   1995 	if (rval == 0) {
   1996 		ip = qpi.qpi_data;
   1997 		swap = ntohs(ip->ip_len);
   1998 		ip->ip_len = swap;
   1999 		swap = ntohs(ip->ip_off);
   2000 		ip->ip_off = swap;
   2001 	}
   2002 	return rval;
   2003 
   2004 }
   2005 int ipf_hook6(hook_data_t info, int out, int loopback, void *arg)
   2006 {
   2007 	hook_pkt_event_t *fw;
   2008 	int rval, hlen;
   2009 	qpktinfo_t qpi;
   2010 	phy_if_t phy;
   2011 
   2012 	fw = (hook_pkt_event_t *)info;
   2013 
   2014 	ASSERT(fw != NULL);
   2015 	phy = (out == 0) ? fw->hpe_ifp : fw->hpe_ofp;
   2016 
   2017 	hlen = sizeof (ip6_t);
   2018 
   2019 	qpi.qpi_m = fw->hpe_mb;
   2020 	qpi.qpi_data = fw->hpe_hdr;
   2021 	qpi.qpi_off = (char *)qpi.qpi_data - (char *)fw->hpe_mb->b_rptr;
   2022 	qpi.qpi_ill = (void *)phy;
   2023 	qpi.qpi_flags = fw->hpe_flags & (HPE_MULTICAST|HPE_BROADCAST);
   2024 	if (qpi.qpi_flags)
   2025 		qpi.qpi_flags |= FI_MBCAST;
   2026 	qpi.qpi_flags |= loopback;
   2027 
   2028 	rval = fr_check(fw->hpe_hdr, hlen, qpi.qpi_ill, out,
   2029 	    &qpi, fw->hpe_mp, arg);
   2030 
   2031 	/* For fastroute cases, fr_check returns 0 with mp set to NULL */
   2032 	if (rval == 0 && *(fw->hpe_mp) == NULL)
   2033 		rval = 1;
   2034 
   2035 	/* Notify IP the packet mblk_t and IP header pointers. */
   2036 	fw->hpe_mb = qpi.qpi_m;
   2037 	fw->hpe_hdr = qpi.qpi_data;
   2038 	return rval;
   2039 
   2040 }
   2041 
   2042 
   2043 /* ------------------------------------------------------------------------ */
   2044 /* Function:    ipf_nic_event_v4                                            */
   2045 /* Returns:     int - 0 == no problems encountered                          */
   2046 /* Parameters:  event(I)     - pointer to event                             */
   2047 /*              info(I)      - pointer to information about a NIC event     */
   2048 /*                                                                          */
   2049 /* Function to receive asynchronous NIC events from IP                      */
   2050 /* ------------------------------------------------------------------------ */
   2051 /*ARGSUSED*/
   2052 int ipf_nic_event_v4(hook_event_token_t event, hook_data_t info, void *arg)
   2053 {
   2054 	struct sockaddr_in *sin;
   2055 	hook_nic_event_t *hn;
   2056 	ipf_stack_t *ifs = arg;
   2057 	void *new_ifp = NULL;
   2058 
   2059 	if (ifs->ifs_fr_running <= 0)
   2060 		return (0);
   2061 
   2062 	hn = (hook_nic_event_t *)info;
   2063 
   2064 	switch (hn->hne_event)
   2065 	{
   2066 	case NE_PLUMB :
   2067 		frsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic, hn->hne_data,
   2068 		       ifs);
   2069 		fr_natifpsync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
   2070 			      hn->hne_data, ifs);
   2071 		fr_statesync(IPFSYNC_NEWIFP, 4, (void *)hn->hne_nic,
   2072 			     hn->hne_data, ifs);
   2073 		break;
   2074 
   2075 	case NE_UNPLUMB :
   2076 		frsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
   2077 		fr_natifpsync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL,
   2078 			      ifs);
   2079 		fr_statesync(IPFSYNC_OLDIFP, 4, (void *)hn->hne_nic, NULL, ifs);
   2080 		break;
   2081 
   2082 	case NE_ADDRESS_CHANGE :
   2083 		/*
   2084 		 * We only respond to events for logical interface 0 because
   2085 		 * IPFilter only uses the first address given to a network
   2086 		 * interface.  We check for hne_lif==1 because the netinfo
   2087 		 * code maps adds 1 to the lif number so that it can return
   2088 		 * 0 to indicate "no more lifs" when walking them.
   2089 		 */
   2090 		if (hn->hne_lif == 1) {
   2091 			frsync(IPFSYNC_RESYNC, 4, (void *)hn->hne_nic, NULL,
   2092 			    ifs);
   2093 			sin = hn->hne_data;
   2094 			fr_nataddrsync(4, (void *)hn->hne_nic, &sin->sin_addr,
   2095 			    ifs);
   2096 		}
   2097 		break;
   2098 
   2099 #if SOLARIS2 >= 10
   2100 	case NE_IFINDEX_CHANGE :
   2101 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
   2102 
   2103 		if (hn->hne_data != NULL) {
   2104 			/*
   2105 			 * The netinfo passes interface index as int (hne_data should be
   2106 			 * handled as a pointer to int), which is always 32bit. We need to
   2107 			 * convert it to void pointer here, since interfaces are
   2108 			 * represented as pointers to void in IPF. The pointers are 64 bits
   2109 			 * long on 64bit platforms. Doing something like
   2110 			 *	(void *)((int) x)
   2111 			 * will throw warning:
   2112 			 *   "cast to pointer from integer of different size"
   2113 			 * during 64bit compilation.
   2114 			 *
   2115 			 * The line below uses (size_t) to typecast int to
   2116 			 * size_t, which might be 64bit/32bit (depending
   2117 			 * on architecture). Once we have proper 64bit/32bit
   2118 			 * type (size_t), we can safely convert it to void pointer.
   2119 			 */
   2120 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
   2121 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2122 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2123 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2124 		}
   2125 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
   2126 		break;
   2127 #endif
   2128 
   2129 	default :
   2130 		break;
   2131 	}
   2132 
   2133 	return 0;
   2134 }
   2135 
   2136 
   2137 /* ------------------------------------------------------------------------ */
   2138 /* Function:    ipf_nic_event_v6                                            */
   2139 /* Returns:     int - 0 == no problems encountered                          */
   2140 /* Parameters:  event(I)     - pointer to event                             */
   2141 /*              info(I)      - pointer to information about a NIC event     */
   2142 /*                                                                          */
   2143 /* Function to receive asynchronous NIC events from IP                      */
   2144 /* ------------------------------------------------------------------------ */
   2145 /*ARGSUSED*/
   2146 int ipf_nic_event_v6(hook_event_token_t event, hook_data_t info, void *arg)
   2147 {
   2148 	struct sockaddr_in6 *sin6;
   2149 	hook_nic_event_t *hn;
   2150 	ipf_stack_t *ifs = arg;
   2151 	void *new_ifp = NULL;
   2152 
   2153 	if (ifs->ifs_fr_running <= 0)
   2154 		return (0);
   2155 
   2156 	hn = (hook_nic_event_t *)info;
   2157 
   2158 	switch (hn->hne_event)
   2159 	{
   2160 	case NE_PLUMB :
   2161 		frsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
   2162 		       hn->hne_data, ifs);
   2163 		fr_natifpsync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
   2164 			      hn->hne_data, ifs);
   2165 		fr_statesync(IPFSYNC_NEWIFP, 6, (void *)hn->hne_nic,
   2166 			     hn->hne_data, ifs);
   2167 		break;
   2168 
   2169 	case NE_UNPLUMB :
   2170 		frsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
   2171 		fr_natifpsync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL,
   2172 			      ifs);
   2173 		fr_statesync(IPFSYNC_OLDIFP, 6, (void *)hn->hne_nic, NULL, ifs);
   2174 		break;
   2175 
   2176 	case NE_ADDRESS_CHANGE :
   2177 		if (hn->hne_lif == 1) {
   2178 			sin6 = hn->hne_data;
   2179 			fr_nataddrsync(6, (void *)hn->hne_nic, &sin6->sin6_addr,
   2180 				       ifs);
   2181 		}
   2182 		break;
   2183 
   2184 #if SOLARIS2 >= 10
   2185 	case NE_IFINDEX_CHANGE :
   2186 		WRITE_ENTER(&ifs->ifs_ipf_mutex);
   2187 		if (hn->hne_data != NULL) {
   2188 			/*
   2189 			 * The netinfo passes interface index as int (hne_data should be
   2190 			 * handled as a pointer to int), which is always 32bit. We need to
   2191 			 * convert it to void pointer here, since interfaces are
   2192 			 * represented as pointers to void in IPF. The pointers are 64 bits
   2193 			 * long on 64bit platforms. Doing something like
   2194 			 *	(void *)((int) x)
   2195 			 * will throw warning:
   2196 			 *   "cast to pointer from integer of different size"
   2197 			 * during 64bit compilation.
   2198 			 *
   2199 			 * The line below uses (size_t) to typecast int to
   2200 			 * size_t, which might be 64bit/32bit (depending
   2201 			 * on architecture). Once we have proper 64bit/32bit
   2202 			 * type (size_t), we can safely convert it to void pointer.
   2203 			 */
   2204 			new_ifp = (void *)(size_t)*((int *)hn->hne_data);
   2205 			fr_ifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2206 			fr_natifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2207 			fr_stateifindexsync((void *)hn->hne_nic, new_ifp, ifs);
   2208 		}
   2209 		RWLOCK_EXIT(&ifs->ifs_ipf_mutex);
   2210 		break;
   2211 #endif
   2212 
   2213 	default :
   2214 		break;
   2215 	}
   2216 
   2217 	return 0;
   2218 }
   2219 
   2220 /*
   2221  * Functions fr_make_rst(), fr_make_icmp_v4(), fr_make_icmp_v6()
   2222  * are needed in Solaris kernel only. We don't need them in
   2223  * ipftest to pretend the ICMP/RST packet was sent as a response.
   2224  */
   2225 #if defined(_KERNEL) && (SOLARIS2 >= 10)
   2226 /* ------------------------------------------------------------------------ */
   2227 /* Function:    fr_make_rst                                                 */
   2228 /* Returns:     int - 0 on success, -1 on failure			    */
   2229 /* Parameters:  fin(I) - pointer to packet information                      */
   2230 /*                                                                          */
   2231 /* We must alter the original mblks passed to IPF from IP stack via	    */
   2232 /* FW_HOOKS. FW_HOOKS interface is powerfull, but it has some limitations.  */
   2233 /* IPF can basicaly do only these things with mblk representing the packet: */
   2234 /*	leave it as it is (pass the packet)				    */
   2235 /*                                                                          */
   2236 /*	discard it (block the packet)					    */
   2237 /*                                                                          */
   2238 /*	alter it (i.e. NAT)						    */
   2239 /*                                                                          */
   2240 /* As you can see IPF can not simply discard the mblk and supply a new one  */
   2241 /* instead to IP stack via FW_HOOKS.					    */
   2242 /*                                                                          */
   2243 /* The return-rst action for packets coming via NIC is handled as follows:  */
   2244 /*	mblk with packet is discarded					    */
   2245 /*                                                                          */
   2246 /*	new mblk with RST response is constructed and injected to network   */
   2247 /*                                                                          */
   2248 /* IPF can't inject packets to loopback interface, this is just another	    */
   2249 /* limitation we have to deal with here. The only option to send RST	    */
   2250 /* response to offending TCP packet coming via loopback is to alter it.	    */
   2251 /*									    */
   2252 /* The fr_make_rst() function alters TCP SYN/FIN packet intercepted on	    */
   2253 /* loopback interface into TCP RST packet. fin->fin_mp is pointer to	    */
   2254 /* mblk L3 (IP) and L4 (TCP/UDP) packet headers.			    */
   2255 /* ------------------------------------------------------------------------ */
   2256 int fr_make_rst(fin)
   2257 fr_info_t *fin;
   2258 {
   2259 	uint16_t tmp_port;
   2260 	int rv = -1;
   2261 	uint32_t old_ack;
   2262 	tcphdr_t *tcp = NULL;
   2263 	struct in_addr tmp_src;
   2264 #ifdef USE_INET6
   2265 	struct in6_addr	tmp_src6;
   2266 #endif
   2267 
   2268 	ASSERT(fin->fin_p == IPPROTO_TCP);
   2269 
   2270 	/*
   2271 	 * We do not need to adjust chksum, since it is not being checked by
   2272 	 * Solaris IP stack for loopback clients.
   2273 	 */
   2274 	if ((fin->fin_v == 4) && (fin->fin_p == IPPROTO_TCP) &&
   2275 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
   2276 
   2277 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
   2278 			/* Swap IPv4 addresses. */
   2279 			tmp_src = fin->fin_ip->ip_src;
   2280 			fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
   2281 			fin->fin_ip->ip_dst = tmp_src;
   2282 
   2283 			rv = 0;
   2284 		}
   2285 		else
   2286 			tcp = NULL;
   2287 	}
   2288 #ifdef USE_INET6
   2289 	else if ((fin->fin_v == 6) && (fin->fin_p == IPPROTO_TCP) &&
   2290 	    ((tcp = (tcphdr_t *) fin->fin_dp) != NULL)) {
   2291 		/*
   2292 		 * We are relying on fact the next header is TCP, which is true
   2293 		 * for regular TCP packets coming in over loopback.
   2294 		 */
   2295 		if (tcp->th_flags & (TH_SYN | TH_FIN)) {
   2296 			/* Swap IPv6 addresses. */
   2297 			tmp_src6 = fin->fin_ip6->ip6_src;
   2298 			fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
   2299 			fin->fin_ip6->ip6_dst = tmp_src6;
   2300 
   2301 			rv = 0;
   2302 		}
   2303 		else
   2304 			tcp = NULL;
   2305 	}
   2306 #endif
   2307 
   2308 	if (tcp != NULL) {
   2309 		/*
   2310 		 * Adjust TCP header:
   2311 		 *	swap ports,
   2312 		 *	set flags,
   2313 		 *	set correct ACK number
   2314 		 */
   2315 		tmp_port = tcp->th_sport;
   2316 		tcp->th_sport = tcp->th_dport;
   2317 		tcp->th_dport = tmp_port;
   2318 		old_ack = tcp->th_ack;
   2319 		tcp->th_ack = htonl(ntohl(tcp->th_seq) + 1);
   2320 		tcp->th_seq = old_ack;
   2321 		tcp->th_flags = TH_RST | TH_ACK;
   2322 	}
   2323 
   2324 	return (rv);
   2325 }
   2326 
   2327 /* ------------------------------------------------------------------------ */
   2328 /* Function:    fr_make_icmp_v4                                             */
   2329 /* Returns:     int - 0 on success, -1 on failure			    */
   2330 /* Parameters:  fin(I) - pointer to packet information                      */
   2331 /*                                                                          */
   2332 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
   2333 /* what is going to happen here and why. Once you read the comment there,   */
   2334 /* continue here with next paragraph.					    */
   2335 /*									    */
   2336 /* To turn IPv4 packet into ICMPv4 response packet, these things must	    */
   2337 /* happen here:								    */
   2338 /*	(1) Original mblk is copied (duplicated).			    */
   2339 /*                                                                          */
   2340 /*	(2) ICMP header is created.					    */
   2341 /*                                                                          */
   2342 /*	(3) Link ICMP header with copy of original mblk, we have ICMPv4	    */
   2343 /*	    data ready then.						    */
   2344 /*                                                                          */
   2345 /*      (4) Swap IP addresses in original mblk and adjust IP header data.   */
   2346 /*                                                                          */
   2347 /*	(5) The mblk containing original packet is trimmed to contain IP    */
   2348 /*	    header only and ICMP chksum is computed.			    */
   2349 /*                                                                          */
   2350 /*	(6) The ICMP header we have from (3) is linked to original mblk,    */
   2351 /*	    which now contains new IP header. If original packet was spread */
   2352 /*	    over several mblks, only the first mblk is kept.		    */
   2353 /* ------------------------------------------------------------------------ */
   2354 static int fr_make_icmp_v4(fin)
   2355 fr_info_t *fin;
   2356 {
   2357 	struct in_addr tmp_src;
   2358 	tcphdr_t *tcp;
   2359 	struct icmp *icmp;
   2360 	mblk_t *mblk_icmp;
   2361 	mblk_t *mblk_ip;
   2362 	size_t icmp_pld_len;	/* octets to append to ICMP header */
   2363 	size_t orig_iphdr_len;	/* length of IP header only */
   2364 	uint32_t sum;
   2365 	uint16_t *buf;
   2366 	int len;
   2367 
   2368 
   2369 	if (fin->fin_v != 4)
   2370 		return (-1);
   2371 
   2372 	/*
   2373 	 * If we are dealing with TCP, then packet must be SYN/FIN to be routed
   2374 	 * by IP stack. If it is not SYN/FIN, then we must drop it silently.
   2375 	 */
   2376 	tcp = (tcphdr_t *) fin->fin_dp;
   2377 
   2378 	if ((fin->fin_p == IPPROTO_TCP) &&
   2379 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
   2380 		return (-1);
   2381 
   2382 	/*
   2383 	 * Step (1)
   2384 	 *
   2385 	 * Make copy of original mblk.
   2386 	 *
   2387 	 * We want to copy as much data as necessary, not less, not more.  The
   2388 	 * ICMPv4 payload length for unreachable messages is:
   2389 	 *	original IP header + 8 bytes of L4 (if there are any).
   2390 	 *
   2391 	 * We determine if there are at least 8 bytes of L4 data following IP
   2392 	 * header first.
   2393 	 */
   2394 	icmp_pld_len = (fin->fin_dlen > ICMPERR_ICMPHLEN) ?
   2395 		ICMPERR_ICMPHLEN : fin->fin_dlen;
   2396 	/*
   2397 	 * Since we don't want to copy more data than necessary, we must trim
   2398 	 * the original mblk here.  The right way (STREAMish) would be to use
   2399 	 * adjmsg() to trim it.  However we would have to calculate the length
   2400 	 * argument for adjmsg() from pointers we already have here.
   2401 	 *
   2402 	 * Since we have pointers and offsets, it's faster and easier for
   2403 	 * us to just adjust pointers by hand instead of using adjmsg().
   2404 	 */
   2405 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_dp;
   2406 	fin->fin_m->b_wptr += icmp_pld_len;
   2407 	icmp_pld_len = fin->fin_m->b_wptr - (unsigned char *) fin->fin_ip;
   2408 
   2409 	/*
   2410 	 * Also we don't want to copy any L2 stuff, which might precede IP
   2411 	 * header, so we have have to set b_rptr to point to the start of IP
   2412 	 * header.
   2413 	 */
   2414 	fin->fin_m->b_rptr += fin->fin_ipoff;
   2415 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
   2416 		return (-1);
   2417 	fin->fin_m->b_rptr -= fin->fin_ipoff;
   2418 
   2419 	/*
   2420 	 * Step (2)
   2421 	 *
   2422 	 * Create an ICMP header, which will be appened to original mblk later.
   2423 	 * ICMP header is just another mblk.
   2424 	 */
   2425 	mblk_icmp = (mblk_t *) allocb(ICMPERR_ICMPHLEN, BPRI_HI);
   2426 	if (mblk_icmp == NULL) {
   2427 		FREE_MB_T(mblk_ip);
   2428 		return (-1);
   2429 	}
   2430 
   2431 	MTYPE(mblk_icmp) = M_DATA;
   2432 	icmp = (struct icmp *) mblk_icmp->b_wptr;
   2433 	icmp->icmp_type = ICMP_UNREACH;
   2434 	icmp->icmp_code = fin->fin_icode & 0xFF;
   2435 	icmp->icmp_void = 0;
   2436 	icmp->icmp_cksum = 0;
   2437 	mblk_icmp->b_wptr += ICMPERR_ICMPHLEN;
   2438 
   2439 	/*
   2440 	 * Step (3)
   2441 	 *
   2442 	 * Complete ICMP packet - link ICMP header with L4 data from original
   2443 	 * IP packet.
   2444 	 */
   2445 	linkb(mblk_icmp, mblk_ip);
   2446 
   2447 	/*
   2448 	 * Step (4)
   2449 	 *
   2450 	 * Swap IP addresses and change IP header fields accordingly in
   2451 	 * original IP packet.
   2452 	 *
   2453 	 * There is a rule option return-icmp as a dest for physical
   2454 	 * interfaces. This option becomes useless for loopback, since IPF box
   2455 	 * uses same address as a loopback destination. We ignore the option
   2456 	 * here, the ICMP packet will always look like as it would have been
   2457 	 * sent from the original destination host.
   2458 	 */
   2459 	tmp_src = fin->fin_ip->ip_src;
   2460 	fin->fin_ip->ip_src = fin->fin_ip->ip_dst;
   2461 	fin->fin_ip->ip_dst = tmp_src;
   2462 	fin->fin_ip->ip_p = IPPROTO_ICMP;
   2463 	fin->fin_ip->ip_sum = 0;
   2464 
   2465 	/*
   2466 	 * Step (5)
   2467 	 *
   2468 	 * We trim the orignal mblk to hold IP header only.
   2469 	 */
   2470 	fin->fin_m->b_wptr = fin->fin_dp;
   2471 	orig_iphdr_len = fin->fin_m->b_wptr -
   2472 			    (fin->fin_m->b_rptr + fin->fin_ipoff);
   2473 	fin->fin_ip->ip_len = htons(icmp_pld_len + ICMPERR_ICMPHLEN +
   2474 			    orig_iphdr_len);
   2475 
   2476 	/*
   2477 	 * ICMP chksum calculation. The data we are calculating chksum for are
   2478 	 * spread over two mblks, therefore we have to use two for loops.
   2479 	 *
   2480 	 * First for loop computes chksum part for ICMP header.
   2481 	 */
   2482 	buf = (uint16_t *) icmp;
   2483 	len = ICMPERR_ICMPHLEN;
   2484 	for (sum = 0; len > 1; len -= 2)
   2485 		sum += *buf++;
   2486 
   2487 	/*
   2488 	 * Here we add chksum part for ICMP payload.
   2489 	 */
   2490 	len = icmp_pld_len;
   2491 	buf = (uint16_t *) mblk_ip->b_rptr;
   2492 	for (; len > 1; len -= 2)
   2493 		sum += *buf++;
   2494 
   2495 	/*
   2496 	 * Chksum is done.
   2497 	 */
   2498 	sum = (sum >> 16) + (sum & 0xffff);
   2499 	sum += (sum >> 16);
   2500 	icmp->icmp_cksum = ~sum;
   2501 
   2502 	/*
   2503 	 * Step (6)
   2504 	 *
   2505 	 * Release all packet mblks, except the first one.
   2506 	 */
   2507 	if (fin->fin_m->b_cont != NULL) {
   2508 		FREE_MB_T(fin->fin_m->b_cont);
   2509 	}
   2510 
   2511 	/*
   2512 	 * Append ICMP payload to first mblk, which already contains new IP
   2513 	 * header.
   2514 	 */
   2515 	linkb(fin->fin_m, mblk_icmp);
   2516 
   2517 	return (0);
   2518 }
   2519 
   2520 #ifdef USE_INET6
   2521 /* ------------------------------------------------------------------------ */
   2522 /* Function:    fr_make_icmp_v6                                             */
   2523 /* Returns:     int - 0 on success, -1 on failure			    */
   2524 /* Parameters:  fin(I) - pointer to packet information                      */
   2525 /*									    */
   2526 /* Please read comment at fr_make_icmp() wrapper function to get an idea    */
   2527 /* what and why is going to happen here. Once you read the comment there,   */
   2528 /* continue here with next paragraph.					    */
   2529 /*									    */
   2530 /* This function turns IPv6 packet (UDP, TCP, ...) into ICMPv6 response.    */
   2531 /* The algorithm is fairly simple:					    */
   2532 /*	1) We need to get copy of complete mblk.			    */
   2533 /*									    */
   2534 /*	2) New ICMPv6 header is created.				    */
   2535 /*									    */
   2536 /*	3) The copy of original mblk with packet is linked to ICMPv6	    */
   2537 /*	   header.							    */
   2538 /*									    */
   2539 /*	4) The checksum must be adjusted.				    */
   2540 /*									    */
   2541 /*	5) IP addresses in original mblk are swapped and IP header data	    */
   2542 /*	   are adjusted (protocol number).				    */
   2543 /*									    */
   2544 /*	6) Original mblk is trimmed to hold IPv6 header only, then it is    */
   2545 /*	   linked with the ICMPv6 data we got from (3).			    */
   2546 /* ------------------------------------------------------------------------ */
   2547 static int fr_make_icmp_v6(fin)
   2548 fr_info_t *fin;
   2549 {
   2550 	struct icmp6_hdr *icmp6;
   2551 	tcphdr_t *tcp;
   2552 	struct in6_addr	tmp_src6;
   2553 	size_t icmp_pld_len;
   2554 	mblk_t *mblk_ip, *mblk_icmp;
   2555 
   2556 	if (fin->fin_v != 6)
   2557 		return (-1);
   2558 
   2559 	/*
   2560 	 * If we are dealing with TCP, then packet must SYN/FIN to be routed by
   2561 	 * IP stack. If it is not SYN/FIN, then we must drop it silently.
   2562 	 */
   2563 	tcp = (tcphdr_t *) fin->fin_dp;
   2564 
   2565 	if ((fin->fin_p == IPPROTO_TCP) &&
   2566 	    ((tcp == NULL) || ((tcp->th_flags & (TH_SYN | TH_FIN)) == 0)))
   2567 		return (-1);
   2568 
   2569 	/*
   2570 	 * Step (1)
   2571 	 *
   2572 	 * We need to copy complete packet in case of IPv6, no trimming is
   2573 	 * needed (except the L2 headers).
   2574 	 */
   2575 	icmp_pld_len = M_LEN(fin->fin_m);
   2576 	fin->fin_m->b_rptr += fin->fin_ipoff;
   2577 	if ((mblk_ip = copyb(fin->fin_m)) == NULL)
   2578 		return (-1);
   2579 	fin->fin_m->b_rptr -= fin->fin_ipoff;
   2580 
   2581 	/*
   2582 	 * Step (2)
   2583 	 *
   2584 	 * Allocate and create ICMP header.
   2585 	 */
   2586 	mblk_icmp = (mblk_t *) allocb(sizeof (struct icmp6_hdr),
   2587 			BPRI_HI);
   2588 
   2589 	if (mblk_icmp == NULL)
   2590 		return (-1);
   2591 
   2592 	MTYPE(mblk_icmp) = M_DATA;
   2593 	icmp6 =  (struct icmp6_hdr *) mblk_icmp->b_wptr;
   2594 	icmp6->icmp6_type = ICMP6_DST_UNREACH;
   2595 	icmp6->icmp6_code = fin->fin_icode & 0xFF;
   2596 	icmp6->icmp6_data32[0] = 0;
   2597 	mblk_icmp->b_wptr += sizeof (struct icmp6_hdr);
   2598 
   2599 	/*
   2600 	 * Step (3)
   2601 	 *
   2602 	 * Link the copy of IP packet to ICMP header.
   2603 	 */
   2604 	linkb(mblk_icmp, mblk_ip);
   2605 
   2606 	/*
   2607 	 * Step (4)
   2608 	 *
   2609 	 * Calculate chksum - this is much more easier task than in case of
   2610 	 * IPv4  - ICMPv6 chksum only covers IP addresses, and payload length.
   2611 	 * We are making compensation just for change of packet length.
   2612 	 */
   2613 	icmp6->icmp6_cksum = icmp_pld_len + sizeof (struct icmp6_hdr);
   2614 
   2615 	/*
   2616 	 * Step (5)
   2617 	 *
   2618 	 * Swap IP addresses.
   2619 	 */
   2620 	tmp_src6 = fin->fin_ip6->ip6_src;
   2621 	fin->fin_ip6->ip6_src = fin->fin_ip6->ip6_dst;
   2622 	fin->fin_ip6->ip6_dst = tmp_src6;
   2623 
   2624 	/*
   2625 	 * and adjust IP header data.
   2626 	 */
   2627 	fin->fin_ip6->ip6_nxt = IPPROTO_ICMPV6;
   2628 	fin->fin_ip6->ip6_plen = htons(icmp_pld_len + sizeof (struct icmp6_hdr));
   2629 
   2630 	/*
   2631 	 * Step (6)
   2632 	 *
   2633 	 * We must release all linked mblks from original packet and keep only
   2634 	 * the first mblk with IP header to link ICMP data.
   2635 	 */
   2636 	fin->fin_m->b_wptr = (unsigned char *) fin->fin_ip6 + sizeof (ip6_t);
   2637 
   2638 	if (fin->fin_m->b_cont != NULL) {
   2639 		FREE_MB_T(fin->fin_m->b_cont);
   2640 	}
   2641 
   2642 	/*
   2643 	 * Append ICMP payload to IP header.
   2644 	 */
   2645 	linkb(fin->fin_m, mblk_icmp);
   2646 
   2647 	return (0);
   2648 }
   2649 #endif	/* USE_INET6 */
   2650 
   2651 /* ------------------------------------------------------------------------ */
   2652 /* Function:    fr_make_icmp                                                */
   2653 /* Returns:     int - 0 on success, -1 on failure			    */
   2654 /* Parameters:  fin(I) - pointer to packet information                      */
   2655 /*                                                                          */
   2656 /* We must alter the original mblks passed to IPF from IP stack via	    */
   2657 /* FW_HOOKS. The reasons why we must alter packet are discussed within	    */
   2658 /* comment at fr_make_rst() function.					    */
   2659 /*									    */
   2660 /* The fr_make_icmp() function acts as a wrapper, which passes the code	    */
   2661 /* execution to	fr_make_icmp_v4() or fr_make_icmp_v6() depending on	    */
   2662 /* protocol version. However there are some details, which are common to    */
   2663 /* both IP versions. The details are going to be explained here.	    */
   2664 /*                                                                          */
   2665 /* The packet looks as follows:						    */
   2666 /*    xxx | IP hdr | IP payload    ...	| 				    */
   2667 /*    ^   ^        ^            	^				    */
   2668 /*    |   |        |            	|				    */
   2669 /*    |   |        |		fin_m->b_wptr = fin->fin_dp + fin->fin_dlen */
   2670 /*    |   |        |							    */
   2671 /*    |   |        `- fin_m->fin_dp (in case of IPv4 points to L4 header)   */
   2672 /*    |   |								    */
   2673 /*    |   `- fin_m->b_rptr + fin_ipoff (fin_ipoff is most likely 0 in case  */
   2674 /*    |      of loopback)						    */
   2675 /*    |   								    */
   2676 /*    `- fin_m->b_rptr -  points to L2 header in case of physical NIC	    */
   2677 /*                                                                          */
   2678 /* All relevant IP headers are pulled up into the first mblk. It happened   */
   2679 /* well in advance before the matching rule was found (the rule, which took */
   2680 /* us here, to fr_make_icmp() function).				    */
   2681 /*                                                                          */
   2682 /* Both functions will turn packet passed in fin->fin_m mblk into a new	    */
   2683 /* packet. New packet will be represented as chain of mblks.		    */
   2684 /* orig mblk |- b_cont ---.						    */
   2685 /*    ^                    `-> ICMP hdr |- b_cont--.			    */
   2686 /*    |	                          ^	            `-> duped orig mblk	    */
   2687 /*    |                           |				^	    */
   2688 /*    `- The original mblk        |				|	    */
   2689 /*       will be trimmed to       |				|	    */
   2690 /*       to contain IP header     |				|	    */
   2691 /*       only                     |				|	    */
   2692 /*                                |				|	    */
   2693 /*                                `- This is newly		|           */
   2694 /*                                   allocated mblk to		|	    */
   2695 /*                                   hold ICMPv6 data.		|	    */
   2696 /*								|	    */
   2697 /*								|	    */
   2698 /*								|	    */
   2699 /*	    This is the copy of original mblk, it will contain -'	    */
   2700 /*	    orignal IP  packet in case of ICMPv6. In case of		    */
   2701 /*	    ICMPv4 it will contain up to 8 bytes of IP payload		    */
   2702 /*	    (TCP/UDP/L4) data from original packet.			    */
   2703 /* ------------------------------------------------------------------------ */
   2704 int fr_make_icmp(fin)
   2705 fr_info_t *fin;
   2706 {
   2707 	int rv;
   2708 
   2709 	if (fin->fin_v == 4)
   2710 		rv = fr_make_icmp_v4(fin);
   2711 #ifdef USE_INET6
   2712 	else if (fin->fin_v == 6)
   2713 		rv = fr_make_icmp_v6(fin);
   2714 #endif
   2715 	else
   2716 		rv = -1;
   2717 
   2718 	return (rv);
   2719 }
   2720 #endif	/* _KERNEL && SOLARIS2 >= 10 */
   2721