Home | History | Annotate | Download | only in nca
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #ifndef	_INET_NCA_H
     27 #define	_INET_NCA_H
     28 
     29 #ifdef	__cplusplus
     30 extern "C" {
     31 #endif
     32 
     33 #include <sys/thread.h>
     34 #include <sys/door.h>
     35 #include <sys/disp.h>
     36 #include <sys/systm.h>
     37 #include <sys/processor.h>
     38 #include <sys/socket.h>
     39 #include <inet/common.h>
     40 #include <inet/ip.h>
     41 #include <inet/tcp.h>
     42 #include <inet/nca/ncadoorhdr.h>
     43 
     44 /*
     45  * The NCA debugging facilities provided via ADB and MDB depend on a
     46  * number of NCA implementation details.  In particular, note that:
     47  *
     48  *	* ADB macros *must* be revised whenever members are added or
     49  *	  removed from the following structures:
     50  *
     51  *		nca_conn_t connf_t nca_cpu_t dcb_t hcb_t nca_if_t nca_io2_t
     52  *		node_t nodef_t sqfan_t nca_squeue_t tb_t te_t ti_t tw_t
     53  *
     54  *	* ADB macros should be added when new core data structures are
     55  *	  added to NCA.  Generally, if you had to put it in here, you
     56  *	  need to write a macro for it.
     57  *
     58  *	* MDB has many dependencies on the way core data structures
     59  *	  are connected.  In general, if you break these dependencies,
     60  *	  the MDB NCA module will fail to build.  However, breakage
     61  *	  may go undetected (for instance, changing a linked list
     62  *	  into a circularly linked list).  If you have any doubts,
     63  *	  inspect the NCA module source before committing your changes.
     64  *
     65  *	* MDB depends on the following variables (and their current
     66  *	  semantics) in order to function correctly:
     67  *
     68  *		nca_conn_fanout nca_conn_fanout_size nca_gv nca_lru
     69  *		urihash filehash
     70  *
     71  *	  If you change the names or *semantics* of these variables,
     72  *	  you must modify the MDB module accordingly.
     73  *
     74  *	  In addition, you should consider whether the changes you've
     75  *	  made should be reflected in the MDB dcmds themselves.
     76  */
     77 
     78 /* The queue to make upcall on for NCAfs */
     79 extern queue_t *ncaupcallq;
     80 extern kmutex_t ncaupcallq_lock;
     81 
     82 extern int nca_logging_on;
     83 extern int nca_conn_fanout_size;
     84 extern boolean_t nca_deferred_oq_if;
     85 extern boolean_t nca_fanout_iq_if;
     86 
     87 /* Checksum pointer for no checksum */
     88 
     89 #define	NO_CKSUM (void *)-1
     90 
     91 /* undef any tcp.h:tcp_t members overloaded by the Solaris 8 tcp.h */
     92 
     93 #undef	tcp_last_rcv_lbolt
     94 #undef	tcp_state
     95 #undef	tcp_rto
     96 #undef	tcp_snd_ts_ok
     97 #undef	tcp_snd_ws_ok
     98 #undef	tcp_snxt
     99 #undef	tcp_swnd
    100 #undef	tcp_mss
    101 #undef	tcp_iss
    102 #undef	tcp_rnxt
    103 #undef	tcp_rwnd
    104 #undef	tcp_lport
    105 #undef	tcp_fport
    106 #undef	tcp_ports
    107 
    108 /* the iph_t is no longer defined in ip.h for Solaris 8 ? */
    109 
    110 /* Unaligned IP header */
    111 typedef struct iph_s {
    112 	uchar_t	iph_version_and_hdr_length;
    113 	uchar_t	iph_type_of_service;
    114 	uchar_t	iph_length[2];
    115 	uchar_t	iph_ident[2];
    116 	uchar_t	iph_fragment_offset_and_flags[2];
    117 	uchar_t	iph_ttl;
    118 	uchar_t	iph_protocol;
    119 	uchar_t	iph_hdr_checksum[2];
    120 	uchar_t	iph_src[4];
    121 	uchar_t	iph_dst[4];
    122 } iph_t;
    123 
    124 
    125 #define	true	B_TRUE			/* used with type boolean_t */
    126 #define	false	B_FALSE			/* used with type boolean_t */
    127 
    128 /*
    129  * Power of 2^N Primes useful for hashing for N of 0-28,
    130  * these primes are the nearest prime <= 2^N - 2^(N-2).
    131  */
    132 
    133 #define	P2Ps() {0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,	\
    134 		6143, 12281, 24571, 49139, 98299, 196597, 393209,	\
    135 		786431, 1572853, 3145721, 6291449, 12582893, 25165813,	\
    136 		50331599, 100663291, 201326557, 0}
    137 
    138 /*
    139  * Serialization queue type (move to strsubr.h (stream.h?) as a general
    140  * purpose lightweight mechanism for mblk_t serialization ?).
    141  */
    142 typedef struct nca_squeue_s {
    143 	uint16_t	sq_state;	/* state flags */
    144 	uint16_t	sq_count;	/* message count */
    145 	uint32_t	sq_type;	/* type flags */
    146 	processorid_t	sq_bind;	/* processor to bind to */
    147 	ddi_softintr_t	sq_softid;	/* softintr() id */
    148 	void		(*sq_init)();	/* initialize function */
    149 	void		*sq_init_arg;	/* initialize argument */
    150 	void		(*sq_proc)();	/* process function */
    151 	mblk_t		*sq_first;	/* first mblk chain or NULL */
    152 	mblk_t		*sq_last;	/* last mblk chain or NULL */
    153 	clock_t		sq_wait;	/* lbolts to wait after a fill() */
    154 	clock_t		sq_iwait;	/* lbolt after nointr() */
    155 	clock_t		sq_pwait;	/* lbolt after pause() */
    156 	int		sq_isintr;	/* is being or was serviced by */
    157 	timeout_id_t	sq_tid;		/* timer id of pending timeout() */
    158 	kcondvar_t	sq_async;	/* async thread blocks on */
    159 	kmutex_t	sq_lock;	/* lock before using any member */
    160 	clock_t		sq_awaken;	/* time async thread was awakened */
    161 	void		*sq_priv;	/* user defined private */
    162 	kt_did_t	sq_ktid;	/* kernel thread id */
    163 } nca_squeue_t;
    164 
    165 /*
    166  * State flags and message count (i.e. properties that change)
    167  * Note: The MDB NCA module depends on the values of these flags.
    168  */
    169 
    170 #define	SQS_CNT_TOOMANY	0x8000	/* message count toomany */
    171 
    172 /* nca_squeue_t state flags now only 16 bits */
    173 
    174 #define	SQS_PROC	0x0001	/* being processed */
    175 #define	SQS_WORKER	0x0002	/* worker thread */
    176 #define	SQS_ENTER	0x0004	/* enter thread */
    177 #define	SQS_FAST	0x0008	/* enter-fast thread */
    178 #define	SQS_PROXY	0x0010	/* proxy thread */
    179 #define	SQS_SOFTINTR	0x0020	/* softint thread */
    180 				/* 0x00C0 bits not used */
    181 
    182 #define	SQS_NOINTR	0x0100	/* no interrupt processing */
    183 #define	SQS_PAUSE	0x0200	/* paused */
    184 #define	SQS_INTRWAIT	0x0400	/* interrupt waiting */
    185 #define	SQS_NOPROC	0x0800	/* no processing */
    186 				/* 0x7000 bits not used */
    187 #define	SQS_EXIT	0x8000	/* worker(s) exit */
    188 
    189 /*
    190  * Type flags (i.e. properties that don't change).
    191  * Note: The MDB NCA module depends on the values of these flags.
    192  */
    193 
    194 #define	SQT_BIND_MASK	0xFF000000	/* bind flags mask */
    195 
    196 #define	SQT_KMEM	0x00000001	/* was kmem_alloc()ed */
    197 #define	SQT_DEFERRED	0x00000002	/* deferred processing */
    198 #define	SQT_SOFTINTR	0x00000004	/* use softintr() */
    199 
    200 #define	SQT_BIND_ANY	0x01000000	/* bind worker thread to any CPU */
    201 #define	SQT_BIND_TO	0x02000000	/* bind worker thread to speced CPU */
    202 
    203 #define	SQ_STATE_IS(sqp, flags) ((sqp)->sq_state & (flags))
    204 #define	SQ_TYPE_IS(sqp, flags) ((sqp)->sq_type & (flags))
    205 
    206 
    207 typedef struct sqfan_s {
    208 	uint32_t	flg;		/* flags only */
    209 	uint32_t	cnt;		/* vector count */
    210 	uint32_t	ix;		/* next sqv[] to process */
    211 	uint32_t	drain;		/* max mblk(s) draind per */
    212 	nca_squeue_t	**sqv;	/* pointer to nca_squeue_t pointer vector */
    213 } sqfan_t;
    214 
    215 #define	SQF_DIST_CNT	0x0001	/* sqfan_t dist by queue count */
    216 #define	SQF_DIST_IPv4	0x0002	/* sqfan_t dist by IPv4 src addr */
    217 
    218 /*
    219  * A multiphase timer is implemented using the te_t, tb_t, and ti_t structs.
    220  *
    221  * The multiple phases of timer entry execution are:
    222  *
    223  * 1) resource, execution is done from resource reclaim when the timer event
    224  *    is the freeing of the timed resource.
    225  *
    226  * 2) process, execution is done from process thread yield (idle/return).
    227  *
    228  * 3) time, execution is done from a timeout callback thread.
    229  *
    230  * Each of the phases have a seperate timer fire time represented by the
    231  * the ti_t members lbolt1, lbolt2, and lbolt3. Each lbolt is an absolute
    232  * lbolt value with lbolt1 <= lbolt2 <= lbolt3.
    233  */
    234 
    235 /*
    236  * te_t - timer entry.
    237  */
    238 
    239 typedef struct te_s {
    240 	struct te_s *prev;	/* prev te_t */
    241 	struct te_s *next;	/* next te_t */
    242 	struct tb_s *tbp;	/* pointer to timer bucket */
    243 	void	*ep;		/* pointer to encapsulating struct */
    244 } te_t;
    245 
    246 /*
    247  * tb_t - timer bucket.
    248  */
    249 
    250 typedef struct tb_s {
    251 	struct tb_s *next;	/* next tb_t in ascending time order */
    252 	clock_t	exec;		/* te_t lbolt exec value for bucket */
    253 	te_t	*head;		/* head of te_t list (first timer) */
    254 	te_t	*tail;		/* tail of te_t list (last timer) */
    255 } tb_t;
    256 
    257 /*
    258  * ti_t - timer state.
    259  */
    260 
    261 typedef struct ti_s {
    262 	clock_t	exec;		/* next te_t exec value (0 = NONE) */
    263 	clock_t	lbolt1;		/* phase1 lbolt1 (0 = NONE) */
    264 	clock_t	lbolt2;		/* phase2 lbolt2 (0 = NONE) */
    265 	clock_t	lbolt3;		/* phase3 lbolt3 (0 = NONE) */
    266 	tb_t	*head;		/* head of tb_t list (first timer bucket) */
    267 	tb_t	*tail;		/* tail of tb_t list (last timer bucket) */
    268 	timeout_id_t tid;	/* timer id of pending timeout() (0 = NONE) */
    269 	void	*ep;		/* pointer to encapsulating struct */
    270 } ti_t;
    271 
    272 #define	NCA_TI_INPROC	-1	/* Processing going on */
    273 #define	NCA_TI_NONE	0	/* no lbolt */
    274 
    275 /*
    276  * TIME_WAIT grounded doubly linked list of nca_conn_t's awaiting TIME_WAIT
    277  * expiration for. This list is used for reclaim, reap, and timer based
    278  * processing.
    279  *
    280  * A multiphase timer is used:
    281  *
    282  * phase 1) reclaim of connections during connection allocation
    283  *
    284  * phase 2) reaping of connections during nca_squeue_t inq thread unwind
    285  *
    286  * phase 3) timeout of connections as a result of a timeout().
    287  *
    288  * Each of the phases have a seperate timer fire lbolt represented by the
    289  * the members lbolt1, lbolt2, and lbolt3, each is an absolute lbolt value
    290  * with lbolt1 <= lbolt2 <= lbolt3.
    291  */
    292 
    293 typedef struct tw_s {
    294 	clock_t	lbolt1;		/* phase1 lbolt value (0 = NONE) */
    295 	clock_t	lbolt2;		/* phase2 lbolt value  */
    296 	clock_t	lbolt3;		/* phase3 lbolt value  */
    297 	struct nca_conn_s *head;	/* Head of nca_conn_t list */
    298 	struct nca_conn_s *tail;	/* Tail of nca_conn_t list */
    299 	timeout_id_t tid;	/* Timer id of pending timeout() (0 = NONE) */
    300 	void	*ep;		/* pointer to encapsulating struct */
    301 } tw_t;
    302 
    303 #define	NCA_TW_NONE	0	/* no lbolt */
    304 
    305 #define	NCA_TW_MS	1000
    306 
    307 #define	NCA_TW_LBOLT MSEC_TO_TICK(NCA_TW_MS)
    308 
    309 #define	NCA_TW_LBOLTS(twp, future) {					\
    310 	clock_t	_lbolt = (future);					\
    311 	clock_t	_mod = _lbolt % NCA_TW_LBOLT;				\
    312 									\
    313 	if (_mod) {							\
    314 		/* Roundup to next TIME_WAIT bucket */			\
    315 		_lbolt += NCA_TW_LBOLT - _mod;				\
    316 	}								\
    317 	if ((twp)->lbolt1 != _lbolt) {					\
    318 		(twp)->lbolt1 = _lbolt;					\
    319 		_lbolt += NCA_TW_LBOLT;					\
    320 		(twp)->lbolt2 = _lbolt;					\
    321 		_lbolt += NCA_TW_LBOLT;					\
    322 		(twp)->lbolt3 = _lbolt;					\
    323 		if ((twp)->tid != 0) {					\
    324 			(void) untimeout((twp)->tid);			\
    325 			(twp)->tid = 0;					\
    326 		}							\
    327 		if ((_lbolt) != NCA_TW_NONE) {				\
    328 			(twp)->tid = timeout((pfv_t)nca_tw_fire, (twp),	\
    329 			    (twp)->lbolt3 - ddi_get_lbolt());		\
    330 		}							\
    331 	}								\
    332 }
    333 
    334 /*
    335  * The Node Fanout structure.
    336  *
    337  * The hash tables and their linkage (hashnext) are protected by the
    338  * per-bucket lock. Each node_t inserted in the list points back at
    339  * the nodef_t that heads the bucket (hashfanout).
    340  */
    341 
    342 typedef struct nodef_s {
    343 	struct node_s	*head;
    344 	kmutex_t	lock;
    345 } nodef_t;
    346 
    347 /*
    348  * A node_t is used to represent a cached byte-stream object. A node_t is
    349  * in one of four active states:
    350  *
    351  * 1) path != NULL, member of a node_t hash list with an object description
    352  *    (hashnext, size, path, pathsz members valid).
    353  *
    354  * 2) pp != NULL, 1) + phys pages allocated (pp, plrupn, plrunn members valid).
    355  *
    356  * 3) data != NULL, 2) + virt mapping allocated (data, datasz, vlrupn, vlrunn
    357  *    members valid).
    358  *
    359  * 4) cksum != NULL 3) + checksum mapping allocated
    360  */
    361 
    362 typedef struct node_s {
    363 	uint32_t 	ref;		/* ref (see below) state */
    364 	uint32_t 	cnt;		/* ref count */
    365 	int32_t		size;		/* object size (-1 = UNKNOWN) */
    366 	uint32_t	mss;		/* mblk(s) in size mss */
    367 	uint64_t	ctag;		/* usr defined cache tag, 0 => no tag */
    368 	ipaddr_t	laddr;		/* local IP, for virtual hosting */
    369 	uint16_t	lport;		/* local port, for virtual hosting */
    370 
    371 	struct node_s	*plrunn;	/* Phys LRU list next node_t */
    372 	struct node_s	*plrupn;	/* Phys LRU list previous node_t */
    373 	struct node_s	*vlrunn;	/* Virt LRU list next node_t */
    374 	struct node_s	*vlrupn;	/* Virt LRU list previous node_t */
    375 
    376 	nodef_t	*hashfanout;		/* hash bucket we're part of */
    377 	nodef_t	*ctaghashfanout;	/* ctaghash bucket we're part off */
    378 	struct node_s *hashnext;	/* hash list next node_t */
    379 	struct node_s *ctaghashnext;	/* ctaghash list next node_t */
    380 	struct nca_conn_s *connhead;	/* head of list of conn(s) in miss */
    381 	struct nca_conn_s *conntail;	/* tail of list of conn(s) in miss */
    382 	struct node_s *next;		/* needed if data is in chunks */
    383 	struct node_s *back;		/* needed if data is in chunks */
    384 
    385 	clock_t	expire;		/* lbolt node_t expires (0 = NOW, -1 = NEVER) */
    386 	time_t	lastmod;	/* HTTP "Last-Modified:" value */
    387 
    388 	mblk_t	*req;		/* whole HTTP request (including headers) */
    389 	int	reqsz;		/* size of above */
    390 	int	reqcontl;	/* HTTP "Content-Length:" value */
    391 	uint32_t rcv_cnt;	/* rcv_list byte count */
    392 	mblk_t	*rcv_head;	/* rcv_list head */
    393 	mblk_t	*rcv_tail;	/* rcv_list tail */
    394 	mblk_t	*rcv_ptr;	/* rcv_list pointer */
    395 
    396 	nca_squeue_t *sqp;	/* squeue node_t is being processed from */
    397 	char	*path;		/* URI path component */
    398 	int	pathsz;		/* size of above */
    399 	uint_t	method;		/* HTTP request method */
    400 	uint_t	version;	/* HTTP request version */
    401 	char	*reqhdr;	/* HTTP request header(s) */
    402 	int	reqhdrsz;	/* size of above */
    403 	char	*reqhost;	/* HTTP "Host:" string */
    404 	int	reqhostsz;	/* size of above */
    405 	char	*reqaccept;	/* HTTP "Accept:" string */
    406 	int	reqacceptsz;	/* size of above */
    407 	char	*reqacceptl;	/* HTTP "Accept-Language:" string */
    408 	int	reqacceptlsz;	/* size of above */
    409 
    410 	page_t	**pp;		/* page pointer vector for data */
    411 	char	*data;		/* data buffer */
    412 	int	datasz;		/* size of above */
    413 	uint16_t *cksum;	/* cksum() vector for data by mss */
    414 	size_t	cksumlen;	/* length of memory block for above vector */
    415 	uint_t	resbody;	/* HTTP response body at &data[resbody] */
    416 
    417 	int	hlen;		/* data buffer split header len */
    418 	int	fileoff;	/* file include offset */
    419 	int	filelen;	/* length of file */
    420 	struct node_s *fileback; /* head node_t of a file list (-1 for death) */
    421 	struct node_s *filenext; /* next node_t of a file list */
    422 	struct node_s *ctagback; /* head node_t of a ctag list */
    423 	struct node_s *ctagnext; /* next node_t of a ctag list */
    424 	vnode_t	*filevp;	/* vnode for the file */
    425 
    426 	kmutex_t lock;		/* serializes access to node_t */
    427 	frtn_t	frtn;		/* STREAMS free routine; always node_freeb() */
    428 	boolean_t headchunk;	/* true if this node is the head chunk */
    429 
    430 	/*
    431 	 * The following 4 fields are used to record node states when
    432 	 * upcalls are preempted. When preempted upcalls are not relevant,
    433 	 * these fields should have default value 0.
    434 	 */
    435 	uint8_t advise;		/* an interpreted advise from http */
    436 	boolean_t last_advisory; /* preempted upcall state -- advisory bit */
    437 	boolean_t advisory;	/* need advisory from httpd before use */
    438 	boolean_t first_upcall;	/* node in first upcall, a internal state */
    439 
    440 	kcondvar_t cv;		/* sync upcall/downcall process on a node */
    441 	int	onqueue;	/* == 1 if on miss_queue, debug aid */
    442 } node_t;
    443 
    444 /* Note: The MDB NCA module depends on the values of these flags. */
    445 
    446 #define	REF_URI		0x80000000 /* & ref = node_t URI hashed */
    447 #define	REF_PHYS	0x40000000 /* & ref = phys mapping in-use */
    448 #define	REF_VIRT	0x20000000 /* & ref = virt mapping in-use */
    449 #define	REF_CKSUM	0x10000000 /* & ref = checksum mapping in-use */
    450 #define	REF_KMEM	0x08000000 /* & ref = kmem mapped (PHYS|VIRT) */
    451 #define	REF_DONE	0x04000000 /* & ref = node_t fill is done */
    452 #define	REF_SAFED	0x02000000 /* & ref = node_t not safe for use */
    453 #define	REF_FILE	0x01000000 /* & ref = node_t filename hashed */
    454 #define	REF_RESP	0x00800000 /* & ref = node_t response header parsed */
    455 #define	REF_NOLRU	0x00400000 /* & ref = node_t not safe for lru reclaim */
    456 #define	REF_MISS	0x00200000 /* & ref = node_t is/will missed() proc */
    457 #define	REF_ONPLRU	0x00100000 /* & ref = node_t is on Phys LRU */
    458 #define	REF_ONVLRU	0x00080000 /* & ref = node_t is on Virt LRU */
    459 #define	REF_PREEMPT	0x00040000 /* & ref = node_t processing preempted */
    460 #define	REF_CTAG	0x00020000 /* & ref = node_t CTAG hashed */
    461 #define	REF_UPCALL	0x00010000 /* & ref = node_t upcall not yet complete */
    462 #define	REF_OWNED	0x00008000 /* & ref = node_t owned (won't be freed) */
    463 #define	REF_ERROR	0x00004000 /* & ref = node_t errored */
    464 #define	REF_VNODE	0x00002000 /* & ref = node_t vnode hashed */
    465 #define	REF_NCAFS	0x00001000 /* & ref = node_t is NCAfs required */
    466 #define	REF_SEGMAP	0x00000800 /* & ref = segmapped (PHYS|VIRT) */
    467 #define	REF_UNUSED	0x000007FF /* & ref = UNUSED */
    468 /*
    469  * Mappings where no seperate PHYS and VIRT, i.e. single mapping with a
    470  * virtual address e.g. REF_KMEM and REF_SEGMAP.
    471  */
    472 #define	REF_NOVIRT	(REF_KMEM | REF_SEGMAP)
    473 
    474 /* Is this node safe for reclaim ? */
    475 #define	REF_RECLAIM	(REF_SAFED | REF_NOLRU | REF_MISS)
    476 
    477 /*
    478  * NCA node_t reference counting is more complicated than nca_conn_t reference
    479  * counting because we pass parts of node_t's (masquerading as dblk
    480  * buffers) into the STREAMS subsystem which eventually get freed by
    481  * network drivers just like regular dblk buffers.  Also, unlike nca_conn_t's,
    482  * we may wish to keep a node_t around even after there are no outstanding
    483  * references, since it's possible that it will be requested again.
    484  *
    485  * Thus, the node_t reference count reflects the number of active codepaths
    486  * in Solaris making use of a given node_t -- each codepath that requires
    487  * that the node_t stick around once it drops the node_t lock must acquire
    488  * a reference via NODE_REFHOLD and drop that reference via NODE_REFRELE
    489  * when done.  Note that following a NODE_REFRELE the node that was
    490  * released may no longer exist and thus it should not be referenced unless
    491  * the codepath has another outstanding reference.  When a node_t is passed
    492  * into the STREAMS subsystem via desballoc() and related interfaces, a
    493  * NODE_REFHOLD should be placed on the node_t and the free routine should
    494  * be set to node_freeb(), which will in turn call NODE_REFRELE.
    495  *
    496  * The concept of node ownership allows NCA to express that it would like
    497  * this node to hang around, even if there are no "explicit" references to
    498  * it (the ownership counts as an implicit reference).  All "headchunk"
    499  * hashed nodes are owned when they are created.  If they subsequently
    500  * become disowned (currently via nca_node_del() or nca_reclaim_vlru()),
    501  * they may have some or all their resources freed (via node_fr()) as soon
    502  * as the last reference to them is removed.  Note that it's possible that
    503  * a disowned node may become of interest again before some or all of its
    504  * resources were reclaimed -- in this case, it must be reowned via
    505  * NODE_OWN.  Note that an unhashed node should never be owned, though it
    506  * of course may be held and released; this is because there is no sense
    507  * in owning a node which is merely temporary (i.e., not hashed somewhere).
    508  * Note that the corollary of this statement is not true -- that is, just
    509  * because a node is hashed does not mean it is owned (it may have been
    510  * disowned via nca_reclaim_vlru()) -- this is why code must always reown
    511  * hashed nodes if it's desirable to have them stick around.
    512  *
    513  * All four macros *must* be called with the node lock held.  However,
    514  * NODE_DISOWN and NODE_REFRELE return with the lock unlocked (if there is
    515  * still a lock at all), because the operation may have just removed the
    516  * final reference to a node and it may no longer exist.
    517  *
    518  * A version of NODE_REFRELE is provided which doesn't unlock the lock but
    519  * can only be used when the caller can gaurantee that it's not the last ref
    520  * (e.g. the caller has another outstanding reference) as if it's the last
    521  * ref the node_t may no longer exist. The new macro is NODE_REFRELE_LOCKED.
    522  */
    523 
    524 #define	NODE_DISOWN(np) {						\
    525 									\
    526 	NODE_T_TRACE((np), NODE_T_TRACE_DISOWN);			\
    527 	ASSERT(mutex_owned(&(np)->lock));				\
    528 									\
    529 	if ((np)->ref & REF_OWNED) {					\
    530 		if ((np)->cnt == 0)	{				\
    531 			panic("nca NODE_DISOWN: %p has no references",	\
    532 			    (void *)(np));				\
    533 		}							\
    534 		(np)->ref &= ~REF_OWNED;				\
    535 		NODE_REFRELE(np);					\
    536 	} else {							\
    537 		mutex_exit(&(np)->lock);				\
    538 	}								\
    539 }
    540 
    541 #define	NODE_OWN(np) {							\
    542 									\
    543 	NODE_T_TRACE((np), NODE_T_TRACE_OWN);				\
    544 	ASSERT(mutex_owned(&(np)->lock));				\
    545 									\
    546 	if (!((np)->ref & REF_OWNED)) {					\
    547 		if ((np)->cnt == UINT_MAX)				\
    548 			panic(						\
    549 			    "nca NODE_OWN: %p has too many references",	\
    550 			    (void *)(np));				\
    551 		(np)->ref |= REF_OWNED;					\
    552 		(np)->cnt++;						\
    553 	}								\
    554 }
    555 
    556 #define	NODE_REFHOLD(np) {						\
    557 									\
    558 	NODE_T_TRACE((np), NODE_T_TRACE_REFHOLD | ((np)->cnt + 1));	\
    559 	ASSERT(mutex_owned(&(np)->lock));				\
    560 									\
    561 	if ((np)->cnt == UINT_MAX)					\
    562 		panic("nca NODE_REFHOLD: %p has too many references",	\
    563 		    (void *)(np));					\
    564 	(np)->cnt++;							\
    565 }
    566 
    567 #define	NODE_REFRELE(np) {						\
    568 									\
    569 	NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | ((np)->cnt - 1));	\
    570 	ASSERT(mutex_owned(&(np)->lock));				\
    571 									\
    572 	if (((np)->ref & REF_OWNED) && (np)->cnt == 1)			\
    573 		panic(							\
    574 		    "nca NODE_REFRELE: %p has only OWNED reference",	\
    575 		    (void *)(np));					\
    576 	if ((np)->cnt == 0)						\
    577 		panic("nca NODE_REFRELE: %p has no references",		\
    578 		    (void *)(np));					\
    579 	(np)->cnt--;							\
    580 	if ((np)->cnt == 0) {						\
    581 		ASSERT(((np)->ref & REF_OWNED) == 0);			\
    582 		node_fr(np);		/* node_fr unlocks the lock */	\
    583 	} else {							\
    584 		mutex_exit(&(np)->lock);				\
    585 	}								\
    586 }
    587 
    588 #define	NODE_REFRELE_LOCKED(np) {					\
    589 	uint_t	_cnt = (np)->cnt;					\
    590 									\
    591 	NODE_T_TRACE((np), NODE_T_TRACE_REFRELE | (_cnt - 1));		\
    592 	ASSERT(mutex_owned(&(np)->lock));				\
    593 									\
    594 	if ((np)->ref & REF_OWNED)					\
    595 		_cnt--;							\
    596 	if (((np)->ref & REF_OWNED) && _cnt == 0)			\
    597 		panic("nca NODE_REFRELE_LOCKED: "			\
    598 		    "%p has only OWNED reference", (void *)(np));	\
    599 	if (_cnt == 0)							\
    600 		panic("nca NODE_REFRELEL_LOCKED: "			\
    601 		    "%p has no references", (void *)(np));		\
    602 	if (_cnt == 1)							\
    603 		panic("nca NODE_REFRELEL_LOCKED: "			\
    604 		    "%p has only one reference", (void *)(np));		\
    605 	(np)->cnt--;							\
    606 }
    607 
    608 
    609 /*
    610  * NODE_T_TRACE - trace node_t events.
    611  *
    612  * adb:
    613  * 32 bit
    614  *	*node_tp,0t8192-(((*node_tp)-node_tv)%0t48)/PXXDDnPnPnPnPnPnPnPnn
    615  *	node_tv,((*node_tp)-node_tv)%0t48/PXXDDnPnPnPnPnPnPnPnn
    616  *
    617  * 64 bit
    618  *	*node_tp,0t8192-(((*node_tp)-node_tv)%0t56)/PXXDDnXnXnXnXnXnXnXnn
    619  *	node_tv,((*node_tp)-node_tv)%0t56/PXXDDnXnXnXnXnXnXnXnn
    620  *
    621  * For incremental node tracing, note the value of node_tp (node_tp/X) after
    622  * a run, then replace that in the 2nd line for node_tv.
    623  */
    624 
    625 #define	NODE_T_STK_DEPTH	6
    626 
    627 struct node_ts {
    628 	node_t	*node;
    629 	unsigned action;
    630 	unsigned ref;
    631 	unsigned cnt;
    632 	int	cpu;
    633 	pc_t	stk[NODE_T_STK_DEPTH + 1];
    634 };
    635 
    636 #undef	NODE_T_TRACE_ON
    637 
    638 #ifdef	NODE_T_TRACE_ON
    639 
    640 #define	NODE_T_TRACE_ALLOC	0xFF000000	/* kmem_alloc() of */
    641 #define	NODE_T_TRACE_ADD	0xFE000000	/* node_add() */
    642 
    643 #define	NODE_T_TRACE_OWN	0xEF000000	/* node has been owned */
    644 #define	NODE_T_TRACE_DISOWN	0xEE000000	/* node has been disowned */
    645 #define	NODE_T_TRACE_DESBALLOC	0xED000000	/* desballoc() */
    646 #define	NODE_T_TRACE_REFRELE	0xEC000000	/* refrele */
    647 #define	NODE_T_TRACE_REFHOLD	0xEB000000	/* refhold */
    648 #define	NODE_T_TRACE_NODE_FR	0xEA000000	/* node_fr() */
    649 
    650 #define	NODE_T_TRACE_TEMPNODE	0xDF000000	/* node_temp() */
    651 #define	NODE_T_TRACE_REPLACE	0xDE000000	/* node_replace() */
    652 #define	NODE_T_TRACE_FLUSH	0xDD000000	/* node_flush() */
    653 #define	NODE_T_TRACE_DOWNCALL	0xDC000000	/* downcall_service() */
    654 #define	NODE_T_TRACE_DOWNCALL_2	0xDB000000	/* dcall_service->httpd_data */
    655 
    656 #define	NODE_T_TRACE_DATA	0xCF000000	/* httpd_data() */
    657 
    658 #define	NODE_T_TRACE_LRU	0xAF000000	/* nca_lru insert */
    659 #define	NODE_T_TRACE_HTTPD	0xAE000000	/* call nca_httpd() */
    660 #define	NODE_T_TRACE_MISS	0xAD000000	/* http_miss() */
    661 #define	NODE_T_TRACE_TEMP	0xAC000000	/* np != *npp */
    662 #define	NODE_T_TRACE_XMIT	0xAB000000	/* tcp_xmit() */
    663 #define	NODE_T_TRACE_MISSED	0xAA000000	/* nca_missed() */
    664 
    665 #define	NODE_T_TRACE_DEL	0x00000000	/* node_del() */
    666 
    667 #if defined(__i386) || defined(__amd64)
    668 #define	NODE_T_TRACE_STK() {						\
    669 	_ix = getpcstack(&_p->stk[0], NODE_T_STK_DEPTH + 1);		\
    670 	if (_ix < NODE_T_STK_DEPTH + 1) {				\
    671 		_p->stk[_ix + 1] = 0;					\
    672 	}								\
    673 }
    674 #else
    675 #define	NODE_T_TRACE_STK() {						\
    676 	_p->stk[0] = (pc_t)callee();					\
    677 	_ix = getpcstack(&_p->stk[1], NODE_T_STK_DEPTH);		\
    678 	if (_ix < NODE_T_STK_DEPTH) {					\
    679 		_p->stk[_ix + 1] = 0;					\
    680 	}								\
    681 }
    682 #endif
    683 
    684 #define	NODE_TV_SZ 8192
    685 
    686 extern struct node_ts node_tv[NODE_TV_SZ];
    687 extern struct node_ts *node_tp;
    688 
    689 #define	NODE_T_TRACE(p, a) {						\
    690 	struct node_ts *_p;						\
    691 	struct node_ts *_np;						\
    692 	int    _ix;							\
    693 									\
    694 	do {								\
    695 		_p = node_tp;						\
    696 		if ((_np = _p + 1) == &node_tv[NODE_TV_SZ])		\
    697 			_np = node_tv;					\
    698 	} while (casptr(&node_tp, _p, _np) != _p);			\
    699 	_p->node = (p);							\
    700 	_p->action = (a);						\
    701 	_p->ref = (p) ? (p)->ref : 0;					\
    702 	_p->cnt = (p) ? (p)->cnt : 0;					\
    703 	_p->cpu = CPU->cpu_seqid;					\
    704 	NODE_T_TRACE_STK();						\
    705 }
    706 
    707 #else	/* NODE_T_TRACE_ON */
    708 
    709 #define	NODE_T_TRACE(p, a)
    710 
    711 #endif	/* NODE_T_TRACE_ON */
    712 
    713 /*
    714  * DOOR_TRACE - trace door node_t events.
    715  *
    716  * adb:
    717  * 32 bit
    718  *	*door_tp,0t8192-(((*door_tp)-door_tv)%0t112)/5XnPnPnPnPnPnPnPn64cnn
    719  *	door_tv,((*door_tp)-door_tv)%0t112/5XnPnPnPnPnPnPnPn64cnn
    720  * 64 bit
    721  *	*door_tp,0t8192-(((*door_tp)-door_tv)%0t128)/PXPXXnXnXnXnXnXnXnXn64cnn
    722  *	door_tv,((*door_tp)-door_tv)%0t128/PXPXXnXnXnXnXnXnXnXn64cnn
    723  */
    724 
    725 #define	DOOR_STK_DEPTH	6
    726 
    727 struct door_ts {
    728 	struct nca_conn_s *cp;
    729 	unsigned action;
    730 	node_t	*np;
    731 	int	ref;
    732 	unsigned state;
    733 	pc_t	stk[DOOR_STK_DEPTH + 1];
    734 	char	data[64];
    735 };
    736 
    737 #undef	DOOR_TRACE_ON
    738 
    739 #ifdef	DOOR_TRACE_ON
    740 
    741 #define	DOOR_TRACE_UPCALL	0xF0000000	/* upcall() */
    742 #define	DOOR_TRACE_UPCALL_RAW	0xF1000000	/* upcall() RAW ? */
    743 #define	DOOR_TRACE_UPCALL_RET	0xFF000000	/* upcall() return */
    744 
    745 #define	DOOR_TRACE_DOWNCALL	0xE0000000	/* downcall() */
    746 #define	DOOR_TRACE_CONNECT	0xE1000000	/* connect() */
    747 #define	DOOR_TRACE_CONNECT_DATA	0xE2000000	/* connect() */
    748 #define	DOOR_TRACE_DIRECTFROM	0xE3000000	/* tee_splice() from */
    749 #define	DOOR_TRACE_DIRECTTO	0xE4000000	/* tee_splice() to */
    750 #define	DOOR_TRACE_DOWNCALL_RET	0xEF000000	/* downcall() return */
    751 
    752 #define	DOOR_TRACE_INIT		0x80000000	/* doorcall_init() */
    753 #define	DOOR_TRACE_INIT_RET	0x88000000	/* doorcall_init() return */
    754 
    755 #if defined(__i386) || defined(__amd64)
    756 #define	DOOR_TRACE_STK() {						\
    757 	_ix = getpcstack(&_p->stk[0], DOOR_STK_DEPTH + 1);		\
    758 	if (_ix < DOOR_STK_DEPTH + 1) {					\
    759 		_p->stk[_ix] = 0;					\
    760 	}								\
    761 }
    762 #else
    763 #define	DOOR_TRACE_STK() {						\
    764 	_p->stk[0] = (pc_t)callee();					\
    765 	_ix = getpcstack(&_p->stk[1], DOOR_STK_DEPTH);			\
    766 	if (_ix < DOOR_STK_DEPTH) {					\
    767 		_p->stk[_ix + 1] = 0;					\
    768 	}								\
    769 }
    770 #endif
    771 
    772 #define	DOOR_TV_SZ 8192
    773 
    774 extern struct door_ts door_tv[DOOR_TV_SZ];
    775 extern struct door_ts *door_tp;
    776 
    777 #define	DOOR_TRACE(io, d, d_sz, a) {				\
    778 	nca_conn_t *_cp = (io) ? (nca_conn_t *)(io)->cid : (nca_conn_t *)NULL; \
    779 	node_t *_req_np = _cp ? _cp->req_np : (node_t *)NULL;		\
    780 	struct door_ts *_p;						\
    781 	struct door_ts *_np;						\
    782 	int    _ix;							\
    783 									\
    784 	do {								\
    785 		_p = door_tp;						\
    786 		if ((_np = _p + 1) == &door_tv[DOOR_TV_SZ])		\
    787 			_np = door_tv;					\
    788 	} while (casptr(&door_tp, _p, _np) != _p);			\
    789 	_p->cp = _cp;							\
    790 	_p->np = _req_np;						\
    791 	_p->action = (a);						\
    792 	_p->ref = _req_np ? _req_np->ref : 0;				\
    793 	if ((io)) {							\
    794 		_p->state = ((io)->op == http_op ? 0x80000000 : 0) |	\
    795 			    ((io)->more ? 0x40000000 : 0) |		\
    796 			    ((io)->first ? 0x20000000 : 0) |		\
    797 			    ((io)->advisory ? 0x10000000 : 0) |		\
    798 			    ((io)->nocache ? 0x08000000 : 0) |		\
    799 			    ((io)->preempt ? 0x04000000 : 0) |		\
    800 			    ((io)->peer_len ? 0x02000000 : 0) |		\
    801 			    ((io)->local_len ? 0x01000000 : 0) |	\
    802 			    ((io)->data_len ? 0x00800000 : 0) |		\
    803 			    (((io)->direct_type << 20) & 0x00700000) |	\
    804 			    ((io)->direct_len ? 0x00080000 : 0) |	\
    805 			    ((io)->trailer_len ? 0x00040000 : 0) |	\
    806 			    (((io)->peer_len + (io)->local_len +	\
    807 			    (io)->data_len + (io)->direct_len +		\
    808 			    (io)->trailer_len) & 0x3FFFF);		\
    809 	} else {							\
    810 		_p->state = 0;						\
    811 	}								\
    812 	if ((d_sz)) {							\
    813 		int _n = MIN((d_sz), 63);				\
    814 									\
    815 		bcopy((d), _p->data, _n);				\
    816 		bzero(&_p->data[_n], 64 - _n);				\
    817 	} else {							\
    818 		bzero(_p->data, 64);					\
    819 	}								\
    820 	DOOR_TRACE_STK();						\
    821 }
    822 
    823 #else	/* DOOR_TRACE_ON */
    824 
    825 #define	DOOR_TRACE(io, d, d_sz, a)
    826 
    827 #endif	/* DOOR_TRACE_ON */
    828 
    829 /*
    830  * NCA node LRU cache.  Defined here so that the NCA mdb module can use it.
    831  */
    832 typedef struct lru_s {
    833 	node_t		*phead;	/* Phys LRU list head (MRU) */
    834 	node_t		*ptail;	/* Phys LRU list tail (LRU) */
    835 	node_t		*vhead;	/* Virt LRU list head (MRU) */
    836 	node_t 		*vtail;	/* Virt LRU list tail (LRU) */
    837 
    838 	uint32_t	pcount;	/* Phys count of node_t members */
    839 	uint32_t	vcount;	/* Virt count of node_t members */
    840 
    841 	kmutex_t	lock;	/* Guarantee atomic access of above */
    842 } lru_t;
    843 
    844 /*
    845  * Per CPU instance structure.
    846  *
    847  * 32-bit adb: XXXnnDnnXXnnXXnnXDnnXXnn228+na
    848  * 64-bit adb: PPPnnD4+nnPPnnPPnnJDnnJ180+na
    849  */
    850 
    851 typedef struct nca_cpu_s {
    852 
    853 	node_t *persist_hdr_none;
    854 	node_t *persist_hdr_close;
    855 	node_t *persist_hdr_ka;
    856 
    857 	uint32_t dcb_readers;	/* count of dcb_list readers for this CPU */
    858 
    859 	nca_squeue_t *if_inq;	/* if_t input nca_squeue_t */
    860 	nca_squeue_t *if_ouq;	/* if_t output nca_squeue_t */
    861 
    862 	ti_t	*tcp_ti;	/* TCP TIMER list */
    863 	tw_t	*tcp_tw;	/* TCP TIME_WAIT list */
    864 
    865 	ddi_softintr_t soft_id;	/* soft interrupt id for if_inq worker */
    866 	int	if_inq_cnt;	/* count of if_t.inq references */
    867 
    868 	char	pad[256 - sizeof (node_t *) - sizeof (node_t *) -
    869 		    sizeof (node_t *) - sizeof (uint32_t) -
    870 		    sizeof (nca_squeue_t *) - sizeof (nca_squeue_t *) -
    871 		    sizeof (ti_t *) - sizeof (tw_t *) -
    872 		    sizeof (ddi_softintr_t) - sizeof (int)];
    873 } nca_cpu_t;
    874 
    875 extern nca_cpu_t *nca_gv;	/* global per CPU state indexed by cpu_seqid */
    876 
    877 /*
    878  * hcb_t - host control block.
    879  *
    880  * Used early on in packet switching to select packets to be serviced by NCA
    881  * and optionally later on by the HTTP protocol layer to further select HTTP
    882  * request to be serviced.
    883  *
    884  * dcb_t - door control block.
    885  *
    886  * Used to associate one or more hcb_t(s) with a given httpd door instance.
    887  *
    888  * dcb_list - dcb_t global list, a singly linked grounded list of dcb_t's.
    889  *
    890  * Used to search for a hcb_t match, currently a singly linked grounded list
    891  * of dcb_t's with a linear walk of the list. While this is adequate for the
    892  * current httpd support (i.e. a single door) a move to either a hash or tree
    893  * will be required for multiple httpd instance support (i.e. multiple doors).
    894  *
    895  * The dcb_list is protected by a custom reader/writer lock, the motivation
    896  * for using a custom lock instead of a krwlock_t is that this lock is the
    897  * single hot spot in NCA (i.e. all in-bound packets must acquire this lock)
    898  * and a nonlocking atomic readers count scheme is used in the common case
    899  * (i.e. reader lock) with a fall-back to a conventional kmutex_t for writer
    900  * (i.e. ndd list add/delete).
    901  */
    902 
    903 typedef struct hcb_s {
    904 	struct hcb_s	*next;		/* Next hcb_t (none: NULL) */
    905 	ipaddr_t	addr;		/* IP address (any: INADDR_ANY or 0) */
    906 	uint16_t	port;		/* TCP port number */
    907 	char		*host;		/* Host: name (any: NULL) */
    908 	ssize_t		hostsz;		/* Size of above */
    909 	char		*root;		/* Document root ("/": NULL) */
    910 	ssize_t		rootsz;		/* Size of above */
    911 } hcb_t;
    912 
    913 typedef struct dcb_s {
    914 	struct dcb_s	*next;		/* Next dcb_t (none: NULL) */
    915 	char		*door;		/* Door file (default: NULL) */
    916 	ssize_t		doorsz;		/* Size of above */
    917 	door_handle_t	hand;		/* Door handle (default: NULL) */
    918 	hcb_t		list;		/* Head of a hcb_t list (any: NULL) */
    919 } dcb_t;
    920 
    921 extern dcb_t dcb_list;
    922 extern kmutex_t nca_dcb_lock;
    923 extern kcondvar_t nca_dcb_wait;
    924 extern kmutex_t nca_dcb_readers;
    925 
    926 #define	NOHANDLE ((door_handle_t)-1)
    927 
    928 #define	DCB_COUNT_USELOCK	0x80000000
    929 #define	DCB_COUNT_MASK		0x3FFFFFFF
    930 
    931 #define	DCB_RD_ENTER(cpu) {						\
    932 	uint32_t *rp;							\
    933 									\
    934 	cpu = CPU->cpu_seqid;						\
    935 	rp = &nca_gv[cpu].dcb_readers;					\
    936 	while (atomic_add_32_nv(rp, 1) & DCB_COUNT_USELOCK) {		\
    937 		/* Need to use the lock, so do the dance */		\
    938 		mutex_enter(&nca_dcb_lock);				\
    939 		if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK &&	\
    940 		    CV_HAS_WAITERS(&nca_dcb_wait)) {			\
    941 			/* May be the last reader for this CPU */	\
    942 			cv_signal(&nca_dcb_wait);			\
    943 		}							\
    944 		mutex_exit(&nca_dcb_lock);				\
    945 		mutex_enter(&nca_dcb_readers);				\
    946 		/*							\
    947 		 * We block above waiting for the writer to exit the	\
    948 		 * readers lock, if we didn't block then while we were	\
    949 		 * away in the nca_dcb_lock enter the writer exited,	\
    950 		 * we could optimize for this case by checking USELOCK	\
    951 		 * after the decrement, but as this is an exceptional	\
    952 		 * case not in the fast-path we'll just take the hit	\
    953 		 * of a needless readers enter/exit.			\
    954 		 */							\
    955 		mutex_exit(&nca_dcb_readers);				\
    956 	}								\
    957 }
    958 
    959 #define	DCB_RD_EXIT(cpu) {						\
    960 	uint32_t *rp = &nca_gv[cpu].dcb_readers;			\
    961 									\
    962 	if (atomic_add_32_nv(rp, -1) == DCB_COUNT_USELOCK) {		\
    963 		mutex_enter(&nca_dcb_lock);				\
    964 		if (CV_HAS_WAITERS(&nca_dcb_wait)) {			\
    965 			/* May be the last reader for this CPU */	\
    966 			cv_signal(&nca_dcb_wait);			\
    967 		}							\
    968 		mutex_exit(&nca_dcb_lock);				\
    969 	}								\
    970 }
    971 
    972 #define	DCB_WR_ENTER() {						\
    973 	int cpu;							\
    974 	int readers;							\
    975 									\
    976 	mutex_enter(&nca_dcb_readers);					\
    977 	mutex_enter(&nca_dcb_lock);					\
    978 	for (;;) {							\
    979 		readers = 0;						\
    980 		for (cpu = 0; cpu < max_ncpus; cpu++) {			\
    981 			int new;					\
    982 			uint32_t *rp = &nca_gv[cpu].dcb_readers;	\
    983 			int old = *rp;					\
    984 									\
    985 			if (old & DCB_COUNT_USELOCK) {			\
    986 				readers += old & DCB_COUNT_MASK;	\
    987 				continue;				\
    988 			}						\
    989 			new = old | DCB_COUNT_USELOCK;			\
    990 			while (cas32(rp, old, new) != old) {		\
    991 				old = *rp;				\
    992 				new = old | DCB_COUNT_USELOCK;		\
    993 			}						\
    994 			readers += (new & DCB_COUNT_MASK);		\
    995 		}							\
    996 		if (readers == 0)					\
    997 			break;						\
    998 		cv_wait(&nca_dcb_wait, &nca_dcb_lock);			\
    999 	}								\
   1000 	mutex_exit(&nca_dcb_lock);					\
   1001 }
   1002 
   1003 #define	DCB_WR_EXIT() {							\
   1004 	int cpu;							\
   1005 									\
   1006 	mutex_enter(&nca_dcb_lock);					\
   1007 	for (cpu = 0; cpu < max_ncpus; cpu++) {				\
   1008 		int new;						\
   1009 		uint32_t *rp = &nca_gv[cpu].dcb_readers;		\
   1010 		int old = *rp;						\
   1011 									\
   1012 		new = old & ~DCB_COUNT_USELOCK;				\
   1013 		while (cas32(rp, old, new) != old) {			\
   1014 			old = *rp;					\
   1015 			new = old & ~DCB_COUNT_USELOCK;			\
   1016 		}							\
   1017 	}								\
   1018 	mutex_exit(&nca_dcb_lock);					\
   1019 	mutex_exit(&nca_dcb_readers);					\
   1020 }
   1021 
   1022 typedef struct nca_door_s {
   1023 	door_handle_t	handle;		/* The door handle */
   1024 	char		*name;		/* The door name */
   1025 	kmutex_t	lock;		/* The door lock */
   1026 	kcondvar_t	cv_writer;	/* condvar for thread waiting */
   1027 					/* to do door_init */
   1028 	kcondvar_t	cv_reader;	/* condvar for thread waiting */
   1029 					/* for a door_init to finish */
   1030 	uint32_t	upcalls;	/* Number of upcalls in progress */
   1031 	boolean_t	init_waiting;	/* door_init thread wanting to */
   1032 					/* be exclusive */
   1033 } nca_door_t;
   1034 
   1035 /*
   1036  * if_t - interface per instance data.
   1037  */
   1038 
   1039 typedef struct if_s {
   1040 
   1041 	boolean_t dev;		/* is a device instance */
   1042 
   1043 	queue_t	*rqp;		/* our read-side STREAMS queue */
   1044 	queue_t	*wqp;		/* our write-side STREAMS queue */
   1045 
   1046 	/* DLPI M_DATA IP fastpath template */
   1047 	size_t	mac_length;
   1048 	mblk_t	*mac_mp;
   1049 	int32_t	mac_mtu;
   1050 	int32_t	mac_addr_len;
   1051 
   1052 	uint32_t ip_ident;	/* our IP ident value */
   1053 
   1054 	boolean_t hwcksum;	/* underlying NIC supports checksum offload */
   1055 
   1056 	nca_squeue_t *inq;		/* in-bound nca_squeue_t */
   1057 	nca_squeue_t *ouq;		/* out-bound nca_squeue_t */
   1058 
   1059 	/*
   1060 	 * All if_t are associated with a CPU and have a default
   1061 	 * router on link are chained in a circular linked list.
   1062 	 */
   1063 	struct if_s *next_if;
   1064 	struct if_s *prev_if;
   1065 	ipaddr_t local_addr;	/* This interface's IP address. */
   1066 	uchar_t router_ether_addr[6];
   1067 
   1068 	uint_t	hdr_ioc_id;	/* id of DL_IOC_HDR_INFO M_IOCTL sent down */
   1069 	boolean_t info_req_pending;
   1070 
   1071 	int32_t	capab_state;	/* Capability probe state */
   1072 
   1073 	/* Bound local address of a NCAfs instance. */
   1074 	struct sockaddr_in	bound_addr;
   1075 } if_t;
   1076 
   1077 /*
   1078  * connf_t - connection fanout data.
   1079  *
   1080  * The hash tables and their linkage (hashnextp, hashprevp) are protected
   1081  * by the per-bucket lock. Each nca_conn_t inserted in the list points back at
   1082  * the connf_t that heads the bucket.
   1083  */
   1084 
   1085 typedef struct connf_s {
   1086 	uint32_t	max;
   1087 	struct nca_conn_s	*head;
   1088 	kmutex_t	lock;
   1089 } connf_t;
   1090 
   1091 #ifdef	CONNP_T_TRACE_ON
   1092 
   1093 #define	CONNP_TV_SZ 32
   1094 
   1095 /*
   1096  * Per nca_conn_t packet tracing.
   1097  */
   1098 typedef struct connp_s {
   1099 	clock_t		lbolt;
   1100 	clock_t		tcp_ti;
   1101 	int32_t		len : 16,
   1102 			dir : 1,
   1103 			state : 4,
   1104 			flags : 6,
   1105 			xmit_np : 1,
   1106 			xmit_head : 1,
   1107 			unsent : 1,
   1108 			tail_unsent : 1,
   1109 			direct : 1;
   1110 	uint32_t	state1;
   1111 	uint32_t	state2;
   1112 	uint32_t	seq;
   1113 	uint32_t	ack;
   1114 	uint32_t	snxt;
   1115 	uint32_t	swnd;
   1116 } connp_t;
   1117 
   1118 #endif	/* CONNP_T_TRACE_ON */
   1119 
   1120 /*
   1121  * nca_conn_t - connection per instance data.
   1122  *
   1123  * Note: hashlock is used to provide atomic access to all nca_conn_t members
   1124  * above it. All other members are protected by the per CPU inq nca_squeue_t
   1125  * which is used to serialize access to all nca_conn_t's per interface.
   1126  *
   1127  * Note: the nca_conn_t can have up to 3 NODE_REFHOLDs:
   1128  *
   1129  *	1) if req_np != NULL then a NODE_REFHOLD(req_np) was done:
   1130  *
   1131  *	    1.1) if http_refed then a NODE_REFHOLD(req_np) was done
   1132  *
   1133  *	    1.2) if http_frefed then a NODE_REFHOLD(req_np->fileback) was done
   1134  *
   1135  *
   1136  * TODO: reorder elements in fast-path code access order.
   1137  *
   1138  * Dnn4XnXXDnnDnnXXXnnXXXnnUXnnXXXnnXXnnDDXXXDXDXDXnnDnnXXDDnXXXDDnnXXXDDnn
   1139  * XXXDDnnXXXDDnnXXXDDnnXXnnDXXnn
   1140  * b+++DDnAnDDDDDnnDnnUnnUUDXDUnnDnn20xnnXnnddnnUUUnnXXUnXXnnUUUnn
   1141  * DDDDDDnnUUnnXXUXUnn4UD4Unn4UnUUnn
   1142  * 64-bit: Xnn4+4pnnppEnEnn3pnn3pnnEJnnXXnnuunn4+ppnnXX3pD4+pD4+pD4+pnnEnnppnnD
   1143  */
   1144 
   1145 #define	TCP_XMIT_MAX_IX	5		/* Max xmit descriptors */
   1146 
   1147 typedef struct nca_conn_s {
   1148 
   1149 	int32_t ref;			/* Reference counter */
   1150 
   1151 	te_t	tcp_ti;			/* TCP TIMER timer entry */
   1152 
   1153 	struct nca_conn_s	*twnext;	/* TIME_WAIT next */
   1154 	struct nca_conn_s	*twprev;	/* TIME_WAIT prev */
   1155 	clock_t	twlbolt;		/* TIME_WAIT lbolt */
   1156 
   1157 	clock_t create;			/* Create lbolt time */
   1158 
   1159 	connf_t	*hashfanout;		/* Hash bucket we're part of */
   1160 	struct nca_conn_s	*hashnext;	/* Hash chain next */
   1161 	struct nca_conn_s	*hashprev;	/* Hash chain prev */
   1162 
   1163 	struct nca_conn_s	*bindnext;	/* Next conn_s in bind list. */
   1164 	struct nca_conn_s	*bindprev;	/* Prev conn_s in bind list. */
   1165 	void		*tbf;		/* Pointer to bind hash list struct. */
   1166 	/*
   1167 	 * Note: atomic access of memebers above is guaranteed by the
   1168 	 * hashfanout->lock of the hash bucket that the nca_conn_t is in.
   1169 	 */
   1170 
   1171 	size_t	mac_length;		/* MAC prepend length */
   1172 	mblk_t	*mac_mp;		/* MAC prepend data */
   1173 
   1174 	ipaddr_t	laddr;		/* Local address */
   1175 	ipaddr_t	faddr;		/* Remote address. 0 => not connected */
   1176 
   1177 	union {
   1178 		struct {
   1179 			uint16_t u_fport; /* Remote port */
   1180 			uint16_t u_lport; /* Local port */
   1181 		} u_ports1;
   1182 		uint32_t u_ports2;	/* Rem port, local port */
   1183 					/* Used for TCP_MATCH performance */
   1184 	} u_port;
   1185 #define	conn_lport	u_port.u_ports1.u_lport
   1186 #define	conn_fport	u_port.u_ports1.u_fport
   1187 #define	conn_ports	u_port.u_ports2
   1188 
   1189 	if_t	*ifp;			/* Interface for this connection */
   1190 	nca_squeue_t *inq;		/* Per CPU inq for this connection */
   1191 
   1192 	uint32_t req_tag;		/* nca_io_t request tag (0 == NONE) */
   1193 	int	req_parse;		/* HTTP request parse state */
   1194 	node_t	*req_np;		/* HTTP request node_t */
   1195 	mblk_t	*req_mp;		/* HTTP request mblk_t */
   1196 	char	*reqpath;		/* HTTP request URI path component */
   1197 	int	reqpathsz;		/* size of above */
   1198 	char	*reqrefer;		/* HTTP "Referer:" string */
   1199 	int	reqrefersz;		/* size of above */
   1200 	char	*requagent;		/* HTTP "User-Agent:" string */
   1201 	int	requagentsz;		/* size of above */
   1202 	struct nca_conn_s *nodenext;	/* Node_t nca_conn_t list */
   1203 
   1204 	clock_t	http_count;		/* HTTP Keep-Alive request count */
   1205 
   1206 	/*
   1207 	 * req_np xmit state used accross calls to tcp_xmit(). A reference
   1208 	 * to the req_np and to any inderect node_t (i.e. file/ctag) ...
   1209 	 */
   1210 	node_t	*xmit_refed;		/* have a ref to the uri node_t */
   1211 	node_t	*xmit_cur;		/* current node to transmit */
   1212 
   1213 	int	xmit_ix;		/* current xmit[] index */
   1214 	int	xmit_pix;		/* past end xmit[] index */
   1215 
   1216 	struct {
   1217 		node_t	*np;		/* node_t pointer for ref */
   1218 		char	*dp;		/* data pointer */
   1219 		uint16_t *cp;		/* cksum array */
   1220 		int	sz;		/* remaining data to xmit */
   1221 		int	iso;		/* initial segment offset (if any) */
   1222 		node_t	*refed;		/* have a ref to the node_t */
   1223 		int	dsz;		/* remaining data for current segment */
   1224 		caddr_t	*dvp;		/* data segment virtual pointer */
   1225 	} xmit[TCP_XMIT_MAX_IX];
   1226 
   1227 	/*
   1228 	 * Connection NCA_IO_DIRECT_SPLICE & NCA_IO_DIRECT_TEE reference,
   1229 	 * see direct_splice and direct_tee below for type of send too.
   1230 	 */
   1231 	struct nca_conn_s	*direct; /* nca_conn_t to send recv data too */
   1232 	mblk_t		*direct_mp;	 /* mblk_t to use for tcp_close() */
   1233 
   1234 	/*
   1235 	 * nca_conn_t state.
   1236 	 */
   1237 
   1238 	int32_t	tcp_state;
   1239 
   1240 	uint32_t
   1241 		tcp_urp_last_valid : 1,	/* Is tcp_urp_last valid? */
   1242 		tcp_hard_binding : 1,	/* If we've started a full bind */
   1243 		tcp_hard_bound : 1,	/* If we've done a full bind with IP */
   1244 		tcp_fin_acked : 1,	/* Has our FIN been acked? */
   1245 
   1246 		tcp_fin_rcvd : 1,	/* Have we seen a FIN? */
   1247 		tcp_fin_sent : 1,	/* Have we sent our FIN yet? */
   1248 		tcp_ordrel_done : 1,	/* Have we sent the ord_rel upstream? */
   1249 		tcp_flow_stopped : 1,	/* Have we flow controlled xmitter? */
   1250 
   1251 		tcp_debug : 1,		/* SO_DEBUG "socket" option. */
   1252 		tcp_dontroute : 1,	/* SO_DONTROUTE "socket" option. */
   1253 		tcp_broadcast : 1,	/* SO_BROADCAST "socket" option. */
   1254 		tcp_useloopback : 1,	/* SO_USELOOPBACK "socket" option. */
   1255 
   1256 		tcp_oobinline : 1,	/* SO_OOBINLINE "socket" option. */
   1257 		tcp_dgram_errind : 1,	/* SO_DGRAM_ERRIND option */
   1258 		tcp_detached : 1,	/* If we're detached from a stream */
   1259 		tcp_bind_pending : 1,	/* Client is waiting for bind ack */
   1260 
   1261 		tcp_unbind_pending : 1, /* Client sent T_UNBIND_REQ */
   1262 		tcp_deferred_clean_death : 1,
   1263 					/* defer tcp endpoint cleanup etc. */
   1264 		tcp_co_wakeq_done : 1,	/* A strwakeq() has been done */
   1265 		tcp_co_wakeq_force : 1,	/* A strwakeq() must be done */
   1266 
   1267 		tcp_co_norm : 1,	/* In normal mode, putnext() done */
   1268 		tcp_co_wakeq_need : 1,	/* A strwakeq() needs to be done */
   1269 		tcp_snd_ws_ok : 1,	/* Received WSCALE from peer */
   1270 		tcp_snd_ts_ok : 1,	/* Received TSTAMP from peer */
   1271 
   1272 		tcp_linger : 1,		/* SO_LINGER turned on */
   1273 		tcp_zero_win_probe: 1,	/* Zero win probing is in progress */
   1274 		tcp_loopback: 1,	/* src and dst are the same machine */
   1275 		tcp_localnet: 1,	/* src and dst are on the same subnet */
   1276 
   1277 		tcp_syn_defense: 1,	/* For defense against SYN attack */
   1278 #define	tcp_dontdrop	tcp_syn_defense
   1279 		tcp_set_timer : 1,
   1280 		tcp_1_junk_fill_thru_bit_31 : 2;
   1281 
   1282 	uint32_t
   1283 		tcp_active_open: 1,	/* This is a active open */
   1284 		tcp_timeout : 1,	/* qbufcall failed, qtimeout pending */
   1285 		tcp_rexmit : 1,		/* TCP is retransmitting */
   1286 		tcp_snd_sack_ok : 1,	/* Can use SACK for this connection */
   1287 
   1288 		tcp_bind_proxy_addr : 1,	/* proxy addr is being used */
   1289 		tcp_recvdstaddr : 1,	/* return T_EXTCONN_IND with dst addr */
   1290 		tcp_refed : 1,		/* nca_conn_t refed by TCP */
   1291 		tcp_time_wait_comp : 1, /* TIME_WAIT compressed nca_conn_t */
   1292 
   1293 		tcp_close : 1,		/* nca_conn_t close */
   1294 		http_persist : 3,	/* HTTP persistent connection state */
   1295 
   1296 		deferred_xmit_end : 1,	/* xmit_end() deferred to xmit() */
   1297 		http_direct_splice : 1,	/* have a connection to splice too */
   1298 		http_direct_tee : 1,	/* have a connection to tee too */
   1299 
   1300 		tcp_2_junk_fill_thru_bit_31 : 17;
   1301 /*
   1302  * Note: all nca_conn_t members to be accessed by a tcp_time_wait_comp
   1303  * nca_conn_t must be above this point !!!
   1304  */
   1305 
   1306 	uchar_t	tcp_timer_backoff;	/* Backoff shift count. */
   1307 	clock_t tcp_last_recv_time;	/* Last time we receive a segment. */
   1308 	clock_t	tcp_dack_set_time;	/* When delayed ACK timer is set. */
   1309 
   1310 	int	tcp_ip_hdr_len;		/* Byte len of our current IP header */
   1311 	clock_t	tcp_first_timer_threshold;  /* When to prod IP */
   1312 	clock_t	tcp_second_timer_threshold; /* When to give up completely */
   1313 	clock_t	tcp_first_ctimer_threshold; /* 1st threshold while connecting */
   1314 	clock_t tcp_second_ctimer_threshold; /* 2nd ... while connecting */
   1315 
   1316 	clock_t	tcp_last_rcv_lbolt; /* lbolt on last packet, used for PAWS */
   1317 
   1318 
   1319 	uint32_t tcp_obsegs;		/* Outbound segments on this stream */
   1320 
   1321 	uint32_t tcp_mss;		/* Max segment size */
   1322 	uint32_t tcp_naglim;		/* Tunable nagle limit */
   1323 	int32_t	tcp_hdr_len;		/* Byte len of combined TCP/IP hdr */
   1324 	tcph_t	*tcp_tcph;		/* tcp header within combined hdr */
   1325 	int32_t	tcp_tcp_hdr_len;	/* tcp header len within combined */
   1326 	uint32_t	tcp_valid_bits;
   1327 #define	TCP_ISS_VALID	0x1	/* Is the tcp_iss seq num active? */
   1328 #define	TCP_FSS_VALID	0x2	/* Is the tcp_fss seq num active? */
   1329 #define	TCP_URG_VALID	0x4	/* If the tcp_urg seq num active? */
   1330 
   1331 	int32_t	tcp_xmit_hiwater;	/* Send buffer high water mark. */
   1332 
   1333 	union {				/* template ip header */
   1334 		ipha_t	tcp_u_ipha;
   1335 		char	tcp_u_buf[IP_SIMPLE_HDR_LENGTH+TCP_MIN_HEADER_LENGTH];
   1336 		double	tcp_u_aligner;
   1337 	} tcp_u;
   1338 #define	tcp_ipha	tcp_u.tcp_u_ipha
   1339 #define	tcp_iphc	tcp_u.tcp_u_buf
   1340 
   1341 	uint32_t tcp_sum;		/* checksum to compensate for source */
   1342 					/* routed packets. Host byte order */
   1343 
   1344 	uint16_t tcp_last_sent_len;	/* Record length for nagle */
   1345 	uint16_t tcp_dupack_cnt;	/* # of consequtive duplicate acks */
   1346 
   1347 	uint32_t tcp_rnxt;		/* Seq we expect to recv next */
   1348 	uint32_t tcp_rwnd;		/* Current receive window */
   1349 	uint32_t tcp_rwnd_max;		/* Maximum receive window */
   1350 
   1351 	mblk_t	*tcp_rcv_head;		/* Queued until push, urgent data or */
   1352 	mblk_t	*tcp_rcv_tail;		/* the count exceeds */
   1353 	uint32_t tcp_rcv_cnt;		/* tcp_rcv_push_wait. */
   1354 
   1355 	mblk_t	*tcp_reass_head;	/* Out of order reassembly list head */
   1356 	mblk_t	*tcp_reass_tail;	/* Out of order reassembly list tail */
   1357 
   1358 	uint32_t tcp_cwnd_ssthresh;	/* Congestion window */
   1359 	uint32_t tcp_cwnd_max;
   1360 	uint32_t tcp_csuna;		/* Clear (no rexmits in window) suna */
   1361 
   1362 	int	tcp_rttv_updates;
   1363 	clock_t	tcp_rto;		/* Round trip timeout */
   1364 	clock_t	tcp_rtt_sa;		/* Round trip smoothed average */
   1365 	clock_t	tcp_rtt_sd;		/* Round trip smoothed deviation */
   1366 	clock_t	tcp_rtt_update;		/* Round trip update(s) */
   1367 	clock_t tcp_ms_we_have_waited;	/* Total retrans time */
   1368 
   1369 	uint32_t tcp_swl1;		/* These help us avoid using stale */
   1370 	uint32_t tcp_swl2;		/*  packets to update state */
   1371 
   1372 	mblk_t	*tcp_xmit_head;		/* Head of rexmit list */
   1373 	mblk_t	*tcp_xmit_last;		/* last valid data seen by tcp_wput */
   1374 	uint32_t tcp_unsent;		/* # of bytes in hand that are unsent */
   1375 	mblk_t	*tcp_xmit_tail;		/* Last rexmit data sent */
   1376 	uint32_t tcp_xmit_tail_unsent;	/* # of unsent bytes in xmit_tail */
   1377 
   1378 	uint32_t tcp_snxt;		/* Senders next seq num */
   1379 	uint32_t tcp_suna;		/* Sender unacknowledged */
   1380 	uint32_t tcp_rexmit_nxt;	/* Next rexmit seq num */
   1381 	uint32_t tcp_rexmit_max;	/* Max retran seq num */
   1382 	int32_t	tcp_snd_burst;		/* Send burst factor */
   1383 	uint32_t tcp_swnd;		/* Senders window (relative to suna) */
   1384 	uint32_t tcp_cwnd;		/* Congestion window */
   1385 	int32_t tcp_cwnd_cnt;		/* cwnd cnt in congestion avoidance */
   1386 	uint32_t tcp_ackonly;		/* Senders last ack seq num */
   1387 
   1388 	uint32_t tcp_irs;		/* Initial recv seq num */
   1389 	uint32_t tcp_iss;		/* Initial send seq num */
   1390 	uint32_t tcp_fss;		/* Final/fin send seq num */
   1391 	uint32_t tcp_urg;		/* Urgent data seq num */
   1392 
   1393 	uint32_t tcp_rack;		/* Seq # we have acked */
   1394 	uint32_t tcp_rack_cnt;		/* # of bytes we have deferred ack */
   1395 
   1396 	uint32_t tcp_max_swnd;		/* Maximum swnd we have seen */
   1397 	int64_t	tcp_rexmit_fire_time;
   1398 	int64_t	tcp_dack_fire_time;
   1399 	int64_t tcp_ka_fire_time;
   1400 	int64_t	tcp_http_ka_fire_time;
   1401 
   1402 	int32_t	tcp_keepalive_intrvl;	/* Zero means don't bother */
   1403 	int32_t	tcp_ka_probe_sent;
   1404 	int32_t tcp_ka_last_intrvl;
   1405 
   1406 #define	TCP_DACK_TIMER		0x1
   1407 #define	TCP_REXMIT_TIMER	0x2
   1408 #define	TCP_KA_TIMER		0x4
   1409 #define	TCP_HTTP_KA_TIMER	0x8
   1410 	int16_t		tcp_running_timer;
   1411 	int16_t		tcp_pending_timer;
   1412 
   1413 #ifdef	CONNP_T_TRACE_ON
   1414 	connp_t *pkt_tp;		/* Packet tracing pointer */
   1415 	connp_t	pkt_tv[CONNP_TV_SZ];	/* Packet tracing vector */
   1416 #endif	/* CONNP_T_TRACE_ON */
   1417 
   1418 } nca_conn_t;
   1419 
   1420 /*
   1421  * Active stack support parameters to control what ports NCA can use.
   1422  * They are declared in ncaproto.c
   1423  */
   1424 extern struct nca_tbf_s *nca_tcp_port;
   1425 extern in_port_t tcp_lo_port;
   1426 extern in_port_t tcp_hi_port;
   1427 
   1428 /*
   1429  * nca_conn_t.http_persist values and corresponding HTTP header strings are
   1430  * used to determine the connection persistent state of a connection and
   1431  * any HTTP header which needs to be sent.
   1432  */
   1433 
   1434 #define	PERSIST_NONE		0	/* Not persistent */
   1435 
   1436 #define	PERSIST_CLOSE		1	/* Was persistent, send close header */
   1437 #define	PERSIST_TRUE		2	/* Connection is HTTP persistent */
   1438 #define	PERSIST_KA		3	/* Persistent, send Keep-Alive header */
   1439 #define	PERSIST_UPCALL		4	/* Insert "Connection: close" on */
   1440 					/* upcall and clear flag */
   1441 
   1442 #define	PERSIST_HDR_NONE	"\r\n"
   1443 #define	PERSIST_HDR_CLOSE	"Connection: close\r\n\r\n"
   1444 #define	PERSIST_HDR_KA		"Connection: Keep-Alive\r\n\r\n"
   1445 
   1446 /*
   1447  * nca_conn_t nca_squeue_ctl() flag values:
   1448  */
   1449 
   1450 #define	CONN_MISS_DONE		0x0001	/* The conn miss processing is done */
   1451 #define	IF_TIME_WAIT		0x0002	/* A TIME_WAIT has fired */
   1452 #define	IF_TCP_TIMER		0x0003	/* A TCP TIMER has fired */
   1453 #define	NCA_CONN_TCP_TIMER	0x0004	/* A TCP TIMER needs to be execed */
   1454 #define	IF_TCP_CONNECT		0x0005	/* TCP connection request */
   1455 #define	IF_TCP_SEND		0x0006	/* A new send request. */
   1456 
   1457 #define	IF_TCP_DIRECT_TO	0x0010	/* A TCP direct i/o, step 1 */
   1458 #define	IF_TCP_DIRECT_FROM	0x0012	/* A TCP direct i/o, step 2 */
   1459 #define	IF_TCP_DIRECT_TEE	0x0001	/* If a tee else a splice */
   1460 #define	IF_TCP_DIRECT_CLOSE	0x001F	/* A TCP direct i/o close */
   1461 
   1462 #define	NCA_CONN_T_STK_DEPTH	7	/* max stack backtrace depth */
   1463 
   1464 struct conn_ts {
   1465 	nca_conn_t	*conn;
   1466 	unsigned action;
   1467 	int	ref;
   1468 	int	cpu;
   1469 	pc_t	stk[NCA_CONN_T_STK_DEPTH + 1];
   1470 };
   1471 
   1472 #undef	NCA_CONN_T_TRACE_ON
   1473 
   1474 #ifdef	NCA_CONN_T_TRACE_ON
   1475 
   1476 /*
   1477  * adb:
   1478  * 32 bit
   1479  *	*conn_tp,0t4096-(((*conn_tp)-con_tv)%0t48)/PXDDnPnPnPnPnPnPnPnPnn
   1480  *	con_tv,((*conn_tp)-con_tv)%0t48/PXDDnPnPnPnPnPnPnPnPnn
   1481  * 64 bit
   1482  *	*conn_tp,0t4096-(((*conn_tp)-con_tv)%0t56)/PXDDnXnXnXnXnXnXnXnXnn
   1483  *	con_tv,((*conn_tp)-con_tv)%0t56/PXDDnXnXnXnXnXnXnXnXnn
   1484  */
   1485 
   1486 #define	NCA_CONN_T_REFINIT	0x10000000	/* CONN_REF init() |ref value */
   1487 #define	NCA_CONN_T_REFINIT1	0x11000000	/* CONN_REF init() |ref value */
   1488 #define	NCA_CONN_T_REFINIT2	0x12000000	/* CONN_REF init() |ref value */
   1489 #define	NCA_CONN_T_REFNOTCP	0x13000000 /* CONN_REF no longer tcp_refed */
   1490 #define	NCA_CONN_T_REFHOLD	0x1A000000	/* CONN_REFHOLD() | ref value */
   1491 #define	NCA_CONN_T_REFRELE	0x1F000000	/* CONN_REFRELE() | ref value */
   1492 
   1493 #define	NCA_CONN_T_HTTPCALL	0x20000000	/* call http() | rbytes */
   1494 #define	NCA_CONN_T_HTTPRET1	0x21000000	/* return http() */
   1495 #define	NCA_CONN_T_HTTPRET2	0x22000000	/* return ! http() */
   1496 
   1497 #define	NCA_CONN_T_MISSDONE	0x30000000	/* CONN_MISS_DONE */
   1498 #define	NCA_CONN_T_TCPTIMER	0x31000000	/* NCA_CONN_TCP_TIMER */
   1499 #define	NCA_CONN_T_XMIT_END	0x32000000	/* xmit_end() | tcp_unsent */
   1500 #define	NCA_CONN_T_XMIT_BAD	0x33000000 /* xmit_end() bad state |tcp_state */
   1501 #define	NCA_CONN_T_XMIT_DEF	0x34000000	/* xmit_end() deferred */
   1502 #define	NCA_CONN_T_TIME_WAIT 0x35000000	/* done: tcp_state == TCPS_TIME_WAIT */
   1503 #define	NCA_CONN_T_PKT_IN	0x36000000	/* tcp_input() | flags */
   1504 #define	NCA_CONN_T_PKT_OUT	0x37000000	/* tcp_input() | flags */
   1505 
   1506 #define	NCA_CONN_T_DIRECT	0x40000000	/* tcp_direct() from conn_t */
   1507 #define	NCA_CONN_T_DIRECT1	0x41000000	/* tcp_direct() to conn_t */
   1508 #define	NCA_CONN_T_DIRECT2	0x42000000	/* IF_TCP_DIRECT_TO | TEE */
   1509 #define	NCA_CONN_T_DIRECT3	0x43000000	/* IF_TCP_DIRECT_FROM | TEE */
   1510 #define	NCA_CONN_T_DIRECT4	0x44000000	/* tcp_close() */
   1511 #define	NCA_CONN_T_DIRECT5	0x45000000	/* IF_TCP_DIRECT_CLOSE */
   1512 						/* from|tcp_state */
   1513 #define	NCA_CONN_T_DIRECT6	0x46000000	/* IF_TCP_DIRECT_CLOSE to */
   1514 
   1515 #if defined(__i386) || defined(__amd64)
   1516 #define	NCA_CONN_T_TRACE_STK() {					\
   1517 	_ix = getpcstack(&_p->stk[0], NCA_CONN_T_STK_DEPTH + 1);	\
   1518 	if (_ix < NCA_CONN_T_STK_DEPTH + 1) {				\
   1519 		_p->stk[_ix + 1] = 0;					\
   1520 	}								\
   1521 }
   1522 #else
   1523 #define	NCA_CONN_T_TRACE_STK() {					\
   1524 	_p->stk[0] = (pc_t)callee();					\
   1525 	_ix = getpcstack(&_p->stk[1], NCA_CONN_T_STK_DEPTH);		\
   1526 	if (_ix < NCA_CONN_T_STK_DEPTH) {				\
   1527 		_p->stk[_ix + 1] = 0;					\
   1528 	}								\
   1529 }
   1530 #endif
   1531 
   1532 #define	CON_TV_SZ 4096
   1533 
   1534 extern struct conn_ts con_tv[CON_TV_SZ];
   1535 extern struct conn_ts *conn_tp;
   1536 
   1537 #define	NCA_CONN_T_TRACE(p, a) {					\
   1538 	struct conn_ts *_p;						\
   1539 	struct conn_ts *_np;						\
   1540 	int    _ix;							\
   1541 									\
   1542 	do {								\
   1543 		_p = conn_tp;					\
   1544 		if ((_np = _p + 1) == &con_tv[CON_TV_SZ])	\
   1545 			_np = con_tv;				\
   1546 	} while (casptr(&conn_tp, _p, _np) != _p);			\
   1547 	_p->conn = (p);							\
   1548 	_p->action = (a);						\
   1549 	_p->ref = (p)->ref;						\
   1550 	_p->cpu = CPU->cpu_seqid;					\
   1551 	NCA_CONN_T_TRACE_STK();						\
   1552 }
   1553 
   1554 #else	/* NCA_CONN_T_TRACE_ON */
   1555 
   1556 #define	NCA_CONN_T_TRACE(p, a)
   1557 
   1558 #endif	/* NCA_CONN_T_TRACE_ON */
   1559 
   1560 
   1561 #define	CONN_REFHOLD(connp) {						\
   1562 									\
   1563 	NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFHOLD | ((connp)->ref + 1)); \
   1564 									\
   1565 	if ((connp)->ref <= 0)						\
   1566 		panic("nca CONN_REFHOLD: %p has no references",		\
   1567 		    (void *)(connp));					\
   1568 	(connp)->ref++;							\
   1569 }
   1570 
   1571 #define	CONN_REFRELE(connp) {						\
   1572 									\
   1573 	NCA_CONN_T_TRACE((connp), NCA_CONN_T_REFRELE | ((connp)->ref - 1)); \
   1574 									\
   1575 	if ((connp)->tcp_refed) {					\
   1576 		if ((connp)->ref == 1)					\
   1577 			panic("nca CONN_REFRELE: %p "			\
   1578 			    "has only tcp_refed reference",		\
   1579 			    (void *)(connp));				\
   1580 		if ((connp)->ref < 1)					\
   1581 			panic("nca CONN_REFRELE: %p has no references",	\
   1582 			    (void *)(connp));				\
   1583 	} else {							\
   1584 		if ((connp)->ref <= 0)					\
   1585 			panic("nca CONN_REFRELE: %p has no references",	\
   1586 			    (void *)(connp));				\
   1587 	}								\
   1588 	(connp)->ref--;							\
   1589 	if ((connp)->ref == 0) {					\
   1590 		/* Last ref of a nca_conn_t, so free it */		\
   1591 		kmutex_t *lock = &(connp)->hashfanout->lock;		\
   1592 		mutex_enter(lock);					\
   1593 		nca_conn_free(connp);					\
   1594 		/* Note: nca_conn_free exits lock */			\
   1595 	}								\
   1596 }
   1597 
   1598 /*
   1599  * The nca_io2_shadow_t is used by the kernel to contian a copy of a user-
   1600  * land nca_io2_t and the the user-land nca_io2_t address and size.
   1601  */
   1602 
   1603 typedef struct nca_io2_shadow_s {
   1604 	nca_io2_t	io;		/* copy of user-land nca_io2_t */
   1605 	void		*data_ptr;	/* copy of door_arg_t.data_ptr */
   1606 	size_t		data_size;	/* copy of door_arg_t.data_size */
   1607 } nca_io2_shadow_t;
   1608 
   1609 #define	SHADOW_NONE	0x00		/* nca_io2_t.shadow NONE */
   1610 #define	SHADOW_DOORSRV	0x01		/* nca_io2_t.shadow door_srv() */
   1611 #define	SHADOW_NCAFS	0x02		/* nca_io2_t.shadow NCAfs */
   1612 
   1613 
   1614 /*
   1615  * Given a ptr to a nca_io2_t, a field and the field_length, write data
   1616  * into buffer (Note: word aligned offsets).
   1617  */
   1618 #define	NCA_IO_WDATA(val, vsize, p, n_used, len, off)		\
   1619 	/*CONSTCOND*/						\
   1620 	if ((val) == NULL) {					\
   1621 		(p)->len = vsize;				\
   1622 		(p)->off = 0;					\
   1623 	} else {						\
   1624 		(p)->len = (vsize);				\
   1625 		(p)->off = ((n_used) + sizeof (uint32_t) - 1) &	\
   1626 				(~(sizeof (uint32_t) - 1));	\
   1627 		bcopy((char *)(val),				\
   1628 		    ((char *)(p) + (p)->off), (vsize));		\
   1629 		(n_used) = (p)->off + (p)->len;			\
   1630 	}
   1631 
   1632 /*
   1633  * Given a ptr to an nca_io2_t, a field length member name, append data to
   1634  * it in the buffer. Note: must be the last field a WDATA() was done for.
   1635  *
   1636  * Note: a NULL NCA_IO_WDATA() can be followed by a NCA_IO_ADATA() only if
   1637  *		vsize was == -1.
   1638  *
   1639  */
   1640 #define	NCA_IO_ADATA(val, vsize, p, n_used, len, off)		\
   1641 	if ((p)->len == -1) {					\
   1642 		(p)->len = 0;					\
   1643 		(p)->off = ((n_used) + sizeof (uint32_t) - 1) &	\
   1644 		(~(sizeof (uint32_t) - 1));			\
   1645 	}							\
   1646 	bcopy((char *)(val), ((char *)(p) + \
   1647 	    (p)->off + (p)->len), (vsize));			\
   1648 	(p)->len += (vsize);					\
   1649 	(n_used) += (vsize);
   1650 
   1651 /*
   1652  * Given a ptr to a nca_io2_t and a field construct a pointer.
   1653  */
   1654 #define	NCA_IO_PDATA(p, off) ((char *)(p) + (p)->off)
   1655 
   1656 
   1657 #ifndef	isdigit
   1658 #define	isdigit(c) ((c) >= '0' && (c) <= '9')
   1659 #endif
   1660 
   1661 #ifndef	tolower
   1662 #define	tolower(c) ((c) >= 'A' && (c) <= 'Z' ? (c) | 0x20 : (c))
   1663 #endif
   1664 
   1665 #ifndef	isalpha
   1666 #define	isalpha(c) (((c) >= 'A' && (c) <= 'Z') || ((c) >= 'a' && (c) <= 'z'))
   1667 #endif
   1668 
   1669 #ifndef	isspace
   1670 #define	isspace(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || \
   1671 		    (c) == '\r' || (c) == '\f' || (c) == '\013')
   1672 #endif
   1673 
   1674 extern char *strnchr(const char *, int, size_t);
   1675 extern char *strnstr(const char *, const char *, size_t);
   1676 extern char *strncasestr(const char *, const char *, size_t);
   1677 extern char *strrncasestr(const char *, const char *, size_t);
   1678 extern int atoin(const char *, size_t);
   1679 extern int digits(int);
   1680 
   1681 extern void nca_conn_free(nca_conn_t *);
   1682 extern void nca_logit_off(void);
   1683 extern void node_fr(node_t *);
   1684 
   1685 extern nca_squeue_t *nca_squeue_init(nca_squeue_t *, uint32_t,
   1686     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
   1687 extern void nca_squeue_fini(nca_squeue_t *);
   1688 extern void nca_squeue_enter(nca_squeue_t *, mblk_t *, void *);
   1689 extern void nca_squeue_fill(nca_squeue_t *, mblk_t *, void *);
   1690 extern mblk_t *nca_squeue_remove(nca_squeue_t *);
   1691 extern void nca_squeue_worker(nca_squeue_t *);
   1692 extern mblk_t *nca_squeue_ctl(mblk_t *, void *, unsigned short);
   1693 extern void nca_squeue_signal(nca_squeue_t *);
   1694 extern void nca_squeue_exit(nca_squeue_t *);
   1695 extern void sqfan_init(sqfan_t *, uint32_t, uint32_t, uint32_t);
   1696 extern nca_squeue_t *sqfan_ixinit(sqfan_t *, uint32_t, nca_squeue_t *, uint32_t,
   1697     processorid_t, void (*)(), void *, void (*)(), clock_t, pri_t);
   1698 extern void sqfan_fini(sqfan_t *);
   1699 extern void sqfan_fill(sqfan_t *, mblk_t *, void *);
   1700 extern mblk_t *sqfan_remove(sqfan_t *);
   1701 extern void nca_squeue_nointr(nca_squeue_t *, mblk_t *, void *, int);
   1702 extern void nca_squeue_pause(nca_squeue_t *, mblk_t *, void *, int, boolean_t);
   1703 extern void nca_squeue_willproxy(nca_squeue_t *);
   1704 extern void nca_squeue_proxy(nca_squeue_t *, nca_squeue_t *);
   1705 extern void nca_squeue_bind(nca_squeue_t *, uint32_t, processorid_t);
   1706 
   1707 extern int nca_tcp_clean_death(nca_conn_t *, int);
   1708 extern nca_conn_t *nca_tcp_connect(ipaddr_t, in_port_t, boolean_t);
   1709 extern void nca_tcp_send(nca_conn_t *, mblk_t *);
   1710 extern void nca_tcp_direct(nca_conn_t *, nca_conn_t *, uint32_t);
   1711 
   1712 /* Functions prototypes from ncadoorsrv.c */
   1713 extern node_t *nca_node_flush(node_t *);
   1714 extern void nca_downcall_service(void *, door_arg_t *, void (**)(void *,
   1715     void *), void **, int *);
   1716 extern node_t *ctag_lookup(uint64_t, unsigned *);
   1717 extern node_t *node_replace(node_t *, nca_conn_t *);
   1718 extern node_t *node_temp(node_t *, nca_conn_t *);
   1719 extern void find_ctags(node_t *, nca_io2_t *, int *);
   1720 extern void nca_ncafs_srv(nca_io2_t *, struct uio *, queue_t *);
   1721 extern boolean_t nca_reclaim_vlru(void);
   1722 extern boolean_t nca_reclaim_plru(boolean_t, boolean_t);
   1723 
   1724 /*
   1725  * NCA_COUNTER() is used to add a signed long value to a unsigned long
   1726  * counter, in general these counters are used to maintain NCA state.
   1727  *
   1728  * NCA_DEBUG_COUNTER() is used like NCA_COUNTER() but for counters used
   1729  * to maintain additional debug state, by default these counters aren't
   1730  * updated unless the global value nca_debug_counter is set to a value
   1731  * other then zero.
   1732  *
   1733  * Also, if NCA_COUNTER_TRACE is defined a time ordered wrapping trace
   1734  * buffer is maintained with hrtime_t stamps, counter address, value to
   1735  * add, and new value entries for all NCA_COUNTER() and NCA_DEBUG_COUNTER()
   1736  * use.
   1737  */
   1738 
   1739 #undef	NCA_COUNTER_TRACE
   1740 
   1741 #ifdef	NCA_COUNTER_TRACE
   1742 
   1743 #define	NCA_COUNTER_TRACE_SZ	1024
   1744 
   1745 typedef struct nca_counter_s {
   1746 	hrtime_t	t;
   1747 	unsigned long	*p;
   1748 	unsigned long	v;
   1749 	unsigned long	nv;
   1750 } nca_counter_t;
   1751 
   1752 extern nca_counter_t nca_counter_tv[];
   1753 extern nca_counter_t *nca_counter_tp;
   1754 
   1755 #define	NCA_COUNTER(_p, _v) {						\
   1756 	unsigned long	*p = _p;					\
   1757 	long		v = _v;						\
   1758 	unsigned long	_nv;						\
   1759 	nca_counter_t	*_otp;						\
   1760 	nca_counter_t	*_ntp;						\
   1761 									\
   1762 	_nv = atomic_add_long_nv(p, v);					\
   1763 	do {								\
   1764 		_otp = nca_counter_tp;					\
   1765 		_ntp = _otp + 1;					\
   1766 		if (_ntp == &nca_counter_tv[NCA_COUNTER_TRACE_SZ])	\
   1767 			_ntp = nca_counter_tv;				\
   1768 	} while (casptr((void *)&nca_counter_tp, (void *)_otp,		\
   1769 	    (void *)_ntp) != (void *)_otp);				\
   1770 	_ntp->t = gethrtime();						\
   1771 	_ntp->p = p;							\
   1772 	_ntp->v = v;							\
   1773 	_ntp->nv = _nv;							\
   1774 }
   1775 
   1776 #else	/* NCA_COUNTER_TRACE */
   1777 
   1778 #define	NCA_COUNTER(p, v) atomic_add_long((p), (v))
   1779 
   1780 #endif	/* NCA_COUNTER_TRACE */
   1781 
   1782 
   1783 /*
   1784  * This is the buf used in upcall to httpd.
   1785  */
   1786 typedef struct {
   1787 	uintptr_t	tid;
   1788 	char		*buf;
   1789 } http_buf_table_t;
   1790 
   1791 /*
   1792  * URI and filename hash, a simple static hash bucket array of singly
   1793  * linked grounded lists is used with a hashing algorithm which has
   1794  * proven to have good distribution properities for strings of ...
   1795  *
   1796  * Note: NCA_HASH_SZ must be a prime number.
   1797  */
   1798 
   1799 #define	NCA_HASH_SZ	8053
   1800 #define	NCA_HASH_MASK	0xFFFFFF
   1801 #define	HASH_IX(s, l, hix, hsz) { \
   1802 	char *cp = (s); \
   1803 	int len = (l); \
   1804 			\
   1805 	(hix) = 0; \
   1806 	while (len-- > 0) { \
   1807 		(hix) = (hix) * 33 + *cp++; \
   1808 		(hix) &= NCA_HASH_MASK; \
   1809 	} \
   1810 	(hix) %= (hsz); \
   1811 }
   1812 
   1813 /*
   1814  * CTAG hash.
   1815  */
   1816 #define	NCA_CTAGHASH_SZ	4096
   1817 #define	CTAGHASH_IX(t, ix) ((ix) = (t) % NCA_CTAGHASH_SZ)
   1818 
   1819 /*
   1820  * VNODE hash.
   1821  *
   1822  * Note: NCA_VNODEHASH_SZ must be a P2Ps() value.
   1823  */
   1824 #define	NCA_VNODEHASH_SZ 12281
   1825 #define	VNODEHASH_IX(p, ix) ((ix) = (((uintptr_t)p >> 27) ^ \
   1826 	((uintptr_t)p >> 17) ^ ((uintptr_t)p >> 11) ^ (uintptr_t)p) % \
   1827 	ncavnodehash_sz)
   1828 
   1829 extern pgcnt_t nca_ppmax;
   1830 extern pgcnt_t nca_vpmax;
   1831 extern pgcnt_t nca_pplim;
   1832 extern pgcnt_t nca_vplim;
   1833 extern pgcnt_t nca_ppmem;
   1834 extern pgcnt_t nca_vpmem;
   1835 extern ssize_t nca_kbmem;
   1836 extern ssize_t nca_spmem;
   1837 extern ssize_t nca_ckmem;
   1838 extern ssize_t nca_mbmem;
   1839 extern ssize_t nca_cbmem;
   1840 extern ssize_t nca_lbmem;
   1841 extern size_t  nca_maxkmem;
   1842 extern uint32_t nca_use_segmap;
   1843 
   1844 extern ulong_t nca_hits;
   1845 extern ulong_t nca_file;
   1846 extern ulong_t nca_ctag;
   1847 extern ulong_t nca_miss;
   1848 
   1849 extern ulong_t nca_hit304;
   1850 extern ulong_t nca_hitnoV;
   1851 extern ulong_t nca_hitnoVfast;
   1852 extern ulong_t nca_hitnoVtemp;
   1853 
   1854 extern ulong_t nca_filehits;
   1855 extern ulong_t nca_filenoV;
   1856 extern ulong_t nca_filenoVfast;
   1857 extern ulong_t nca_filemiss;
   1858 
   1859 extern ulong_t nca_missURI;
   1860 extern ulong_t nca_missQ;
   1861 extern ulong_t nca_missSAFE;
   1862 extern ulong_t nca_missnoV;
   1863 extern ulong_t nca_missnotcp;
   1864 extern ulong_t nca_missfail;
   1865 extern ulong_t nca_misstemp;
   1866 extern ulong_t nca_missnohash;
   1867 extern ulong_t nca_missclean;
   1868 extern ulong_t nca_missadvisory;
   1869 extern ulong_t nca_missadvNoA;
   1870 extern ulong_t nca_missERROR;
   1871 
   1872 extern ulong_t nca_ERROR;
   1873 extern ulong_t nca_flushnode;
   1874 extern ulong_t nca_replacenode;
   1875 extern ulong_t nca_tempnode;
   1876 
   1877 extern ulong_t nca_fail304;
   1878 
   1879 extern ulong_t nca_nocache1;
   1880 extern ulong_t nca_nocache2;
   1881 extern ulong_t nca_nocache3;
   1882 extern ulong_t nca_nocache4;
   1883 extern ulong_t nca_nocache5;
   1884 extern ulong_t nca_nocache6;
   1885 extern ulong_t nca_nocache6nomp;
   1886 extern ulong_t nca_nocache7;
   1887 extern ulong_t nca_nocache8;
   1888 extern ulong_t nca_nocache9;
   1889 extern ulong_t nca_nocache10;
   1890 extern ulong_t nca_nocache11;
   1891 extern ulong_t nca_nocache12;
   1892 extern ulong_t nca_nocache13;
   1893 extern ulong_t nca_nocache14;
   1894 extern ulong_t nca_nocache15;
   1895 extern ulong_t nca_nodes;
   1896 extern ulong_t nca_desballoc;
   1897 
   1898 extern ulong_t nca_plrucnt;
   1899 extern ulong_t nca_vlrucnt;
   1900 extern ulong_t nca_rpcall;
   1901 extern ulong_t nca_rvcall;
   1902 extern ulong_t nca_rpbusy;
   1903 extern ulong_t nca_rvbusy;
   1904 extern ulong_t nca_rpfail;
   1905 extern ulong_t nca_rpempty;
   1906 extern ulong_t nca_rvempty;
   1907 extern ulong_t nca_rpdone;
   1908 extern ulong_t nca_rvdone;
   1909 extern ulong_t nca_rmdone;
   1910 extern ulong_t nca_rkdone;
   1911 extern ulong_t nca_rsdone;
   1912 extern ulong_t nca_rndone;
   1913 extern ulong_t nca_rpnone;
   1914 extern ulong_t nca_rvnone;
   1915 extern ulong_t nca_rmnone;
   1916 extern ulong_t nca_rknone;
   1917 extern ulong_t nca_rsnone;
   1918 extern ulong_t nca_rnh;
   1919 extern ulong_t nca_ref[];
   1920 extern ulong_t nca_vmap_rpcall;
   1921 
   1922 extern ulong_t nca_node_kmem_fail1;
   1923 extern ulong_t nca_node_kmem_fail2;
   1924 
   1925 extern ulong_t doorsrv_nopreempt;
   1926 extern ulong_t doorsrv_badconnect;
   1927 extern ulong_t doorsrv_invaladvise;
   1928 extern ulong_t doorsrv_notupcall;
   1929 extern ulong_t doorsrv_badadvise;
   1930 extern ulong_t doorsrv_cksum;
   1931 extern ulong_t doorsrv_error;
   1932 extern ulong_t doorsrv_op;
   1933 extern ulong_t doorsrv_badtee;
   1934 extern ulong_t doorsrv_badio;
   1935 extern ulong_t doorsrv_sz;
   1936 
   1937 extern ulong_t nca_allocfail;
   1938 extern ulong_t nca_mapinfail;
   1939 extern ulong_t nca_mapinfail1;
   1940 extern ulong_t nca_mapinfail2;
   1941 extern ulong_t nca_mapinfail3;
   1942 
   1943 extern ulong_t nca_httpd_http;
   1944 extern ulong_t nca_httpd_badsz;
   1945 extern ulong_t nca_httpd_nosz;
   1946 extern ulong_t nca_httpd_filename;
   1947 extern ulong_t nca_httpd_filename1;
   1948 extern ulong_t nca_httpd_filename2;
   1949 extern ulong_t nca_httpd_trailer;
   1950 extern ulong_t nca_httpd_preempt;
   1951 extern ulong_t nca_httpd_downcall;
   1952 extern ulong_t nca_early_downcall;
   1953 extern ulong_t nca_httpd_more;
   1954 
   1955 ulong_t nca_logit_noupcall;
   1956 
   1957 ulong_t nca_logit;
   1958 ulong_t nca_logit_nomp;
   1959 ulong_t nca_logit_no;
   1960 ulong_t nca_logit_NULL;
   1961 ulong_t nca_logit_fail;
   1962 
   1963 ulong_t nca_logit_flush_NULL1;
   1964 ulong_t nca_logit_flush_NULL2;
   1965 
   1966 ulong_t nca_logger_NULL1;
   1967 ulong_t nca_logger_NULL2;
   1968 
   1969 ulong_t nca_log_buf_alloc_NULL;
   1970 ulong_t nca_log_buf_alloc_fail;
   1971 ulong_t nca_log_buf_alloc_part;
   1972 
   1973 ulong_t nca_log_buf_dup;
   1974 
   1975 extern ulong_t nca_upcalls;
   1976 extern ulong_t nca_ncafs_upcalls;
   1977 
   1978 extern ulong_t nca_conn_count;
   1979 extern ulong_t nca_conn_kmem;
   1980 extern ulong_t nca_conn_kmem_fail;
   1981 extern ulong_t nca_conn_allocb_fail;
   1982 extern ulong_t nca_conn_tw;
   1983 extern ulong_t nca_conn_tw1;
   1984 extern ulong_t nca_conn_tw2;
   1985 extern ulong_t nca_conn_reinit_cnt;
   1986 extern ulong_t nca_conn_NULL1;
   1987 extern ulong_t nca_conn_Q0;
   1988 extern ulong_t nca_conn_FLAGS;
   1989 
   1990 extern ulong_t tcpwronginq;
   1991 extern ulong_t ipsendup;
   1992 extern ulong_t ipwrongcpu;
   1993 extern ulong_t iponcpu;
   1994 
   1995 extern ulong_t nca_tcp_xmit_null;
   1996 extern ulong_t nca_tcp_xmit_null1;
   1997 
   1998 extern ulong_t tw_on;
   1999 extern ulong_t tw_fire;
   2000 extern ulong_t tw_fire1;
   2001 extern ulong_t tw_fire2;
   2002 extern ulong_t tw_fire3;
   2003 extern ulong_t tw_add;
   2004 extern ulong_t tw_add1;
   2005 extern ulong_t tw_delete;
   2006 extern ulong_t tw_reclaim;
   2007 extern ulong_t tw_reap;
   2008 extern ulong_t tw_reap1;
   2009 extern ulong_t tw_reap2;
   2010 extern ulong_t tw_reap3;
   2011 extern ulong_t tw_reap4;
   2012 extern ulong_t tw_reap5;
   2013 extern ulong_t tw_timer;
   2014 extern ulong_t tw_timer1;
   2015 extern ulong_t tw_timer2;
   2016 extern ulong_t tw_timer3;
   2017 extern ulong_t tw_timer4;
   2018 extern ulong_t tw_timer5;
   2019 
   2020 extern ulong_t ti_on;
   2021 extern ulong_t ti_fire;
   2022 extern ulong_t ti_fire1;
   2023 extern ulong_t ti_fire2;
   2024 extern ulong_t ti_fire3;
   2025 extern ulong_t ti_fire4;
   2026 extern ulong_t ti_add;
   2027 extern ulong_t ti_add1;
   2028 extern ulong_t ti_add2;
   2029 extern ulong_t ti_add3;
   2030 extern ulong_t ti_add4;
   2031 extern ulong_t ti_add5;
   2032 extern ulong_t ti_add_reuse;
   2033 extern ulong_t ti_delete;
   2034 extern ulong_t ti_delete1;
   2035 extern ulong_t ti_delete2;
   2036 extern ulong_t ti_reap;
   2037 extern ulong_t ti_reap1;
   2038 extern ulong_t ti_reap2;
   2039 extern ulong_t ti_reap3;
   2040 extern ulong_t ti_reap4;
   2041 extern ulong_t ti_reap5;
   2042 extern ulong_t ti_timer;
   2043 extern ulong_t ti_timer1;
   2044 extern ulong_t ti_timer2;
   2045 extern ulong_t ti_timer3;
   2046 extern ulong_t ti_timer4;
   2047 extern ulong_t ti_timer5;
   2048 extern ulong_t ti_timer6;
   2049 
   2050 extern uint32_t nca_conn_q;
   2051 extern uint32_t nca_conn_q0;
   2052 extern uint32_t nca_conn_req_max_q;
   2053 extern uint32_t nca_conn_req_max_q0;
   2054 
   2055 extern char nca_resp_500[];
   2056 extern ssize_t nca_resp_500_sz;
   2057 
   2058 extern uint32_t ncaurihash_sz;
   2059 extern uint32_t ncafilehash_sz;
   2060 extern uint32_t ncactaghash_sz;
   2061 extern uint32_t ncavnodehash_sz;
   2062 extern nodef_t *ncaurihash;
   2063 extern nodef_t *ncafilehash;
   2064 extern nodef_t *ncavnodehash;
   2065 extern nodef_t *ncactaghash;
   2066 extern char nca_httpd_door_path[];
   2067 extern char nca_httpd_downdoor_path[];
   2068 extern door_handle_t nca_downcall_door_hand;
   2069 extern uint32_t n_http_buf_size;
   2070 extern door_handle_t nca_httpd_door_hand;
   2071 extern sqfan_t nca_miss_fanout1;
   2072 extern sqfan_t nca_miss_fanout2;
   2073 extern nca_door_t nca_httpd_door;
   2074 extern int nca_downdoor_created;
   2075 extern int n_http_buf_table;
   2076 extern http_buf_table_t *g_http_buf_table;
   2077 extern struct kmem_cache *node_cache;
   2078 #ifdef DEBUG
   2079 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
   2080 		    uint_t, const char *);
   2081 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int, node_t *,
   2082 		    const char *);
   2083 #else
   2084 extern node_t *nca_http_response(nca_conn_t *, const char *, int, char *, int,
   2085 		    uint_t);
   2086 extern node_t *nca_http_response_node(nca_conn_t *, const char *, int,
   2087     node_t *);
   2088 #endif
   2089 extern void nca_node_del(node_t *);
   2090 extern void nca_node_uncache(node_t *);
   2091 extern node_t *nca_node_add(char *, int, nodef_t *, int);
   2092 extern node_t *node_create(int, boolean_t, char *, int);
   2093 extern void nca_reclaim_phys(node_t *, boolean_t, boolean_t);
   2094 extern boolean_t nca_http_pmap(node_t *);
   2095 extern boolean_t nca_http_vmap(node_t *, int);
   2096 extern time_t nca_http_date(char *);
   2097 extern node_t *nca_httpd_data(node_t *, nca_conn_t *, nca_io2_t *, int);
   2098 extern void nca_missed(node_t *, mblk_t *, nca_squeue_t *);
   2099 extern void nca_miss_conn_mv(node_t *, nca_conn_t *);
   2100 extern void nca_miss_conn_fr(node_t *, nca_conn_t *);
   2101 extern void nca_http_logit(nca_conn_t *);
   2102 extern void nca_http_error(nca_conn_t *);
   2103 extern void nca_node_xmit(node_t *, nca_conn_t *);
   2104 
   2105 /*
   2106  * It contains data for forwarding data to application programs.
   2107  * For door case, doorhandle is the upcall door handle and listenerq
   2108  * is NULL; for ncafs, listenerq is the upcall listener queue and
   2109  * doorhandle is NULL. listenning is always B_TRUE for door and it is
   2110  * B_TRUE for ncafs only after the listen system call has been issued.
   2111  */
   2112 typedef struct nca_listener_s {
   2113 	boolean_t	listenning;	/* is ready for accepting connection */
   2114 	door_handle_t	doorhandle;	/* door handle or NULL for ncafs */
   2115 	queue_t		*listenerq;	/* upcall queue or NULL for door */
   2116 } nca_listener_t;
   2117 
   2118 /*
   2119  * Returned values of nca_isnca_data.
   2120  * NOT_NCA_DATA:	not NCA data.
   2121  * NCA_DATA_ANY_ADDR:	NCA data, matches INADDR_ANY.
   2122  * NCA_DATA_ADDR:	NCA data, match an IP address.
   2123  */
   2124 #define	NOT_NCA_DATA		0
   2125 #define	NCA_DATA_ANY_ADDR	1
   2126 #define	NCA_DATA_ADDR		2
   2127 
   2128 extern uint32_t ipportrehashcount1;
   2129 extern uint32_t ipportrehashcount2;
   2130 extern uint32_t ipportbucketcnt;
   2131 extern uint32_t ipporttablesize;
   2132 extern uint32_t ncafscount;
   2133 extern uint32_t doorcount;
   2134 extern int	ip_virtual_hosting;
   2135 
   2136 extern nca_listener_t *nca_listener_find(ipaddr_t, uint16_t);
   2137 extern nca_listener_t *nca_listener_find2(ipaddr_t, uint16_t);
   2138 extern int		nca_isnca_data(ipaddr_t, uint16_t);
   2139 extern int		nca_listener_add(ipaddr_t, uint16_t, void *, boolean_t);
   2140 extern int		nca_listener_del(ipaddr_t, uint16_t);
   2141 extern void		nca_listener_report(mblk_t *);
   2142 
   2143 #ifdef	__cplusplus
   2144 }
   2145 #endif
   2146 
   2147 #endif	/* _INET_NCA_H */
   2148