Home | History | Annotate | Download | only in sockfs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  */
     25 
     26 #include <sys/strsubr.h>
     27 #include <sys/strsun.h>
     28 #include <sys/param.h>
     29 #include <sys/sysmacros.h>
     30 #include <vm/seg_map.h>
     31 #include <vm/seg_kpm.h>
     32 #include <sys/condvar_impl.h>
     33 #include <sys/sendfile.h>
     34 #include <fs/sockfs/nl7c.h>
     35 #include <fs/sockfs/nl7curi.h>
     36 #include <fs/sockfs/socktpi_impl.h>
     37 
     38 #include <inet/common.h>
     39 #include <inet/ip.h>
     40 #include <inet/ip6.h>
     41 #include <inet/tcp.h>
     42 #include <inet/led.h>
     43 #include <inet/mi.h>
     44 
     45 #include <inet/nca/ncadoorhdr.h>
     46 #include <inet/nca/ncalogd.h>
     47 #include <inet/nca/ncandd.h>
     48 
     49 #include <sys/promif.h>
     50 
     51 /*
     52  * Some externs:
     53  */
     54 
     55 extern boolean_t	nl7c_logd_enabled;
     56 extern void		nl7c_logd_log(uri_desc_t *, uri_desc_t *,
     57 			    time_t, ipaddr_t);
     58 extern boolean_t	nl7c_close_addr(struct sonode *);
     59 extern struct sonode	*nl7c_addr2portso(void *);
     60 extern uri_desc_t	*nl7c_http_cond(uri_desc_t *, uri_desc_t *);
     61 
     62 /*
     63  * Various global tuneables:
     64  */
     65 
     66 clock_t		nl7c_uri_ttl = -1;	/* TTL in seconds (-1 == infinite) */
     67 
     68 boolean_t	nl7c_use_kmem = B_FALSE; /* Force use of kmem (no segmap) */
     69 
     70 uint64_t	nl7c_file_prefetch = 1; /* File cache prefetch pages */
     71 
     72 uint64_t	nl7c_uri_max = 0;	/* Maximum bytes (0 == infinite) */
     73 uint64_t	nl7c_uri_bytes = 0;	/* Bytes of kmem used by URIs */
     74 
     75 /*
     76  * Locals:
     77  */
     78 
     79 static int	uri_rd_response(struct sonode *, uri_desc_t *,
     80 		    uri_rd_t *, boolean_t);
     81 static int	uri_response(struct sonode *, uri_desc_t *);
     82 
     83 /*
     84  * HTTP scheme functions called from nl7chttp.c:
     85  */
     86 
     87 boolean_t nl7c_http_request(char **, char *, uri_desc_t *, struct sonode *);
     88 boolean_t nl7c_http_response(char **, char *, uri_desc_t *, struct sonode *);
     89 boolean_t nl7c_http_cmp(void *, void *);
     90 mblk_t *nl7c_http_persist(struct sonode *);
     91 void nl7c_http_free(void *arg);
     92 void nl7c_http_init(void);
     93 
     94 /*
     95  * Counters that need to move to kstat and/or be removed:
     96  */
     97 
     98 volatile uint64_t nl7c_uri_request = 0;
     99 volatile uint64_t nl7c_uri_hit = 0;
    100 volatile uint64_t nl7c_uri_pass = 0;
    101 volatile uint64_t nl7c_uri_miss = 0;
    102 volatile uint64_t nl7c_uri_temp = 0;
    103 volatile uint64_t nl7c_uri_more = 0;
    104 volatile uint64_t nl7c_uri_data = 0;
    105 volatile uint64_t nl7c_uri_sendfilev = 0;
    106 volatile uint64_t nl7c_uri_reclaim_calls = 0;
    107 volatile uint64_t nl7c_uri_reclaim_cnt = 0;
    108 volatile uint64_t nl7c_uri_pass_urifail = 0;
    109 volatile uint64_t nl7c_uri_pass_dupbfail = 0;
    110 volatile uint64_t nl7c_uri_more_get = 0;
    111 volatile uint64_t nl7c_uri_pass_method = 0;
    112 volatile uint64_t nl7c_uri_pass_option = 0;
    113 volatile uint64_t nl7c_uri_more_eol = 0;
    114 volatile uint64_t nl7c_uri_more_http = 0;
    115 volatile uint64_t nl7c_uri_pass_http = 0;
    116 volatile uint64_t nl7c_uri_pass_addfail = 0;
    117 volatile uint64_t nl7c_uri_pass_temp = 0;
    118 volatile uint64_t nl7c_uri_expire = 0;
    119 volatile uint64_t nl7c_uri_purge = 0;
    120 volatile uint64_t nl7c_uri_NULL1 = 0;
    121 volatile uint64_t nl7c_uri_NULL2 = 0;
    122 volatile uint64_t nl7c_uri_close = 0;
    123 volatile uint64_t nl7c_uri_temp_close = 0;
    124 volatile uint64_t nl7c_uri_free = 0;
    125 volatile uint64_t nl7c_uri_temp_free = 0;
    126 volatile uint64_t nl7c_uri_temp_mk = 0;
    127 volatile uint64_t nl7c_uri_rd_EAGAIN = 0;
    128 
    129 /*
    130  * Various kmem_cache_t's:
    131  */
    132 
    133 kmem_cache_t *nl7c_uri_kmc;
    134 kmem_cache_t *nl7c_uri_rd_kmc;
    135 static kmem_cache_t *uri_desb_kmc;
    136 static kmem_cache_t *uri_segmap_kmc;
    137 
    138 static void uri_kmc_reclaim(void *);
    139 
    140 static void nl7c_uri_reclaim(void);
    141 
    142 /*
    143  * The URI hash is a dynamically sized A/B bucket hash, when the current
    144  * hash's average bucket chain length exceeds URI_HASH_AVRG a new hash of
    145  * the next P2Ps[] size is created.
    146  *
    147  * All lookups are done in the current hash then the new hash (if any),
    148  * if there is a new has then when a current hash bucket chain is examined
    149  * any uri_desc_t members will be migrated to the new hash and when the
    150  * last uri_desc_t has been migrated then the new hash will become the
    151  * current and the previous current hash will be freed leaving a single
    152  * hash.
    153  *
    154  * uri_hash_t - hash bucket (chain) type, contained in the uri_hash_ab[]
    155  * and can be accessed only after aquiring the uri_hash_access lock (for
    156  * READER or WRITER) then acquiring the lock uri_hash_t.lock, the uri_hash_t
    157  * and all linked uri_desc_t.hash members are protected. Note, a REF_HOLD()
    158  * is placed on all uri_desc_t uri_hash_t list members.
    159  *
    160  * uri_hash_access - rwlock for all uri_hash_* variables, READER for read
    161  * access and WRITER for write access. Note, WRITER is only required for
    162  * hash geometry changes.
    163  *
    164  * uri_hash_which - which uri_hash_ab[] is the current hash.
    165  *
    166  * uri_hash_n[] - the P2Ps[] index for each uri_hash_ab[].
    167  *
    168  * uri_hash_sz[] - the size for each uri_hash_ab[].
    169  *
    170  * uri_hash_cnt[] - the total uri_desc_t members for each uri_hash_ab[].
    171  *
    172  * uri_hash_overflow[] - the uri_hash_cnt[] for each uri_hash_ab[] when
    173  * a new uri_hash_ab[] needs to be created.
    174  *
    175  * uri_hash_ab[] - the uri_hash_t entries.
    176  *
    177  * uri_hash_lru[] - the last uri_hash_ab[] walked for lru reclaim.
    178  */
    179 
    180 typedef struct uri_hash_s {
    181 	struct uri_desc_s	*list;		/* List of uri_t(s) */
    182 	kmutex_t		lock;
    183 } uri_hash_t;
    184 
    185 #define	URI_HASH_AVRG	5	/* Desired average hash chain length */
    186 #define	URI_HASH_N_INIT	9	/* P2Ps[] initial index */
    187 
    188 static krwlock_t	uri_hash_access;
    189 static uint32_t		uri_hash_which = 0;
    190 static uint32_t		uri_hash_n[2] = {URI_HASH_N_INIT, 0};
    191 static uint32_t		uri_hash_sz[2] = {0, 0};
    192 static uint32_t		uri_hash_cnt[2] = {0, 0};
    193 static uint32_t		uri_hash_overflow[2] = {0, 0};
    194 static uri_hash_t	*uri_hash_ab[2] = {NULL, NULL};
    195 static uri_hash_t	*uri_hash_lru[2] = {NULL, NULL};
    196 
    197 /*
    198  * Primes for N of 3 - 24 where P is first prime less then (2^(N-1))+(2^(N-2))
    199  * these primes have been foud to be useful for prime sized hash tables.
    200  */
    201 
    202 static const int P2Ps[] = {
    203 	0, 0, 0, 5, 11, 23, 47, 89, 191, 383, 761, 1531, 3067,
    204 	6143, 12281, 24571, 49139, 98299, 196597, 393209,
    205 	786431, 1572853, 3145721, 6291449, 12582893, 0};
    206 
    207 /*
    208  * Hash macros:
    209  *
    210  *    H2A(char *cp, char *ep, char c) - convert the escaped octet (ASCII)
    211  *    hex multichar of the format "%HH" pointeded to by *cp to a char and
    212  *    return in c, *ep points to past end of (char *), on return *cp will
    213  *    point to the last char consumed.
    214  *
    215  *    URI_HASH(unsigned hix, char *cp, char *ep) - hash the char(s) from
    216  *    *cp to *ep to the unsigned hix, cp nor ep are modified.
    217  *
    218  *    URI_HASH_IX(unsigned hix, int which) - convert the hash value hix to
    219  *    a hash index 0 - (uri_hash_sz[which] - 1).
    220  *
    221  *    URI_HASH_MIGRATE(from, hp, to) - migrate the uri_hash_t *hp list
    222  *    uri_desc_t members from hash from to hash to.
    223  *
    224  *    URI_HASH_UNLINK(cur, new, hp, puri, uri) - unlink the uri_desc_t
    225  *    *uri which is a member of the uri_hash_t *hp list with a previous
    226  *    list member of *puri for the uri_hash_ab[] cur. After unlinking
    227  *    check for cur hash empty, if so make new cur. Note, as this macro
    228  *    can change a hash chain it needs to be run under hash_access as
    229  *    RW_WRITER, futher as it can change the new hash to cur any access
    230  *    to the hash state must be done after either dropping locks and
    231  *    starting over or making sure the global state is consistent after
    232  *    as before.
    233  */
    234 
    235 #define	H2A(cp, ep, c) {						\
    236 	int	_h = 2;							\
    237 	int	_n = 0;							\
    238 	char	_hc;							\
    239 									\
    240 	while (_h > 0 && ++(cp) < (ep)) {				\
    241 		if (_h == 1)						\
    242 			_n *= 0x10;					\
    243 		_hc = *(cp);						\
    244 		if (_hc >= '0' && _hc <= '9')				\
    245 			_n += _hc - '0';				\
    246 		else if (_hc >= 'a' || _hc <= 'f')			\
    247 			_n += _hc - 'W';				\
    248 		else if (_hc >= 'A' || _hc <= 'F')			\
    249 			_n += _hc - '7';				\
    250 		_h--;							\
    251 	}								\
    252 	(c) = _n;							\
    253 }
    254 
    255 #define	URI_HASH(hv, cp, ep) {						\
    256 	char	*_s = (cp);						\
    257 	char	_c;							\
    258 									\
    259 	while (_s < (ep)) {						\
    260 		if ((_c = *_s) == '%') {				\
    261 			H2A(_s, (ep), _c);				\
    262 		}							\
    263 		CHASH(hv, _c);						\
    264 		_s++;							\
    265 	}								\
    266 }
    267 
    268 #define	URI_HASH_IX(hix, which) (hix) = (hix) % (uri_hash_sz[(which)])
    269 
    270 #define	URI_HASH_MIGRATE(from, hp, to) {				\
    271 	uri_desc_t	*_nuri;						\
    272 	uint32_t	_nhix;						\
    273 	uri_hash_t	*_nhp;						\
    274 									\
    275 	mutex_enter(&(hp)->lock);					\
    276 	while ((_nuri = (hp)->list) != NULL) {				\
    277 		(hp)->list = _nuri->hash;				\
    278 		atomic_add_32(&uri_hash_cnt[(from)], -1);		\
    279 		atomic_add_32(&uri_hash_cnt[(to)], 1);			\
    280 		_nhix = _nuri->hvalue;					\
    281 		URI_HASH_IX(_nhix, to);					\
    282 		_nhp = &uri_hash_ab[(to)][_nhix];			\
    283 		mutex_enter(&_nhp->lock);				\
    284 		_nuri->hash = _nhp->list;				\
    285 		_nhp->list = _nuri;					\
    286 		_nuri->hit = 0;						\
    287 		mutex_exit(&_nhp->lock);				\
    288 	}								\
    289 	mutex_exit(&(hp)->lock);					\
    290 }
    291 
    292 #define	URI_HASH_UNLINK(cur, new, hp, puri, uri) {			\
    293 	if ((puri) != NULL) {						\
    294 		(puri)->hash = (uri)->hash;				\
    295 	} else {							\
    296 		(hp)->list = (uri)->hash;				\
    297 	}								\
    298 	if (atomic_add_32_nv(&uri_hash_cnt[(cur)], -1) == 0 &&		\
    299 	    uri_hash_ab[(new)] != NULL) {				\
    300 		kmem_free(uri_hash_ab[cur],				\
    301 		    sizeof (uri_hash_t) * uri_hash_sz[cur]);		\
    302 		uri_hash_ab[(cur)] = NULL;				\
    303 		uri_hash_lru[(cur)] = NULL;				\
    304 		uri_hash_which = (new);					\
    305 	} else {							\
    306 		uri_hash_lru[(cur)] = (hp);				\
    307 	}								\
    308 }
    309 
    310 void
    311 nl7c_uri_init(void)
    312 {
    313 	uint32_t	cur = uri_hash_which;
    314 
    315 	rw_init(&uri_hash_access, NULL, RW_DEFAULT, NULL);
    316 
    317 	uri_hash_sz[cur] = P2Ps[URI_HASH_N_INIT];
    318 	uri_hash_overflow[cur] = P2Ps[URI_HASH_N_INIT] * URI_HASH_AVRG;
    319 	uri_hash_ab[cur] = kmem_zalloc(sizeof (uri_hash_t) * uri_hash_sz[cur],
    320 	    KM_SLEEP);
    321 	uri_hash_lru[cur] = uri_hash_ab[cur];
    322 
    323 	nl7c_uri_kmc = kmem_cache_create("NL7C_uri_kmc", sizeof (uri_desc_t),
    324 	    0, NULL, NULL, uri_kmc_reclaim, NULL, NULL, 0);
    325 
    326 	nl7c_uri_rd_kmc = kmem_cache_create("NL7C_uri_rd_kmc",
    327 	    sizeof (uri_rd_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    328 
    329 	uri_desb_kmc = kmem_cache_create("NL7C_uri_desb_kmc",
    330 	    sizeof (uri_desb_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    331 
    332 	uri_segmap_kmc = kmem_cache_create("NL7C_uri_segmap_kmc",
    333 	    sizeof (uri_segmap_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
    334 
    335 	nl7c_http_init();
    336 }
    337 
    338 #define	CV_SZ	16
    339 
    340 void
    341 nl7c_mi_report_hash(mblk_t *mp)
    342 {
    343 	uri_hash_t	*hp, *pend;
    344 	uri_desc_t	*uri;
    345 	uint32_t	cur;
    346 	uint32_t	new;
    347 	int		n, nz, tot;
    348 	uint32_t	cv[CV_SZ + 1];
    349 
    350 	rw_enter(&uri_hash_access, RW_READER);
    351 	cur = uri_hash_which;
    352 	new = cur ? 0 : 1;
    353 next:
    354 	for (n = 0; n <= CV_SZ; n++)
    355 		cv[n] = 0;
    356 	nz = 0;
    357 	tot = 0;
    358 	hp = &uri_hash_ab[cur][0];
    359 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
    360 	while (hp < pend) {
    361 		n = 0;
    362 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
    363 			n++;
    364 		}
    365 		tot += n;
    366 		if (n > 0)
    367 			nz++;
    368 		if (n > CV_SZ)
    369 			n = CV_SZ;
    370 		cv[n]++;
    371 		hp++;
    372 	}
    373 
    374 	(void) mi_mpprintf(mp, "\nHash=%s, Buckets=%d, "
    375 	    "Avrg=%d\nCount by bucket:", cur != new ? "CUR" : "NEW",
    376 	    uri_hash_sz[cur], nz != 0 ? ((tot * 10 + 5) / nz) / 10 : 0);
    377 	(void) mi_mpprintf(mp, "Free=%d", cv[0]);
    378 	for (n = 1; n < CV_SZ; n++) {
    379 		int	pn = 0;
    380 		char	pv[5];
    381 		char	*pp = pv;
    382 
    383 		for (pn = n; pn < 1000; pn *= 10)
    384 			*pp++ = ' ';
    385 		*pp = 0;
    386 		(void) mi_mpprintf(mp, "%s%d=%d", pv, n, cv[n]);
    387 	}
    388 	(void) mi_mpprintf(mp, "Long=%d", cv[CV_SZ]);
    389 
    390 	if (cur != new && uri_hash_ab[new] != NULL) {
    391 		cur = new;
    392 		goto next;
    393 	}
    394 	rw_exit(&uri_hash_access);
    395 }
    396 
    397 void
    398 nl7c_mi_report_uri(mblk_t *mp)
    399 {
    400 	uri_hash_t	*hp;
    401 	uri_desc_t	*uri;
    402 	uint32_t	cur;
    403 	uint32_t	new;
    404 	int		ix;
    405 	int		ret;
    406 	char		sc;
    407 
    408 	rw_enter(&uri_hash_access, RW_READER);
    409 	cur = uri_hash_which;
    410 	new = cur ? 0 : 1;
    411 next:
    412 	for (ix = 0; ix < uri_hash_sz[cur]; ix++) {
    413 		hp = &uri_hash_ab[cur][ix];
    414 		mutex_enter(&hp->lock);
    415 		uri = hp->list;
    416 		while (uri != NULL) {
    417 			sc = *(uri->path.ep);
    418 			*(uri->path.ep) = 0;
    419 			ret = mi_mpprintf(mp, "%s: %d %d %d",
    420 			    uri->path.cp, (int)uri->resplen,
    421 			    (int)uri->respclen, (int)uri->count);
    422 			*(uri->path.ep) = sc;
    423 			if (ret == -1) break;
    424 			uri = uri->hash;
    425 		}
    426 		mutex_exit(&hp->lock);
    427 		if (ret == -1) break;
    428 	}
    429 	if (ret != -1 && cur != new && uri_hash_ab[new] != NULL) {
    430 		cur = new;
    431 		goto next;
    432 	}
    433 	rw_exit(&uri_hash_access);
    434 }
    435 
    436 /*
    437  * The uri_desc_t ref_t inactive function called on the last REF_RELE(),
    438  * free all resources contained in the uri_desc_t. Note, the uri_desc_t
    439  * will be freed by REF_RELE() on return.
    440  */
    441 
    442 void
    443 nl7c_uri_inactive(uri_desc_t *uri)
    444 {
    445 	int64_t	 bytes = 0;
    446 
    447 	if (uri->tail) {
    448 		uri_rd_t *rdp = &uri->response;
    449 		uri_rd_t *free = NULL;
    450 
    451 		while (rdp) {
    452 			if (rdp->off == -1) {
    453 				bytes += rdp->sz;
    454 				kmem_free(rdp->data.kmem, rdp->sz);
    455 			} else {
    456 				VN_RELE(rdp->data.vnode);
    457 			}
    458 			rdp = rdp->next;
    459 			if (free != NULL) {
    460 				kmem_cache_free(nl7c_uri_rd_kmc, free);
    461 			}
    462 			free = rdp;
    463 		}
    464 	}
    465 	if (bytes) {
    466 		atomic_add_64(&nl7c_uri_bytes, -bytes);
    467 	}
    468 	if (uri->scheme != NULL) {
    469 		nl7c_http_free(uri->scheme);
    470 	}
    471 	if (uri->reqmp) {
    472 		freeb(uri->reqmp);
    473 	}
    474 }
    475 
    476 /*
    477  * The reclaim is called by the kmem subsystem when kmem is running
    478  * low. More work is needed to determine the best reclaim policy, for
    479  * now we just manipulate the nl7c_uri_max global maximum bytes threshold
    480  * value using a simple arithmetic backoff of the value every time this
    481  * function is called then call uri_reclaim() to enforce it.
    482  *
    483  * Note, this value remains in place and enforced for all subsequent
    484  * URI request/response processing.
    485  *
    486  * Note, nl7c_uri_max is currently initialized to 0 or infinite such that
    487  * the first call here set it to the current uri_bytes value then backoff
    488  * from there.
    489  *
    490  * XXX how do we determine when to increase nl7c_uri_max ???
    491  */
    492 
    493 /*ARGSUSED*/
    494 static void
    495 uri_kmc_reclaim(void *arg)
    496 {
    497 	uint64_t new_max;
    498 
    499 	if ((new_max = nl7c_uri_max) == 0) {
    500 		/* Currently infinite, initialize to current bytes used */
    501 		nl7c_uri_max = nl7c_uri_bytes;
    502 		new_max = nl7c_uri_bytes;
    503 	}
    504 	if (new_max > 1) {
    505 		/* Lower max_bytes to 93% of current value */
    506 		new_max >>= 1;			/* 50% */
    507 		new_max += (new_max >> 1);	/* 75% */
    508 		new_max += (new_max >> 2);	/* 93% */
    509 		if (new_max < nl7c_uri_max)
    510 			nl7c_uri_max = new_max;
    511 		else
    512 			nl7c_uri_max = 1;
    513 	}
    514 	nl7c_uri_reclaim();
    515 }
    516 
    517 /*
    518  * Delete a uri_desc_t from the URI hash.
    519  */
    520 
    521 static void
    522 uri_delete(uri_desc_t *del)
    523 {
    524 	uint32_t	hix;
    525 	uri_hash_t	*hp;
    526 	uri_desc_t	*uri;
    527 	uri_desc_t	*puri;
    528 	uint32_t	cur;
    529 	uint32_t	new;
    530 
    531 	ASSERT(del->hash != URI_TEMP);
    532 	rw_enter(&uri_hash_access, RW_WRITER);
    533 	cur = uri_hash_which;
    534 	new = cur ? 0 : 1;
    535 next:
    536 	puri = NULL;
    537 	hix = del->hvalue;
    538 	URI_HASH_IX(hix, cur);
    539 	hp = &uri_hash_ab[cur][hix];
    540 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
    541 		if (uri != del) {
    542 			puri = uri;
    543 			continue;
    544 		}
    545 		/*
    546 		 * Found the URI, unlink from the hash chain,
    547 		 * drop locks, ref release it.
    548 		 */
    549 		URI_HASH_UNLINK(cur, new, hp, puri, uri);
    550 		rw_exit(&uri_hash_access);
    551 		REF_RELE(uri);
    552 		return;
    553 	}
    554 	if (cur != new && uri_hash_ab[new] != NULL) {
    555 		/*
    556 		 * Not found in current hash and have a new hash so
    557 		 * check the new hash next.
    558 		 */
    559 		cur = new;
    560 		goto next;
    561 	}
    562 	rw_exit(&uri_hash_access);
    563 }
    564 
    565 /*
    566  * Add a uri_desc_t to the URI hash.
    567  */
    568 
    569 static void
    570 uri_add(uri_desc_t *uri, krw_t rwlock, boolean_t nonblocking)
    571 {
    572 	uint32_t	hix;
    573 	uri_hash_t	*hp;
    574 	uint32_t	cur = uri_hash_which;
    575 	uint32_t	new = cur ? 0 : 1;
    576 
    577 	/*
    578 	 * Caller of uri_add() must hold the uri_hash_access rwlock.
    579 	 */
    580 	ASSERT((rwlock == RW_READER && RW_READ_HELD(&uri_hash_access)) ||
    581 	    (rwlock == RW_WRITER && RW_WRITE_HELD(&uri_hash_access)));
    582 	/*
    583 	 * uri_add() always succeeds so add a hash ref to the URI now.
    584 	 */
    585 	REF_HOLD(uri);
    586 again:
    587 	hix = uri->hvalue;
    588 	URI_HASH_IX(hix, cur);
    589 	if (uri_hash_ab[new] == NULL &&
    590 	    uri_hash_cnt[cur] < uri_hash_overflow[cur]) {
    591 		/*
    592 		 * Easy case, no new hash and current hasn't overflowed,
    593 		 * add URI to current hash and return.
    594 		 *
    595 		 * Note, the check for uri_hash_cnt[] above aren't done
    596 		 * atomictally, i.e. multiple threads can be in this code
    597 		 * as RW_READER and update the cnt[], this isn't a problem
    598 		 * as the check is only advisory.
    599 		 */
    600 	fast:
    601 		atomic_add_32(&uri_hash_cnt[cur], 1);
    602 		hp = &uri_hash_ab[cur][hix];
    603 		mutex_enter(&hp->lock);
    604 		uri->hash = hp->list;
    605 		hp->list = uri;
    606 		mutex_exit(&hp->lock);
    607 		rw_exit(&uri_hash_access);
    608 		return;
    609 	}
    610 	if (uri_hash_ab[new] == NULL) {
    611 		/*
    612 		 * Need a new a or b hash, if not already RW_WRITER
    613 		 * try to upgrade our lock to writer.
    614 		 */
    615 		if (rwlock != RW_WRITER && ! rw_tryupgrade(&uri_hash_access)) {
    616 			/*
    617 			 * Upgrade failed, we can't simple exit and reenter
    618 			 * the lock as after the exit and before the reenter
    619 			 * the whole world can change so just wait for writer
    620 			 * then do everything again.
    621 			 */
    622 			if (nonblocking) {
    623 				/*
    624 				 * Can't block, use fast-path above.
    625 				 *
    626 				 * XXX should have a background thread to
    627 				 * handle new ab[] in this case so as to
    628 				 * not overflow the cur hash to much.
    629 				 */
    630 				goto fast;
    631 			}
    632 			rw_exit(&uri_hash_access);
    633 			rwlock = RW_WRITER;
    634 			rw_enter(&uri_hash_access, rwlock);
    635 			cur = uri_hash_which;
    636 			new = cur ? 0 : 1;
    637 			goto again;
    638 		}
    639 		rwlock = RW_WRITER;
    640 		if (uri_hash_ab[new] == NULL) {
    641 			/*
    642 			 * Still need a new hash, allocate and initialize
    643 			 * the new hash.
    644 			 */
    645 			uri_hash_n[new] = uri_hash_n[cur] + 1;
    646 			if (uri_hash_n[new] == 0) {
    647 				/*
    648 				 * No larger P2Ps[] value so use current,
    649 				 * i.e. 2 of the largest are better than 1 ?
    650 				 */
    651 				uri_hash_n[new] = uri_hash_n[cur];
    652 				cmn_err(CE_NOTE, "NL7C: hash index overflow");
    653 			}
    654 			uri_hash_sz[new] = P2Ps[uri_hash_n[new]];
    655 			ASSERT(uri_hash_cnt[new] == 0);
    656 			uri_hash_overflow[new] = uri_hash_sz[new] *
    657 			    URI_HASH_AVRG;
    658 			uri_hash_ab[new] = kmem_zalloc(sizeof (uri_hash_t) *
    659 			    uri_hash_sz[new], nonblocking ? KM_NOSLEEP :
    660 			    KM_SLEEP);
    661 			if (uri_hash_ab[new] == NULL) {
    662 				/*
    663 				 * Alloc failed, use fast-path above.
    664 				 *
    665 				 * XXX should have a background thread to
    666 				 * handle new ab[] in this case so as to
    667 				 * not overflow the cur hash to much.
    668 				 */
    669 				goto fast;
    670 			}
    671 			uri_hash_lru[new] = uri_hash_ab[new];
    672 		}
    673 	}
    674 	/*
    675 	 * Hashed against current hash so migrate any current hash chain
    676 	 * members, if any.
    677 	 *
    678 	 * Note, the hash chain list can be checked for a non empty list
    679 	 * outside of the hash chain list lock as the hash chain struct
    680 	 * can't be destroyed while in the uri_hash_access rwlock, worst
    681 	 * case is that a non empty list is found and after acquiring the
    682 	 * lock another thread beats us to it (i.e. migrated the list).
    683 	 */
    684 	hp = &uri_hash_ab[cur][hix];
    685 	if (hp->list != NULL) {
    686 		URI_HASH_MIGRATE(cur, hp, new);
    687 	}
    688 	/*
    689 	 * If new hash has overflowed before current hash has been
    690 	 * completely migrated then walk all current hash chains and
    691 	 * migrate list members now.
    692 	 */
    693 	if (atomic_add_32_nv(&uri_hash_cnt[new], 1) >= uri_hash_overflow[new]) {
    694 		for (hix = 0; hix < uri_hash_sz[cur]; hix++) {
    695 			hp = &uri_hash_ab[cur][hix];
    696 			if (hp->list != NULL) {
    697 				URI_HASH_MIGRATE(cur, hp, new);
    698 			}
    699 		}
    700 	}
    701 	/*
    702 	 * Add URI to new hash.
    703 	 */
    704 	hix = uri->hvalue;
    705 	URI_HASH_IX(hix, new);
    706 	hp = &uri_hash_ab[new][hix];
    707 	mutex_enter(&hp->lock);
    708 	uri->hash = hp->list;
    709 	hp->list = uri;
    710 	mutex_exit(&hp->lock);
    711 	/*
    712 	 * Last, check to see if last cur hash chain has been
    713 	 * migrated, if so free cur hash and make new hash cur.
    714 	 */
    715 	if (uri_hash_cnt[cur] == 0) {
    716 		/*
    717 		 * If we don't already hold the uri_hash_access rwlock for
    718 		 * RW_WRITE try to upgrade to RW_WRITE and if successful
    719 		 * check again and to see if still need to do the free.
    720 		 */
    721 		if ((rwlock == RW_WRITER || rw_tryupgrade(&uri_hash_access)) &&
    722 		    uri_hash_cnt[cur] == 0 && uri_hash_ab[new] != 0) {
    723 			kmem_free(uri_hash_ab[cur],
    724 			    sizeof (uri_hash_t) * uri_hash_sz[cur]);
    725 			uri_hash_ab[cur] = NULL;
    726 			uri_hash_lru[cur] = NULL;
    727 			uri_hash_which = new;
    728 		}
    729 	}
    730 	rw_exit(&uri_hash_access);
    731 }
    732 
    733 /*
    734  * Lookup a uri_desc_t in the URI hash, if found free the request uri_desc_t
    735  * and return the found uri_desc_t with a REF_HOLD() placed on it. Else, if
    736  * add B_TRUE use the request URI to create a new hash entry. Else if add
    737  * B_FALSE ...
    738  */
    739 
    740 static uri_desc_t *
    741 uri_lookup(uri_desc_t *ruri, boolean_t add, boolean_t nonblocking)
    742 {
    743 	uint32_t	hix;
    744 	uri_hash_t	*hp;
    745 	uri_desc_t	*uri;
    746 	uri_desc_t	*puri;
    747 	uint32_t	cur;
    748 	uint32_t	new;
    749 	char		*rcp = ruri->path.cp;
    750 	char		*rep = ruri->path.ep;
    751 
    752 again:
    753 	rw_enter(&uri_hash_access, RW_READER);
    754 	cur = uri_hash_which;
    755 	new = cur ? 0 : 1;
    756 nexthash:
    757 	puri = NULL;
    758 	hix = ruri->hvalue;
    759 	URI_HASH_IX(hix, cur);
    760 	hp = &uri_hash_ab[cur][hix];
    761 	mutex_enter(&hp->lock);
    762 	for (uri = hp->list; uri != NULL; uri = uri->hash) {
    763 		char	*ap = uri->path.cp;
    764 		char	*bp = rcp;
    765 		char	a, b;
    766 
    767 		/* Compare paths */
    768 		while (bp < rep && ap < uri->path.ep) {
    769 			if ((a = *ap) == '%') {
    770 				/* Escaped hex multichar, convert it */
    771 				H2A(ap, uri->path.ep, a);
    772 			}
    773 			if ((b = *bp) == '%') {
    774 				/* Escaped hex multichar, convert it */
    775 				H2A(bp, rep, b);
    776 			}
    777 			if (a != b) {
    778 				/* Char's don't match */
    779 				goto nexturi;
    780 			}
    781 			ap++;
    782 			bp++;
    783 		}
    784 		if (bp != rep || ap != uri->path.ep) {
    785 			/* Not same length */
    786 			goto nexturi;
    787 		}
    788 		ap = uri->auth.cp;
    789 		bp = ruri->auth.cp;
    790 		if (ap != NULL) {
    791 			if (bp == NULL) {
    792 				/* URI has auth request URI doesn't */
    793 				goto nexturi;
    794 			}
    795 			while (bp < ruri->auth.ep && ap < uri->auth.ep) {
    796 				if ((a = *ap) == '%') {
    797 					/* Escaped hex multichar, convert it */
    798 					H2A(ap, uri->path.ep, a);
    799 				}
    800 				if ((b = *bp) == '%') {
    801 					/* Escaped hex multichar, convert it */
    802 					H2A(bp, rep, b);
    803 				}
    804 				if (a != b) {
    805 					/* Char's don't match */
    806 					goto nexturi;
    807 				}
    808 				ap++;
    809 				bp++;
    810 			}
    811 			if (bp != ruri->auth.ep || ap != uri->auth.ep) {
    812 				/* Not same length */
    813 				goto nexturi;
    814 			}
    815 		} else if (bp != NULL) {
    816 			/* URI doesn't have auth and request URI does */
    817 			goto nexturi;
    818 		}
    819 		/*
    820 		 * Have a path/auth match so before any other processing
    821 		 * of requested URI, check for expire or request no cache
    822 		 * purge.
    823 		 */
    824 		if (uri->expire >= 0 && uri->expire <= ddi_get_lbolt() ||
    825 		    ruri->nocache) {
    826 			/*
    827 			 * URI has expired or request specified to not use
    828 			 * the cached version, unlink the URI from the hash
    829 			 * chain, release all locks, release the hash ref
    830 			 * on the URI, and last look it up again.
    831 			 *
    832 			 * Note, this will cause all variants of the named
    833 			 * URI to be purged.
    834 			 */
    835 			if (puri != NULL) {
    836 				puri->hash = uri->hash;
    837 			} else {
    838 				hp->list = uri->hash;
    839 			}
    840 			mutex_exit(&hp->lock);
    841 			atomic_add_32(&uri_hash_cnt[cur], -1);
    842 			rw_exit(&uri_hash_access);
    843 			if (ruri->nocache)
    844 				nl7c_uri_purge++;
    845 			else
    846 				nl7c_uri_expire++;
    847 			REF_RELE(uri);
    848 			goto again;
    849 		}
    850 		if (uri->scheme != NULL) {
    851 			/*
    852 			 * URI has scheme private qualifier(s), if request
    853 			 * URI doesn't or if no match skip this URI.
    854 			 */
    855 			if (ruri->scheme == NULL ||
    856 			    ! nl7c_http_cmp(uri->scheme, ruri->scheme))
    857 				goto nexturi;
    858 		} else if (ruri->scheme != NULL) {
    859 			/*
    860 			 * URI doesn't have scheme private qualifiers but
    861 			 * request URI does, no match, skip this URI.
    862 			 */
    863 			goto nexturi;
    864 		}
    865 		/*
    866 		 * Have a match, ready URI for return, first put a reference
    867 		 * hold on the URI, if this URI is currently being processed
    868 		 * then have to wait for the processing to be completed and
    869 		 * redo the lookup, else return it.
    870 		 */
    871 		REF_HOLD(uri);
    872 		mutex_enter(&uri->proclock);
    873 		if (uri->proc != NULL) {
    874 			/* The URI is being processed, wait for completion */
    875 			mutex_exit(&hp->lock);
    876 			rw_exit(&uri_hash_access);
    877 			if (! nonblocking &&
    878 			    cv_wait_sig(&uri->waiting, &uri->proclock)) {
    879 				/*
    880 				 * URI has been processed but things may
    881 				 * have changed while we were away so do
    882 				 * most everything again.
    883 				 */
    884 				mutex_exit(&uri->proclock);
    885 				REF_RELE(uri);
    886 				goto again;
    887 			} else {
    888 				/*
    889 				 * A nonblocking socket or an interrupted
    890 				 * cv_wait_sig() in the first case can't
    891 				 * block waiting for the processing of the
    892 				 * uri hash hit uri to complete, in both
    893 				 * cases just return failure to lookup.
    894 				 */
    895 				mutex_exit(&uri->proclock);
    896 				REF_RELE(uri);
    897 				return (NULL);
    898 			}
    899 		}
    900 		mutex_exit(&uri->proclock);
    901 		uri->hit++;
    902 		mutex_exit(&hp->lock);
    903 		rw_exit(&uri_hash_access);
    904 		return (uri);
    905 	nexturi:
    906 		puri = uri;
    907 	}
    908 	mutex_exit(&hp->lock);
    909 	if (cur != new && uri_hash_ab[new] != NULL) {
    910 		/*
    911 		 * Not found in current hash and have a new hash so
    912 		 * check the new hash next.
    913 		 */
    914 		cur = new;
    915 		goto nexthash;
    916 	}
    917 add:
    918 	if (! add) {
    919 		/* Lookup only so return failure */
    920 		rw_exit(&uri_hash_access);
    921 		return (NULL);
    922 	}
    923 	/*
    924 	 * URI not hashed, finish intialization of the
    925 	 * request URI, add it to the hash, return it.
    926 	 */
    927 	ruri->hit = 0;
    928 	ruri->expire = -1;
    929 	ruri->response.sz = 0;
    930 	ruri->proc = (struct sonode *)~NULL;
    931 	cv_init(&ruri->waiting, NULL, CV_DEFAULT, NULL);
    932 	mutex_init(&ruri->proclock, NULL, MUTEX_DEFAULT, NULL);
    933 	uri_add(ruri, RW_READER, nonblocking);
    934 	/* uri_add() has done rw_exit(&uri_hash_access) */
    935 	return (ruri);
    936 }
    937 
    938 /*
    939  * Reclaim URIs until max cache size threshold has been reached.
    940  *
    941  * A CLOCK based reclaim modified with a history (hit counter) counter.
    942  */
    943 
    944 static void
    945 nl7c_uri_reclaim(void)
    946 {
    947 	uri_hash_t	*hp, *start, *pend;
    948 	uri_desc_t	*uri;
    949 	uri_desc_t	*puri;
    950 	uint32_t	cur;
    951 	uint32_t	new;
    952 
    953 	nl7c_uri_reclaim_calls++;
    954 again:
    955 	rw_enter(&uri_hash_access, RW_WRITER);
    956 	cur = uri_hash_which;
    957 	new = cur ? 0 : 1;
    958 next:
    959 	hp = uri_hash_lru[cur];
    960 	start = hp;
    961 	pend = &uri_hash_ab[cur][uri_hash_sz[cur]];
    962 	while (nl7c_uri_bytes > nl7c_uri_max) {
    963 		puri = NULL;
    964 		for (uri = hp->list; uri != NULL; uri = uri->hash) {
    965 			if (uri->hit != 0) {
    966 				/*
    967 				 * Decrement URI activity counter and skip.
    968 				 */
    969 				uri->hit--;
    970 				puri = uri;
    971 				continue;
    972 			}
    973 			if (uri->proc != NULL) {
    974 				/*
    975 				 * Currently being processed by a socket, skip.
    976 				 */
    977 				continue;
    978 			}
    979 			/*
    980 			 * Found a candidate, no hit(s) since added or last
    981 			 * reclaim pass, unlink from it's hash chain, update
    982 			 * lru scan pointer, drop lock, ref release it.
    983 			 */
    984 			URI_HASH_UNLINK(cur, new, hp, puri, uri);
    985 			if (cur == uri_hash_which) {
    986 				if (++hp == pend) {
    987 					/* Wrap pointer */
    988 					hp = uri_hash_ab[cur];
    989 				}
    990 				uri_hash_lru[cur] = hp;
    991 			}
    992 			rw_exit(&uri_hash_access);
    993 			REF_RELE(uri);
    994 			nl7c_uri_reclaim_cnt++;
    995 			goto again;
    996 		}
    997 		if (++hp == pend) {
    998 			/* Wrap pointer */
    999 			hp = uri_hash_ab[cur];
   1000 		}
   1001 		if (hp == start) {
   1002 			if (cur != new && uri_hash_ab[new] != NULL) {
   1003 				/*
   1004 				 * Done with the current hash and have a
   1005 				 * new hash so check the new hash next.
   1006 				 */
   1007 				cur = new;
   1008 				goto next;
   1009 			}
   1010 		}
   1011 	}
   1012 	rw_exit(&uri_hash_access);
   1013 }
   1014 
   1015 /*
   1016  * Called for a socket which is being freed prior to close, e.g. errored.
   1017  */
   1018 
   1019 void
   1020 nl7c_urifree(struct sonode *so)
   1021 {
   1022 	sotpi_info_t *sti = SOTOTPI(so);
   1023 	uri_desc_t *uri = (uri_desc_t *)sti->sti_nl7c_uri;
   1024 
   1025 	sti->sti_nl7c_uri = NULL;
   1026 	if (uri->hash != URI_TEMP) {
   1027 		uri_delete(uri);
   1028 		mutex_enter(&uri->proclock);
   1029 		uri->proc = NULL;
   1030 		if (CV_HAS_WAITERS(&uri->waiting)) {
   1031 			cv_broadcast(&uri->waiting);
   1032 		}
   1033 		mutex_exit(&uri->proclock);
   1034 		nl7c_uri_free++;
   1035 	} else {
   1036 		/* No proclock as uri exclusively owned by so */
   1037 		uri->proc = NULL;
   1038 		nl7c_uri_temp_free++;
   1039 	}
   1040 	REF_RELE(uri);
   1041 }
   1042 
   1043 /*
   1044  * ...
   1045  *
   1046  *	< 0	need more data
   1047  *
   1048  *	  0	parse complete
   1049  *
   1050  *	> 0	parse error
   1051  */
   1052 
   1053 volatile uint64_t nl7c_resp_pfail = 0;
   1054 volatile uint64_t nl7c_resp_ntemp = 0;
   1055 volatile uint64_t nl7c_resp_pass = 0;
   1056 
   1057 static int
   1058 nl7c_resp_parse(struct sonode *so, uri_desc_t *uri, char *data, int sz)
   1059 {
   1060 	if (! nl7c_http_response(&data, &data[sz], uri, so)) {
   1061 		if (data == NULL) {
   1062 			/* Parse fail */
   1063 			goto pfail;
   1064 		}
   1065 		/* More data */
   1066 		data = NULL;
   1067 	} else if (data == NULL) {
   1068 		goto pass;
   1069 	}
   1070 	if (uri->hash != URI_TEMP && uri->nocache) {
   1071 		/*
   1072 		 * After response parse now no cache,
   1073 		 * delete it from cache, wakeup any
   1074 		 * waiters on this URI, make URI_TEMP.
   1075 		 */
   1076 		uri_delete(uri);
   1077 		mutex_enter(&uri->proclock);
   1078 		if (CV_HAS_WAITERS(&uri->waiting)) {
   1079 			cv_broadcast(&uri->waiting);
   1080 		}
   1081 		mutex_exit(&uri->proclock);
   1082 		uri->hash = URI_TEMP;
   1083 		nl7c_uri_temp_mk++;
   1084 	}
   1085 	if (data == NULL) {
   1086 		/* More data needed */
   1087 		return (-1);
   1088 	}
   1089 	/* Success */
   1090 	return (0);
   1091 
   1092 pfail:
   1093 	nl7c_resp_pfail++;
   1094 	return (EINVAL);
   1095 
   1096 pass:
   1097 	nl7c_resp_pass++;
   1098 	return (ENOTSUP);
   1099 }
   1100 
   1101 /*
   1102  * Called to sink application response data, the processing of the data
   1103  * is the same for a cached or temp URI (i.e. a URI for which we aren't
   1104  * going to cache the URI but want to parse it for detecting response
   1105  * data end such that for a persistent connection we can parse the next
   1106  * request).
   1107  *
   1108  * On return 0 is returned for sink success, > 0 on error, and < 0 on
   1109  * no so URI (note, data not sinked).
   1110  */
   1111 
   1112 int
   1113 nl7c_data(struct sonode *so, uio_t *uio)
   1114 {
   1115 	sotpi_info_t	*sti = SOTOTPI(so);
   1116 	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
   1117 	iovec_t		*iov;
   1118 	int		cnt;
   1119 	int		sz = uio->uio_resid;
   1120 	char		*data, *alloc;
   1121 	char		*bp;
   1122 	uri_rd_t	*rdp;
   1123 	boolean_t	first;
   1124 	int		error, perror;
   1125 
   1126 	nl7c_uri_data++;
   1127 
   1128 	if (uri == NULL) {
   1129 		/* Socket & NL7C out of sync, disable NL7C */
   1130 		sti->sti_nl7c_flags = 0;
   1131 		nl7c_uri_NULL1++;
   1132 		return (-1);
   1133 	}
   1134 
   1135 	if (sti->sti_nl7c_flags & NL7C_WAITWRITE) {
   1136 		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
   1137 		first = B_TRUE;
   1138 	} else {
   1139 		first = B_FALSE;
   1140 	}
   1141 
   1142 	alloc = kmem_alloc(sz, KM_SLEEP);
   1143 	URI_RD_ADD(uri, rdp, sz, -1);
   1144 	if (rdp == NULL) {
   1145 		error = ENOMEM;
   1146 		goto fail;
   1147 	}
   1148 
   1149 	if (uri->hash != URI_TEMP && uri->count > nca_max_cache_size) {
   1150 		uri_delete(uri);
   1151 		uri->hash = URI_TEMP;
   1152 	}
   1153 	data = alloc;
   1154 	alloc = NULL;
   1155 	rdp->data.kmem = data;
   1156 	atomic_add_64(&nl7c_uri_bytes, sz);
   1157 
   1158 	bp = data;
   1159 	while (uio->uio_resid > 0) {
   1160 		iov = uio->uio_iov;
   1161 		if ((cnt = iov->iov_len) == 0) {
   1162 			goto next;
   1163 		}
   1164 		cnt = MIN(cnt, uio->uio_resid);
   1165 		error = xcopyin(iov->iov_base, bp, cnt);
   1166 		if (error)
   1167 			goto fail;
   1168 
   1169 		iov->iov_base += cnt;
   1170 		iov->iov_len -= cnt;
   1171 		uio->uio_resid -= cnt;
   1172 		uio->uio_loffset += cnt;
   1173 		bp += cnt;
   1174 	next:
   1175 		uio->uio_iov++;
   1176 		uio->uio_iovcnt--;
   1177 	}
   1178 
   1179 	/* Successfull sink of data, response parse the data */
   1180 	perror = nl7c_resp_parse(so, uri, data, sz);
   1181 
   1182 	/* Send the data out the connection */
   1183 	error = uri_rd_response(so, uri, rdp, first);
   1184 	if (error)
   1185 		goto fail;
   1186 
   1187 	/* Success */
   1188 	if (perror == 0 &&
   1189 	    ((uri->respclen == URI_LEN_NOVALUE &&
   1190 	    uri->resplen == URI_LEN_NOVALUE) ||
   1191 	    uri->count >= uri->resplen)) {
   1192 		/*
   1193 		 * No more data needed and no pending response
   1194 		 * data or current data count >= response length
   1195 		 * so close the URI processing for this so.
   1196 		 */
   1197 		nl7c_close(so);
   1198 		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
   1199 			/* Not a persistent connection */
   1200 			sti->sti_nl7c_flags = 0;
   1201 		}
   1202 	}
   1203 
   1204 	return (0);
   1205 
   1206 fail:
   1207 	if (alloc != NULL) {
   1208 		kmem_free(alloc, sz);
   1209 	}
   1210 	sti->sti_nl7c_flags = 0;
   1211 	nl7c_urifree(so);
   1212 
   1213 	return (error);
   1214 }
   1215 
   1216 /*
   1217  * Called to read data from file "*fp" at offset "*off" of length "*len"
   1218  * for a maximum of "*max_rem" bytes.
   1219  *
   1220  * On success a pointer to the kmem_alloc()ed file data is returned, "*off"
   1221  * and "*len" are updated for the acutal number of bytes read and "*max_rem"
   1222  * is updated with the number of bytes remaining to be read.
   1223  *
   1224  * Else, "NULL" is returned.
   1225  */
   1226 
   1227 static char *
   1228 nl7c_readfile(file_t *fp, u_offset_t *off, int *len, int max, int *ret)
   1229 {
   1230 	vnode_t	*vp = fp->f_vnode;
   1231 	int	flg = 0;
   1232 	size_t	size = MIN(*len, max);
   1233 	char	*data;
   1234 	int	error;
   1235 	uio_t	uio;
   1236 	iovec_t	iov;
   1237 
   1238 	(void) VOP_RWLOCK(vp, flg, NULL);
   1239 
   1240 	if (*off > MAXOFFSET_T) {
   1241 		VOP_RWUNLOCK(vp, flg, NULL);
   1242 		*ret = EFBIG;
   1243 		return (NULL);
   1244 	}
   1245 
   1246 	if (*off + size > MAXOFFSET_T)
   1247 		size = (ssize32_t)(MAXOFFSET_T - *off);
   1248 
   1249 	data = kmem_alloc(size, KM_SLEEP);
   1250 
   1251 	iov.iov_base = data;
   1252 	iov.iov_len = size;
   1253 	uio.uio_loffset = *off;
   1254 	uio.uio_iov = &iov;
   1255 	uio.uio_iovcnt = 1;
   1256 	uio.uio_resid = size;
   1257 	uio.uio_segflg = UIO_SYSSPACE;
   1258 	uio.uio_llimit = MAXOFFSET_T;
   1259 	uio.uio_fmode = fp->f_flag;
   1260 
   1261 	error = VOP_READ(vp, &uio, fp->f_flag, fp->f_cred, NULL);
   1262 	VOP_RWUNLOCK(vp, flg, NULL);
   1263 	*ret = error;
   1264 	if (error) {
   1265 		kmem_free(data, size);
   1266 		return (NULL);
   1267 	}
   1268 	*len = size;
   1269 	*off += size;
   1270 	return (data);
   1271 }
   1272 
   1273 /*
   1274  * Called to sink application response sendfilev, as with nl7c_data() above
   1275  * all the data will be processed by NL7C unless there's an error.
   1276  */
   1277 
   1278 int
   1279 nl7c_sendfilev(struct sonode *so, u_offset_t *fileoff, sendfilevec_t *sfvp,
   1280 	int sfvc, ssize_t *xfer)
   1281 {
   1282 	sotpi_info_t	*sti = SOTOTPI(so);
   1283 	uri_desc_t	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
   1284 	file_t		*fp = NULL;
   1285 	vnode_t		*vp = NULL;
   1286 	char		*data = NULL;
   1287 	u_offset_t	off;
   1288 	int		len;
   1289 	int		cnt;
   1290 	int		total_count = 0;
   1291 	char		*alloc;
   1292 	uri_rd_t	*rdp;
   1293 	int		max;
   1294 	int		perror;
   1295 	int		error = 0;
   1296 	boolean_t	first = B_TRUE;
   1297 
   1298 	nl7c_uri_sendfilev++;
   1299 
   1300 	if (uri == NULL) {
   1301 		/* Socket & NL7C out of sync, disable NL7C */
   1302 		sti->sti_nl7c_flags = 0;
   1303 		nl7c_uri_NULL2++;
   1304 		return (0);
   1305 	}
   1306 
   1307 	if (sti->sti_nl7c_flags & NL7C_WAITWRITE)
   1308 		sti->sti_nl7c_flags &= ~NL7C_WAITWRITE;
   1309 
   1310 	while (sfvc-- > 0) {
   1311 		/*
   1312 		 * off - the current sfv read file offset or user address.
   1313 		 *
   1314 		 * len - the current sfv length in bytes.
   1315 		 *
   1316 		 * cnt - number of bytes kmem_alloc()ed.
   1317 		 *
   1318 		 * alloc - the kmem_alloc()ed buffer of size "cnt".
   1319 		 *
   1320 		 * data - copy of "alloc" used for post alloc references.
   1321 		 *
   1322 		 * fp - the current sfv file_t pointer.
   1323 		 *
   1324 		 * vp - the current "*vp" vnode_t pointer.
   1325 		 *
   1326 		 * Note, for "data" and "fp" and "vp" a NULL value is used
   1327 		 * when not allocated such that the common failure path "fail"
   1328 		 * is used.
   1329 		 */
   1330 		off = sfvp->sfv_off;
   1331 		len = sfvp->sfv_len;
   1332 		cnt = len;
   1333 
   1334 		if (len == 0) {
   1335 			sfvp++;
   1336 			continue;
   1337 		}
   1338 
   1339 		if (sfvp->sfv_fd == SFV_FD_SELF) {
   1340 			/*
   1341 			 * User memory, copyin() all the bytes.
   1342 			 */
   1343 			alloc = kmem_alloc(cnt, KM_SLEEP);
   1344 			error = xcopyin((caddr_t)(uintptr_t)off, alloc, cnt);
   1345 			if (error)
   1346 				goto fail;
   1347 		} else {
   1348 			/*
   1349 			 * File descriptor, prefetch some bytes.
   1350 			 */
   1351 			if ((fp = getf(sfvp->sfv_fd)) == NULL) {
   1352 				error = EBADF;
   1353 				goto fail;
   1354 			}
   1355 			if ((fp->f_flag & FREAD) == 0) {
   1356 				error = EACCES;
   1357 				goto fail;
   1358 			}
   1359 			vp = fp->f_vnode;
   1360 			if (vp->v_type != VREG) {
   1361 				error = EINVAL;
   1362 				goto fail;
   1363 			}
   1364 			VN_HOLD(vp);
   1365 
   1366 			/* Read max_rem bytes from file for prefetch */
   1367 			if (nl7c_use_kmem) {
   1368 				max = cnt;
   1369 			} else {
   1370 				max = MAXBSIZE * nl7c_file_prefetch;
   1371 			}
   1372 			alloc = nl7c_readfile(fp, &off, &cnt, max, &error);
   1373 			if (alloc == NULL)
   1374 				goto fail;
   1375 
   1376 			releasef(sfvp->sfv_fd);
   1377 			fp = NULL;
   1378 		}
   1379 		URI_RD_ADD(uri, rdp, cnt, -1);
   1380 		if (rdp == NULL) {
   1381 			error = ENOMEM;
   1382 			goto fail;
   1383 		}
   1384 		data = alloc;
   1385 		alloc = NULL;
   1386 		rdp->data.kmem = data;
   1387 		total_count += cnt;
   1388 		if (uri->hash != URI_TEMP && total_count > nca_max_cache_size) {
   1389 			uri_delete(uri);
   1390 			uri->hash = URI_TEMP;
   1391 		}
   1392 
   1393 		/* Response parse */
   1394 		perror = nl7c_resp_parse(so, uri, data, len);
   1395 
   1396 		/* Send kmem data out the connection */
   1397 		error = uri_rd_response(so, uri, rdp, first);
   1398 
   1399 		if (error)
   1400 			goto fail;
   1401 
   1402 		if (sfvp->sfv_fd != SFV_FD_SELF) {
   1403 			/*
   1404 			 * File descriptor, if any bytes left save vnode_t.
   1405 			 */
   1406 			if (len > cnt) {
   1407 				/* More file data so add it */
   1408 				URI_RD_ADD(uri, rdp, len - cnt, off);
   1409 				if (rdp == NULL) {
   1410 					error = ENOMEM;
   1411 					goto fail;
   1412 				}
   1413 				rdp->data.vnode = vp;
   1414 
   1415 				/* Send vnode data out the connection */
   1416 				error = uri_rd_response(so, uri, rdp, first);
   1417 			} else {
   1418 				/* All file data fit in the prefetch */
   1419 				VN_RELE(vp);
   1420 			}
   1421 			*fileoff += len;
   1422 			vp = NULL;
   1423 		}
   1424 		*xfer += len;
   1425 		sfvp++;
   1426 
   1427 		if (first)
   1428 			first = B_FALSE;
   1429 	}
   1430 	if (total_count > 0) {
   1431 		atomic_add_64(&nl7c_uri_bytes, total_count);
   1432 	}
   1433 	if (perror == 0 &&
   1434 	    ((uri->respclen == URI_LEN_NOVALUE &&
   1435 	    uri->resplen == URI_LEN_NOVALUE) ||
   1436 	    uri->count >= uri->resplen)) {
   1437 		/*
   1438 		 * No more data needed and no pending response
   1439 		 * data or current data count >= response length
   1440 		 * so close the URI processing for this so.
   1441 		 */
   1442 		nl7c_close(so);
   1443 		if (! (sti->sti_nl7c_flags & NL7C_SOPERSIST)) {
   1444 			/* Not a persistent connection */
   1445 			sti->sti_nl7c_flags = 0;
   1446 		}
   1447 	}
   1448 
   1449 	return (0);
   1450 
   1451 fail:
   1452 	if (error == EPIPE)
   1453 		tsignal(curthread, SIGPIPE);
   1454 
   1455 	if (alloc != NULL)
   1456 		kmem_free(data, len);
   1457 
   1458 	if (vp != NULL)
   1459 		VN_RELE(vp);
   1460 
   1461 	if (fp != NULL)
   1462 		releasef(sfvp->sfv_fd);
   1463 
   1464 	if (total_count > 0) {
   1465 		atomic_add_64(&nl7c_uri_bytes, total_count);
   1466 	}
   1467 
   1468 	sti->sti_nl7c_flags = 0;
   1469 	nl7c_urifree(so);
   1470 
   1471 	return (error);
   1472 }
   1473 
   1474 /*
   1475  * Called for a socket which is closing or when an application has
   1476  * completed sending all the response data (i.e. for a persistent
   1477  * connection called once for each completed application response).
   1478  */
   1479 
   1480 void
   1481 nl7c_close(struct sonode *so)
   1482 {
   1483 	sotpi_info_t	*sti = SOTOTPI(so);
   1484 	uri_desc_t 	*uri = (uri_desc_t *)sti->sti_nl7c_uri;
   1485 
   1486 	if (uri == NULL) {
   1487 		/*
   1488 		 * No URI being processed so might be a listen()er
   1489 		 * if so do any cleanup, else nothing more to do.
   1490 		 */
   1491 		if (so->so_state & SS_ACCEPTCONN) {
   1492 			(void) nl7c_close_addr(so);
   1493 		}
   1494 		return;
   1495 	}
   1496 	sti->sti_nl7c_uri = NULL;
   1497 	if (uri->hash != URI_TEMP) {
   1498 		mutex_enter(&uri->proclock);
   1499 		uri->proc = NULL;
   1500 		if (CV_HAS_WAITERS(&uri->waiting)) {
   1501 			cv_broadcast(&uri->waiting);
   1502 		}
   1503 		mutex_exit(&uri->proclock);
   1504 		nl7c_uri_close++;
   1505 	} else {
   1506 		/* No proclock as uri exclusively owned by so */
   1507 		uri->proc = NULL;
   1508 		nl7c_uri_temp_close++;
   1509 	}
   1510 	REF_RELE(uri);
   1511 	if (nl7c_uri_max > 0 && nl7c_uri_bytes > nl7c_uri_max) {
   1512 		nl7c_uri_reclaim();
   1513 	}
   1514 }
   1515 
   1516 /*
   1517  * The uri_segmap_t ref_t inactive function called on the last REF_RELE(),
   1518  * release the segmap mapping. Note, the uri_segmap_t will be freed by
   1519  * REF_RELE() on return.
   1520  */
   1521 
   1522 void
   1523 uri_segmap_inactive(uri_segmap_t *smp)
   1524 {
   1525 	if (!segmap_kpm) {
   1526 		(void) segmap_fault(kas.a_hat, segkmap, smp->base,
   1527 		    smp->len, F_SOFTUNLOCK, S_OTHER);
   1528 	}
   1529 	(void) segmap_release(segkmap, smp->base, SM_DONTNEED);
   1530 	VN_RELE(smp->vp);
   1531 }
   1532 
   1533 /*
   1534  * The call-back for desballoc()ed mblk_t's, if a segmap mapped mblk_t
   1535  * release the reference, one per desballoc() of a segmap page, if a rd_t
   1536  * mapped mblk_t release the reference, one per desballoc() of a uri_desc_t,
   1537  * last kmem free the uri_desb_t.
   1538  */
   1539 
   1540 static void
   1541 uri_desb_free(uri_desb_t *desb)
   1542 {
   1543 	if (desb->segmap != NULL) {
   1544 		REF_RELE(desb->segmap);
   1545 	}
   1546 	REF_RELE(desb->uri);
   1547 	kmem_cache_free(uri_desb_kmc, desb);
   1548 }
   1549 
   1550 /*
   1551  * Segmap map up to a page of a uri_rd_t file descriptor.
   1552  */
   1553 
   1554 uri_segmap_t *
   1555 uri_segmap_map(uri_rd_t *rdp, int bytes)
   1556 {
   1557 	uri_segmap_t	*segmap = kmem_cache_alloc(uri_segmap_kmc, KM_SLEEP);
   1558 	int		len = MIN(rdp->sz, MAXBSIZE);
   1559 
   1560 	if (len > bytes)
   1561 		len = bytes;
   1562 
   1563 	REF_INIT(segmap, 1, uri_segmap_inactive, uri_segmap_kmc);
   1564 	segmap->len = len;
   1565 	VN_HOLD(rdp->data.vnode);
   1566 	segmap->vp = rdp->data.vnode;
   1567 
   1568 	segmap->base = segmap_getmapflt(segkmap, segmap->vp, rdp->off, len,
   1569 	    segmap_kpm ? SM_FAULT : 0, S_READ);
   1570 
   1571 	if (segmap_fault(kas.a_hat, segkmap, segmap->base, len,
   1572 	    F_SOFTLOCK, S_READ) != 0) {
   1573 		REF_RELE(segmap);
   1574 		return (NULL);
   1575 	}
   1576 	return (segmap);
   1577 }
   1578 
   1579 /*
   1580  * Chop up the kernel virtual memory area *data of size *sz bytes for
   1581  * a maximum of *bytes bytes into an besballoc()ed mblk_t chain using
   1582  * the given template uri_desb_t *temp of max_mblk bytes per.
   1583  *
   1584  * The values of *data, *sz, and *bytes are updated on return, the
   1585  * mblk_t chain is returned.
   1586  */
   1587 
   1588 static mblk_t *
   1589 uri_desb_chop(
   1590 	char 		**data,
   1591 	size_t		*sz,
   1592 	int 		*bytes,
   1593 	uri_desb_t 	*temp,
   1594 	int		max_mblk,
   1595 	char		*eoh,
   1596 	mblk_t		*persist
   1597 )
   1598 {
   1599 	char		*ldata = *data;
   1600 	size_t		lsz = *sz;
   1601 	int		lbytes = bytes ? *bytes : lsz;
   1602 	uri_desb_t	*desb;
   1603 	mblk_t		*mp = NULL;
   1604 	mblk_t		*nmp, *pmp = NULL;
   1605 	int		msz;
   1606 
   1607 	if (lbytes == 0 && lsz == 0)
   1608 		return (NULL);
   1609 
   1610 	while (lbytes > 0 && lsz > 0) {
   1611 		msz = MIN(lbytes, max_mblk);
   1612 		msz = MIN(msz, lsz);
   1613 		if (persist && eoh >= ldata && eoh < &ldata[msz]) {
   1614 			msz = (eoh - ldata);
   1615 			pmp = persist;
   1616 			persist = NULL;
   1617 			if (msz == 0) {
   1618 				nmp = pmp;
   1619 				pmp = NULL;
   1620 				goto zero;
   1621 			}
   1622 		}
   1623 		desb = kmem_cache_alloc(uri_desb_kmc, KM_SLEEP);
   1624 		REF_HOLD(temp->uri);
   1625 		if (temp->segmap) {
   1626 			REF_HOLD(temp->segmap);
   1627 		}
   1628 		bcopy(temp, desb, sizeof (*desb));
   1629 		desb->frtn.free_arg = (caddr_t)desb;
   1630 		nmp = desballoc((uchar_t *)ldata, msz, BPRI_HI, &desb->frtn);
   1631 		if (nmp == NULL) {
   1632 			if (temp->segmap) {
   1633 				REF_RELE(temp->segmap);
   1634 			}
   1635 			REF_RELE(temp->uri);
   1636 			if (mp != NULL) {
   1637 				mp->b_next = NULL;
   1638 				freemsg(mp);
   1639 			}
   1640 			if (persist != NULL) {
   1641 				freeb(persist);
   1642 			}
   1643 			return (NULL);
   1644 		}
   1645 		nmp->b_wptr += msz;
   1646 	zero:
   1647 		if (mp != NULL) {
   1648 			mp->b_next->b_cont = nmp;
   1649 		} else {
   1650 			mp = nmp;
   1651 		}
   1652 		if (pmp != NULL) {
   1653 			nmp->b_cont = pmp;
   1654 			nmp = pmp;
   1655 			pmp = NULL;
   1656 		}
   1657 		mp->b_next = nmp;
   1658 		ldata += msz;
   1659 		lsz -= msz;
   1660 		lbytes -= msz;
   1661 	}
   1662 	*data = ldata;
   1663 	*sz = lsz;
   1664 	if (bytes)
   1665 		*bytes = lbytes;
   1666 	return (mp);
   1667 }
   1668 
   1669 /*
   1670  * Experimential noqwait (i.e. no canput()/qwait() checks), just send
   1671  * the entire mblk_t chain down without flow-control checks.
   1672  */
   1673 
   1674 static int
   1675 kstrwritempnoqwait(struct vnode *vp, mblk_t *mp)
   1676 {
   1677 	struct stdata *stp;
   1678 	int error = 0;
   1679 
   1680 	ASSERT(vp->v_stream);
   1681 	stp = vp->v_stream;
   1682 
   1683 	/* Fast check of flags before acquiring the lock */
   1684 	if (stp->sd_flag & (STWRERR|STRHUP|STPLEX)) {
   1685 		mutex_enter(&stp->sd_lock);
   1686 		error = strgeterr(stp, STWRERR|STRHUP|STPLEX, 0);
   1687 		mutex_exit(&stp->sd_lock);
   1688 		if (error != 0) {
   1689 			if (!(stp->sd_flag & STPLEX) &&
   1690 			    (stp->sd_wput_opt & SW_SIGPIPE)) {
   1691 				error = EPIPE;
   1692 			}
   1693 			return (error);
   1694 		}
   1695 	}
   1696 	putnext(stp->sd_wrq, mp);
   1697 	return (0);
   1698 }
   1699 
   1700 /*
   1701  * Send the URI uri_desc_t *uri response uri_rd_t *rdp out the socket_t *so.
   1702  */
   1703 
   1704 static int
   1705 uri_rd_response(struct sonode *so,
   1706     uri_desc_t *uri,
   1707     uri_rd_t *rdp,
   1708     boolean_t first)
   1709 {
   1710 	vnode_t		*vp = SOTOV(so);
   1711 	int		max_mblk = (int)vp->v_stream->sd_maxblk;
   1712 	int		wsz;
   1713 	mblk_t		*mp, *wmp, *persist;
   1714 	int		write_bytes;
   1715 	uri_rd_t	rd;
   1716 	uri_desb_t	desb;
   1717 	uri_segmap_t	*segmap = NULL;
   1718 	char		*segmap_data;
   1719 	size_t		segmap_sz;
   1720 	int		error;
   1721 	int		fflg = ((so->so_state & SS_NDELAY) ? FNDELAY : 0) |
   1722 	    ((so->so_state & SS_NONBLOCK) ? FNONBLOCK : 0);
   1723 
   1724 
   1725 	/* Initialize template uri_desb_t */
   1726 	desb.frtn.free_func = uri_desb_free;
   1727 	desb.frtn.free_arg = NULL;
   1728 	desb.uri = uri;
   1729 
   1730 	/* Get a local copy of the rd_t */
   1731 	bcopy(rdp, &rd, sizeof (rd));
   1732 	do {
   1733 		if (first) {
   1734 			/*
   1735 			 * For first kstrwrite() enough data to get
   1736 			 * things going, note non blocking version of
   1737 			 * kstrwrite() will be used below.
   1738 			 */
   1739 			write_bytes = P2ROUNDUP((max_mblk * 4),
   1740 			    MAXBSIZE * nl7c_file_prefetch);
   1741 		} else {
   1742 			if ((write_bytes = so->so_sndbuf) == 0)
   1743 				write_bytes = vp->v_stream->sd_qn_maxpsz;
   1744 			ASSERT(write_bytes > 0);
   1745 			write_bytes = P2ROUNDUP(write_bytes, MAXBSIZE);
   1746 		}
   1747 		/*
   1748 		 * Chop up to a write_bytes worth of data.
   1749 		 */
   1750 		wmp = NULL;
   1751 		wsz = write_bytes;
   1752 		do {
   1753 			if (rd.sz == 0)
   1754 				break;
   1755 			if (rd.off == -1) {
   1756 				if (uri->eoh >= rd.data.kmem &&
   1757 				    uri->eoh < &rd.data.kmem[rd.sz]) {
   1758 					persist = nl7c_http_persist(so);
   1759 				} else {
   1760 					persist = NULL;
   1761 				}
   1762 				desb.segmap = NULL;
   1763 				mp = uri_desb_chop(&rd.data.kmem, &rd.sz,
   1764 				    &wsz, &desb, max_mblk, uri->eoh, persist);
   1765 				if (mp == NULL) {
   1766 					error = ENOMEM;
   1767 					goto invalidate;
   1768 				}
   1769 			} else {
   1770 				if (segmap == NULL) {
   1771 					segmap = uri_segmap_map(&rd,
   1772 					    write_bytes);
   1773 					if (segmap == NULL) {
   1774 						error = ENOMEM;
   1775 						goto invalidate;
   1776 					}
   1777 					desb.segmap = segmap;
   1778 					segmap_data = segmap->base;
   1779 					segmap_sz = segmap->len;
   1780 				}
   1781 				mp = uri_desb_chop(&segmap_data, &segmap_sz,
   1782 				    &wsz, &desb, max_mblk, NULL, NULL);
   1783 				if (mp == NULL) {
   1784 					error = ENOMEM;
   1785 					goto invalidate;
   1786 				}
   1787 				if (segmap_sz == 0) {
   1788 					rd.sz -= segmap->len;
   1789 					rd.off += segmap->len;
   1790 					REF_RELE(segmap);
   1791 					segmap = NULL;
   1792 				}
   1793 			}
   1794 			if (wmp == NULL) {
   1795 				wmp = mp;
   1796 			} else {
   1797 				wmp->b_next->b_cont = mp;
   1798 				wmp->b_next = mp->b_next;
   1799 				mp->b_next = NULL;
   1800 			}
   1801 		} while (wsz > 0 && rd.sz > 0);
   1802 
   1803 		wmp->b_next = NULL;
   1804 		if (first) {
   1805 			/* First kstrwrite(), use noqwait */
   1806 			if ((error = kstrwritempnoqwait(vp, wmp)) != 0)
   1807 				goto invalidate;
   1808 			/*
   1809 			 * For the rest of the kstrwrite()s use SO_SNDBUF
   1810 			 * worth of data at a time, note these kstrwrite()s
   1811 			 * may (will) block one or more times.
   1812 			 */
   1813 			first = B_FALSE;
   1814 		} else {
   1815 			if ((error = kstrwritemp(vp, wmp, fflg)) != 0) {
   1816 				if (error == EAGAIN) {
   1817 					nl7c_uri_rd_EAGAIN++;
   1818 					if ((error =
   1819 					    kstrwritempnoqwait(vp, wmp)) != 0)
   1820 						goto invalidate;
   1821 				} else
   1822 					goto invalidate;
   1823 			}
   1824 		}
   1825 	} while (rd.sz > 0);
   1826 
   1827 	return (0);
   1828 
   1829 invalidate:
   1830 	if (segmap) {
   1831 		REF_RELE(segmap);
   1832 	}
   1833 	if (wmp)
   1834 		freemsg(wmp);
   1835 
   1836 	return (error);
   1837 }
   1838 
   1839 /*
   1840  * Send the URI uri_desc_t *uri response out the socket_t *so.
   1841  */
   1842 
   1843 static int
   1844 uri_response(struct sonode *so, uri_desc_t *uri)
   1845 {
   1846 	uri_rd_t	*rdp = &uri->response;
   1847 	boolean_t	first = B_TRUE;
   1848 	int		error;
   1849 
   1850 	while (rdp != NULL) {
   1851 		error = uri_rd_response(so, uri, rdp, first);
   1852 		if (error != 0) {
   1853 			goto invalidate;
   1854 		}
   1855 		first = B_FALSE;
   1856 		rdp = rdp->next;
   1857 	}
   1858 	return (0);
   1859 
   1860 invalidate:
   1861 	uri_delete(uri);
   1862 	return (error);
   1863 }
   1864 
   1865 /*
   1866  * The pchars[] array is indexed by a char to determine if it's a
   1867  * valid URI path component chararcter where:
   1868  *
   1869  *    pchar       = unreserved | escaped |
   1870  *                  ":" | "@" | "&" | "=" | "+" | "$" | ","
   1871  *
   1872  *    unreserved  = alphanum | mark
   1873  *
   1874  *    alphanum    = alpha | digit
   1875  *
   1876  *    alpha       = lowalpha | upalpha
   1877  *
   1878  *    lowalpha    = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" |
   1879  *                  "i" | "j" | "k" | "l" | "m" | "n" | "o" | "p" |
   1880  *                  "q" | "r" | "s" | "t" | "u" | "v" | "w" | "x" |
   1881  *                  "y" | "z"
   1882  *
   1883  *    upalpha     = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" |
   1884  *                  "I" | "J" | "K" | "L" | "M" | "N" | "O" | "P" |
   1885  *                  "Q" | "R" | "S" | "T" | "U" | "V" | "W" | "X" |
   1886  *                  "Y" | "Z"
   1887  *
   1888  *    digit       = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" |
   1889  *                  "8" | "9"
   1890  *
   1891  *    mark        = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
   1892  *
   1893  *    escaped     = "%" hex hex
   1894  *    hex         = digit | "A" | "B" | "C" | "D" | "E" | "F" |
   1895  *                  "a" | "b" | "c" | "d" | "e" | "f"
   1896  */
   1897 
   1898 static char pchars[] = {
   1899     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x00 - 0x07 */
   1900     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x08 - 0x0F */
   1901     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x10 - 0x17 */
   1902     0, 0, 0, 0, 0, 0, 0, 0,	/* 0x18 - 0x1F */
   1903     0, 1, 0, 0, 1, 1, 1, 1,	/* 0x20 - 0x27 */
   1904     0, 0, 1, 1, 1, 1, 1, 1,	/* 0x28 - 0x2F */
   1905     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x30 - 0x37 */
   1906     1, 1, 1, 0, 0, 1, 0, 0,	/* 0x38 - 0x3F */
   1907     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x40 - 0x47 */
   1908     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x48 - 0x4F */
   1909     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x50 - 0x57 */
   1910     1, 1, 1, 0, 0, 0, 0, 1,	/* 0x58 - 0x5F */
   1911     0, 1, 1, 1, 1, 1, 1, 1,	/* 0x60 - 0x67 */
   1912     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x68 - 0x6F */
   1913     1, 1, 1, 1, 1, 1, 1, 1,	/* 0x70 - 0x77 */
   1914     1, 1, 1, 0, 0, 0, 1, 0	/* 0x78 - 0x7F */
   1915 };
   1916 
   1917 #define	PCHARS_MASK 0x7F
   1918 
   1919 /*
   1920  * This is the main L7 request message parse, we are called each time
   1921  * new data is availble for a socket, each time a single buffer of the
   1922  * entire message to date is given.
   1923  *
   1924  * Here we parse the request looking for the URI, parse it, and if a
   1925  * supported scheme call the scheme parser to commplete the parse of any
   1926  * headers which may further qualify the identity of the requested object
   1927  * then lookup it up in the URI hash.
   1928  *
   1929  * Return B_TRUE for more processing.
   1930  *
   1931  * Note, at this time the parser supports the generic message format as
   1932  * specified in RFC 822 with potentional limitations as specified in RFC
   1933  * 2616 for HTTP messages.
   1934  *
   1935  * Note, the caller supports an mblk_t chain, for now the parser(s)
   1936  * require the complete header in a single mblk_t. This is the common
   1937  * case and certainly for high performance environments, if at a future
   1938  * date mblk_t chains are important the parse can be reved to process
   1939  * mblk_t chains.
   1940  */
   1941 
   1942 boolean_t
   1943 nl7c_parse(struct sonode *so, boolean_t nonblocking, boolean_t *ret)
   1944 {
   1945 	sotpi_info_t *sti = SOTOTPI(so);
   1946 	char	*cp = (char *)sti->sti_nl7c_rcv_mp->b_rptr;
   1947 	char	*ep = (char *)sti->sti_nl7c_rcv_mp->b_wptr;
   1948 	char	*get = "GET ";
   1949 	char	*post = "POST ";
   1950 	char	c;
   1951 	char	*uris;
   1952 	uri_desc_t *uri = NULL;
   1953 	uri_desc_t *ruri = NULL;
   1954 	mblk_t	*reqmp;
   1955 	uint32_t hv = 0;
   1956 
   1957 	if ((reqmp = dupb(sti->sti_nl7c_rcv_mp)) == NULL) {
   1958 		nl7c_uri_pass_dupbfail++;
   1959 		goto pass;
   1960 	}
   1961 	/*
   1962 	 * Allocate and initialize minimumal state for the request
   1963 	 * uri_desc_t, in the cache hit case this uri_desc_t will
   1964 	 * be freed.
   1965 	 */
   1966 	uri = kmem_cache_alloc(nl7c_uri_kmc, KM_SLEEP);
   1967 	REF_INIT(uri, 1, nl7c_uri_inactive, nl7c_uri_kmc);
   1968 	uri->hash = NULL;
   1969 	uri->tail = NULL;
   1970 	uri->scheme = NULL;
   1971 	uri->count = 0;
   1972 	uri->reqmp = reqmp;
   1973 
   1974 	/*
   1975 	 * Set request time to current time.
   1976 	 */
   1977 	sti->sti_nl7c_rtime = gethrestime_sec();
   1978 
   1979 	/*
   1980 	 * Parse the Request-Line for the URI.
   1981 	 *
   1982 	 * For backwards HTTP version compatable reasons skip any leading
   1983 	 * CRLF (or CR or LF) line terminator(s) preceding Request-Line.
   1984 	 */
   1985 	while (cp < ep && (*cp == '\r' || *cp == '\n')) {
   1986 		cp++;
   1987 	}
   1988 	while (cp < ep && *get == *cp) {
   1989 		get++;
   1990 		cp++;
   1991 	}
   1992 	if (*get != 0) {
   1993 		/* Note a "GET", check for "POST" */
   1994 		while (cp < ep && *post == *cp) {
   1995 			post++;
   1996 			cp++;
   1997 		}
   1998 		if (*post != 0) {
   1999 			if (cp == ep) {
   2000 				nl7c_uri_more_get++;
   2001 				goto more;
   2002 			}
   2003 			/* Not a "GET" or a "POST", just pass */
   2004 			nl7c_uri_pass_method++;
   2005 			goto pass;
   2006 		}
   2007 		/* "POST", don't cache but still may want to parse */
   2008 		uri->hash = URI_TEMP;
   2009 	}
   2010 	/*
   2011 	 * Skip over URI path char(s) and save start and past end pointers.
   2012 	 */
   2013 	uris = cp;
   2014 	while (cp < ep && (c = *cp) != ' ' && c != '\r') {
   2015 		if (c == '?') {
   2016 			/* Don't cache but still may want to parse */
   2017 			uri->hash = URI_TEMP;
   2018 		}
   2019 		CHASH(hv, c);
   2020 		cp++;
   2021 	}
   2022 	if (c != '\r' && cp == ep) {
   2023 		nl7c_uri_more_eol++;
   2024 		goto more;
   2025 	}
   2026 	/*
   2027 	 * Request-Line URI parsed, pass the rest of the request on
   2028 	 * to the the http scheme parse.
   2029 	 */
   2030 	uri->path.cp = uris;
   2031 	uri->path.ep = cp;
   2032 	uri->hvalue = hv;
   2033 	if (! nl7c_http_request(&cp, ep, uri, so) || cp == NULL) {
   2034 		/*
   2035 		 * Parse not successful or pass on request, the pointer
   2036 		 * to the parse pointer "cp" is overloaded such that ! NULL
   2037 		 * for more data and NULL for bad parse of request or pass.
   2038 		 */
   2039 		if (cp != NULL) {
   2040 			nl7c_uri_more_http++;
   2041 			goto more;
   2042 		}
   2043 		nl7c_uri_pass_http++;
   2044 		goto pass;
   2045 	}
   2046 	if (uri->nocache) {
   2047 		uri->hash = URI_TEMP;
   2048 		(void) uri_lookup(uri, B_FALSE, nonblocking);
   2049 	} else if (uri->hash == URI_TEMP) {
   2050 		uri->nocache = B_TRUE;
   2051 		(void) uri_lookup(uri, B_FALSE, nonblocking);
   2052 	}
   2053 
   2054 	if (uri->hash == URI_TEMP) {
   2055 		if (sti->sti_nl7c_flags & NL7C_SOPERSIST) {
   2056 			/* Temporary URI so skip hash processing */
   2057 			nl7c_uri_request++;
   2058 			nl7c_uri_temp++;
   2059 			goto temp;
   2060 		}
   2061 		/* Not persistent so not interested in the response */
   2062 		nl7c_uri_pass_temp++;
   2063 		goto pass;
   2064 	}
   2065 	/*
   2066 	 * Check the URI hash for a cached response, save the request
   2067 	 * uri in case we need it below.
   2068 	 */
   2069 	ruri = uri;
   2070 	if ((uri = uri_lookup(uri, B_TRUE, nonblocking)) == NULL) {
   2071 		/*
   2072 		 * Failed to lookup due to nonblocking wait required,
   2073 		 * interrupted cv_wait_sig(), KM_NOSLEEP memory alloc
   2074 		 * failure, ... Just pass on this request.
   2075 		 */
   2076 		nl7c_uri_pass_addfail++;
   2077 		goto pass;
   2078 	}
   2079 	nl7c_uri_request++;
   2080 	if (uri->response.sz > 0) {
   2081 		/*
   2082 		 * We have the response cached, update recv mblk rptr
   2083 		 * to reflect the data consumed in parse.
   2084 		 */
   2085 		mblk_t	*mp = sti->sti_nl7c_rcv_mp;
   2086 
   2087 		if (cp == (char *)mp->b_wptr) {
   2088 			sti->sti_nl7c_rcv_mp = mp->b_cont;
   2089 			mp->b_cont = NULL;
   2090 			freeb(mp);
   2091 		} else {
   2092 			mp->b_rptr = (unsigned char *)cp;
   2093 		}
   2094 		nl7c_uri_hit++;
   2095 		/* If logging enabled log request */
   2096 		if (nl7c_logd_enabled) {
   2097 			ipaddr_t faddr;
   2098 
   2099 			if (so->so_family == AF_INET) {
   2100 				/* Only support IPv4 addrs */
   2101 				faddr = ((struct sockaddr_in *)
   2102 				    sti->sti_faddr_sa) ->sin_addr.s_addr;
   2103 			} else {
   2104 				faddr = 0;
   2105 			}
   2106 			/* XXX need to pass response type, e.g. 200, 304 */
   2107 			nl7c_logd_log(ruri, uri, sti->sti_nl7c_rtime, faddr);
   2108 		}
   2109 
   2110 		/* If conditional request check for substitute response */
   2111 		if (ruri->conditional) {
   2112 			uri = nl7c_http_cond(ruri, uri);
   2113 		}
   2114 
   2115 		/*
   2116 		 * Release reference on request URI, send the response out
   2117 		 * the socket, release reference on response uri, set the
   2118 		 * *ret value to B_TRUE to indicate request was consumed
   2119 		 * then return B_FALSE to indcate no more data needed.
   2120 		 */
   2121 		REF_RELE(ruri);
   2122 		(void) uri_response(so, uri);
   2123 		REF_RELE(uri);
   2124 		*ret = B_TRUE;
   2125 		return (B_FALSE);
   2126 	}
   2127 	/*
   2128 	 * Miss the cache, the request URI is in the cache waiting for
   2129 	 * application write-side data to fill it.
   2130 	 */
   2131 	nl7c_uri_miss++;
   2132 temp:
   2133 	/*
   2134 	 * A miss or temp URI for which response data is needed, link
   2135 	 * uri to so and so to uri, set WAITWRITE in the so such that
   2136 	 * read-side processing is suspended (so the next read() gets
   2137 	 * the request data) until a write() is processed by NL7C.
   2138 	 *
   2139 	 * Note, sti->sti_nl7c_uri now owns the REF_INIT() ref.
   2140 	 */
   2141 	uri->proc = so;
   2142 	sti->sti_nl7c_uri = uri;
   2143 	sti->sti_nl7c_flags |= NL7C_WAITWRITE;
   2144 	*ret = B_FALSE;
   2145 	return (B_FALSE);
   2146 
   2147 more:
   2148 	/* More data is needed, note fragmented recv not supported */
   2149 	nl7c_uri_more++;
   2150 
   2151 pass:
   2152 	/* Pass on this request */
   2153 	nl7c_uri_pass++;
   2154 	nl7c_uri_request++;
   2155 	if (ruri != NULL) {
   2156 		REF_RELE(ruri);
   2157 	}
   2158 	if (uri) {
   2159 		REF_RELE(uri);
   2160 	}
   2161 	sti->sti_nl7c_flags = 0;
   2162 	*ret = B_FALSE;
   2163 	return (B_FALSE);
   2164 }
   2165