Home | History | Annotate | Download | only in io
      1      0    stevel /*
      2      0    stevel  * CDDL HEADER START
      3      0    stevel  *
      4      0    stevel  * The contents of this file are subject to the terms of the
      5   2958  dr146992  * Common Development and Distribution License (the "License").
      6   2958  dr146992  * You may not use this file except in compliance with the License.
      7      0    stevel  *
      8      0    stevel  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9      0    stevel  * or http://www.opensolaris.org/os/licensing.
     10      0    stevel  * See the License for the specific language governing permissions
     11      0    stevel  * and limitations under the License.
     12      0    stevel  *
     13      0    stevel  * When distributing Covered Code, include this CDDL HEADER in each
     14      0    stevel  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15      0    stevel  * If applicable, add the following below this CDDL HEADER, with the
     16      0    stevel  * fields enclosed by brackets "[]" replaced with your own identifying
     17      0    stevel  * information: Portions Copyright [yyyy] [name of copyright owner]
     18      0    stevel  *
     19      0    stevel  * CDDL HEADER END
     20      0    stevel  */
     21      0    stevel /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
     22      0    stevel /*	  All Rights Reserved  	*/
     23      0    stevel 
     24   8752     Peter /*
     25   8752     Peter  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     26      0    stevel  * Use is subject to license terms.
     27      0    stevel  */
     28      0    stevel 
     29      0    stevel #include <sys/types.h>
     30      0    stevel #include <sys/param.h>
     31      0    stevel #include <sys/thread.h>
     32      0    stevel #include <sys/sysmacros.h>
     33      0    stevel #include <sys/stropts.h>
     34      0    stevel #include <sys/stream.h>
     35      0    stevel #include <sys/strsubr.h>
     36      0    stevel #include <sys/strsun.h>
     37      0    stevel #include <sys/conf.h>
     38      0    stevel #include <sys/debug.h>
     39      0    stevel #include <sys/cmn_err.h>
     40      0    stevel #include <sys/kmem.h>
     41      0    stevel #include <sys/atomic.h>
     42      0    stevel #include <sys/errno.h>
     43      0    stevel #include <sys/vtrace.h>
     44      0    stevel #include <sys/ftrace.h>
     45      0    stevel #include <sys/ontrap.h>
     46      0    stevel #include <sys/multidata.h>
     47      0    stevel #include <sys/multidata_impl.h>
     48      0    stevel #include <sys/sdt.h>
     49   1110      meem #include <sys/strft.h>
     50      0    stevel 
     51      0    stevel #ifdef DEBUG
     52      0    stevel #include <sys/kmem_impl.h>
     53      0    stevel #endif
     54      0    stevel 
     55      0    stevel /*
     56      0    stevel  * This file contains all the STREAMS utility routines that may
     57      0    stevel  * be used by modules and drivers.
     58      0    stevel  */
     59      0    stevel 
     60      0    stevel /*
     61      0    stevel  * STREAMS message allocator: principles of operation
     62      0    stevel  *
     63      0    stevel  * The streams message allocator consists of all the routines that
     64      0    stevel  * allocate, dup and free streams messages: allocb(), [d]esballoc[a],
     65      0    stevel  * dupb(), freeb() and freemsg().  What follows is a high-level view
     66      0    stevel  * of how the allocator works.
     67      0    stevel  *
     68      0    stevel  * Every streams message consists of one or more mblks, a dblk, and data.
     69      0    stevel  * All mblks for all types of messages come from a common mblk_cache.
     70      0    stevel  * The dblk and data come in several flavors, depending on how the
     71      0    stevel  * message is allocated:
     72      0    stevel  *
     73      0    stevel  * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of
     74      0    stevel  *     fixed-size dblk/data caches. For message sizes that are multiples of
     75      0    stevel  *     PAGESIZE, dblks are allocated separately from the buffer.
     76      0    stevel  *     The associated buffer is allocated by the constructor using kmem_alloc().
     77      0    stevel  *     For all other message sizes, dblk and its associated data is allocated
     78      0    stevel  *     as a single contiguous chunk of memory.
     79      0    stevel  *     Objects in these caches consist of a dblk plus its associated data.
     80      0    stevel  *     allocb() determines the nearest-size cache by table lookup:
     81      0    stevel  *     the dblk_cache[] array provides the mapping from size to dblk cache.
     82      0    stevel  *
     83      0    stevel  * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by
     84      0    stevel  *     kmem_alloc()'ing a buffer for the data and supplying that
     85      0    stevel  *     buffer to gesballoc(), described below.
     86      0    stevel  *
     87      0    stevel  * (3) The four flavors of [d]esballoc[a] are all implemented by a
     88      0    stevel  *     common routine, gesballoc() ("generic esballoc").  gesballoc()
     89      0    stevel  *     allocates a dblk from the global dblk_esb_cache and sets db_base,
     90      0    stevel  *     db_lim and db_frtnp to describe the caller-supplied buffer.
     91      0    stevel  *
     92      0    stevel  * While there are several routines to allocate messages, there is only
     93      0    stevel  * one routine to free messages: freeb().  freeb() simply invokes the
     94      0    stevel  * dblk's free method, dbp->db_free(), which is set at allocation time.
     95      0    stevel  *
     96      0    stevel  * dupb() creates a new reference to a message by allocating a new mblk,
     97      0    stevel  * incrementing the dblk reference count and setting the dblk's free
     98      0    stevel  * method to dblk_decref().  The dblk's original free method is retained
     99      0    stevel  * in db_lastfree.  dblk_decref() decrements the reference count on each
    100      0    stevel  * freeb().  If this is not the last reference it just frees the mblk;
    101      0    stevel  * if this *is* the last reference, it restores db_free to db_lastfree,
    102      0    stevel  * sets db_mblk to the current mblk (see below), and invokes db_lastfree.
    103      0    stevel  *
    104      0    stevel  * The implementation makes aggressive use of kmem object caching for
    105      0    stevel  * maximum performance.  This makes the code simple and compact, but
    106      0    stevel  * also a bit abstruse in some places.  The invariants that constitute a
    107      0    stevel  * message's constructed state, described below, are more subtle than usual.
    108      0    stevel  *
    109      0    stevel  * Every dblk has an "attached mblk" as part of its constructed state.
    110      0    stevel  * The mblk is allocated by the dblk's constructor and remains attached
    111      0    stevel  * until the message is either dup'ed or pulled up.  In the dupb() case
    112      0    stevel  * the mblk association doesn't matter until the last free, at which time
    113      0    stevel  * dblk_decref() attaches the last mblk to the dblk.  pullupmsg() affects
    114      0    stevel  * the mblk association because it swaps the leading mblks of two messages,
    115      0    stevel  * so it is responsible for swapping their db_mblk pointers accordingly.
    116      0    stevel  * From a constructed-state viewpoint it doesn't matter that a dblk's
    117      0    stevel  * attached mblk can change while the message is allocated; all that
    118      0    stevel  * matters is that the dblk has *some* attached mblk when it's freed.
    119      0    stevel  *
    120      0    stevel  * The sizes of the allocb() small-message caches are not magical.
    121      0    stevel  * They represent a good trade-off between internal and external
    122      0    stevel  * fragmentation for current workloads.  They should be reevaluated
    123      0    stevel  * periodically, especially if allocations larger than DBLK_MAX_CACHE
    124      0    stevel  * become common.  We use 64-byte alignment so that dblks don't
    125      0    stevel  * straddle cache lines unnecessarily.
    126      0    stevel  */
    127      0    stevel #define	DBLK_MAX_CACHE		73728
    128      0    stevel #define	DBLK_CACHE_ALIGN	64
    129      0    stevel #define	DBLK_MIN_SIZE		8
    130      0    stevel #define	DBLK_SIZE_SHIFT		3
    131      0    stevel 
    132      0    stevel #ifdef _BIG_ENDIAN
    133      0    stevel #define	DBLK_RTFU_SHIFT(field)	\
    134      0    stevel 	(8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field))
    135      0    stevel #else
    136      0    stevel #define	DBLK_RTFU_SHIFT(field)	\
    137      0    stevel 	(8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref))
    138      0    stevel #endif
    139      0    stevel 
    140      0    stevel #define	DBLK_RTFU(ref, type, flags, uioflag)	\
    141      0    stevel 	(((ref) << DBLK_RTFU_SHIFT(db_ref)) | \
    142      0    stevel 	((type) << DBLK_RTFU_SHIFT(db_type)) | \
    143      0    stevel 	(((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \
    144      0    stevel 	((uioflag) << DBLK_RTFU_SHIFT(db_struioflag)))
    145      0    stevel #define	DBLK_RTFU_REF_MASK	(DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref))
    146      0    stevel #define	DBLK_RTFU_WORD(dbp)	(*((uint32_t *)&(dbp)->db_ref))
    147      0    stevel #define	MBLK_BAND_FLAG_WORD(mp)	(*((uint32_t *)&(mp)->b_band))
    148      0    stevel 
    149      0    stevel static size_t dblk_sizes[] = {
    150      0    stevel #ifdef _LP64
    151   6712     tomee 	16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856,
    152   6712     tomee 	8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624,
    153   6712     tomee 	40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392,
    154      0    stevel #else
    155   6712     tomee 	64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904,
    156   6712     tomee 	8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672,
    157   6712     tomee 	40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440,
    158      0    stevel #endif
    159      0    stevel 	DBLK_MAX_CACHE, 0
    160      0    stevel };
    161      0    stevel 
    162      0    stevel static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE];
    163      0    stevel static struct kmem_cache *mblk_cache;
    164      0    stevel static struct kmem_cache *dblk_esb_cache;
    165      0    stevel static struct kmem_cache *fthdr_cache;
    166      0    stevel static struct kmem_cache *ftblk_cache;
    167      0    stevel 
    168      0    stevel static void dblk_lastfree(mblk_t *mp, dblk_t *dbp);
    169      0    stevel static mblk_t *allocb_oversize(size_t size, int flags);
    170      0    stevel static int allocb_tryhard_fails;
    171      0    stevel static void frnop_func(void *arg);
    172      0    stevel frtn_t frnop = { frnop_func };
    173      0    stevel static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp);
    174      0    stevel 
    175      0    stevel static boolean_t rwnext_enter(queue_t *qp);
    176      0    stevel static void rwnext_exit(queue_t *qp);
    177      0    stevel 
    178      0    stevel /*
    179      0    stevel  * Patchable mblk/dblk kmem_cache flags.
    180      0    stevel  */
    181      0    stevel int dblk_kmem_flags = 0;
    182      0    stevel int mblk_kmem_flags = 0;
    183      0    stevel 
    184      0    stevel static int
    185      0    stevel dblk_constructor(void *buf, void *cdrarg, int kmflags)
    186      0    stevel {
    187      0    stevel 	dblk_t *dbp = buf;
    188      0    stevel 	ssize_t msg_size = (ssize_t)cdrarg;
    189      0    stevel 	size_t index;
    190      0    stevel 
    191      0    stevel 	ASSERT(msg_size != 0);
    192      0    stevel 
    193      0    stevel 	index = (msg_size - 1) >> DBLK_SIZE_SHIFT;
    194      0    stevel 
    195    577      meem 	ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT));
    196      0    stevel 
    197      0    stevel 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
    198      0    stevel 		return (-1);
    199      0    stevel 	if ((msg_size & PAGEOFFSET) == 0) {
    200      0    stevel 		dbp->db_base = kmem_alloc(msg_size, kmflags);
    201      0    stevel 		if (dbp->db_base == NULL) {
    202      0    stevel 			kmem_cache_free(mblk_cache, dbp->db_mblk);
    203      0    stevel 			return (-1);
    204      0    stevel 		}
    205      0    stevel 	} else {
    206      0    stevel 		dbp->db_base = (unsigned char *)&dbp[1];
    207      0    stevel 	}
    208      0    stevel 
    209      0    stevel 	dbp->db_mblk->b_datap = dbp;
    210      0    stevel 	dbp->db_cache = dblk_cache[index];
    211      0    stevel 	dbp->db_lim = dbp->db_base + msg_size;
    212      0    stevel 	dbp->db_free = dbp->db_lastfree = dblk_lastfree;
    213      0    stevel 	dbp->db_frtnp = NULL;
    214      0    stevel 	dbp->db_fthdr = NULL;
    215      0    stevel 	dbp->db_credp = NULL;
    216      0    stevel 	dbp->db_cpid = -1;
    217      0    stevel 	dbp->db_struioflag = 0;
    218      0    stevel 	dbp->db_struioun.cksum.flags = 0;
    219      0    stevel 	return (0);
    220      0    stevel }
    221      0    stevel 
    222      0    stevel /*ARGSUSED*/
    223      0    stevel static int
    224      0    stevel dblk_esb_constructor(void *buf, void *cdrarg, int kmflags)
    225      0    stevel {
    226      0    stevel 	dblk_t *dbp = buf;
    227      0    stevel 
    228      0    stevel 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
    229      0    stevel 		return (-1);
    230      0    stevel 	dbp->db_mblk->b_datap = dbp;
    231      0    stevel 	dbp->db_cache = dblk_esb_cache;
    232      0    stevel 	dbp->db_fthdr = NULL;
    233      0    stevel 	dbp->db_credp = NULL;
    234      0    stevel 	dbp->db_cpid = -1;
    235      0    stevel 	dbp->db_struioflag = 0;
    236      0    stevel 	dbp->db_struioun.cksum.flags = 0;
    237      0    stevel 	return (0);
    238      0    stevel }
    239      0    stevel 
    240      0    stevel static int
    241      0    stevel bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags)
    242      0    stevel {
    243      0    stevel 	dblk_t *dbp = buf;
    244   8752     Peter 	bcache_t *bcp = cdrarg;
    245      0    stevel 
    246      0    stevel 	if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL)
    247      0    stevel 		return (-1);
    248      0    stevel 
    249   8752     Peter 	dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags);
    250   8752     Peter 	if (dbp->db_base == NULL) {
    251      0    stevel 		kmem_cache_free(mblk_cache, dbp->db_mblk);
    252      0    stevel 		return (-1);
    253      0    stevel 	}
    254      0    stevel 
    255      0    stevel 	dbp->db_mblk->b_datap = dbp;
    256      0    stevel 	dbp->db_cache = (void *)bcp;
    257      0    stevel 	dbp->db_lim = dbp->db_base + bcp->size;
    258      0    stevel 	dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree;
    259      0    stevel 	dbp->db_frtnp = NULL;
    260      0    stevel 	dbp->db_fthdr = NULL;
    261      0    stevel 	dbp->db_credp = NULL;
    262      0    stevel 	dbp->db_cpid = -1;
    263      0    stevel 	dbp->db_struioflag = 0;
    264      0    stevel 	dbp->db_struioun.cksum.flags = 0;
    265      0    stevel 	return (0);
    266      0    stevel }
    267      0    stevel 
    268      0    stevel /*ARGSUSED*/
    269      0    stevel static void
    270      0    stevel dblk_destructor(void *buf, void *cdrarg)
    271      0    stevel {
    272      0    stevel 	dblk_t *dbp = buf;
    273      0    stevel 	ssize_t msg_size = (ssize_t)cdrarg;
    274      0    stevel 
    275      0    stevel 	ASSERT(dbp->db_mblk->b_datap == dbp);
    276      0    stevel 	ASSERT(msg_size != 0);
    277      0    stevel 	ASSERT(dbp->db_struioflag == 0);
    278      0    stevel 	ASSERT(dbp->db_struioun.cksum.flags == 0);
    279      0    stevel 
    280      0    stevel 	if ((msg_size & PAGEOFFSET) == 0) {
    281      0    stevel 		kmem_free(dbp->db_base, msg_size);
    282      0    stevel 	}
    283      0    stevel 
    284      0    stevel 	kmem_cache_free(mblk_cache, dbp->db_mblk);
    285      0    stevel }
    286      0    stevel 
    287      0    stevel static void
    288      0    stevel bcache_dblk_destructor(void *buf, void *cdrarg)
    289      0    stevel {
    290      0    stevel 	dblk_t *dbp = buf;
    291   8752     Peter 	bcache_t *bcp = cdrarg;
    292      0    stevel 
    293      0    stevel 	kmem_cache_free(bcp->buffer_cache, dbp->db_base);
    294      0    stevel 
    295      0    stevel 	ASSERT(dbp->db_mblk->b_datap == dbp);
    296      0    stevel 	ASSERT(dbp->db_struioflag == 0);
    297      0    stevel 	ASSERT(dbp->db_struioun.cksum.flags == 0);
    298      0    stevel 
    299      0    stevel 	kmem_cache_free(mblk_cache, dbp->db_mblk);
    300   8752     Peter }
    301   8752     Peter 
    302   8752     Peter /* ARGSUSED */
    303   8752     Peter static int
    304   8752     Peter ftblk_constructor(void *buf, void *cdrarg, int kmflags)
    305   8752     Peter {
    306   8752     Peter 	ftblk_t *fbp = buf;
    307   8752     Peter 	int i;
    308   8752     Peter 
    309   8752     Peter 	bzero(fbp, sizeof (ftblk_t));
    310   8752     Peter 	if (str_ftstack != 0) {
    311   8752     Peter 		for (i = 0; i < FTBLK_EVNTS; i++)
    312   8752     Peter 			fbp->ev[i].stk = kmem_alloc(sizeof (ftstk_t), kmflags);
    313   8752     Peter 	}
    314   8752     Peter 
    315   8752     Peter 	return (0);
    316   8752     Peter }
    317   8752     Peter 
    318   8752     Peter /* ARGSUSED */
    319   8752     Peter static void
    320   8752     Peter ftblk_destructor(void *buf, void *cdrarg)
    321   8752     Peter {
    322   8752     Peter 	ftblk_t *fbp = buf;
    323   8752     Peter 	int i;
    324   8752     Peter 
    325   8752     Peter 	if (str_ftstack != 0) {
    326   8752     Peter 		for (i = 0; i < FTBLK_EVNTS; i++) {
    327   8752     Peter 			if (fbp->ev[i].stk != NULL) {
    328   8752     Peter 				kmem_free(fbp->ev[i].stk, sizeof (ftstk_t));
    329   8752     Peter 				fbp->ev[i].stk = NULL;
    330   8752     Peter 			}
    331   8752     Peter 		}
    332   8752     Peter 	}
    333   8752     Peter }
    334   8752     Peter 
    335   8752     Peter static int
    336   8752     Peter fthdr_constructor(void *buf, void *cdrarg, int kmflags)
    337   8752     Peter {
    338   8752     Peter 	fthdr_t *fhp = buf;
    339   8752     Peter 
    340   8752     Peter 	return (ftblk_constructor(&fhp->first, cdrarg, kmflags));
    341   8752     Peter }
    342   8752     Peter 
    343   8752     Peter static void
    344   8752     Peter fthdr_destructor(void *buf, void *cdrarg)
    345   8752     Peter {
    346   8752     Peter 	fthdr_t *fhp = buf;
    347   8752     Peter 
    348   8752     Peter 	ftblk_destructor(&fhp->first, cdrarg);
    349      0    stevel }
    350      0    stevel 
    351      0    stevel void
    352      0    stevel streams_msg_init(void)
    353      0    stevel {
    354      0    stevel 	char name[40];
    355      0    stevel 	size_t size;
    356      0    stevel 	size_t lastsize = DBLK_MIN_SIZE;
    357      0    stevel 	size_t *sizep;
    358      0    stevel 	struct kmem_cache *cp;
    359      0    stevel 	size_t tot_size;
    360      0    stevel 	int offset;
    361      0    stevel 
    362   8752     Peter 	mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32,
    363   8752     Peter 	    NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags);
    364      0    stevel 
    365      0    stevel 	for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) {
    366      0    stevel 
    367      0    stevel 		if ((offset = (size & PAGEOFFSET)) != 0) {
    368      0    stevel 			/*
    369      0    stevel 			 * We are in the middle of a page, dblk should
    370      0    stevel 			 * be allocated on the same page
    371      0    stevel 			 */
    372      0    stevel 			tot_size = size + sizeof (dblk_t);
    373      0    stevel 			ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t))
    374   6707    brutus 			    < PAGESIZE);
    375      0    stevel 			ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0);
    376      0    stevel 
    377      0    stevel 		} else {
    378      0    stevel 
    379      0    stevel 			/*
    380      0    stevel 			 * buf size is multiple of page size, dblk and
    381      0    stevel 			 * buffer are allocated separately.
    382      0    stevel 			 */
    383      0    stevel 
    384      0    stevel 			ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0);
    385      0    stevel 			tot_size = sizeof (dblk_t);
    386      0    stevel 		}
    387      0    stevel 
    388      0    stevel 		(void) sprintf(name, "streams_dblk_%ld", size);
    389   8752     Peter 		cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN,
    390   8752     Peter 		    dblk_constructor, dblk_destructor, NULL, (void *)(size),
    391   8752     Peter 		    NULL, dblk_kmem_flags);
    392      0    stevel 
    393      0    stevel 		while (lastsize <= size) {
    394      0    stevel 			dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp;
    395      0    stevel 			lastsize += DBLK_MIN_SIZE;
    396      0    stevel 		}
    397      0    stevel 	}
    398      0    stevel 
    399   8752     Peter 	dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t),
    400   8752     Peter 	    DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL,
    401   8752     Peter 	    (void *)sizeof (dblk_t), NULL, dblk_kmem_flags);
    402   8752     Peter 	fthdr_cache = kmem_cache_create("streams_fthdr", sizeof (fthdr_t), 32,
    403   8752     Peter 	    fthdr_constructor, fthdr_destructor, NULL, NULL, NULL, 0);
    404   8752     Peter 	ftblk_cache = kmem_cache_create("streams_ftblk", sizeof (ftblk_t), 32,
    405   8752     Peter 	    ftblk_constructor, ftblk_destructor, NULL, NULL, NULL, 0);
    406      0    stevel 
    407      0    stevel 	/* Initialize Multidata caches */
    408      0    stevel 	mmd_init();
    409   3932  ss146032 
    410   3932  ss146032 	/* initialize throttling queue for esballoc */
    411   3932  ss146032 	esballoc_queue_init();
    412      0    stevel }
    413      0    stevel 
    414      0    stevel /*ARGSUSED*/
    415      0    stevel mblk_t *
    416      0    stevel allocb(size_t size, uint_t pri)
    417      0    stevel {
    418      0    stevel 	dblk_t *dbp;
    419      0    stevel 	mblk_t *mp;
    420      0    stevel 	size_t index;
    421      0    stevel 
    422      0    stevel 	index =  (size - 1)  >> DBLK_SIZE_SHIFT;
    423      0    stevel 
    424      0    stevel 	if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
    425      0    stevel 		if (size != 0) {
    426      0    stevel 			mp = allocb_oversize(size, KM_NOSLEEP);
    427      0    stevel 			goto out;
    428      0    stevel 		}
    429      0    stevel 		index = 0;
    430      0    stevel 	}
    431      0    stevel 
    432      0    stevel 	if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) {
    433      0    stevel 		mp = NULL;
    434      0    stevel 		goto out;
    435      0    stevel 	}
    436      0    stevel 
    437      0    stevel 	mp = dbp->db_mblk;
    438      0    stevel 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
    439      0    stevel 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
    440      0    stevel 	mp->b_rptr = mp->b_wptr = dbp->db_base;
    441      0    stevel 	mp->b_queue = NULL;
    442      0    stevel 	MBLK_BAND_FLAG_WORD(mp) = 0;
    443      0    stevel 	STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size);
    444      0    stevel out:
    445      0    stevel 	FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp);
    446      0    stevel 
    447      0    stevel 	return (mp);
    448      0    stevel }
    449      0    stevel 
    450   8778      Erik /*
    451   8778      Erik  * Allocate an mblk taking db_credp and db_cpid from the template.
    452   8778      Erik  * Allow the cred to be NULL.
    453   8778      Erik  */
    454      0    stevel mblk_t *
    455      0    stevel allocb_tmpl(size_t size, const mblk_t *tmpl)
    456      0    stevel {
    457      0    stevel 	mblk_t *mp = allocb(size, 0);
    458      0    stevel 
    459      0    stevel 	if (mp != NULL) {
    460   8778      Erik 		dblk_t *src = tmpl->b_datap;
    461   8778      Erik 		dblk_t *dst = mp->b_datap;
    462  10163       Ken 		cred_t *cr;
    463  10163       Ken 		pid_t cpid;
    464  10163       Ken 
    465  10163       Ken 		cr = msg_getcred(tmpl, &cpid);
    466      0    stevel 		if (cr != NULL)
    467   8778      Erik 			crhold(dst->db_credp = cr);
    468  10163       Ken 		dst->db_cpid = cpid;
    469   8778      Erik 		dst->db_type = src->db_type;
    470   8778      Erik 	}
    471   8778      Erik 	return (mp);
    472   8778      Erik }
    473   8778      Erik 
    474   8778      Erik mblk_t *
    475   8778      Erik allocb_cred(size_t size, cred_t *cr, pid_t cpid)
    476      0    stevel {
    477      0    stevel 	mblk_t *mp = allocb(size, 0);
    478      0    stevel 
    479   8778      Erik 	ASSERT(cr != NULL);
    480   8778      Erik 	if (mp != NULL) {
    481   8778      Erik 		dblk_t *dbp = mp->b_datap;
    482   8778      Erik 
    483   8778      Erik 		crhold(dbp->db_credp = cr);
    484   8778      Erik 		dbp->db_cpid = cpid;
    485   8778      Erik 	}
    486   8778      Erik 	return (mp);
    487   8778      Erik }
    488   8778      Erik 
    489   8778      Erik mblk_t *
    490   8778      Erik allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid)
    491      0    stevel {
    492      0    stevel 	mblk_t *mp = allocb_wait(size, 0, flags, error);
    493      0    stevel 
    494   8778      Erik 	ASSERT(cr != NULL);
    495   8778      Erik 	if (mp != NULL) {
    496   8778      Erik 		dblk_t *dbp = mp->b_datap;
    497   8778      Erik 
    498   8778      Erik 		crhold(dbp->db_credp = cr);
    499   8778      Erik 		dbp->db_cpid = cpid;
    500   8778      Erik 	}
    501   8778      Erik 
    502   8778      Erik 	return (mp);
    503   8778      Erik }
    504   8778      Erik 
    505   8778      Erik /*
    506   8778      Erik  * Extract the db_cred (and optionally db_cpid) from a message.
    507   8778      Erik  * We find the first mblk which has a non-NULL db_cred and use that.
    508   8778      Erik  * If none found we return NULL.
    509   8778      Erik  * Does NOT get a hold on the cred.
    510   8778      Erik  */
    511   8778      Erik cred_t *
    512   8778      Erik msg_getcred(const mblk_t *mp, pid_t *cpidp)
    513   8778      Erik {
    514   8778      Erik 	cred_t *cr = NULL;
    515   8778      Erik 	cred_t *cr2;
    516  10163       Ken 	mblk_t *mp2;
    517   8778      Erik 
    518   8778      Erik 	while (mp != NULL) {
    519   8778      Erik 		dblk_t *dbp = mp->b_datap;
    520   8778      Erik 
    521   8778      Erik 		cr = dbp->db_credp;
    522   8778      Erik 		if (cr == NULL) {
    523   8778      Erik 			mp = mp->b_cont;
    524   8778      Erik 			continue;
    525   8778      Erik 		}
    526   8778      Erik 		if (cpidp != NULL)
    527   8778      Erik 			*cpidp = dbp->db_cpid;
    528   8778      Erik 
    529   8778      Erik #ifdef DEBUG
    530   8778      Erik 		/*
    531   8778      Erik 		 * Normally there should at most one db_credp in a message.
    532   8778      Erik 		 * But if there are multiple (as in the case of some M_IOC*
    533   8778      Erik 		 * and some internal messages in TCP/IP bind logic) then
    534   8778      Erik 		 * they must be identical in the normal case.
    535   8778      Erik 		 * However, a socket can be shared between different uids
    536   8778      Erik 		 * in which case data queued in TCP would be from different
    537   8778      Erik 		 * creds. Thus we can only assert for the zoneid being the
    538   8778      Erik 		 * same. Due to Multi-level Level Ports for TX, some
    539   8778      Erik 		 * cred_t can have a NULL cr_zone, and we skip the comparison
    540   8778      Erik 		 * in that case.
    541   8778      Erik 		 */
    542  10163       Ken 		mp2 = mp->b_cont;
    543  10163       Ken 		while (mp2 != NULL) {
    544  10163       Ken 			cr2 = DB_CRED(mp2);
    545  10163       Ken 			if (cr2 != NULL) {
    546  10163       Ken 				DTRACE_PROBE2(msg__getcred,
    547  10163       Ken 				    cred_t *, cr, cred_t *, cr2);
    548  10163       Ken 				ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
    549  10163       Ken 				    crgetzone(cr) == NULL ||
    550  10163       Ken 				    crgetzone(cr2) == NULL);
    551  10163       Ken 			}
    552  10163       Ken 			mp2 = mp2->b_cont;
    553   8778      Erik 		}
    554   8778      Erik #endif
    555   8778      Erik 		return (cr);
    556   8778      Erik 	}
    557   8778      Erik 	if (cpidp != NULL)
    558   8778      Erik 		*cpidp = NOPID;
    559   8778      Erik 	return (NULL);
    560   8778      Erik }
    561   8778      Erik 
    562   8778      Erik /*
    563   8778      Erik  * Variant of msg_getcred which, when a cred is found
    564   8778      Erik  * 1. Returns with a hold on the cred
    565   8778      Erik  * 2. Clears the first cred in the mblk.
    566   8778      Erik  * This is more efficient to use than a msg_getcred() + crhold() when
    567   8778      Erik  * the message is freed after the cred has been extracted.
    568   8778      Erik  *
    569   8778      Erik  * The caller is responsible for ensuring that there is no other reference
    570   8778      Erik  * on the message since db_credp can not be cleared when there are other
    571   8778      Erik  * references.
    572   8778      Erik  */
    573   8778      Erik cred_t *
    574   8778      Erik msg_extractcred(mblk_t *mp, pid_t *cpidp)
    575   8778      Erik {
    576   8778      Erik 	cred_t *cr = NULL;
    577   8778      Erik 	cred_t *cr2;
    578  10163       Ken 	mblk_t *mp2;
    579   8778      Erik 
    580   8778      Erik 	while (mp != NULL) {
    581   8778      Erik 		dblk_t *dbp = mp->b_datap;
    582   8778      Erik 
    583   8778      Erik 		cr = dbp->db_credp;
    584   8778      Erik 		if (cr == NULL) {
    585   8778      Erik 			mp = mp->b_cont;
    586   8778      Erik 			continue;
    587   8778      Erik 		}
    588   8778      Erik 		ASSERT(dbp->db_ref == 1);
    589   8778      Erik 		dbp->db_credp = NULL;
    590   8778      Erik 		if (cpidp != NULL)
    591   8778      Erik 			*cpidp = dbp->db_cpid;
    592   8778      Erik #ifdef DEBUG
    593   8778      Erik 		/*
    594   8778      Erik 		 * Normally there should at most one db_credp in a message.
    595   8778      Erik 		 * But if there are multiple (as in the case of some M_IOC*
    596   8778      Erik 		 * and some internal messages in TCP/IP bind logic) then
    597   8778      Erik 		 * they must be identical in the normal case.
    598   8778      Erik 		 * However, a socket can be shared between different uids
    599   8778      Erik 		 * in which case data queued in TCP would be from different
    600   8778      Erik 		 * creds. Thus we can only assert for the zoneid being the
    601   8778      Erik 		 * same. Due to Multi-level Level Ports for TX, some
    602   8778      Erik 		 * cred_t can have a NULL cr_zone, and we skip the comparison
    603   8778      Erik 		 * in that case.
    604   8778      Erik 		 */
    605  10163       Ken 		mp2 = mp->b_cont;
    606  10163       Ken 		while (mp2 != NULL) {
    607  10163       Ken 			cr2 = DB_CRED(mp2);
    608  10163       Ken 			if (cr2 != NULL) {
    609  10163       Ken 				DTRACE_PROBE2(msg__extractcred,
    610  10163       Ken 				    cred_t *, cr, cred_t *, cr2);
    611  10163       Ken 				ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) ||
    612  10163       Ken 				    crgetzone(cr) == NULL ||
    613  10163       Ken 				    crgetzone(cr2) == NULL);
    614  10163       Ken 			}
    615  10163       Ken 			mp2 = mp2->b_cont;
    616   8778      Erik 		}
    617   8778      Erik #endif
    618   8778      Erik 		return (cr);
    619   8778      Erik 	}
    620   8778      Erik 	return (NULL);
    621   8778      Erik }
    622   8778      Erik /*
    623   8778      Erik  * Get the label for a message. Uses the first mblk in the message
    624   8778      Erik  * which has a non-NULL db_credp.
    625   8778      Erik  * Returns NULL if there is no credp.
    626   8778      Erik  */
    627   8778      Erik extern struct ts_label_s *
    628   8778      Erik msg_getlabel(const mblk_t *mp)
    629   8778      Erik {
    630   8778      Erik 	cred_t *cr = msg_getcred(mp, NULL);
    631   8778      Erik 
    632   8778      Erik 	if (cr == NULL)
    633   8778      Erik 		return (NULL);
    634   8778      Erik 
    635   8778      Erik 	return (crgetlabel(cr));
    636      0    stevel }
    637      0    stevel 
    638      0    stevel void
    639      0    stevel freeb(mblk_t *mp)
    640      0    stevel {
    641      0    stevel 	dblk_t *dbp = mp->b_datap;
    642      0    stevel 
    643      0    stevel 	ASSERT(dbp->db_ref > 0);
    644      0    stevel 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
    645      0    stevel 	FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp);
    646      0    stevel 
    647      0    stevel 	STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
    648      0    stevel 
    649      0    stevel 	dbp->db_free(mp, dbp);
    650      0    stevel }
    651      0    stevel 
    652      0    stevel void
    653      0    stevel freemsg(mblk_t *mp)
    654      0    stevel {
    655      0    stevel 	FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp);
    656      0    stevel 	while (mp) {
    657      0    stevel 		dblk_t *dbp = mp->b_datap;
    658      0    stevel 		mblk_t *mp_cont = mp->b_cont;
    659      0    stevel 
    660      0    stevel 		ASSERT(dbp->db_ref > 0);
    661      0    stevel 		ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
    662      0    stevel 
    663      0    stevel 		STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref);
    664      0    stevel 
    665      0    stevel 		dbp->db_free(mp, dbp);
    666      0    stevel 		mp = mp_cont;
    667      0    stevel 	}
    668      0    stevel }
    669      0    stevel 
    670      0    stevel /*
    671      0    stevel  * Reallocate a block for another use.  Try hard to use the old block.
    672      0    stevel  * If the old data is wanted (copy), leave b_wptr at the end of the data,
    673      0    stevel  * otherwise return b_wptr = b_rptr.
    674      0    stevel  *
    675      0    stevel  * This routine is private and unstable.
    676      0    stevel  */
    677      0    stevel mblk_t	*
    678      0    stevel reallocb(mblk_t *mp, size_t size, uint_t copy)
    679      0    stevel {
    680      0    stevel 	mblk_t		*mp1;
    681      0    stevel 	unsigned char	*old_rptr;
    682      0    stevel 	ptrdiff_t	cur_size;
    683      0    stevel 
    684      0    stevel 	if (mp == NULL)
    685      0    stevel 		return (allocb(size, BPRI_HI));
    686      0    stevel 
    687      0    stevel 	cur_size = mp->b_wptr - mp->b_rptr;
    688      0    stevel 	old_rptr = mp->b_rptr;
    689      0    stevel 
    690      0    stevel 	ASSERT(mp->b_datap->db_ref != 0);
    691      0    stevel 
    692      0    stevel 	if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) {
    693      0    stevel 		/*
    694      0    stevel 		 * If the data is wanted and it will fit where it is, no
    695      0    stevel 		 * work is required.
    696      0    stevel 		 */
    697      0    stevel 		if (copy && mp->b_datap->db_lim - mp->b_rptr >= size)
    698      0    stevel 			return (mp);
    699      0    stevel 
    700      0    stevel 		mp->b_wptr = mp->b_rptr = mp->b_datap->db_base;
    701      0    stevel 		mp1 = mp;
    702      0    stevel 	} else if ((mp1 = allocb_tmpl(size, mp)) != NULL) {
    703      0    stevel 		/* XXX other mp state could be copied too, db_flags ... ? */
    704      0    stevel 		mp1->b_cont = mp->b_cont;
    705      0    stevel 	} else {
    706      0    stevel 		return (NULL);
    707      0    stevel 	}
    708      0    stevel 
    709      0    stevel 	if (copy) {
    710      0    stevel 		bcopy(old_rptr, mp1->b_rptr, cur_size);
    711      0    stevel 		mp1->b_wptr = mp1->b_rptr + cur_size;
    712      0    stevel 	}
    713      0    stevel 
    714      0    stevel 	if (mp != mp1)
    715      0    stevel 		freeb(mp);
    716      0    stevel 
    717      0    stevel 	return (mp1);
    718      0    stevel }
    719      0    stevel 
    720      0    stevel static void
    721      0    stevel dblk_lastfree(mblk_t *mp, dblk_t *dbp)
    722      0    stevel {
    723      0    stevel 	ASSERT(dbp->db_mblk == mp);
    724      0    stevel 	if (dbp->db_fthdr != NULL)
    725      0    stevel 		str_ftfree(dbp);
    726      0    stevel 
    727      0    stevel 	/* set credp and projid to be 'unspecified' before returning to cache */
    728      0    stevel 	if (dbp->db_credp != NULL) {
    729      0    stevel 		crfree(dbp->db_credp);
    730      0    stevel 		dbp->db_credp = NULL;
    731      0    stevel 	}
    732      0    stevel 	dbp->db_cpid = -1;
    733      0    stevel 
    734      0    stevel 	/* Reset the struioflag and the checksum flag fields */
    735      0    stevel 	dbp->db_struioflag = 0;
    736      0    stevel 	dbp->db_struioun.cksum.flags = 0;
    737      0    stevel 
    738   6707    brutus 	/* and the COOKED and/or UIOA flag(s) */
    739   6707    brutus 	dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA);
    740    898      kais 
    741      0    stevel 	kmem_cache_free(dbp->db_cache, dbp);
    742      0    stevel }
    743      0    stevel 
    744      0    stevel static void
    745      0    stevel dblk_decref(mblk_t *mp, dblk_t *dbp)
    746      0    stevel {
    747      0    stevel 	if (dbp->db_ref != 1) {
    748      0    stevel 		uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp),
    749      0    stevel 		    -(1 << DBLK_RTFU_SHIFT(db_ref)));
    750      0    stevel 		/*
    751      0    stevel 		 * atomic_add_32_nv() just decremented db_ref, so we no longer
    752      0    stevel 		 * have a reference to the dblk, which means another thread
    753      0    stevel 		 * could free it.  Therefore we cannot examine the dblk to
    754      0    stevel 		 * determine whether ours was the last reference.  Instead,
    755      0    stevel 		 * we extract the new and minimum reference counts from rtfu.
    756      0    stevel 		 * Note that all we're really saying is "if (ref != refmin)".
    757      0    stevel 		 */
    758      0    stevel 		if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) !=
    759      0    stevel 		    ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) {
    760      0    stevel 			kmem_cache_free(mblk_cache, mp);
    761      0    stevel 			return;
    762      0    stevel 		}
    763      0    stevel 	}
    764      0    stevel 	dbp->db_mblk = mp;
    765      0    stevel 	dbp->db_free = dbp->db_lastfree;
    766      0    stevel 	dbp->db_lastfree(mp, dbp);
    767      0    stevel }
    768      0    stevel 
    769      0    stevel mblk_t *
    770      0    stevel dupb(mblk_t *mp)
    771      0    stevel {
    772      0    stevel 	dblk_t *dbp = mp->b_datap;
    773      0    stevel 	mblk_t *new_mp;
    774      0    stevel 	uint32_t oldrtfu, newrtfu;
    775      0    stevel 
    776      0    stevel 	if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL)
    777      0    stevel 		goto out;
    778      0    stevel 
    779      0    stevel 	new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL;
    780      0    stevel 	new_mp->b_rptr = mp->b_rptr;
    781      0    stevel 	new_mp->b_wptr = mp->b_wptr;
    782      0    stevel 	new_mp->b_datap = dbp;
    783      0    stevel 	new_mp->b_queue = NULL;
    784      0    stevel 	MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp);
    785      0    stevel 
    786      0    stevel 	STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref);
    787      0    stevel 
    788   3163   georges 	dbp->db_free = dblk_decref;
    789      0    stevel 	do {
    790      0    stevel 		ASSERT(dbp->db_ref > 0);
    791      0    stevel 		oldrtfu = DBLK_RTFU_WORD(dbp);
    792      0    stevel 		newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref));
    793      0    stevel 		/*
    794      0    stevel 		 * If db_ref is maxed out we can't dup this message anymore.
    795      0    stevel 		 */
    796      0    stevel 		if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) {
    797      0    stevel 			kmem_cache_free(mblk_cache, new_mp);
    798      0    stevel 			new_mp = NULL;
    799      0    stevel 			goto out;
    800      0    stevel 		}
    801      0    stevel 	} while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu);
    802      0    stevel 
    803      0    stevel out:
    804      0    stevel 	FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp);
    805      0    stevel 	return (new_mp);
    806      0    stevel }
    807      0    stevel 
    808      0    stevel static void
    809      0    stevel dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp)
    810      0    stevel {
    811      0    stevel 	frtn_t *frp = dbp->db_frtnp;
    812      0    stevel 
    813      0    stevel 	ASSERT(dbp->db_mblk == mp);
    814      0    stevel 	frp->free_func(frp->free_arg);
    815      0    stevel 	if (dbp->db_fthdr != NULL)
    816      0    stevel 		str_ftfree(dbp);
    817      0    stevel 
    818      0    stevel 	/* set credp and projid to be 'unspecified' before returning to cache */
    819      0    stevel 	if (dbp->db_credp != NULL) {
    820      0    stevel 		crfree(dbp->db_credp);
    821      0    stevel 		dbp->db_credp = NULL;
    822      0    stevel 	}
    823      0    stevel 	dbp->db_cpid = -1;
    824      0    stevel 	dbp->db_struioflag = 0;
    825      0    stevel 	dbp->db_struioun.cksum.flags = 0;
    826      0    stevel 
    827      0    stevel 	kmem_cache_free(dbp->db_cache, dbp);
    828      0    stevel }
    829      0    stevel 
    830      0    stevel /*ARGSUSED*/
    831      0    stevel static void
    832      0    stevel frnop_func(void *arg)
    833      0    stevel {
    834      0    stevel }
    835      0    stevel 
    836      0    stevel /*
    837      0    stevel  * Generic esballoc used to implement the four flavors: [d]esballoc[a].
    838      0    stevel  */
    839      0    stevel static mblk_t *
    840      0    stevel gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp,
    841      0    stevel 	void (*lastfree)(mblk_t *, dblk_t *), int kmflags)
    842      0    stevel {
    843      0    stevel 	dblk_t *dbp;
    844      0    stevel 	mblk_t *mp;
    845      0    stevel 
    846      0    stevel 	ASSERT(base != NULL && frp != NULL);
    847      0    stevel 
    848      0    stevel 	if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) {
    849      0    stevel 		mp = NULL;
    850      0    stevel 		goto out;
    851      0    stevel 	}
    852      0    stevel 
    853      0    stevel 	mp = dbp->db_mblk;
    854      0    stevel 	dbp->db_base = base;
    855      0    stevel 	dbp->db_lim = base + size;
    856      0    stevel 	dbp->db_free = dbp->db_lastfree = lastfree;
    857      0    stevel 	dbp->db_frtnp = frp;
    858      0    stevel 	DBLK_RTFU_WORD(dbp) = db_rtfu;
    859      0    stevel 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
    860      0    stevel 	mp->b_rptr = mp->b_wptr = base;
    861      0    stevel 	mp->b_queue = NULL;
    862      0    stevel 	MBLK_BAND_FLAG_WORD(mp) = 0;
    863      0    stevel 
    864      0    stevel out:
    865      0    stevel 	FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp);
    866      0    stevel 	return (mp);
    867      0    stevel }
    868      0    stevel 
    869      0    stevel /*ARGSUSED*/
    870      0    stevel mblk_t *
    871      0    stevel esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
    872      0    stevel {
    873      0    stevel 	mblk_t *mp;
    874      0    stevel 
    875      0    stevel 	/*
    876      0    stevel 	 * Note that this is structured to allow the common case (i.e.
    877      0    stevel 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
    878      0    stevel 	 * call optimization.
    879      0    stevel 	 */
    880      0    stevel 	if (!str_ftnever) {
    881      0    stevel 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    882      0    stevel 		    frp, freebs_enqueue, KM_NOSLEEP);
    883      0    stevel 
    884      0    stevel 		if (mp != NULL)
    885      0    stevel 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
    886      0    stevel 		return (mp);
    887      0    stevel 	}
    888      0    stevel 
    889      0    stevel 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    890      0    stevel 	    frp, freebs_enqueue, KM_NOSLEEP));
    891      0    stevel }
    892      0    stevel 
    893      0    stevel /*
    894      0    stevel  * Same as esballoc() but sleeps waiting for memory.
    895      0    stevel  */
    896      0    stevel /*ARGSUSED*/
    897      0    stevel mblk_t *
    898      0    stevel esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
    899      0    stevel {
    900      0    stevel 	mblk_t *mp;
    901      0    stevel 
    902      0    stevel 	/*
    903      0    stevel 	 * Note that this is structured to allow the common case (i.e.
    904      0    stevel 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
    905      0    stevel 	 * call optimization.
    906      0    stevel 	 */
    907      0    stevel 	if (!str_ftnever) {
    908      0    stevel 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    909      0    stevel 		    frp, freebs_enqueue, KM_SLEEP);
    910      0    stevel 
    911      0    stevel 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size);
    912      0    stevel 		return (mp);
    913      0    stevel 	}
    914      0    stevel 
    915      0    stevel 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    916      0    stevel 	    frp, freebs_enqueue, KM_SLEEP));
    917      0    stevel }
    918      0    stevel 
    919      0    stevel /*ARGSUSED*/
    920      0    stevel mblk_t *
    921      0    stevel desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
    922      0    stevel {
    923      0    stevel 	mblk_t *mp;
    924      0    stevel 
    925      0    stevel 	/*
    926      0    stevel 	 * Note that this is structured to allow the common case (i.e.
    927      0    stevel 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
    928      0    stevel 	 * call optimization.
    929      0    stevel 	 */
    930      0    stevel 	if (!str_ftnever) {
    931      0    stevel 		mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    932   6707    brutus 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
    933      0    stevel 
    934      0    stevel 		if (mp != NULL)
    935      0    stevel 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size);
    936      0    stevel 		return (mp);
    937      0    stevel 	}
    938      0    stevel 
    939      0    stevel 	return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0),
    940      0    stevel 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
    941      0    stevel }
    942      0    stevel 
    943      0    stevel /*ARGSUSED*/
    944      0    stevel mblk_t *
    945      0    stevel esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
    946      0    stevel {
    947      0    stevel 	mblk_t *mp;
    948      0    stevel 
    949      0    stevel 	/*
    950      0    stevel 	 * Note that this is structured to allow the common case (i.e.
    951      0    stevel 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
    952      0    stevel 	 * call optimization.
    953      0    stevel 	 */
    954      0    stevel 	if (!str_ftnever) {
    955      0    stevel 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
    956      0    stevel 		    frp, freebs_enqueue, KM_NOSLEEP);
    957      0    stevel 
    958      0    stevel 		if (mp != NULL)
    959      0    stevel 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size);
    960      0    stevel 		return (mp);
    961      0    stevel 	}
    962      0    stevel 
    963      0    stevel 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
    964      0    stevel 	    frp, freebs_enqueue, KM_NOSLEEP));
    965      0    stevel }
    966      0    stevel 
    967      0    stevel /*ARGSUSED*/
    968      0    stevel mblk_t *
    969      0    stevel desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp)
    970      0    stevel {
    971      0    stevel 	mblk_t *mp;
    972      0    stevel 
    973      0    stevel 	/*
    974      0    stevel 	 * Note that this is structured to allow the common case (i.e.
    975      0    stevel 	 * STREAMS flowtracing disabled) to call gesballoc() with tail
    976      0    stevel 	 * call optimization.
    977      0    stevel 	 */
    978      0    stevel 	if (!str_ftnever) {
    979      0    stevel 		mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
    980      0    stevel 		    frp, dblk_lastfree_desb, KM_NOSLEEP);
    981      0    stevel 
    982      0    stevel 		if (mp != NULL)
    983      0    stevel 			STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size);
    984      0    stevel 		return (mp);
    985      0    stevel 	}
    986      0    stevel 
    987      0    stevel 	return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0),
    988      0    stevel 	    frp, dblk_lastfree_desb, KM_NOSLEEP));
    989      0    stevel }
    990      0    stevel 
    991      0    stevel static void
    992      0    stevel bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp)
    993      0    stevel {
    994      0    stevel 	bcache_t *bcp = dbp->db_cache;
    995      0    stevel 
    996      0    stevel 	ASSERT(dbp->db_mblk == mp);
    997      0    stevel 	if (dbp->db_fthdr != NULL)
    998      0    stevel 		str_ftfree(dbp);
    999      0    stevel 
   1000      0    stevel 	/* set credp and projid to be 'unspecified' before returning to cache */
   1001      0    stevel 	if (dbp->db_credp != NULL) {
   1002      0    stevel 		crfree(dbp->db_credp);
   1003      0    stevel 		dbp->db_credp = NULL;
   1004      0    stevel 	}
   1005      0    stevel 	dbp->db_cpid = -1;
   1006      0    stevel 	dbp->db_struioflag = 0;
   1007      0    stevel 	dbp->db_struioun.cksum.flags = 0;
   1008      0    stevel 
   1009      0    stevel 	mutex_enter(&bcp->mutex);
   1010      0    stevel 	kmem_cache_free(bcp->dblk_cache, dbp);
   1011      0    stevel 	bcp->alloc--;
   1012      0    stevel 
   1013      0    stevel 	if (bcp->alloc == 0 && bcp->destroy != 0) {
   1014      0    stevel 		kmem_cache_destroy(bcp->dblk_cache);
   1015      0    stevel 		kmem_cache_destroy(bcp->buffer_cache);
   1016      0    stevel 		mutex_exit(&bcp->mutex);
   1017      0    stevel 		mutex_destroy(&bcp->mutex);
   1018      0    stevel 		kmem_free(bcp, sizeof (bcache_t));
   1019      0    stevel 	} else {
   1020      0    stevel 		mutex_exit(&bcp->mutex);
   1021      0    stevel 	}
   1022      0    stevel }
   1023      0    stevel 
   1024      0    stevel bcache_t *
   1025      0    stevel bcache_create(char *name, size_t size, uint_t align)
   1026      0    stevel {
   1027      0    stevel 	bcache_t *bcp;
   1028      0    stevel 	char buffer[255];
   1029      0    stevel 
   1030      0    stevel 	ASSERT((align & (align - 1)) == 0);
   1031      0    stevel 
   1032   8752     Peter 	if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL)
   1033   8752     Peter 		return (NULL);
   1034      0    stevel 
   1035      0    stevel 	bcp->size = size;
   1036      0    stevel 	bcp->align = align;
   1037      0    stevel 	bcp->alloc = 0;
   1038      0    stevel 	bcp->destroy = 0;
   1039      0    stevel 
   1040      0    stevel 	mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL);
   1041      0    stevel 
   1042      0    stevel 	(void) sprintf(buffer, "%s_buffer_cache", name);
   1043      0    stevel 	bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL,
   1044      0    stevel 	    NULL, NULL, NULL, 0);
   1045      0    stevel 	(void) sprintf(buffer, "%s_dblk_cache", name);
   1046      0    stevel 	bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t),
   1047      0    stevel 	    DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor,
   1048   6707    brutus 	    NULL, (void *)bcp, NULL, 0);
   1049      0    stevel 
   1050      0    stevel 	return (bcp);
   1051      0    stevel }
   1052      0    stevel 
   1053      0    stevel void
   1054      0    stevel bcache_destroy(bcache_t *bcp)
   1055      0    stevel {
   1056      0    stevel 	ASSERT(bcp != NULL);
   1057      0    stevel 
   1058      0    stevel 	mutex_enter(&bcp->mutex);
   1059      0    stevel 	if (bcp->alloc == 0) {
   1060      0    stevel 		kmem_cache_destroy(bcp->dblk_cache);
   1061      0    stevel 		kmem_cache_destroy(bcp->buffer_cache);
   1062      0    stevel 		mutex_exit(&bcp->mutex);
   1063      0    stevel 		mutex_destroy(&bcp->mutex);
   1064      0    stevel 		kmem_free(bcp, sizeof (bcache_t));
   1065      0    stevel 	} else {
   1066      0    stevel 		bcp->destroy++;
   1067      0    stevel 		mutex_exit(&bcp->mutex);
   1068      0    stevel 	}
   1069      0    stevel }
   1070      0    stevel 
   1071      0    stevel /*ARGSUSED*/
   1072      0    stevel mblk_t *
   1073      0    stevel bcache_allocb(bcache_t *bcp, uint_t pri)
   1074      0    stevel {
   1075      0    stevel 	dblk_t *dbp;
   1076      0    stevel 	mblk_t *mp = NULL;
   1077      0    stevel 
   1078      0    stevel 	ASSERT(bcp != NULL);
   1079      0    stevel 
   1080      0    stevel 	mutex_enter(&bcp->mutex);
   1081      0    stevel 	if (bcp->destroy != 0) {
   1082      0    stevel 		mutex_exit(&bcp->mutex);
   1083      0    stevel 		goto out;
   1084      0    stevel 	}
   1085      0    stevel 
   1086      0    stevel 	if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) {
   1087      0    stevel 		mutex_exit(&bcp->mutex);
   1088      0    stevel 		goto out;
   1089      0    stevel 	}
   1090      0    stevel 	bcp->alloc++;
   1091      0    stevel 	mutex_exit(&bcp->mutex);
   1092      0    stevel 
   1093      0    stevel 	ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0);
   1094      0    stevel 
   1095      0    stevel 	mp = dbp->db_mblk;
   1096      0    stevel 	DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
   1097      0    stevel 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
   1098      0    stevel 	mp->b_rptr = mp->b_wptr = dbp->db_base;
   1099      0    stevel 	mp->b_queue = NULL;
   1100      0    stevel 	MBLK_BAND_FLAG_WORD(mp) = 0;
   1101      0    stevel 	STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size);
   1102      0    stevel out:
   1103      0    stevel 	FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp);
   1104      0    stevel 
   1105      0    stevel 	return (mp);
   1106      0    stevel }
   1107      0    stevel 
   1108      0    stevel static void
   1109      0    stevel dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp)
   1110      0    stevel {
   1111      0    stevel 	ASSERT(dbp->db_mblk == mp);
   1112      0    stevel 	if (dbp->db_fthdr != NULL)
   1113      0    stevel 		str_ftfree(dbp);
   1114      0    stevel 
   1115      0    stevel 	/* set credp and projid to be 'unspecified' before returning to cache */
   1116      0    stevel 	if (dbp->db_credp != NULL) {
   1117      0    stevel 		crfree(dbp->db_credp);
   1118      0    stevel 		dbp->db_credp = NULL;
   1119      0    stevel 	}
   1120      0    stevel 	dbp->db_cpid = -1;
   1121      0    stevel 	dbp->db_struioflag = 0;
   1122      0    stevel 	dbp->db_struioun.cksum.flags = 0;
   1123      0    stevel 
   1124      0    stevel 	kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base);
   1125      0    stevel 	kmem_cache_free(dbp->db_cache, dbp);
   1126      0    stevel }
   1127      0    stevel 
   1128      0    stevel static mblk_t *
   1129      0    stevel allocb_oversize(size_t size, int kmflags)
   1130      0    stevel {
   1131      0    stevel 	mblk_t *mp;
   1132      0    stevel 	void *buf;
   1133      0    stevel 
   1134      0    stevel 	size = P2ROUNDUP(size, DBLK_CACHE_ALIGN);
   1135      0    stevel 	if ((buf = kmem_alloc(size, kmflags)) == NULL)
   1136      0    stevel 		return (NULL);
   1137      0    stevel 	if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0),
   1138      0    stevel 	    &frnop, dblk_lastfree_oversize, kmflags)) == NULL)
   1139      0    stevel 		kmem_free(buf, size);
   1140      0    stevel 
   1141      0    stevel 	if (mp != NULL)
   1142      0    stevel 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size);
   1143      0    stevel 
   1144      0    stevel 	return (mp);
   1145      0    stevel }
   1146      0    stevel 
   1147      0    stevel mblk_t *
   1148      0    stevel allocb_tryhard(size_t target_size)
   1149      0    stevel {
   1150      0    stevel 	size_t size;
   1151      0    stevel 	mblk_t *bp;
   1152      0    stevel 
   1153      0    stevel 	for (size = target_size; size < target_size + 512;
   1154      0    stevel 	    size += DBLK_CACHE_ALIGN)
   1155      0    stevel 		if ((bp = allocb(size, BPRI_HI)) != NULL)
   1156      0    stevel 			return (bp);
   1157      0    stevel 	allocb_tryhard_fails++;
   1158      0    stevel 	return (NULL);
   1159      0    stevel }
   1160      0    stevel 
   1161      0    stevel /*
   1162      0    stevel  * This routine is consolidation private for STREAMS internal use
   1163      0    stevel  * This routine may only be called from sync routines (i.e., not
   1164      0    stevel  * from put or service procedures).  It is located here (rather
   1165      0    stevel  * than strsubr.c) so that we don't have to expose all of the
   1166      0    stevel  * allocb() implementation details in header files.
   1167      0    stevel  */
   1168      0    stevel mblk_t *
   1169      0    stevel allocb_wait(size_t size, uint_t pri, uint_t flags, int *error)
   1170      0    stevel {
   1171      0    stevel 	dblk_t *dbp;
   1172      0    stevel 	mblk_t *mp;
   1173      0    stevel 	size_t index;
   1174      0    stevel 
   1175      0    stevel 	index = (size -1) >> DBLK_SIZE_SHIFT;
   1176      0    stevel 
   1177      0    stevel 	if (flags & STR_NOSIG) {
   1178      0    stevel 		if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) {
   1179      0    stevel 			if (size != 0) {
   1180      0    stevel 				mp = allocb_oversize(size, KM_SLEEP);
   1181      0    stevel 				FTRACE_1("allocb_wait (NOSIG): mp=0x%lx",
   1182      0    stevel 				    (uintptr_t)mp);
   1183      0    stevel 				return (mp);
   1184      0    stevel 			}
   1185      0    stevel 			index = 0;
   1186      0    stevel 		}
   1187      0    stevel 
   1188      0    stevel 		dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP);
   1189      0    stevel 		mp = dbp->db_mblk;
   1190      0    stevel 		DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0);
   1191      0    stevel 		mp->b_next = mp->b_prev = mp->b_cont = NULL;
   1192      0    stevel 		mp->b_rptr = mp->b_wptr = dbp->db_base;
   1193      0    stevel 		mp->b_queue = NULL;
   1194      0    stevel 		MBLK_BAND_FLAG_WORD(mp) = 0;
   1195      0    stevel 		STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size);
   1196      0    stevel 
   1197      0    stevel 		FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp);
   1198      0    stevel 
   1199      0    stevel 	} else {
   1200      0    stevel 		while ((mp = allocb(size, pri)) == NULL) {
   1201      0    stevel 			if ((*error = strwaitbuf(size, BPRI_HI)) != 0)
   1202      0    stevel 				return (NULL);
   1203      0    stevel 		}
   1204      0    stevel 	}
   1205      0    stevel 
   1206      0    stevel 	return (mp);
   1207      0    stevel }
   1208      0    stevel 
   1209      0    stevel /*
   1210      0    stevel  * Call function 'func' with 'arg' when a class zero block can
   1211      0    stevel  * be allocated with priority 'pri'.
   1212      0    stevel  */
   1213      0    stevel bufcall_id_t
   1214      0    stevel esbbcall(uint_t pri, void (*func)(void *), void *arg)
   1215      0    stevel {
   1216      0    stevel 	return (bufcall(1, pri, func, arg));
   1217      0    stevel }
   1218      0    stevel 
   1219      0    stevel /*
   1220      0    stevel  * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials
   1221      0    stevel  * ioc_id, rval and error of the struct ioctl to set up an ioctl call.
   1222      0    stevel  * This provides consistency for all internal allocators of ioctl.
   1223      0    stevel  */
   1224      0    stevel mblk_t *
   1225      0    stevel mkiocb(uint_t cmd)
   1226      0    stevel {
   1227      0    stevel 	struct iocblk	*ioc;
   1228      0    stevel 	mblk_t		*mp;
   1229      0    stevel 
   1230      0    stevel 	/*
   1231      0    stevel 	 * Allocate enough space for any of the ioctl related messages.
   1232      0    stevel 	 */
   1233      0    stevel 	if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL)
   1234      0    stevel 		return (NULL);
   1235      0    stevel 
   1236      0    stevel 	bzero(mp->b_rptr, sizeof (union ioctypes));
   1237      0    stevel 
   1238      0    stevel 	/*
   1239      0    stevel 	 * Set the mblk_t information and ptrs correctly.
   1240      0    stevel 	 */
   1241      0    stevel 	mp->b_wptr += sizeof (struct iocblk);
   1242      0    stevel 	mp->b_datap->db_type = M_IOCTL;
   1243      0    stevel 
   1244      0    stevel 	/*
   1245      0    stevel 	 * Fill in the fields.
   1246      0    stevel 	 */
   1247      0    stevel 	ioc		= (struct iocblk *)mp->b_rptr;
   1248      0    stevel 	ioc->ioc_cmd	= cmd;
   1249      0    stevel 	ioc->ioc_cr	= kcred;
   1250      0    stevel 	ioc->ioc_id	= getiocseqno();
   1251      0    stevel 	ioc->ioc_flag	= IOC_NATIVE;
   1252      0    stevel 	return (mp);
   1253      0    stevel }
   1254      0    stevel 
   1255      0    stevel /*
   1256      0    stevel  * test if block of given size can be allocated with a request of
   1257      0    stevel  * the given priority.
   1258      0    stevel  * 'pri' is no longer used, but is retained for compatibility.
   1259      0    stevel  */
   1260      0    stevel /* ARGSUSED */
   1261      0    stevel int
   1262      0    stevel testb(size_t size, uint_t pri)
   1263      0    stevel {
   1264      0    stevel 	return ((size + sizeof (dblk_t)) <= kmem_avail());
   1265      0    stevel }
   1266      0    stevel 
   1267      0    stevel /*
   1268      0    stevel  * Call function 'func' with argument 'arg' when there is a reasonably
   1269      0    stevel  * good chance that a block of size 'size' can be allocated.
   1270      0    stevel  * 'pri' is no longer used, but is retained for compatibility.
   1271      0    stevel  */
   1272      0    stevel /* ARGSUSED */
   1273      0    stevel bufcall_id_t
   1274      0    stevel bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg)
   1275      0    stevel {
   1276      0    stevel 	static long bid = 1;	/* always odd to save checking for zero */
   1277      0    stevel 	bufcall_id_t bc_id;
   1278      0    stevel 	struct strbufcall *bcp;
   1279      0    stevel 
   1280      0    stevel 	if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL)
   1281      0    stevel 		return (0);
   1282      0    stevel 
   1283      0    stevel 	bcp->bc_func = func;
   1284      0    stevel 	bcp->bc_arg = arg;
   1285      0    stevel 	bcp->bc_size = size;
   1286      0    stevel 	bcp->bc_next = NULL;
   1287      0    stevel 	bcp->bc_executor = NULL;
   1288      0    stevel 
   1289      0    stevel 	mutex_enter(&strbcall_lock);
   1290      0    stevel 	/*
   1291      0    stevel 	 * After bcp is linked into strbcalls and strbcall_lock is dropped there
   1292      0    stevel 	 * should be no references to bcp since it may be freed by
   1293      0    stevel 	 * runbufcalls(). Since bcp_id field is returned, we save its value in
   1294      0    stevel 	 * the local var.
   1295      0    stevel 	 */
   1296      0    stevel 	bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2);	/* keep it odd */
   1297      0    stevel 
   1298      0    stevel 	/*
   1299      0    stevel 	 * add newly allocated stream event to existing
   1300      0    stevel 	 * linked list of events.
   1301      0    stevel 	 */
   1302      0    stevel 	if (strbcalls.bc_head == NULL) {
   1303      0    stevel 		strbcalls.bc_head = strbcalls.bc_tail = bcp;
   1304      0    stevel 	} else {
   1305      0    stevel 		strbcalls.bc_tail->bc_next = bcp;
   1306      0    stevel 		strbcalls.bc_tail = bcp;
   1307      0    stevel 	}
   1308      0    stevel 
   1309      0    stevel 	cv_signal(&strbcall_cv);
   1310      0    stevel 	mutex_exit(&strbcall_lock);
   1311      0    stevel 	return (bc_id);
   1312      0    stevel }
   1313      0    stevel 
   1314      0    stevel /*
   1315      0    stevel  * Cancel a bufcall request.
   1316      0    stevel  */
   1317      0    stevel void
   1318      0    stevel unbufcall(bufcall_id_t id)
   1319      0    stevel {
   1320      0    stevel 	strbufcall_t *bcp, *pbcp;
   1321      0    stevel 
   1322      0    stevel 	mutex_enter(&strbcall_lock);
   1323      0    stevel again:
   1324      0    stevel 	pbcp = NULL;
   1325      0    stevel 	for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) {
   1326      0    stevel 		if (id == bcp->bc_id)
   1327      0    stevel 			break;
   1328      0    stevel 		pbcp = bcp;
   1329      0    stevel 	}
   1330      0    stevel 	if (bcp) {
   1331      0    stevel 		if (bcp->bc_executor != NULL) {
   1332      0    stevel 			if (bcp->bc_executor != curthread) {
   1333      0    stevel 				cv_wait(&bcall_cv, &strbcall_lock);
   1334      0    stevel 				goto again;
   1335      0    stevel 			}
   1336      0    stevel 		} else {
   1337      0    stevel 			if (pbcp)
   1338      0    stevel 				pbcp->bc_next = bcp->bc_next;
   1339      0    stevel 			else
   1340      0    stevel 				strbcalls.bc_head = bcp->bc_next;
   1341      0    stevel 			if (bcp == strbcalls.bc_tail)
   1342      0    stevel 				strbcalls.bc_tail = pbcp;
   1343      0    stevel 			kmem_free(bcp, sizeof (strbufcall_t));
   1344      0    stevel 		}
   1345      0    stevel 	}
   1346      0    stevel 	mutex_exit(&strbcall_lock);
   1347      0    stevel }
   1348      0    stevel 
   1349      0    stevel /*
   1350      0    stevel  * Duplicate a message block by block (uses dupb), returning
   1351      0    stevel  * a pointer to the duplicate message.
   1352      0    stevel  * Returns a non-NULL value only if the entire message
   1353      0    stevel  * was dup'd.
   1354      0    stevel  */
   1355      0    stevel mblk_t *
   1356      0    stevel dupmsg(mblk_t *bp)
   1357      0    stevel {
   1358      0    stevel 	mblk_t *head, *nbp;
   1359      0    stevel 
   1360      0    stevel 	if (!bp || !(nbp = head = dupb(bp)))
   1361      0    stevel 		return (NULL);
   1362      0    stevel 
   1363      0    stevel 	while (bp->b_cont) {
   1364      0    stevel 		if (!(nbp->b_cont = dupb(bp->b_cont))) {
   1365      0    stevel 			freemsg(head);
   1366      0    stevel 			return (NULL);
   1367      0    stevel 		}
   1368      0    stevel 		nbp = nbp->b_cont;
   1369      0    stevel 		bp = bp->b_cont;
   1370      0    stevel 	}
   1371      0    stevel 	return (head);
   1372      0    stevel }
   1373      0    stevel 
   1374      0    stevel #define	DUPB_NOLOAN(bp) \
   1375      0    stevel 	((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \
   1376      0    stevel 	copyb((bp)) : dupb((bp)))
   1377      0    stevel 
   1378      0    stevel mblk_t *
   1379      0    stevel dupmsg_noloan(mblk_t *bp)
   1380      0    stevel {
   1381      0    stevel 	mblk_t *head, *nbp;
   1382      0    stevel 
   1383      0    stevel 	if (bp == NULL || DB_TYPE(bp) != M_DATA ||
   1384      0    stevel 	    ((nbp = head = DUPB_NOLOAN(bp)) == NULL))
   1385      0    stevel 		return (NULL);
   1386      0    stevel 
   1387      0    stevel 	while (bp->b_cont) {
   1388      0    stevel 		if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) {
   1389      0    stevel 			freemsg(head);
   1390      0    stevel 			return (NULL);
   1391      0    stevel 		}
   1392      0    stevel 		nbp = nbp->b_cont;
   1393      0    stevel 		bp = bp->b_cont;
   1394      0    stevel 	}
   1395      0    stevel 	return (head);
   1396      0    stevel }
   1397      0    stevel 
   1398      0    stevel /*
   1399      0    stevel  * Copy data from message and data block to newly allocated message and
   1400      0    stevel  * data block. Returns new message block pointer, or NULL if error.
   1401      0    stevel  * The alignment of rptr (w.r.t. word alignment) will be the same in the copy
   1402      0    stevel  * as in the original even when db_base is not word aligned. (bug 1052877)
   1403      0    stevel  */
   1404      0    stevel mblk_t *
   1405      0    stevel copyb(mblk_t *bp)
   1406      0    stevel {
   1407      0    stevel 	mblk_t	*nbp;
   1408      0    stevel 	dblk_t	*dp, *ndp;
   1409      0    stevel 	uchar_t *base;
   1410      0    stevel 	size_t	size;
   1411      0    stevel 	size_t	unaligned;
   1412      0    stevel 
   1413      0    stevel 	ASSERT(bp->b_wptr >= bp->b_rptr);
   1414      0    stevel 
   1415      0    stevel 	dp = bp->b_datap;
   1416      0    stevel 	if (dp->db_fthdr != NULL)
   1417      0    stevel 		STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0);
   1418      0    stevel 
   1419      0    stevel 	/*
   1420      0    stevel 	 * Special handling for Multidata message; this should be
   1421      0    stevel 	 * removed once a copy-callback routine is made available.
   1422      0    stevel 	 */
   1423      0    stevel 	if (dp->db_type == M_MULTIDATA) {
   1424      0    stevel 		cred_t *cr;
   1425      0    stevel 
   1426      0    stevel 		if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL)
   1427      0    stevel 			return (NULL);
   1428      0    stevel 
   1429      0    stevel 		nbp->b_flag = bp->b_flag;
   1430      0    stevel 		nbp->b_band = bp->b_band;
   1431      0    stevel 		ndp = nbp->b_datap;
   1432      0    stevel 
   1433      0    stevel 		/* See comments below on potential issues. */
   1434      0    stevel 		STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
   1435      0    stevel 
   1436      0    stevel 		ASSERT(ndp->db_type == dp->db_type);
   1437      0    stevel 		cr = dp->db_credp;
   1438      0    stevel 		if (cr != NULL)
   1439      0    stevel 			crhold(ndp->db_credp = cr);
   1440      0    stevel 		ndp->db_cpid = dp->db_cpid;
   1441      0    stevel 		return (nbp);
   1442      0    stevel 	}
   1443      0    stevel 
   1444      0    stevel 	size = dp->db_lim - dp->db_base;
   1445      0    stevel 	unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t));
   1446      0    stevel 	if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL)
   1447      0    stevel 		return (NULL);
   1448      0    stevel 	nbp->b_flag = bp->b_flag;
   1449      0    stevel 	nbp->b_band = bp->b_band;
   1450      0    stevel 	ndp = nbp->b_datap;
   1451      0    stevel 
   1452      0    stevel 	/*
   1453      0    stevel 	 * Well, here is a potential issue.  If we are trying to
   1454      0    stevel 	 * trace a flow, and we copy the message, we might lose
   1455      0    stevel 	 * information about where this message might have been.
   1456      0    stevel 	 * So we should inherit the FT data.  On the other hand,
   1457      0    stevel 	 * a user might be interested only in alloc to free data.
   1458      0    stevel 	 * So I guess the real answer is to provide a tunable.
   1459      0    stevel 	 */
   1460      0    stevel 	STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1);
   1461      0    stevel 
   1462      0    stevel 	base = ndp->db_base + unaligned;
   1463      0    stevel 	bcopy(dp->db_base, ndp->db_base + unaligned, size);
   1464      0    stevel 
   1465      0    stevel 	nbp->b_rptr = base + (bp->b_rptr - dp->db_base);
   1466      0    stevel 	nbp->b_wptr = nbp->b_rptr + MBLKL(bp);
   1467      0    stevel 
   1468      0    stevel 	return (nbp);
   1469      0    stevel }
   1470      0    stevel 
   1471      0    stevel /*
   1472      0    stevel  * Copy data from message to newly allocated message using new
   1473      0    stevel  * data blocks.  Returns a pointer to the new message, or NULL if error.
   1474      0    stevel  */
   1475      0    stevel mblk_t *
   1476      0    stevel copymsg(mblk_t *bp)
   1477      0    stevel {
   1478      0    stevel 	mblk_t *head, *nbp;
   1479      0    stevel 
   1480      0    stevel 	if (!bp || !(nbp = head = copyb(bp)))
   1481      0    stevel 		return (NULL);
   1482      0    stevel 
   1483      0    stevel 	while (bp->b_cont) {
   1484      0    stevel 		if (!(nbp->b_cont = copyb(bp->b_cont))) {
   1485      0    stevel 			freemsg(head);
   1486      0    stevel 			return (NULL);
   1487      0    stevel 		}
   1488      0    stevel 		nbp = nbp->b_cont;
   1489      0    stevel 		bp = bp->b_cont;
   1490      0    stevel 	}
   1491      0    stevel 	return (head);
   1492      0    stevel }
   1493      0    stevel 
   1494      0    stevel /*
   1495      0    stevel  * link a message block to tail of message
   1496      0    stevel  */
   1497      0    stevel void
   1498      0    stevel linkb(mblk_t *mp, mblk_t *bp)
   1499      0    stevel {
   1500      0    stevel 	ASSERT(mp && bp);
   1501      0    stevel 
   1502      0    stevel 	for (; mp->b_cont; mp = mp->b_cont)
   1503      0    stevel 		;
   1504      0    stevel 	mp->b_cont = bp;
   1505      0    stevel }
   1506      0    stevel 
   1507      0    stevel /*
   1508      0    stevel  * unlink a message block from head of message
   1509      0    stevel  * return pointer to new message.
   1510      0    stevel  * NULL if message becomes empty.
   1511      0    stevel  */
   1512      0    stevel mblk_t *
   1513      0    stevel unlinkb(mblk_t *bp)
   1514      0    stevel {
   1515      0    stevel 	mblk_t *bp1;
   1516      0    stevel 
   1517      0    stevel 	bp1 = bp->b_cont;
   1518      0    stevel 	bp->b_cont = NULL;
   1519      0    stevel 	return (bp1);
   1520      0    stevel }
   1521      0    stevel 
   1522      0    stevel /*
   1523      0    stevel  * remove a message block "bp" from message "mp"
   1524      0    stevel  *
   1525      0    stevel  * Return pointer to new message or NULL if no message remains.
   1526      0    stevel  * Return -1 if bp is not found in message.
   1527      0    stevel  */
   1528      0    stevel mblk_t *
   1529      0    stevel rmvb(mblk_t *mp, mblk_t *bp)
   1530      0    stevel {
   1531      0    stevel 	mblk_t *tmp;
   1532      0    stevel 	mblk_t *lastp = NULL;
   1533      0    stevel 
   1534      0    stevel 	ASSERT(mp && bp);
   1535      0    stevel 	for (tmp = mp; tmp; tmp = tmp->b_cont) {
   1536      0    stevel 		if (tmp == bp) {
   1537      0    stevel 			if (lastp)
   1538      0    stevel 				lastp->b_cont = tmp->b_cont;
   1539      0    stevel 			else
   1540      0    stevel 				mp = tmp->b_cont;
   1541      0    stevel 			tmp->b_cont = NULL;
   1542      0    stevel 			return (mp);
   1543      0    stevel 		}
   1544      0    stevel 		lastp = tmp;
   1545      0    stevel 	}
   1546      0    stevel 	return ((mblk_t *)-1);
   1547      0    stevel }
   1548      0    stevel 
   1549      0    stevel /*
   1550      0    stevel  * Concatenate and align first len bytes of common
   1551      0    stevel  * message type.  Len == -1, means concat everything.
   1552      0    stevel  * Returns 1 on success, 0 on failure
   1553      0    stevel  * After the pullup, mp points to the pulled up data.
   1554      0    stevel  */
   1555      0    stevel int
   1556      0    stevel pullupmsg(mblk_t *mp, ssize_t len)
   1557      0    stevel {
   1558      0    stevel 	mblk_t *bp, *b_cont;
   1559      0    stevel 	dblk_t *dbp;
   1560      0    stevel 	ssize_t n;
   1561      0    stevel 
   1562      0    stevel 	ASSERT(mp->b_datap->db_ref > 0);
   1563      0    stevel 	ASSERT(mp->b_next == NULL && mp->b_prev == NULL);
   1564      0    stevel 
   1565      0    stevel 	/*
   1566      0    stevel 	 * We won't handle Multidata message, since it contains
   1567      0    stevel 	 * metadata which this function has no knowledge of; we
   1568      0    stevel 	 * assert on DEBUG, and return failure otherwise.
   1569      0    stevel 	 */
   1570      0    stevel 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
   1571      0    stevel 	if (mp->b_datap->db_type == M_MULTIDATA)
   1572      0    stevel 		return (0);
   1573      0    stevel 
   1574      0    stevel 	if (len == -1) {
   1575      0    stevel 		if (mp->b_cont == NULL && str_aligned(mp->b_rptr))
   1576      0    stevel 			return (1);
   1577      0    stevel 		len = xmsgsize(mp);
   1578      0    stevel 	} else {
   1579      0    stevel 		ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr;
   1580      0    stevel 		ASSERT(first_mblk_len >= 0);
   1581      0    stevel 		/*
   1582      0    stevel 		 * If the length is less than that of the first mblk,
   1583      0    stevel 		 * we want to pull up the message into an aligned mblk.
   1584      0    stevel 		 * Though not part of the spec, some callers assume it.
   1585      0    stevel 		 */
   1586      0    stevel 		if (len <= first_mblk_len) {
   1587      0    stevel 			if (str_aligned(mp->b_rptr))
   1588      0    stevel 				return (1);
   1589      0    stevel 			len = first_mblk_len;
   1590      0    stevel 		} else if (xmsgsize(mp) < len)
   1591      0    stevel 			return (0);
   1592      0    stevel 	}
   1593      0    stevel 
   1594      0    stevel 	if ((bp = allocb_tmpl(len, mp)) == NULL)
   1595      0    stevel 		return (0);
   1596      0    stevel 
   1597      0    stevel 	dbp = bp->b_datap;
   1598      0    stevel 	*bp = *mp;		/* swap mblks so bp heads the old msg... */
   1599      0    stevel 	mp->b_datap = dbp;	/* ... and mp heads the new message */
   1600      0    stevel 	mp->b_datap->db_mblk = mp;
   1601      0    stevel 	bp->b_datap->db_mblk = bp;
   1602      0    stevel 	mp->b_rptr = mp->b_wptr = dbp->db_base;
   1603      0    stevel 
   1604      0    stevel 	do {
   1605      0    stevel 		ASSERT(bp->b_datap->db_ref > 0);
   1606      0    stevel 		ASSERT(bp->b_wptr >= bp->b_rptr);
   1607      0    stevel 		n = MIN(bp->b_wptr - bp->b_rptr, len);
   1608  11042      Erik 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
   1609  11042      Erik 		if (n > 0)
   1610  11042      Erik 			bcopy(bp->b_rptr, mp->b_wptr, (size_t)n);
   1611      0    stevel 		mp->b_wptr += n;
   1612      0    stevel 		bp->b_rptr += n;
   1613      0    stevel 		len -= n;
   1614      0    stevel 		if (bp->b_rptr != bp->b_wptr)
   1615      0    stevel 			break;
   1616      0    stevel 		b_cont = bp->b_cont;
   1617      0    stevel 		freeb(bp);
   1618      0    stevel 		bp = b_cont;
   1619      0    stevel 	} while (len && bp);
   1620      0    stevel 
   1621      0    stevel 	mp->b_cont = bp;	/* tack on whatever wasn't pulled up */
   1622      0    stevel 
   1623      0    stevel 	return (1);
   1624      0    stevel }
   1625      0    stevel 
   1626      0    stevel /*
   1627      0    stevel  * Concatenate and align at least the first len bytes of common message
   1628      0    stevel  * type.  Len == -1 means concatenate everything.  The original message is
   1629      0    stevel  * unaltered.  Returns a pointer to a new message on success, otherwise
   1630      0    stevel  * returns NULL.
   1631      0    stevel  */
   1632      0    stevel mblk_t *
   1633      0    stevel msgpullup(mblk_t *mp, ssize_t len)
   1634      0    stevel {
   1635      0    stevel 	mblk_t	*newmp;
   1636      0    stevel 	ssize_t	totlen;
   1637      0    stevel 	ssize_t	n;
   1638      0    stevel 
   1639      0    stevel 	/*
   1640      0    stevel 	 * We won't handle Multidata message, since it contains
   1641      0    stevel 	 * metadata which this function has no knowledge of; we
   1642      0    stevel 	 * assert on DEBUG, and return failure otherwise.
   1643      0    stevel 	 */
   1644      0    stevel 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
   1645      0    stevel 	if (mp->b_datap->db_type == M_MULTIDATA)
   1646      0    stevel 		return (NULL);
   1647      0    stevel 
   1648      0    stevel 	totlen = xmsgsize(mp);
   1649      0    stevel 
   1650      0    stevel 	if ((len > 0) && (len > totlen))
   1651      0    stevel 		return (NULL);
   1652      0    stevel 
   1653      0    stevel 	/*
   1654      0    stevel 	 * Copy all of the first msg type into one new mblk, then dupmsg
   1655      0    stevel 	 * and link the rest onto this.
   1656      0    stevel 	 */
   1657      0    stevel 
   1658      0    stevel 	len = totlen;
   1659      0    stevel 
   1660      0    stevel 	if ((newmp = allocb_tmpl(len, mp)) == NULL)
   1661      0    stevel 		return (NULL);
   1662      0    stevel 
   1663      0    stevel 	newmp->b_flag = mp->b_flag;
   1664      0    stevel 	newmp->b_band = mp->b_band;
   1665      0    stevel 
   1666      0    stevel 	while (len > 0) {
   1667      0    stevel 		n = mp->b_wptr - mp->b_rptr;
   1668      0    stevel 		ASSERT(n >= 0);		/* allow zero-length mblk_t's */
   1669      0    stevel 		if (n > 0)
   1670      0    stevel 			bcopy(mp->b_rptr, newmp->b_wptr, n);
   1671      0    stevel 		newmp->b_wptr += n;
   1672      0    stevel 		len -= n;
   1673      0    stevel 		mp = mp->b_cont;
   1674      0    stevel 	}
   1675      0    stevel 
   1676      0    stevel 	if (mp != NULL) {
   1677      0    stevel 		newmp->b_cont = dupmsg(mp);
   1678      0    stevel 		if (newmp->b_cont == NULL) {
   1679      0    stevel 			freemsg(newmp);
   1680      0    stevel 			return (NULL);
   1681      0    stevel 		}
   1682      0    stevel 	}
   1683      0    stevel 
   1684      0    stevel 	return (newmp);
   1685      0    stevel }
   1686      0    stevel 
   1687      0    stevel /*
   1688      0    stevel  * Trim bytes from message
   1689      0    stevel  *  len > 0, trim from head
   1690      0    stevel  *  len < 0, trim from tail
   1691      0    stevel  * Returns 1 on success, 0 on failure.
   1692      0    stevel  */
   1693      0    stevel int
   1694      0    stevel adjmsg(mblk_t *mp, ssize_t len)
   1695      0    stevel {
   1696      0    stevel 	mblk_t *bp;
   1697      0    stevel 	mblk_t *save_bp = NULL;
   1698      0    stevel 	mblk_t *prev_bp;
   1699      0    stevel 	mblk_t *bcont;
   1700      0    stevel 	unsigned char type;
   1701      0    stevel 	ssize_t n;
   1702      0    stevel 	int fromhead;
   1703      0    stevel 	int first;
   1704      0    stevel 
   1705      0    stevel 	ASSERT(mp != NULL);
   1706      0    stevel 	/*
   1707      0    stevel 	 * We won't handle Multidata message, since it contains
   1708      0    stevel 	 * metadata which this function has no knowledge of; we
   1709      0    stevel 	 * assert on DEBUG, and return failure otherwise.
   1710      0    stevel 	 */
   1711      0    stevel 	ASSERT(mp->b_datap->db_type != M_MULTIDATA);
   1712      0    stevel 	if (mp->b_datap->db_type == M_MULTIDATA)
   1713      0    stevel 		return (0);
   1714      0    stevel 
   1715      0    stevel 	if (len < 0) {
   1716      0    stevel 		fromhead = 0;
   1717      0    stevel 		len = -len;
   1718      0    stevel 	} else {
   1719      0    stevel 		fromhead = 1;
   1720      0    stevel 	}
   1721      0    stevel 
   1722      0    stevel 	if (xmsgsize(mp) < len)
   1723      0    stevel 		return (0);
   1724      0    stevel 
   1725      0    stevel 	if (fromhead) {
   1726      0    stevel 		first = 1;
   1727      0    stevel 		while (len) {
   1728      0    stevel 			ASSERT(mp->b_wptr >= mp->b_rptr);
   1729      0    stevel 			n = MIN(mp->b_wptr - mp->b_rptr, len);
   1730      0    stevel 			mp->b_rptr += n;
   1731      0    stevel 			len -= n;
   1732      0    stevel 
   1733      0    stevel 			/*
   1734      0    stevel 			 * If this is not the first zero length
   1735      0    stevel 			 * message remove it
   1736      0    stevel 			 */
   1737      0    stevel 			if (!first && (mp->b_wptr == mp->b_rptr)) {
   1738      0    stevel 				bcont = mp->b_cont;
   1739      0    stevel 				freeb(mp);
   1740      0    stevel 				mp = save_bp->b_cont = bcont;
   1741      0    stevel 			} else {
   1742      0    stevel 				save_bp = mp;
   1743      0    stevel 				mp = mp->b_cont;
   1744      0    stevel 			}
   1745      0    stevel 			first = 0;
   1746      0    stevel 		}
   1747      0    stevel 	} else {
   1748      0    stevel 		type = mp->b_datap->db_type;
   1749      0    stevel 		while (len) {
   1750      0    stevel 			bp = mp;
   1751      0    stevel 			save_bp = NULL;
   1752      0    stevel 
   1753      0    stevel 			/*
   1754      0    stevel 			 * Find the last message of same type
   1755      0    stevel 			 */
   1756      0    stevel 			while (bp && bp->b_datap->db_type == type) {
   1757      0    stevel 				ASSERT(bp->b_wptr >= bp->b_rptr);
   1758      0    stevel 				prev_bp = save_bp;
   1759      0    stevel 				save_bp = bp;
   1760      0    stevel 				bp = bp->b_cont;
   1761      0    stevel 			}
   1762      0    stevel 			if (save_bp == NULL)
   1763      0    stevel 				break;
   1764      0    stevel 			n = MIN(save_bp->b_wptr - save_bp->b_rptr, len);
   1765      0    stevel 			save_bp->b_wptr -= n;
   1766      0    stevel 			len -= n;
   1767      0    stevel 
   1768      0    stevel 			/*
   1769      0    stevel 			 * If this is not the first message
   1770      0    stevel 			 * and we have taken away everything
   1771      0    stevel 			 * from this message, remove it
   1772      0    stevel 			 */
   1773      0    stevel 
   1774      0    stevel 			if ((save_bp != mp) &&
   1775   6707    brutus 			    (save_bp->b_wptr == save_bp->b_rptr)) {
   1776      0    stevel 				bcont = save_bp->b_cont;
   1777      0    stevel 				freeb(save_bp);
   1778      0    stevel 				prev_bp->b_cont = bcont;
   1779      0    stevel 			}
   1780      0    stevel 		}
   1781      0    stevel 	}
   1782      0    stevel 	return (1);
   1783      0    stevel }
   1784      0    stevel 
   1785      0    stevel /*
   1786      0    stevel  * get number of data bytes in message
   1787      0    stevel  */
   1788      0    stevel size_t
   1789      0    stevel msgdsize(mblk_t *bp)
   1790      0    stevel {
   1791      0    stevel 	size_t count = 0;
   1792      0    stevel 
   1793      0    stevel 	for (; bp; bp = bp->b_cont)
   1794      0    stevel 		if (bp->b_datap->db_type == M_DATA) {
   1795      0    stevel 			ASSERT(bp->b_wptr >= bp->b_rptr);
   1796      0    stevel 			count += bp->b_wptr - bp->b_rptr;
   1797      0    stevel 		}
   1798      0    stevel 	return (count);
   1799      0    stevel }
   1800      0    stevel 
   1801      0    stevel /*
   1802      0    stevel  * Get a message off head of queue
   1803      0    stevel  *
   1804      0    stevel  * If queue has no buffers then mark queue
   1805      0    stevel  * with QWANTR. (queue wants to be read by
   1806      0    stevel  * someone when data becomes available)
   1807      0    stevel  *
   1808      0    stevel  * If there is something to take off then do so.
   1809      0    stevel  * If queue falls below hi water mark turn off QFULL
   1810      0    stevel  * flag.  Decrement weighted count of queue.
   1811      0    stevel  * Also turn off QWANTR because queue is being read.
   1812      0    stevel  *
   1813      0    stevel  * The queue count is maintained on a per-band basis.
   1814      0    stevel  * Priority band 0 (normal messages) uses q_count,
   1815      0    stevel  * q_lowat, etc.  Non-zero priority bands use the
   1816      0    stevel  * fields in their respective qband structures
   1817      0    stevel  * (qb_count, qb_lowat, etc.)  All messages appear
   1818      0    stevel  * on the same list, linked via their b_next pointers.
   1819      0    stevel  * q_first is the head of the list.  q_count does
   1820      0    stevel  * not reflect the size of all the messages on the
   1821      0    stevel  * queue.  It only reflects those messages in the
   1822      0    stevel  * normal band of flow.  The one exception to this
   1823      0    stevel  * deals with high priority messages.  They are in
   1824      0    stevel  * their own conceptual "band", but are accounted
   1825      0    stevel  * against q_count.
   1826      0    stevel  *
   1827      0    stevel  * If queue count is below the lo water mark and QWANTW
   1828      0    stevel  * is set, enable the closest backq which has a service
   1829      0    stevel  * procedure and turn off the QWANTW flag.
   1830      0    stevel  *
   1831      0    stevel  * getq could be built on top of rmvq, but isn't because
   1832      0    stevel  * of performance considerations.
   1833      0    stevel  *
   1834      0    stevel  * A note on the use of q_count and q_mblkcnt:
   1835      0    stevel  *   q_count is the traditional byte count for messages that
   1836      0    stevel  *   have been put on a queue.  Documentation tells us that
   1837      0    stevel  *   we shouldn't rely on that count, but some drivers/modules
   1838      0    stevel  *   do.  What was needed, however, is a mechanism to prevent
   1839      0    stevel  *   runaway streams from consuming all of the resources,
   1840      0    stevel  *   and particularly be able to flow control zero-length
   1841      0    stevel  *   messages.  q_mblkcnt is used for this purpose.  It
   1842      0    stevel  *   counts the number of mblk's that are being put on
   1843      0    stevel  *   the queue.  The intention here, is that each mblk should
   1844      0    stevel  *   contain one byte of data and, for the purpose of
   1845      0    stevel  *   flow-control, logically does.  A queue will become
   1846      0    stevel  *   full when EITHER of these values (q_count and q_mblkcnt)
   1847      0    stevel  *   reach the highwater mark.  It will clear when BOTH
   1848      0    stevel  *   of them drop below the highwater mark.  And it will
   1849      0    stevel  *   backenable when BOTH of them drop below the lowwater
   1850      0    stevel  *   mark.
   1851      0    stevel  *   With this algorithm, a driver/module might be able
   1852      0    stevel  *   to find a reasonably accurate q_count, and the
   1853      0    stevel  *   framework can still try and limit resource usage.
   1854      0    stevel  */
   1855      0    stevel mblk_t *
   1856      0    stevel getq(queue_t *q)
   1857      0    stevel {
   1858      0    stevel 	mblk_t *bp;
   1859    235   micheng 	uchar_t band = 0;
   1860      0    stevel 
   1861   6769   ja97890 	bp = getq_noenab(q, 0);
   1862      0    stevel 	if (bp != NULL)
   1863      0    stevel 		band = bp->b_band;
   1864      0    stevel 
   1865      0    stevel 	/*
   1866      0    stevel 	 * Inlined from qbackenable().
   1867      0    stevel 	 * Quick check without holding the lock.
   1868      0    stevel 	 */
   1869      0    stevel 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
   1870      0    stevel 		return (bp);
   1871      0    stevel 
   1872      0    stevel 	qbackenable(q, band);
   1873      0    stevel 	return (bp);
   1874      0    stevel }
   1875      0    stevel 
   1876      0    stevel /*
   1877    741  masputra  * Calculate number of data bytes in a single data message block taking
   1878    741  masputra  * multidata messages into account.
   1879    741  masputra  */
   1880    741  masputra 
   1881    741  masputra #define	ADD_MBLK_SIZE(mp, size) 					\
   1882    741  masputra 	if (DB_TYPE(mp) != M_MULTIDATA) {				\
   1883    741  masputra 		(size) += MBLKL(mp);					\
   1884    741  masputra 	} else {							\
   1885    741  masputra 		uint_t	pinuse;						\
   1886    741  masputra 									\
   1887    741  masputra 		mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse);	\
   1888    741  masputra 		(size) += pinuse;					\
   1889    741  masputra 	}
   1890    741  masputra 
   1891    741  masputra /*
   1892   6769   ja97890  * Returns the number of bytes in a message (a message is defined as a
   1893   6769   ja97890  * chain of mblks linked by b_cont). If a non-NULL mblkcnt is supplied we
   1894   6769   ja97890  * also return the number of distinct mblks in the message.
   1895   6769   ja97890  */
   1896   6769   ja97890 int
   1897   6769   ja97890 mp_cont_len(mblk_t *bp, int *mblkcnt)
   1898   6769   ja97890 {
   1899   6769   ja97890 	mblk_t	*mp;
   1900   6769   ja97890 	int	mblks = 0;
   1901   6769   ja97890 	int	bytes = 0;
   1902   6769   ja97890 
   1903   6769   ja97890 	for (mp = bp; mp != NULL; mp = mp->b_cont) {
   1904   6769   ja97890 		ADD_MBLK_SIZE(mp, bytes);
   1905   6769   ja97890 		mblks++;
   1906   6769   ja97890 	}
   1907   6769   ja97890 
   1908   6769   ja97890 	if (mblkcnt != NULL)
   1909   6769   ja97890 		*mblkcnt = mblks;
   1910   6769   ja97890 
   1911   6769   ja97890 	return (bytes);
   1912   6769   ja97890 }
   1913   6769   ja97890 
   1914   6769   ja97890 /*
   1915      0    stevel  * Like getq() but does not backenable.  This is used by the stream
   1916      0    stevel  * head when a putback() is likely.  The caller must call qbackenable()
   1917      0    stevel  * after it is done with accessing the queue.
   1918   6769   ja97890  * The rbytes arguments to getq_noneab() allows callers to specify a
   1919   6769   ja97890  * the maximum number of bytes to return. If the current amount on the
   1920   6769   ja97890  * queue is less than this then the entire message will be returned.
   1921   6769   ja97890  * A value of 0 returns the entire message and is equivalent to the old
   1922   6769   ja97890  * default behaviour prior to the addition of the rbytes argument.
   1923   6769   ja97890  */
   1924   6769   ja97890 mblk_t *
   1925   6769   ja97890 getq_noenab(queue_t *q, ssize_t rbytes)
   1926   6769   ja97890 {
   1927   6769   ja97890 	mblk_t *bp, *mp1;
   1928   6769   ja97890 	mblk_t *mp2 = NULL;
   1929      0    stevel 	qband_t *qbp;
   1930      0    stevel 	kthread_id_t freezer;
   1931      0    stevel 	int	bytecnt = 0, mblkcnt = 0;
   1932      0    stevel 
   1933      0    stevel 	/* freezestr should allow its caller to call getq/putq */
   1934      0    stevel 	freezer = STREAM(q)->sd_freezer;
   1935      0    stevel 	if (freezer == curthread) {
   1936      0    stevel 		ASSERT(frozenstr(q));
   1937      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   1938      0    stevel 	} else
   1939      0    stevel 		mutex_enter(QLOCK(q));
   1940      0    stevel 
   1941      0    stevel 	if ((bp = q->q_first) == 0) {
   1942      0    stevel 		q->q_flag |= QWANTR;
   1943      0    stevel 	} else {
   1944   6769   ja97890 		/*
   1945   6769   ja97890 		 * If the caller supplied a byte threshold and there is
   1946   6769   ja97890 		 * more than this amount on the queue then break up the
   1947   6769   ja97890 		 * the message appropriately.  We can only safely do
   1948   6769   ja97890 		 * this for M_DATA messages.
   1949   6769   ja97890 		 */
   1950   6769   ja97890 		if ((DB_TYPE(bp) == M_DATA) && (rbytes > 0) &&
   1951   6769   ja97890 		    (q->q_count > rbytes)) {
   1952   6769   ja97890 			/*
   1953   6769   ja97890 			 * Inline version of mp_cont_len() which terminates
   1954   6769   ja97890 			 * when we meet or exceed rbytes.
   1955   6769   ja97890 			 */
   1956   6769   ja97890 			for (mp1 = bp; mp1 != NULL; mp1 = mp1->b_cont) {
   1957   6769   ja97890 				mblkcnt++;
   1958   6769   ja97890 				ADD_MBLK_SIZE(mp1, bytecnt);
   1959   6769   ja97890 				if (bytecnt  >= rbytes)
   1960   6769   ja97890 					break;
   1961   6769   ja97890 			}
   1962   6769   ja97890 			/*
   1963   6769   ja97890 			 * We need to account for the following scenarios:
   1964   6769   ja97890 			 *
   1965   6769   ja97890 			 * 1) Too much data in the first message:
   1966   6769   ja97890 			 *	mp1 will be the mblk which puts us over our
   1967   6769   ja97890 			 *	byte limit.
   1968   6769   ja97890 			 * 2) Not enough data in the first message:
   1969   6769   ja97890 			 *	mp1 will be NULL.
   1970   6769   ja97890 			 * 3) Exactly the right amount of data contained within
   1971   6769   ja97890 			 *    whole mblks:
   1972   6769   ja97890 			 *	mp1->b_cont will be where we break the message.
   1973   6769   ja97890 			 */
   1974   6769   ja97890 			if (bytecnt > rbytes) {
   1975   6769   ja97890 				/*
   1976   6769   ja97890 				 * Dup/copy mp1 and put what we don't need
   1977   6769   ja97890 				 * back onto the queue. Adjust the read/write
   1978   6769   ja97890 				 * and continuation pointers appropriately
   1979   6769   ja97890 				 * and decrement the current mblk count to
   1980   6769   ja97890 				 * reflect we are putting an mblk back onto
   1981   6769   ja97890 				 * the queue.
   1982   6769   ja97890 				 * When adjusting the message pointers, it's
   1983   6769   ja97890 				 * OK to use the existing bytecnt and the
   1984   6769   ja97890 				 * requested amount (rbytes) to calculate the
   1985   6769   ja97890 				 * the new write offset (b_wptr) of what we
   1986   6769   ja97890 				 * are taking. However, we  cannot use these
   1987   6769   ja97890 				 * values when calculating the read offset of
   1988   6769   ja97890 				 * the mblk we are putting back on the queue.
   1989   6769   ja97890 				 * This is because the begining (b_rptr) of the
   1990   6769   ja97890 				 * mblk represents some arbitrary point within
   1991   6769   ja97890 				 * the message.
   1992   6769   ja97890 				 * It's simplest to do this by advancing b_rptr
   1993   6769   ja97890 				 * by the new length of mp1 as we don't have to
   1994   6769   ja97890 				 * remember any intermediate state.
   1995   6769   ja97890 				 */
   1996   6769   ja97890 				ASSERT(mp1 != NULL);
   1997   6769   ja97890 				mblkcnt--;
   1998   6769   ja97890 				if ((mp2 = dupb(mp1)) == NULL &&
   1999   6769   ja97890 				    (mp2 = copyb(mp1)) == NULL) {
   2000   6769   ja97890 					bytecnt = mblkcnt = 0;
   2001   6769   ja97890 					goto dup_failed;
   2002   6769   ja97890 				}
   2003   6769   ja97890 				mp2->b_cont = mp1->b_cont;
   2004   6769   ja97890 				mp1->b_wptr -= bytecnt - rbytes;
   2005   6769   ja97890 				mp2->b_rptr += mp1->b_wptr - mp1->b_rptr;
   2006   6769   ja97890 				mp1->b_cont = NULL;
   2007   6769   ja97890 				bytecnt = rbytes;
   2008   6769   ja97890 			} else {
   2009   6769   ja97890 				/*
   2010   6769   ja97890 				 * Either there is not enough data in the first
   2011   6769   ja97890 				 * message or there is no excess data to deal
   2012   6769   ja97890 				 * with. If mp1 is NULL, we are taking the
   2013   6769   ja97890 				 * whole message. No need to do anything.
   2014   6769   ja97890 				 * Otherwise we assign mp1->b_cont to mp2 as
   2015   6769   ja97890 				 * we will be putting this back onto the head of
   2016   6769   ja97890 				 * the queue.
   2017   6769   ja97890 				 */
   2018   6769   ja97890 				if (mp1 != NULL) {
   2019   6769   ja97890 					mp2 = mp1->b_cont;
   2020   6769   ja97890 					mp1->b_cont = NULL;
   2021   6769   ja97890 				}
   2022   6769   ja97890 			}
   2023   6769   ja97890 			/*
   2024   6769   ja97890 			 * If mp2 is not NULL then we have part of the message
   2025   6769   ja97890 			 * to put back onto the queue.
   2026   6769   ja97890 			 */
   2027   6769   ja97890 			if (mp2 != NULL) {
   2028   6769   ja97890 				if ((mp2->b_next = bp->b_next) == NULL)
   2029   6769   ja97890 					q->q_last = mp2;
   2030   6769   ja97890 				else
   2031   6769   ja97890 					bp->b_next->b_prev = mp2;
   2032   6769   ja97890 				q->q_first = mp2;
   2033   6769   ja97890 			} else {
   2034   6769   ja97890 				if ((q->q_first = bp->b_next) == NULL)
   2035   6769   ja97890 					q->q_last = NULL;
   2036   6769   ja97890 				else
   2037   6769   ja97890 					q->q_first->b_prev = NULL;
   2038   6769   ja97890 			}
   2039   6769   ja97890 		} else {
   2040   6769   ja97890 			/*
   2041   6769   ja97890 			 * Either no byte threshold was supplied, there is
   2042   6769   ja97890 			 * not enough on the queue or we failed to
   2043   6769   ja97890 			 * duplicate/copy a data block. In these cases we
   2044   6769   ja97890 			 * just take the entire first message.
   2045   6769   ja97890 			 */
   2046   6769   ja97890 dup_failed:
   2047   6769   ja97890 			bytecnt = mp_cont_len(bp, &mblkcnt);
   2048   6769   ja97890 			if ((q->q_first = bp->b_next) == NULL)
   2049   6769   ja97890 				q->q_last = NULL;
   2050   6769   ja97890 			else
   2051   6769   ja97890 				q->q_first->b_prev = NULL;
   2052   6769   ja97890 		}
   2053      0    stevel 		if (bp->b_band == 0) {
   2054      0    stevel 			q->q_count -= bytecnt;
   2055      0    stevel 			q->q_mblkcnt -= mblkcnt;
   2056   5360  rk129064 			if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
   2057   5360  rk129064 			    (q->q_mblkcnt < q->q_hiwat))) {
   2058      0    stevel 				q->q_flag &= ~QFULL;
   2059      0    stevel 			}
   2060      0    stevel 		} else {
   2061      0    stevel 			int i;
   2062      0    stevel 
   2063      0    stevel 			ASSERT(bp->b_band <= q->q_nband);
   2064      0    stevel 			ASSERT(q->q_bandp != NULL);
   2065      0    stevel 			ASSERT(MUTEX_HELD(QLOCK(q)));
   2066      0    stevel 			qbp = q->q_bandp;
   2067      0    stevel 			i = bp->b_band;
   2068      0    stevel 			while (--i > 0)
   2069      0    stevel 				qbp = qbp->qb_next;
   2070      0    stevel 			if (qbp->qb_first == qbp->qb_last) {
   2071      0    stevel 				qbp->qb_first = NULL;
   2072      0    stevel 				qbp->qb_last = NULL;
   2073      0    stevel 			} else {
   2074      0    stevel 				qbp->qb_first = bp->b_next;
   2075      0    stevel 			}
   2076      0    stevel 			qbp->qb_count -= bytecnt;
   2077      0    stevel 			qbp->qb_mblkcnt -= mblkcnt;
   2078   5360  rk129064 			if (qbp->qb_mblkcnt == 0 ||
   2079   5360  rk129064 			    ((qbp->qb_count < qbp->qb_hiwat) &&
   2080   5360  rk129064 			    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
   2081      0    stevel 				qbp->qb_flag &= ~QB_FULL;
   2082      0    stevel 			}
   2083      0    stevel 		}
   2084      0    stevel 		q->q_flag &= ~QWANTR;
   2085      0    stevel 		bp->b_next = NULL;
   2086      0    stevel 		bp->b_prev = NULL;
   2087      0    stevel 	}
   2088      0    stevel 	if (freezer != curthread)
   2089      0    stevel 		mutex_exit(QLOCK(q));
   2090      0    stevel 
   2091      0    stevel 	STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL);
   2092      0    stevel 
   2093      0    stevel 	return (bp);
   2094      0    stevel }
   2095      0    stevel 
   2096      0    stevel /*
   2097      0    stevel  * Determine if a backenable is needed after removing a message in the
   2098      0    stevel  * specified band.
   2099      0    stevel  * NOTE: This routine assumes that something like getq_noenab() has been
   2100      0    stevel  * already called.
   2101      0    stevel  *
   2102      0    stevel  * For the read side it is ok to hold sd_lock across calling this (and the
   2103      0    stevel  * stream head often does).
   2104      0    stevel  * But for the write side strwakeq might be invoked and it acquires sd_lock.
   2105      0    stevel  */
   2106      0    stevel void
   2107    235   micheng qbackenable(queue_t *q, uchar_t band)
   2108      0    stevel {
   2109      0    stevel 	int backenab = 0;
   2110      0    stevel 	qband_t *qbp;
   2111      0    stevel 	kthread_id_t freezer;
   2112      0    stevel 
   2113      0    stevel 	ASSERT(q);
   2114      0    stevel 	ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock));
   2115      0    stevel 
   2116      0    stevel 	/*
   2117      0    stevel 	 * Quick check without holding the lock.
   2118      0    stevel 	 * OK since after getq() has lowered the q_count these flags
   2119      0    stevel 	 * would not change unless either the qbackenable() is done by
   2120      0    stevel 	 * another thread (which is ok) or the queue has gotten QFULL
   2121      0    stevel 	 * in which case another backenable will take place when the queue
   2122      0    stevel 	 * drops below q_lowat.
   2123      0    stevel 	 */
   2124      0    stevel 	if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0)
   2125      0    stevel 		return;
   2126      0    stevel 
   2127      0    stevel 	/* freezestr should allow its caller to call getq/putq */
   2128      0    stevel 	freezer = STREAM(q)->sd_freezer;
   2129      0    stevel 	if (freezer == curthread) {
   2130      0    stevel 		ASSERT(frozenstr(q));
   2131      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2132      0    stevel 	} else
   2133      0    stevel 		mutex_enter(QLOCK(q));
   2134      0    stevel 
   2135      0    stevel 	if (band == 0) {
   2136      0    stevel 		if (q->q_lowat == 0 || (q->q_count < q->q_lowat &&
   2137      0    stevel 		    q->q_mblkcnt < q->q_lowat)) {
   2138      0    stevel 			backenab = q->q_flag & (QWANTW|QWANTWSYNC);
   2139      0    stevel 		}
   2140      0    stevel 	} else {
   2141      0    stevel 		int i;
   2142      0    stevel 
   2143      0    stevel 		ASSERT((unsigned)band <= q->q_nband);
   2144      0    stevel 		ASSERT(q->q_bandp != NULL);
   2145      0    stevel 
   2146      0    stevel 		qbp = q->q_bandp;
   2147      0    stevel 		i = band;
   2148      0    stevel 		while (--i > 0)
   2149      0    stevel 			qbp = qbp->qb_next;
   2150      0    stevel 
   2151      0    stevel 		if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat &&
   2152      0    stevel 		    qbp->qb_mblkcnt < qbp->qb_lowat)) {
   2153      0    stevel 			backenab = qbp->qb_flag & QB_WANTW;
   2154      0    stevel 		}
   2155      0    stevel 	}
   2156      0    stevel 
   2157      0    stevel 	if (backenab == 0) {
   2158      0    stevel 		if (freezer != curthread)
   2159      0    stevel 			mutex_exit(QLOCK(q));
   2160      0    stevel 		return;
   2161      0    stevel 	}
   2162      0    stevel 
   2163      0    stevel 	/* Have to drop the lock across strwakeq and backenable */
   2164      0    stevel 	if (backenab & QWANTWSYNC)
   2165      0    stevel 		q->q_flag &= ~QWANTWSYNC;
   2166      0    stevel 	if (backenab & (QWANTW|QB_WANTW)) {
   2167      0    stevel 		if (band != 0)
   2168      0    stevel 			qbp->qb_flag &= ~QB_WANTW;
   2169      0    stevel 		else {
   2170      0    stevel 			q->q_flag &= ~QWANTW;
   2171      0    stevel 		}
   2172      0    stevel 	}
   2173      0    stevel 
   2174      0    stevel 	if (freezer != curthread)
   2175      0    stevel 		mutex_exit(QLOCK(q));
   2176      0    stevel 
   2177      0    stevel 	if (backenab & QWANTWSYNC)
   2178      0    stevel 		strwakeq(q, QWANTWSYNC);
   2179      0    stevel 	if (backenab & (QWANTW|QB_WANTW))
   2180      0    stevel 		backenable(q, band);
   2181      0    stevel }
   2182      0    stevel 
   2183      0    stevel /*
   2184      0    stevel  * Remove a message from a queue.  The queue count and other
   2185      0    stevel  * flow control parameters are adjusted and the back queue
   2186      0    stevel  * enabled if necessary.
   2187      0    stevel  *
   2188      0    stevel  * rmvq can be called with the stream frozen, but other utility functions
   2189      0    stevel  * holding QLOCK, and by streams modules without any locks/frozen.
   2190      0    stevel  */
   2191      0    stevel void
   2192      0    stevel rmvq(queue_t *q, mblk_t *mp)
   2193      0    stevel {
   2194      0    stevel 	ASSERT(mp != NULL);
   2195      0    stevel 
   2196      0    stevel 	rmvq_noenab(q, mp);
   2197      0    stevel 	if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) {
   2198      0    stevel 		/*
   2199      0    stevel 		 * qbackenable can handle a frozen stream but not a "random"
   2200      0    stevel 		 * qlock being held. Drop lock across qbackenable.
   2201      0    stevel 		 */
   2202      0    stevel 		mutex_exit(QLOCK(q));
   2203      0    stevel 		qbackenable(q, mp->b_band);
   2204      0    stevel 		mutex_enter(QLOCK(q));
   2205      0    stevel 	} else {
   2206      0    stevel 		qbackenable(q, mp->b_band);
   2207      0    stevel 	}
   2208      0    stevel }
   2209      0    stevel 
   2210      0    stevel /*
   2211      0    stevel  * Like rmvq() but without any backenabling.
   2212      0    stevel  * This exists to handle SR_CONSOL_DATA in strrput().
   2213      0    stevel  */
   2214      0    stevel void
   2215      0    stevel rmvq_noenab(queue_t *q, mblk_t *mp)
   2216      0    stevel {
   2217      0    stevel 	int i;
   2218      0    stevel 	qband_t *qbp = NULL;
   2219      0    stevel 	kthread_id_t freezer;
   2220      0    stevel 	int	bytecnt = 0, mblkcnt = 0;
   2221      0    stevel 
   2222      0    stevel 	freezer = STREAM(q)->sd_freezer;
   2223      0    stevel 	if (freezer == curthread) {
   2224      0    stevel 		ASSERT(frozenstr(q));
   2225      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2226      0    stevel 	} else if (MUTEX_HELD(QLOCK(q))) {
   2227      0    stevel 		/* Don't drop lock on exit */
   2228      0    stevel 		freezer = curthread;
   2229      0    stevel 	} else
   2230      0    stevel 		mutex_enter(QLOCK(q));
   2231      0    stevel 
   2232      0    stevel 	ASSERT(mp->b_band <= q->q_nband);
   2233      0    stevel 	if (mp->b_band != 0) {		/* Adjust band pointers */
   2234      0    stevel 		ASSERT(q->q_bandp != NULL);
   2235      0    stevel 		qbp = q->q_bandp;
   2236      0    stevel 		i = mp->b_band;
   2237      0    stevel 		while (--i > 0)
   2238      0    stevel 			qbp = qbp->qb_next;
   2239      0    stevel 		if (mp == qbp->qb_first) {
   2240      0    stevel 			if (mp->b_next && mp->b_band == mp->b_next->b_band)
   2241      0    stevel 				qbp->qb_first = mp->b_next;
   2242      0    stevel 			else
   2243      0    stevel 				qbp->qb_first = NULL;
   2244      0    stevel 		}
   2245      0    stevel 		if (mp == qbp->qb_last) {
   2246      0    stevel 			if (mp->b_prev && mp->b_band == mp->b_prev->b_band)
   2247      0    stevel 				qbp->qb_last = mp->b_prev;
   2248      0    stevel 			else
   2249      0    stevel 				qbp->qb_last = NULL;
   2250      0    stevel 		}
   2251      0    stevel 	}
   2252      0    stevel 
   2253      0    stevel 	/*
   2254      0    stevel 	 * Remove the message from the list.
   2255      0    stevel 	 */
   2256      0    stevel 	if (mp->b_prev)
   2257      0    stevel 		mp->b_prev->b_next = mp->b_next;
   2258      0    stevel 	else
   2259      0    stevel 		q->q_first = mp->b_next;
   2260      0    stevel 	if (mp->b_next)
   2261      0    stevel 		mp->b_next->b_prev = mp->b_prev;
   2262      0    stevel 	else
   2263      0    stevel 		q->q_last = mp->b_prev;
   2264      0    stevel 	mp->b_next = NULL;
   2265      0    stevel 	mp->b_prev = NULL;
   2266      0    stevel 
   2267      0    stevel 	/* Get the size of the message for q_count accounting */
   2268   6769   ja97890 	bytecnt = mp_cont_len(mp, &mblkcnt);
   2269      0    stevel 
   2270      0    stevel 	if (mp->b_band == 0) {		/* Perform q_count accounting */
   2271      0    stevel 		q->q_count -= bytecnt;
   2272      0    stevel 		q->q_mblkcnt -= mblkcnt;
   2273   5360  rk129064 		if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) &&
   2274   5360  rk129064 		    (q->q_mblkcnt < q->q_hiwat))) {
   2275      0    stevel 			q->q_flag &= ~QFULL;
   2276      0    stevel 		}
   2277      0    stevel 	} else {			/* Perform qb_count accounting */
   2278      0    stevel 		qbp->qb_count -= bytecnt;
   2279      0    stevel 		qbp->qb_mblkcnt -= mblkcnt;
   2280   5360  rk129064 		if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) &&
   2281   5360  rk129064 		    (qbp->qb_mblkcnt < qbp->qb_hiwat))) {
   2282      0    stevel 			qbp->qb_flag &= ~QB_FULL;
   2283      0    stevel 		}
   2284      0    stevel 	}
   2285      0    stevel 	if (freezer != curthread)
   2286      0    stevel 		mutex_exit(QLOCK(q));
   2287      0    stevel 
   2288      0    stevel 	STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL);
   2289      0    stevel }
   2290      0    stevel 
   2291      0    stevel /*
   2292      0    stevel  * Empty a queue.
   2293      0    stevel  * If flag is set, remove all messages.  Otherwise, remove
   2294      0    stevel  * only non-control messages.  If queue falls below its low
   2295      0    stevel  * water mark, and QWANTW is set, enable the nearest upstream
   2296      0    stevel  * service procedure.
   2297      0    stevel  *
   2298      0    stevel  * Historical note: when merging the M_FLUSH code in strrput with this
   2299      0    stevel  * code one difference was discovered. flushq did not have a check
   2300      0    stevel  * for q_lowat == 0 in the backenabling test.
   2301      0    stevel  *
   2302      0    stevel  * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed
   2303      0    stevel  * if one exists on the queue.
   2304      0    stevel  */
   2305      0    stevel void
   2306      0    stevel flushq_common(queue_t *q, int flag, int pcproto_flag)
   2307      0    stevel {
   2308      0    stevel 	mblk_t *mp, *nmp;
   2309      0    stevel 	qband_t *qbp;
   2310      0    stevel 	int backenab = 0;
   2311      0    stevel 	unsigned char bpri;
   2312      0    stevel 	unsigned char	qbf[NBAND];	/* band flushing backenable flags */
   2313      0    stevel 
   2314      0    stevel 	if (q->q_first == NULL)
   2315      0    stevel 		return;
   2316      0    stevel 
   2317      0    stevel 	mutex_enter(QLOCK(q));
   2318      0    stevel 	mp = q->q_first;
   2319      0    stevel 	q->q_first = NULL;
   2320      0    stevel 	q->q_last = NULL;
   2321      0    stevel 	q->q_count = 0;
   2322      0    stevel 	q->q_mblkcnt = 0;
   2323      0    stevel 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
   2324      0    stevel 		qbp->qb_first = NULL;
   2325      0    stevel 		qbp->qb_last = NULL;
   2326      0    stevel 		qbp->qb_count = 0;
   2327      0    stevel 		qbp->qb_mblkcnt = 0;
   2328      0    stevel 		qbp->qb_flag &= ~QB_FULL;
   2329      0    stevel 	}
   2330      0    stevel 	q->q_flag &= ~QFULL;
   2331      0    stevel 	mutex_exit(QLOCK(q));
   2332      0    stevel 	while (mp) {
   2333      0    stevel 		nmp = mp->b_next;
   2334      0    stevel 		mp->b_next = mp->b_prev = NULL;
   2335      0    stevel 
   2336      0    stevel 		STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL);
   2337      0    stevel 
   2338      0    stevel 		if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO))
   2339      0    stevel 			(void) putq(q, mp);
   2340      0    stevel 		else if (flag || datamsg(mp->b_datap->db_type))
   2341      0    stevel 			freemsg(mp);
   2342      0    stevel 		else
   2343      0    stevel 			(void) putq(q, mp);
   2344      0    stevel 		mp = nmp;
   2345      0    stevel 	}
   2346      0    stevel 	bpri = 1;
   2347      0    stevel 	mutex_enter(QLOCK(q));
   2348      0    stevel 	for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
   2349      0    stevel 		if ((qbp->qb_flag & QB_WANTW) &&
   2350      0    stevel 		    (((qbp->qb_count < qbp->qb_lowat) &&
   2351      0    stevel 		    (qbp->qb_mblkcnt < qbp->qb_lowat)) ||
   2352      0    stevel 		    qbp->qb_lowat == 0)) {
   2353      0    stevel 			qbp->qb_flag &= ~QB_WANTW;
   2354      0    stevel 			backenab = 1;
   2355      0    stevel 			qbf[bpri] = 1;
   2356      0    stevel 		} else
   2357      0    stevel 			qbf[bpri] = 0;
   2358      0    stevel 		bpri++;
   2359      0    stevel 	}
   2360      0    stevel 	ASSERT(bpri == (unsigned char)(q->q_nband + 1));
   2361      0    stevel 	if ((q->q_flag & QWANTW) &&
   2362      0    stevel 	    (((q->q_count < q->q_lowat) &&
   2363      0    stevel 	    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
   2364      0    stevel 		q->q_flag &= ~QWANTW;
   2365      0    stevel 		backenab = 1;
   2366      0    stevel 		qbf[0] = 1;
   2367      0    stevel 	} else
   2368      0    stevel 		qbf[0] = 0;
   2369      0    stevel 
   2370      0    stevel 	/*
   2371      0    stevel 	 * If any band can now be written to, and there is a writer
   2372      0    stevel 	 * for that band, then backenable the closest service procedure.
   2373      0    stevel 	 */
   2374      0    stevel 	if (backenab) {
   2375      0    stevel 		mutex_exit(QLOCK(q));
   2376      0    stevel 		for (bpri = q->q_nband; bpri != 0; bpri--)
   2377      0    stevel 			if (qbf[bpri])
   2378    235   micheng 				backenable(q, bpri);
   2379      0    stevel 		if (qbf[0])
   2380      0    stevel 			backenable(q, 0);
   2381      0    stevel 	} else
   2382      0    stevel 		mutex_exit(QLOCK(q));
   2383      0    stevel }
   2384      0    stevel 
   2385      0    stevel /*
   2386      0    stevel  * The real flushing takes place in flushq_common. This is done so that
   2387      0    stevel  * a flag which specifies whether or not M_PCPROTO messages should be flushed
   2388      0    stevel  * or not. Currently the only place that uses this flag is the stream head.
   2389      0    stevel  */
   2390      0    stevel void
   2391      0    stevel flushq(queue_t *q, int flag)
   2392      0    stevel {
   2393      0    stevel 	flushq_common(q, flag, 0);
   2394      0    stevel }
   2395      0    stevel 
   2396      0    stevel /*
   2397      0    stevel  * Flush the queue of messages of the given priority band.
   2398      0    stevel  * There is some duplication of code between flushq and flushband.
   2399      0    stevel  * This is because we want to optimize the code as much as possible.
   2400      0    stevel  * The assumption is that there will be more messages in the normal
   2401      0    stevel  * (priority 0) band than in any other.
   2402      0    stevel  *
   2403      0    stevel  * Historical note: when merging the M_FLUSH code in strrput with this
   2404      0    stevel  * code one difference was discovered. flushband had an extra check for
   2405      0    stevel  * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0
   2406      0    stevel  * case. That check does not match the man page for flushband and was not
   2407      0    stevel  * in the strrput flush code hence it was removed.
   2408      0    stevel  */
   2409      0    stevel void
   2410      0    stevel flushband(queue_t *q, unsigned char pri, int flag)
   2411      0    stevel {
   2412      0    stevel 	mblk_t *mp;
   2413      0    stevel 	mblk_t *nmp;
   2414      0    stevel 	mblk_t *last;
   2415      0    stevel 	qband_t *qbp;
   2416      0    stevel 	int band;
   2417      0    stevel 
   2418      0    stevel 	ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL));
   2419      0    stevel 	if (pri > q->q_nband) {
   2420      0    stevel 		return;
   2421      0    stevel 	}
   2422      0    stevel 	mutex_enter(QLOCK(q));
   2423      0    stevel 	if (pri == 0) {
   2424      0    stevel 		mp = q->q_first;
   2425      0    stevel 		q->q_first = NULL;
   2426      0    stevel 		q->q_last = NULL;
   2427      0    stevel 		q->q_count = 0;
   2428      0    stevel 		q->q_mblkcnt = 0;
   2429      0    stevel 		for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) {
   2430      0    stevel 			qbp->qb_first = NULL;
   2431      0    stevel 			qbp->qb_last = NULL;
   2432      0    stevel 			qbp->qb_count = 0;
   2433      0    stevel 			qbp->qb_mblkcnt = 0;
   2434      0    stevel 			qbp->qb_flag &= ~QB_FULL;
   2435      0    stevel 		}
   2436      0    stevel 		q->q_flag &= ~QFULL;
   2437      0    stevel 		mutex_exit(QLOCK(q));
   2438      0    stevel 		while (mp) {
   2439      0    stevel 			nmp = mp->b_next;
   2440      0    stevel 			mp->b_next = mp->b_prev = NULL;
   2441      0    stevel 			if ((mp->b_band == 0) &&
   2442   6707    brutus 			    ((flag == FLUSHALL) ||
   2443   6707    brutus 			    datamsg(mp->b_datap->db_type)))
   2444      0    stevel 				freemsg(mp);
   2445      0    stevel 			else
   2446      0    stevel 				(void) putq(q, mp);
   2447      0    stevel 			mp = nmp;
   2448      0    stevel 		}
   2449      0    stevel 		mutex_enter(QLOCK(q));
   2450      0    stevel 		if ((q->q_flag & QWANTW) &&
   2451      0    stevel 		    (((q->q_count < q->q_lowat) &&
   2452      0    stevel 		    (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) {
   2453      0    stevel 			q->q_flag &= ~QWANTW;
   2454      0    stevel 			mutex_exit(QLOCK(q));
   2455      0    stevel 
   2456    235   micheng 			backenable(q, pri);
   2457      0    stevel 		} else
   2458      0    stevel 			mutex_exit(QLOCK(q));
   2459      0    stevel 	} else {	/* pri != 0 */
   2460      0    stevel 		boolean_t flushed = B_FALSE;
   2461      0    stevel 		band = pri;
   2462      0    stevel 
   2463      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2464      0    stevel 		qbp = q->q_bandp;
   2465      0    stevel 		while (--band > 0)
   2466      0    stevel 			qbp = qbp->qb_next;
   2467      0    stevel 		mp = qbp->qb_first;
   2468      0    stevel 		if (mp == NULL) {
   2469      0    stevel 			mutex_exit(QLOCK(q));
   2470      0    stevel 			return;
   2471      0    stevel 		}
   2472      0    stevel 		last = qbp->qb_last->b_next;
   2473      0    stevel 		/*
   2474      0    stevel 		 * rmvq_noenab() and freemsg() are called for each mblk that
   2475      0    stevel 		 * meets the criteria.  The loop is executed until the last
   2476      0    stevel 		 * mblk has been processed.
   2477      0    stevel 		 */
   2478      0    stevel 		while (mp != last) {
   2479      0    stevel 			ASSERT(mp->b_band == pri);
   2480      0    stevel 			nmp = mp->b_next;
   2481      0    stevel 			if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) {
   2482      0    stevel 				rmvq_noenab(q, mp);
   2483      0    stevel 				freemsg(mp);
   2484      0    stevel 				flushed = B_TRUE;
   2485      0    stevel 			}
   2486      0    stevel 			mp = nmp;
   2487      0    stevel 		}
   2488      0    stevel 		mutex_exit(QLOCK(q));
   2489      0    stevel 
   2490      0    stevel 		/*
   2491      0    stevel 		 * If any mblk(s) has been freed, we know that qbackenable()
   2492      0    stevel 		 * will need to be called.
   2493      0    stevel 		 */
   2494      0    stevel 		if (flushed)
   2495    235   micheng 			qbackenable(q, pri);
   2496      0    stevel 	}
   2497      0    stevel }
   2498      0    stevel 
   2499      0    stevel /*
   2500      0    stevel  * Return 1 if the queue is not full.  If the queue is full, return
   2501      0    stevel  * 0 (may not put message) and set QWANTW flag (caller wants to write
   2502      0    stevel  * to the queue).
   2503      0    stevel  */
   2504      0    stevel int
   2505      0    stevel canput(queue_t *q)
   2506      0    stevel {
   2507      0    stevel 	TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q);
   2508      0    stevel 
   2509      0    stevel 	/* this is for loopback transports, they should not do a canput */
   2510      0    stevel 	ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv));
   2511      0    stevel 
   2512      0    stevel 	/* Find next forward module that has a service procedure */
   2513      0    stevel 	q = q->q_nfsrv;
   2514      0    stevel 
   2515      0    stevel 	if (!(q->q_flag & QFULL)) {
   2516      0    stevel 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
   2517      0    stevel 		return (1);
   2518      0    stevel 	}
   2519      0    stevel 	mutex_enter(QLOCK(q));
   2520      0    stevel 	if (q->q_flag & QFULL) {
   2521      0    stevel 		q->q_flag |= QWANTW;
   2522      0    stevel 		mutex_exit(QLOCK(q));
   2523      0    stevel 		TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0);
   2524      0    stevel 		return (0);
   2525      0    stevel 	}
   2526      0    stevel 	mutex_exit(QLOCK(q));
   2527      0    stevel 	TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1);
   2528      0    stevel 	return (1);
   2529      0    stevel }
   2530      0    stevel 
   2531      0    stevel /*
   2532      0    stevel  * This is the new canput for use with priority bands.  Return 1 if the
   2533      0    stevel  * band is not full.  If the band is full, return 0 (may not put message)
   2534      0    stevel  * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to
   2535      0    stevel  * write to the queue).
   2536      0    stevel  */
   2537      0    stevel int
   2538      0    stevel bcanput(queue_t *q, unsigned char pri)
   2539      0    stevel {
   2540      0    stevel 	qband_t *qbp;
   2541      0    stevel 
   2542      0    stevel 	TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri);
   2543      0    stevel 	if (!q)
   2544      0    stevel 		return (0);
   2545      0    stevel 
   2546      0    stevel 	/* Find next forward module that has a service procedure */
   2547      0    stevel 	q = q->q_nfsrv;
   2548      0    stevel 
   2549      0    stevel 	mutex_enter(QLOCK(q));
   2550      0    stevel 	if (pri == 0) {
   2551      0    stevel 		if (q->q_flag & QFULL) {
   2552      0    stevel 			q->q_flag |= QWANTW;
   2553      0    stevel 			mutex_exit(QLOCK(q));
   2554      0    stevel 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
   2555   6707    brutus 			    "bcanput:%p %X %d", q, pri, 0);
   2556      0    stevel 			return (0);
   2557      0    stevel 		}
   2558      0    stevel 	} else {	/* pri != 0 */
   2559      0    stevel 		if (pri > q->q_nband) {
   2560      0    stevel 			/*
   2561      0    stevel 			 * No band exists yet, so return success.
   2562      0    stevel 			 */
   2563      0    stevel 			mutex_exit(QLOCK(q));
   2564      0    stevel 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
   2565   6707    brutus 			    "bcanput:%p %X %d", q, pri, 1);
   2566      0    stevel 			return (1);
   2567      0    stevel 		}
   2568      0    stevel 		qbp = q->q_bandp;
   2569      0    stevel 		while (--pri)
   2570      0    stevel 			qbp = qbp->qb_next;
   2571      0    stevel 		if (qbp->qb_flag & QB_FULL) {
   2572      0    stevel 			qbp->qb_flag |= QB_WANTW;
   2573      0    stevel 			mutex_exit(QLOCK(q));
   2574      0    stevel 			TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
   2575   6707    brutus 			    "bcanput:%p %X %d", q, pri, 0);
   2576      0    stevel 			return (0);
   2577      0    stevel 		}
   2578      0    stevel 	}
   2579      0    stevel 	mutex_exit(QLOCK(q));
   2580      0    stevel 	TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT,
   2581   6707    brutus 	    "bcanput:%p %X %d", q, pri, 1);
   2582      0    stevel 	return (1);
   2583      0    stevel }
   2584      0    stevel 
   2585      0    stevel /*
   2586      0    stevel  * Put a message on a queue.
   2587      0    stevel  *
   2588      0    stevel  * Messages are enqueued on a priority basis.  The priority classes
   2589      0    stevel  * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0),
   2590      0    stevel  * and B_NORMAL (type < QPCTL && band == 0).
   2591      0    stevel  *
   2592      0    stevel  * Add appropriate weighted data block sizes to queue count.
   2593      0    stevel  * If queue hits high water mark then set QFULL flag.
   2594      0    stevel  *
   2595      0    stevel  * If QNOENAB is not set (putq is allowed to enable the queue),
   2596      0    stevel  * enable the queue only if the message is PRIORITY,
   2597      0    stevel  * or the QWANTR flag is set (indicating that the service procedure
   2598      0    stevel  * is ready to read the queue.  This implies that a service
   2599      0    stevel  * procedure must NEVER put a high priority message back on its own
   2600      0    stevel  * queue, as this would result in an infinite loop (!).
   2601      0    stevel  */
   2602      0    stevel int
   2603      0    stevel putq(queue_t *q, mblk_t *bp)
   2604      0    stevel {
   2605      0    stevel 	mblk_t *tmp;
   2606      0    stevel 	qband_t *qbp = NULL;
   2607      0    stevel 	int mcls = (int)queclass(bp);
   2608      0    stevel 	kthread_id_t freezer;
   2609      0    stevel 	int	bytecnt = 0, mblkcnt = 0;
   2610      0    stevel 
   2611      0    stevel 	freezer = STREAM(q)->sd_freezer;
   2612      0    stevel 	if (freezer == curthread) {
   2613      0    stevel 		ASSERT(frozenstr(q));
   2614      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2615      0    stevel 	} else
   2616      0    stevel 		mutex_enter(QLOCK(q));
   2617      0    stevel 
   2618      0    stevel 	/*
   2619      0    stevel 	 * Make sanity checks and if qband structure is not yet
   2620      0    stevel 	 * allocated, do so.
   2621      0    stevel 	 */
   2622      0    stevel 	if (mcls == QPCTL) {
   2623      0    stevel 		if (bp->b_band != 0)
   2624      0    stevel 			bp->b_band = 0;		/* force to be correct */
   2625      0    stevel 	} else if (bp->b_band != 0) {
   2626      0    stevel 		int i;
   2627      0    stevel 		qband_t **qbpp;
   2628      0    stevel 
   2629      0    stevel 		if (bp->b_band > q->q_nband) {
   2630      0    stevel 
   2631      0    stevel 			/*
   2632      0    stevel 			 * The qband structure for this priority band is
   2633      0    stevel 			 * not on the queue yet, so we have to allocate
   2634      0    stevel 			 * one on the fly.  It would be wasteful to
   2635      0    stevel 			 * associate the qband structures with every
   2636      0    stevel 			 * queue when the queues are allocated.  This is
   2637      0    stevel 			 * because most queues will only need the normal
   2638      0    stevel 			 * band of flow which can be described entirely
   2639      0    stevel 			 * by the queue itself.
   2640      0    stevel 			 */
   2641      0    stevel 			qbpp = &q->q_bandp;
   2642      0    stevel 			while (*qbpp)
   2643      0    stevel 				qbpp = &(*qbpp)->qb_next;
   2644      0    stevel 			while (bp->b_band > q->q_nband) {
   2645      0    stevel 				if ((*qbpp = allocband()) == NULL) {
   2646      0    stevel 					if (freezer != curthread)
   2647      0    stevel 						mutex_exit(QLOCK(q));
   2648      0    stevel 					return (0);
   2649      0    stevel 				}
   2650      0    stevel 				(*qbpp)->qb_hiwat = q->q_hiwat;
   2651      0    stevel 				(*qbpp)->qb_lowat = q->q_lowat;
   2652      0    stevel 				q->q_nband++;
   2653      0    stevel 				qbpp = &(*qbpp)->qb_next;
   2654      0    stevel 			}
   2655      0    stevel 		}
   2656      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2657      0    stevel 		qbp = q->q_bandp;
   2658      0    stevel 		i = bp->b_band;
   2659      0    stevel 		while (--i)
   2660      0    stevel 			qbp = qbp->qb_next;
   2661      0    stevel 	}
   2662      0    stevel 
   2663      0    stevel 	/*
   2664      0    stevel 	 * If queue is empty, add the message and initialize the pointers.
   2665      0    stevel 	 * Otherwise, adjust message pointers and queue pointers based on
   2666      0    stevel 	 * the type of the message and where it belongs on the queue.  Some
   2667      0    stevel 	 * code is duplicated to minimize the number of conditionals and
   2668      0    stevel 	 * hopefully minimize the amount of time this routine takes.
   2669      0    stevel 	 */
   2670      0    stevel 	if (!q->q_first) {
   2671      0    stevel 		bp->b_next = NULL;
   2672      0    stevel 		bp->b_prev = NULL;
   2673      0    stevel 		q->q_first = bp;
   2674      0    stevel 		q->q_last = bp;
   2675      0    stevel 		if (qbp) {
   2676      0    stevel 			qbp->qb_first = bp;
   2677      0    stevel 			qbp->qb_last = bp;
   2678      0    stevel 		}
   2679      0    stevel 	} else if (!qbp) {	/* bp->b_band == 0 */
   2680      0    stevel 
   2681      0    stevel 		/*
   2682      0    stevel 		 * If queue class of message is less than or equal to
   2683      0    stevel 		 * that of the last one on the queue, tack on to the end.
   2684      0    stevel 		 */
   2685      0    stevel 		tmp = q->q_last;
   2686      0    stevel 		if (mcls <= (int)queclass(tmp)) {
   2687      0    stevel 			bp->b_next = NULL;
   2688      0    stevel 			bp->b_prev = tmp;
   2689      0    stevel 			tmp->b_next = bp;
   2690      0    stevel 			q->q_last = bp;
   2691      0    stevel 		} else {
   2692      0    stevel 			tmp = q->q_first;
   2693      0    stevel 			while ((int)queclass(tmp) >= mcls)
   2694      0    stevel 				tmp = tmp->b_next;
   2695      0    stevel 
   2696      0    stevel 			/*
   2697      0    stevel 			 * Insert bp before tmp.
   2698      0    stevel 			 */
   2699      0    stevel 			bp->b_next = tmp;
   2700      0    stevel 			bp->b_prev = tmp->b_prev;
   2701      0    stevel 			if (tmp->b_prev)
   2702      0    stevel 				tmp->b_prev->b_next = bp;
   2703      0    stevel 			else
   2704      0    stevel 				q->q_first = bp;
   2705      0    stevel 			tmp->b_prev = bp;
   2706      0    stevel 		}
   2707      0    stevel 	} else {		/* bp->b_band != 0 */
   2708      0    stevel 		if (qbp->qb_first) {
   2709      0    stevel 			tmp = qbp->qb_last;
   2710      0    stevel 
   2711      0    stevel 			/*
   2712      0    stevel 			 * Insert bp after the last message in this band.
   2713      0    stevel 			 */
   2714      0    stevel 			bp->b_next = tmp->b_next;
   2715      0    stevel 			if (tmp->b_next)
   2716      0    stevel 				tmp->b_next->b_prev = bp;
   2717      0    stevel 			else
   2718      0    stevel 				q->q_last = bp;
   2719      0    stevel 			bp->b_prev = tmp;
   2720      0    stevel 			tmp->b_next = bp;
   2721      0    stevel 		} else {
   2722      0    stevel 			tmp = q->q_last;
   2723      0    stevel 			if ((mcls < (int)queclass(tmp)) ||
   2724      0    stevel 			    (bp->b_band <= tmp->b_band)) {
   2725      0    stevel 
   2726      0    stevel 				/*
   2727      0    stevel 				 * Tack bp on end of queue.
   2728      0    stevel 				 */
   2729      0    stevel 				bp->b_next = NULL;
   2730      0    stevel 				bp->b_prev = tmp;
   2731      0    stevel 				tmp->b_next = bp;
   2732      0    stevel 				q->q_last = bp;
   2733      0    stevel 			} else {
   2734      0    stevel 				tmp = q->q_first;
   2735      0    stevel 				while (tmp->b_datap->db_type >= QPCTL)
   2736      0    stevel 					tmp = tmp->b_next;
   2737      0    stevel 				while (tmp->b_band >= bp->b_band)
   2738      0    stevel 					tmp = tmp->b_next;
   2739      0    stevel 
   2740      0    stevel 				/*
   2741      0    stevel 				 * Insert bp before tmp.
   2742      0    stevel 				 */
   2743      0    stevel 				bp->b_next = tmp;
   2744      0    stevel 				bp->b_prev = tmp->b_prev;
   2745      0    stevel 				if (tmp->b_prev)
   2746      0    stevel 					tmp->b_prev->b_next = bp;
   2747      0    stevel 				else
   2748      0    stevel 					q->q_first = bp;
   2749      0    stevel 				tmp->b_prev = bp;
   2750      0    stevel 			}
   2751      0    stevel 			qbp->qb_first = bp;
   2752      0    stevel 		}
   2753      0    stevel 		qbp->qb_last = bp;
   2754      0    stevel 	}
   2755      0    stevel 
   2756      0    stevel 	/* Get message byte count for q_count accounting */
   2757   6769   ja97890 	bytecnt = mp_cont_len(bp, &mblkcnt);
   2758    741  masputra 
   2759      0    stevel 	if (qbp) {
   2760      0    stevel 		qbp->qb_count += bytecnt;
   2761      0    stevel 		qbp->qb_mblkcnt += mblkcnt;
   2762      0    stevel 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
   2763      0    stevel 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
   2764      0    stevel 			qbp->qb_flag |= QB_FULL;
   2765      0    stevel 		}
   2766      0    stevel 	} else {
   2767      0    stevel 		q->q_count += bytecnt;
   2768      0    stevel 		q->q_mblkcnt += mblkcnt;
   2769      0    stevel 		if ((q->q_count >= q->q_hiwat) ||
   2770      0    stevel 		    (q->q_mblkcnt >= q->q_hiwat)) {
   2771      0    stevel 			q->q_flag |= QFULL;
   2772      0    stevel 		}
   2773      0    stevel 	}
   2774      0    stevel 
   2775      0    stevel 	STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL);
   2776      0    stevel 
   2777      0    stevel 	if ((mcls > QNORM) ||
   2778      0    stevel 	    (canenable(q) && (q->q_flag & QWANTR || bp->b_band)))
   2779      0    stevel 		qenable_locked(q);
   2780      0    stevel 	ASSERT(MUTEX_HELD(QLOCK(q)));
   2781      0    stevel 	if (freezer != curthread)
   2782      0    stevel 		mutex_exit(QLOCK(q));
   2783      0    stevel 
   2784      0    stevel 	return (1);
   2785      0    stevel }
   2786      0    stevel 
   2787      0    stevel /*
   2788      0    stevel  * Put stuff back at beginning of Q according to priority order.
   2789      0    stevel  * See comment on putq above for details.
   2790      0    stevel  */
   2791      0    stevel int
   2792      0    stevel putbq(queue_t *q, mblk_t *bp)
   2793      0    stevel {
   2794      0    stevel 	mblk_t *tmp;
   2795      0    stevel 	qband_t *qbp = NULL;
   2796      0    stevel 	int mcls = (int)queclass(bp);
   2797      0    stevel 	kthread_id_t freezer;
   2798      0    stevel 	int	bytecnt = 0, mblkcnt = 0;
   2799      0    stevel 
   2800      0    stevel 	ASSERT(q && bp);
   2801      0    stevel 	ASSERT(bp->b_next == NULL);
   2802      0    stevel 	freezer = STREAM(q)->sd_freezer;
   2803      0    stevel 	if (freezer == curthread) {
   2804      0    stevel 		ASSERT(frozenstr(q));
   2805      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2806      0    stevel 	} else
   2807      0    stevel 		mutex_enter(QLOCK(q));
   2808      0    stevel 
   2809      0    stevel 	/*
   2810      0    stevel 	 * Make sanity checks and if qband structure is not yet
   2811      0    stevel 	 * allocated, do so.
   2812      0    stevel 	 */
   2813      0    stevel 	if (mcls == QPCTL) {
   2814      0    stevel 		if (bp->b_band != 0)
   2815      0    stevel 			bp->b_band = 0;		/* force to be correct */
   2816      0    stevel 	} else if (bp->b_band != 0) {
   2817      0    stevel 		int i;
   2818      0    stevel 		qband_t **qbpp;
   2819      0    stevel 
   2820      0    stevel 		if (bp->b_band > q->q_nband) {
   2821      0    stevel 			qbpp = &q->q_bandp;
   2822      0    stevel 			while (*qbpp)
   2823      0    stevel 				qbpp = &(*qbpp)->qb_next;
   2824      0    stevel 			while (bp->b_band > q->q_nband) {
   2825      0    stevel 				if ((*qbpp = allocband()) == NULL) {
   2826      0    stevel 					if (freezer != curthread)
   2827      0    stevel 						mutex_exit(QLOCK(q));
   2828      0    stevel 					return (0);
   2829      0    stevel 				}
   2830      0    stevel 				(*qbpp)->qb_hiwat = q->q_hiwat;
   2831      0    stevel 				(*qbpp)->qb_lowat = q->q_lowat;
   2832      0    stevel 				q->q_nband++;
   2833      0    stevel 				qbpp = &(*qbpp)->qb_next;
   2834      0    stevel 			}
   2835      0    stevel 		}
   2836      0    stevel 		qbp = q->q_bandp;
   2837      0    stevel 		i = bp->b_band;
   2838      0    stevel 		while (--i)
   2839      0    stevel 			qbp = qbp->qb_next;
   2840      0    stevel 	}
   2841      0    stevel 
   2842      0    stevel 	/*
   2843      0    stevel 	 * If queue is empty or if message is high priority,
   2844      0    stevel 	 * place on the front of the queue.
   2845      0    stevel 	 */
   2846      0    stevel 	tmp = q->q_first;
   2847      0    stevel 	if ((!tmp) || (mcls == QPCTL)) {
   2848      0    stevel 		bp->b_next = tmp;
   2849      0    stevel 		if (tmp)
   2850      0    stevel 			tmp->b_prev = bp;
   2851      0    stevel 		else
   2852      0    stevel 			q->q_last = bp;
   2853      0    stevel 		q->q_first = bp;
   2854      0    stevel 		bp->b_prev = NULL;
   2855      0    stevel 		if (qbp) {
   2856      0    stevel 			qbp->qb_first = bp;
   2857      0    stevel 			qbp->qb_last = bp;
   2858      0    stevel 		}
   2859      0    stevel 	} else if (qbp) {	/* bp->b_band != 0 */
   2860      0    stevel 		tmp = qbp->qb_first;
   2861      0    stevel 		if (tmp) {
   2862      0    stevel 
   2863      0    stevel 			/*
   2864      0    stevel 			 * Insert bp before the first message in this band.
   2865      0    stevel 			 */
   2866      0    stevel 			bp->b_next = tmp;
   2867      0    stevel 			bp->b_prev = tmp->b_prev;
   2868      0    stevel 			if (tmp->b_prev)
   2869      0    stevel 				tmp->b_prev->b_next = bp;
   2870      0    stevel 			else
   2871      0    stevel 				q->q_first = bp;
   2872      0    stevel 			tmp->b_prev = bp;
   2873      0    stevel 		} else {
   2874      0    stevel 			tmp = q->q_last;
   2875      0    stevel 			if ((mcls < (int)queclass(tmp)) ||
   2876      0    stevel 			    (bp->b_band < tmp->b_band)) {
   2877      0    stevel 
   2878      0    stevel 				/*
   2879      0    stevel 				 * Tack bp on end of queue.
   2880      0    stevel 				 */
   2881      0    stevel 				bp->b_next = NULL;
   2882      0    stevel 				bp->b_prev = tmp;
   2883      0    stevel 				tmp->b_next = bp;
   2884      0    stevel 				q->q_last = bp;
   2885      0    stevel 			} else {
   2886      0    stevel 				tmp = q->q_first;
   2887      0    stevel 				while (tmp->b_datap->db_type >= QPCTL)
   2888      0    stevel 					tmp = tmp->b_next;
   2889      0    stevel 				while (tmp->b_band > bp->b_band)
   2890      0    stevel 					tmp = tmp->b_next;
   2891      0    stevel 
   2892      0    stevel 				/*
   2893      0    stevel 				 * Insert bp before tmp.
   2894      0    stevel 				 */
   2895      0    stevel 				bp->b_next = tmp;
   2896      0    stevel 				bp->b_prev = tmp->b_prev;
   2897      0    stevel 				if (tmp->b_prev)
   2898      0    stevel 					tmp->b_prev->b_next = bp;
   2899      0    stevel 				else
   2900      0    stevel 					q->q_first = bp;
   2901      0    stevel 				tmp->b_prev = bp;
   2902      0    stevel 			}
   2903      0    stevel 			qbp->qb_last = bp;
   2904      0    stevel 		}
   2905      0    stevel 		qbp->qb_first = bp;
   2906      0    stevel 	} else {		/* bp->b_band == 0 && !QPCTL */
   2907      0    stevel 
   2908      0    stevel 		/*
   2909      0    stevel 		 * If the queue class or band is less than that of the last
   2910      0    stevel 		 * message on the queue, tack bp on the end of the queue.
   2911      0    stevel 		 */
   2912      0    stevel 		tmp = q->q_last;
   2913      0    stevel 		if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) {
   2914      0    stevel 			bp->b_next = NULL;
   2915      0    stevel 			bp->b_prev = tmp;
   2916      0    stevel 			tmp->b_next = bp;
   2917      0    stevel 			q->q_last = bp;
   2918      0    stevel 		} else {
   2919      0    stevel 			tmp = q->q_first;
   2920      0    stevel 			while (tmp->b_datap->db_type >= QPCTL)
   2921      0    stevel 				tmp = tmp->b_next;
   2922      0    stevel 			while (tmp->b_band > bp->b_band)
   2923      0    stevel 				tmp = tmp->b_next;
   2924      0    stevel 
   2925      0    stevel 			/*
   2926      0    stevel 			 * Insert bp before tmp.
   2927      0    stevel 			 */
   2928      0    stevel 			bp->b_next = tmp;
   2929      0    stevel 			bp->b_prev = tmp->b_prev;
   2930      0    stevel 			if (tmp->b_prev)
   2931      0    stevel 				tmp->b_prev->b_next = bp;
   2932      0    stevel 			else
   2933      0    stevel 				q->q_first = bp;
   2934      0    stevel 			tmp->b_prev = bp;
   2935      0    stevel 		}
   2936      0    stevel 	}
   2937      0    stevel 
   2938      0    stevel 	/* Get message byte count for q_count accounting */
   2939   6769   ja97890 	bytecnt = mp_cont_len(bp, &mblkcnt);
   2940   6769   ja97890 
   2941      0    stevel 	if (qbp) {
   2942      0    stevel 		qbp->qb_count += bytecnt;
   2943      0    stevel 		qbp->qb_mblkcnt += mblkcnt;
   2944      0    stevel 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
   2945      0    stevel 		    (qbp->qb_mblkcnt >= qbp->qb_hiwat)) {
   2946      0    stevel 			qbp->qb_flag |= QB_FULL;
   2947      0    stevel 		}
   2948      0    stevel 	} else {
   2949      0    stevel 		q->q_count += bytecnt;
   2950      0    stevel 		q->q_mblkcnt += mblkcnt;
   2951      0    stevel 		if ((q->q_count >= q->q_hiwat) ||
   2952      0    stevel 		    (q->q_mblkcnt >= q->q_hiwat)) {
   2953      0    stevel 			q->q_flag |= QFULL;
   2954      0    stevel 		}
   2955      0    stevel 	}
   2956      0    stevel 
   2957      0    stevel 	STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL);
   2958      0    stevel 
   2959      0    stevel 	if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR)))
   2960      0    stevel 		qenable_locked(q);
   2961      0    stevel 	ASSERT(MUTEX_HELD(QLOCK(q)));
   2962      0    stevel 	if (freezer != curthread)
   2963      0    stevel 		mutex_exit(QLOCK(q));
   2964      0    stevel 
   2965      0    stevel 	return (1);
   2966      0    stevel }
   2967      0    stevel 
   2968      0    stevel /*
   2969      0    stevel  * Insert a message before an existing message on the queue.  If the
   2970      0    stevel  * existing message is NULL, the new messages is placed on the end of
   2971      0    stevel  * the queue.  The queue class of the new message is ignored.  However,
   2972      0    stevel  * the priority band of the new message must adhere to the following
   2973      0    stevel  * ordering:
   2974      0    stevel  *
   2975      0    stevel  *	emp->b_prev->b_band >= mp->b_band >= emp->b_band.
   2976      0    stevel  *
   2977      0    stevel  * All flow control parameters are updated.
   2978      0    stevel  *
   2979      0    stevel  * insq can be called with the stream frozen, but other utility functions
   2980      0    stevel  * holding QLOCK, and by streams modules without any locks/frozen.
   2981      0    stevel  */
   2982      0    stevel int
   2983      0    stevel insq(queue_t *q, mblk_t *emp, mblk_t *mp)
   2984      0    stevel {
   2985      0    stevel 	mblk_t *tmp;
   2986      0    stevel 	qband_t *qbp = NULL;
   2987      0    stevel 	int mcls = (int)queclass(mp);
   2988      0    stevel 	kthread_id_t freezer;
   2989      0    stevel 	int	bytecnt = 0, mblkcnt = 0;
   2990      0    stevel 
   2991      0    stevel 	freezer = STREAM(q)->sd_freezer;
   2992      0    stevel 	if (freezer == curthread) {
   2993      0    stevel 		ASSERT(frozenstr(q));
   2994      0    stevel 		ASSERT(MUTEX_HELD(QLOCK(q)));
   2995      0    stevel 	} else if (MUTEX_HELD(QLOCK(q))) {
   2996      0    stevel 		/* Don't drop lock on exit */
   2997      0    stevel 		freezer = curthread;
   2998      0    stevel 	} else
   2999      0    stevel 		mutex_enter(QLOCK(q));
   3000      0    stevel 
   3001      0    stevel 	if (mcls == QPCTL) {
   3002      0    stevel 		if (mp->b_band != 0)
   3003      0    stevel 			mp->b_band = 0;		/* force to be correct */
   3004      0    stevel 		if (emp && emp->b_prev &&
   3005      0    stevel 		    (emp->b_prev->b_datap->db_type < QPCTL))
   3006      0    stevel 			goto badord;
   3007      0    stevel 	}
   3008      0    stevel 	if (emp) {
   3009      0    stevel 		if (((mcls == QNORM) && (mp->b_band < emp->b_band)) ||
   3010      0    stevel 		    (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) &&
   3011      0    stevel 		    (emp->b_prev->b_band < mp->b_band))) {
   3012      0    stevel 			goto badord;
   3013      0    stevel 		}
   3014      0    stevel 	} else {
   3015      0    stevel 		tmp = q->q_last;
   3016      0    stevel 		if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) {
   3017      0    stevel badord:
   3018      0    stevel 			cmn_err(CE_WARN,
   3019      0    stevel 			    "insq: attempt to insert message out of order "
   3020      0    stevel 			    "on q %p", (void *)q);
   3021      0    stevel 			if (freezer != curthread)
   3022      0    stevel 				mutex_exit(QLOCK(q));
   3023      0    stevel 			return (0);
   3024      0    stevel 		}
   3025      0    stevel 	}
   3026      0    stevel 
   3027      0    stevel 	if (mp->b_band != 0) {
   3028      0    stevel 		int i;
   3029      0    stevel 		qband_t **qbpp;
   3030      0    stevel 
   3031      0    stevel 		if (mp->b_band > q->q_nband) {
   3032      0    stevel 			qbpp = &q->q_bandp;
   3033      0    stevel 			while (*qbpp)
   3034      0    stevel 				qbpp = &(*qbpp)->qb_next;
   3035      0    stevel 			while (mp->b_band > q->q_nband) {
   3036      0    stevel 				if ((*qbpp = allocband()) == NULL) {
   3037      0    stevel 					if (freezer != curthread)
   3038      0    stevel 						mutex_exit(QLOCK(q));
   3039      0    stevel 					return (0);
   3040      0    stevel 				}
   3041      0    stevel 				(*qbpp)->qb_hiwat = q->q_hiwat;
   3042      0    stevel 				(*qbpp)->qb_lowat = q->q_lowat;
   3043      0    stevel 				q->q_nband++;
   3044      0    stevel 				qbpp = &(*qbpp)->qb_next;
   3045      0    stevel 			}
   3046      0    stevel 		}
   3047      0    stevel 		qbp = q->q_bandp;
   3048      0    stevel 		i = mp->b_band;
   3049      0    stevel 		while (--i)
   3050      0    stevel 			qbp = qbp->qb_next;
   3051      0    stevel 	}
   3052      0    stevel 
   3053      0    stevel 	if ((mp->b_next = emp) != NULL) {
   3054      0    stevel 		if ((mp->b_prev = emp->b_prev) != NULL)
   3055      0    stevel 			emp->b_prev->b_next = mp;
   3056      0    stevel 		else
   3057      0    stevel 			q->q_first = mp;
   3058      0    stevel 		emp->b_prev = mp;
   3059      0    stevel 	} else {
   3060      0    stevel 		if ((mp->b_prev = q->q_last) != NULL)
   3061      0    stevel 			q->q_last->b_next = mp;
   3062      0    stevel 		else
   3063      0    stevel 			q->q_first = mp;
   3064      0    stevel 		q->q_last = mp;
   3065      0    stevel 	}
   3066      0    stevel 
   3067      0    stevel 	/* Get mblk and byte count for q_count accounting */
   3068   6769   ja97890 	bytecnt = mp_cont_len(mp, &mblkcnt);
   3069      0    stevel 
   3070      0    stevel 	if (qbp) {	/* adjust qband pointers and count */
   3071      0    stevel 		if (!qbp->qb_first) {
   3072      0    stevel 			qbp->qb_first = mp;
   3073      0    stevel 			qbp->qb_last = mp;
   3074      0    stevel 		} else {
   3075      0    stevel 			if (mp->b_prev == NULL || (mp->b_prev != NULL &&
   3076      0    stevel 			    (mp->b_prev->b_band != mp->b_band)))
   3077      0    stevel 				qbp->qb_first = mp;
   3078      0    stevel 			else if (mp->b_next == NULL || (mp->b_next != NULL &&
   3079      0    stevel 			    (mp->b_next->b_band != mp->b_band)))
   3080      0    stevel 				qbp->qb_last = mp;
   3081      0    stevel 		}
   3082      0    stevel 		qbp->qb_count += bytecnt;
   3083      0    stevel 		qbp->qb_mblkcnt += mblkcnt;
   3084      0    stevel 		if ((qbp->qb_count >= qbp->qb_hiwat) ||
   3085      0    stevel 		    (qbp->qb_mblkcnt >= qbp-