1 0 stevel /* 2 0 stevel * CDDL HEADER START 3 0 stevel * 4 0 stevel * The contents of this file are subject to the terms of the 5 2958 dr146992 * Common Development and Distribution License (the "License"). 6 2958 dr146992 * You may not use this file except in compliance with the License. 7 0 stevel * 8 0 stevel * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 0 stevel * or http://www.opensolaris.org/os/licensing. 10 0 stevel * See the License for the specific language governing permissions 11 0 stevel * and limitations under the License. 12 0 stevel * 13 0 stevel * When distributing Covered Code, include this CDDL HEADER in each 14 0 stevel * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 0 stevel * If applicable, add the following below this CDDL HEADER, with the 16 0 stevel * fields enclosed by brackets "[]" replaced with your own identifying 17 0 stevel * information: Portions Copyright [yyyy] [name of copyright owner] 18 0 stevel * 19 0 stevel * CDDL HEADER END 20 0 stevel */ 21 0 stevel /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 22 0 stevel /* All Rights Reserved */ 23 0 stevel 24 8752 Peter /* 25 8752 Peter * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 26 0 stevel * Use is subject to license terms. 27 0 stevel */ 28 0 stevel 29 0 stevel #include <sys/types.h> 30 0 stevel #include <sys/param.h> 31 0 stevel #include <sys/thread.h> 32 0 stevel #include <sys/sysmacros.h> 33 0 stevel #include <sys/stropts.h> 34 0 stevel #include <sys/stream.h> 35 0 stevel #include <sys/strsubr.h> 36 0 stevel #include <sys/strsun.h> 37 0 stevel #include <sys/conf.h> 38 0 stevel #include <sys/debug.h> 39 0 stevel #include <sys/cmn_err.h> 40 0 stevel #include <sys/kmem.h> 41 0 stevel #include <sys/atomic.h> 42 0 stevel #include <sys/errno.h> 43 0 stevel #include <sys/vtrace.h> 44 0 stevel #include <sys/ftrace.h> 45 0 stevel #include <sys/ontrap.h> 46 0 stevel #include <sys/multidata.h> 47 0 stevel #include <sys/multidata_impl.h> 48 0 stevel #include <sys/sdt.h> 49 1110 meem #include <sys/strft.h> 50 0 stevel 51 0 stevel #ifdef DEBUG 52 0 stevel #include <sys/kmem_impl.h> 53 0 stevel #endif 54 0 stevel 55 0 stevel /* 56 0 stevel * This file contains all the STREAMS utility routines that may 57 0 stevel * be used by modules and drivers. 58 0 stevel */ 59 0 stevel 60 0 stevel /* 61 0 stevel * STREAMS message allocator: principles of operation 62 0 stevel * 63 0 stevel * The streams message allocator consists of all the routines that 64 0 stevel * allocate, dup and free streams messages: allocb(), [d]esballoc[a], 65 0 stevel * dupb(), freeb() and freemsg(). What follows is a high-level view 66 0 stevel * of how the allocator works. 67 0 stevel * 68 0 stevel * Every streams message consists of one or more mblks, a dblk, and data. 69 0 stevel * All mblks for all types of messages come from a common mblk_cache. 70 0 stevel * The dblk and data come in several flavors, depending on how the 71 0 stevel * message is allocated: 72 0 stevel * 73 0 stevel * (1) mblks up to DBLK_MAX_CACHE size are allocated from a collection of 74 0 stevel * fixed-size dblk/data caches. For message sizes that are multiples of 75 0 stevel * PAGESIZE, dblks are allocated separately from the buffer. 76 0 stevel * The associated buffer is allocated by the constructor using kmem_alloc(). 77 0 stevel * For all other message sizes, dblk and its associated data is allocated 78 0 stevel * as a single contiguous chunk of memory. 79 0 stevel * Objects in these caches consist of a dblk plus its associated data. 80 0 stevel * allocb() determines the nearest-size cache by table lookup: 81 0 stevel * the dblk_cache[] array provides the mapping from size to dblk cache. 82 0 stevel * 83 0 stevel * (2) Large messages (size > DBLK_MAX_CACHE) are constructed by 84 0 stevel * kmem_alloc()'ing a buffer for the data and supplying that 85 0 stevel * buffer to gesballoc(), described below. 86 0 stevel * 87 0 stevel * (3) The four flavors of [d]esballoc[a] are all implemented by a 88 0 stevel * common routine, gesballoc() ("generic esballoc"). gesballoc() 89 0 stevel * allocates a dblk from the global dblk_esb_cache and sets db_base, 90 0 stevel * db_lim and db_frtnp to describe the caller-supplied buffer. 91 0 stevel * 92 0 stevel * While there are several routines to allocate messages, there is only 93 0 stevel * one routine to free messages: freeb(). freeb() simply invokes the 94 0 stevel * dblk's free method, dbp->db_free(), which is set at allocation time. 95 0 stevel * 96 0 stevel * dupb() creates a new reference to a message by allocating a new mblk, 97 0 stevel * incrementing the dblk reference count and setting the dblk's free 98 0 stevel * method to dblk_decref(). The dblk's original free method is retained 99 0 stevel * in db_lastfree. dblk_decref() decrements the reference count on each 100 0 stevel * freeb(). If this is not the last reference it just frees the mblk; 101 0 stevel * if this *is* the last reference, it restores db_free to db_lastfree, 102 0 stevel * sets db_mblk to the current mblk (see below), and invokes db_lastfree. 103 0 stevel * 104 0 stevel * The implementation makes aggressive use of kmem object caching for 105 0 stevel * maximum performance. This makes the code simple and compact, but 106 0 stevel * also a bit abstruse in some places. The invariants that constitute a 107 0 stevel * message's constructed state, described below, are more subtle than usual. 108 0 stevel * 109 0 stevel * Every dblk has an "attached mblk" as part of its constructed state. 110 0 stevel * The mblk is allocated by the dblk's constructor and remains attached 111 0 stevel * until the message is either dup'ed or pulled up. In the dupb() case 112 0 stevel * the mblk association doesn't matter until the last free, at which time 113 0 stevel * dblk_decref() attaches the last mblk to the dblk. pullupmsg() affects 114 0 stevel * the mblk association because it swaps the leading mblks of two messages, 115 0 stevel * so it is responsible for swapping their db_mblk pointers accordingly. 116 0 stevel * From a constructed-state viewpoint it doesn't matter that a dblk's 117 0 stevel * attached mblk can change while the message is allocated; all that 118 0 stevel * matters is that the dblk has *some* attached mblk when it's freed. 119 0 stevel * 120 0 stevel * The sizes of the allocb() small-message caches are not magical. 121 0 stevel * They represent a good trade-off between internal and external 122 0 stevel * fragmentation for current workloads. They should be reevaluated 123 0 stevel * periodically, especially if allocations larger than DBLK_MAX_CACHE 124 0 stevel * become common. We use 64-byte alignment so that dblks don't 125 0 stevel * straddle cache lines unnecessarily. 126 0 stevel */ 127 0 stevel #define DBLK_MAX_CACHE 73728 128 0 stevel #define DBLK_CACHE_ALIGN 64 129 0 stevel #define DBLK_MIN_SIZE 8 130 0 stevel #define DBLK_SIZE_SHIFT 3 131 0 stevel 132 0 stevel #ifdef _BIG_ENDIAN 133 0 stevel #define DBLK_RTFU_SHIFT(field) \ 134 0 stevel (8 * (&((dblk_t *)0)->db_struioflag - &((dblk_t *)0)->field)) 135 0 stevel #else 136 0 stevel #define DBLK_RTFU_SHIFT(field) \ 137 0 stevel (8 * (&((dblk_t *)0)->field - &((dblk_t *)0)->db_ref)) 138 0 stevel #endif 139 0 stevel 140 0 stevel #define DBLK_RTFU(ref, type, flags, uioflag) \ 141 0 stevel (((ref) << DBLK_RTFU_SHIFT(db_ref)) | \ 142 0 stevel ((type) << DBLK_RTFU_SHIFT(db_type)) | \ 143 0 stevel (((flags) | (ref - 1)) << DBLK_RTFU_SHIFT(db_flags)) | \ 144 0 stevel ((uioflag) << DBLK_RTFU_SHIFT(db_struioflag))) 145 0 stevel #define DBLK_RTFU_REF_MASK (DBLK_REFMAX << DBLK_RTFU_SHIFT(db_ref)) 146 0 stevel #define DBLK_RTFU_WORD(dbp) (*((uint32_t *)&(dbp)->db_ref)) 147 0 stevel #define MBLK_BAND_FLAG_WORD(mp) (*((uint32_t *)&(mp)->b_band)) 148 0 stevel 149 0 stevel static size_t dblk_sizes[] = { 150 0 stevel #ifdef _LP64 151 6712 tomee 16, 80, 144, 208, 272, 336, 528, 1040, 1488, 1936, 2576, 3856, 152 6712 tomee 8192, 12048, 16384, 20240, 24576, 28432, 32768, 36624, 153 6712 tomee 40960, 44816, 49152, 53008, 57344, 61200, 65536, 69392, 154 0 stevel #else 155 6712 tomee 64, 128, 320, 576, 1088, 1536, 1984, 2624, 3904, 156 6712 tomee 8192, 12096, 16384, 20288, 24576, 28480, 32768, 36672, 157 6712 tomee 40960, 44864, 49152, 53056, 57344, 61248, 65536, 69440, 158 0 stevel #endif 159 0 stevel DBLK_MAX_CACHE, 0 160 0 stevel }; 161 0 stevel 162 0 stevel static struct kmem_cache *dblk_cache[DBLK_MAX_CACHE / DBLK_MIN_SIZE]; 163 0 stevel static struct kmem_cache *mblk_cache; 164 0 stevel static struct kmem_cache *dblk_esb_cache; 165 0 stevel static struct kmem_cache *fthdr_cache; 166 0 stevel static struct kmem_cache *ftblk_cache; 167 0 stevel 168 0 stevel static void dblk_lastfree(mblk_t *mp, dblk_t *dbp); 169 0 stevel static mblk_t *allocb_oversize(size_t size, int flags); 170 0 stevel static int allocb_tryhard_fails; 171 0 stevel static void frnop_func(void *arg); 172 0 stevel frtn_t frnop = { frnop_func }; 173 0 stevel static void bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp); 174 0 stevel 175 0 stevel static boolean_t rwnext_enter(queue_t *qp); 176 0 stevel static void rwnext_exit(queue_t *qp); 177 0 stevel 178 0 stevel /* 179 0 stevel * Patchable mblk/dblk kmem_cache flags. 180 0 stevel */ 181 0 stevel int dblk_kmem_flags = 0; 182 0 stevel int mblk_kmem_flags = 0; 183 0 stevel 184 0 stevel static int 185 0 stevel dblk_constructor(void *buf, void *cdrarg, int kmflags) 186 0 stevel { 187 0 stevel dblk_t *dbp = buf; 188 0 stevel ssize_t msg_size = (ssize_t)cdrarg; 189 0 stevel size_t index; 190 0 stevel 191 0 stevel ASSERT(msg_size != 0); 192 0 stevel 193 0 stevel index = (msg_size - 1) >> DBLK_SIZE_SHIFT; 194 0 stevel 195 577 meem ASSERT(index < (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)); 196 0 stevel 197 0 stevel if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 198 0 stevel return (-1); 199 0 stevel if ((msg_size & PAGEOFFSET) == 0) { 200 0 stevel dbp->db_base = kmem_alloc(msg_size, kmflags); 201 0 stevel if (dbp->db_base == NULL) { 202 0 stevel kmem_cache_free(mblk_cache, dbp->db_mblk); 203 0 stevel return (-1); 204 0 stevel } 205 0 stevel } else { 206 0 stevel dbp->db_base = (unsigned char *)&dbp[1]; 207 0 stevel } 208 0 stevel 209 0 stevel dbp->db_mblk->b_datap = dbp; 210 0 stevel dbp->db_cache = dblk_cache[index]; 211 0 stevel dbp->db_lim = dbp->db_base + msg_size; 212 0 stevel dbp->db_free = dbp->db_lastfree = dblk_lastfree; 213 0 stevel dbp->db_frtnp = NULL; 214 0 stevel dbp->db_fthdr = NULL; 215 0 stevel dbp->db_credp = NULL; 216 0 stevel dbp->db_cpid = -1; 217 0 stevel dbp->db_struioflag = 0; 218 0 stevel dbp->db_struioun.cksum.flags = 0; 219 0 stevel return (0); 220 0 stevel } 221 0 stevel 222 0 stevel /*ARGSUSED*/ 223 0 stevel static int 224 0 stevel dblk_esb_constructor(void *buf, void *cdrarg, int kmflags) 225 0 stevel { 226 0 stevel dblk_t *dbp = buf; 227 0 stevel 228 0 stevel if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 229 0 stevel return (-1); 230 0 stevel dbp->db_mblk->b_datap = dbp; 231 0 stevel dbp->db_cache = dblk_esb_cache; 232 0 stevel dbp->db_fthdr = NULL; 233 0 stevel dbp->db_credp = NULL; 234 0 stevel dbp->db_cpid = -1; 235 0 stevel dbp->db_struioflag = 0; 236 0 stevel dbp->db_struioun.cksum.flags = 0; 237 0 stevel return (0); 238 0 stevel } 239 0 stevel 240 0 stevel static int 241 0 stevel bcache_dblk_constructor(void *buf, void *cdrarg, int kmflags) 242 0 stevel { 243 0 stevel dblk_t *dbp = buf; 244 8752 Peter bcache_t *bcp = cdrarg; 245 0 stevel 246 0 stevel if ((dbp->db_mblk = kmem_cache_alloc(mblk_cache, kmflags)) == NULL) 247 0 stevel return (-1); 248 0 stevel 249 8752 Peter dbp->db_base = kmem_cache_alloc(bcp->buffer_cache, kmflags); 250 8752 Peter if (dbp->db_base == NULL) { 251 0 stevel kmem_cache_free(mblk_cache, dbp->db_mblk); 252 0 stevel return (-1); 253 0 stevel } 254 0 stevel 255 0 stevel dbp->db_mblk->b_datap = dbp; 256 0 stevel dbp->db_cache = (void *)bcp; 257 0 stevel dbp->db_lim = dbp->db_base + bcp->size; 258 0 stevel dbp->db_free = dbp->db_lastfree = bcache_dblk_lastfree; 259 0 stevel dbp->db_frtnp = NULL; 260 0 stevel dbp->db_fthdr = NULL; 261 0 stevel dbp->db_credp = NULL; 262 0 stevel dbp->db_cpid = -1; 263 0 stevel dbp->db_struioflag = 0; 264 0 stevel dbp->db_struioun.cksum.flags = 0; 265 0 stevel return (0); 266 0 stevel } 267 0 stevel 268 0 stevel /*ARGSUSED*/ 269 0 stevel static void 270 0 stevel dblk_destructor(void *buf, void *cdrarg) 271 0 stevel { 272 0 stevel dblk_t *dbp = buf; 273 0 stevel ssize_t msg_size = (ssize_t)cdrarg; 274 0 stevel 275 0 stevel ASSERT(dbp->db_mblk->b_datap == dbp); 276 0 stevel ASSERT(msg_size != 0); 277 0 stevel ASSERT(dbp->db_struioflag == 0); 278 0 stevel ASSERT(dbp->db_struioun.cksum.flags == 0); 279 0 stevel 280 0 stevel if ((msg_size & PAGEOFFSET) == 0) { 281 0 stevel kmem_free(dbp->db_base, msg_size); 282 0 stevel } 283 0 stevel 284 0 stevel kmem_cache_free(mblk_cache, dbp->db_mblk); 285 0 stevel } 286 0 stevel 287 0 stevel static void 288 0 stevel bcache_dblk_destructor(void *buf, void *cdrarg) 289 0 stevel { 290 0 stevel dblk_t *dbp = buf; 291 8752 Peter bcache_t *bcp = cdrarg; 292 0 stevel 293 0 stevel kmem_cache_free(bcp->buffer_cache, dbp->db_base); 294 0 stevel 295 0 stevel ASSERT(dbp->db_mblk->b_datap == dbp); 296 0 stevel ASSERT(dbp->db_struioflag == 0); 297 0 stevel ASSERT(dbp->db_struioun.cksum.flags == 0); 298 0 stevel 299 0 stevel kmem_cache_free(mblk_cache, dbp->db_mblk); 300 8752 Peter } 301 8752 Peter 302 8752 Peter /* ARGSUSED */ 303 8752 Peter static int 304 8752 Peter ftblk_constructor(void *buf, void *cdrarg, int kmflags) 305 8752 Peter { 306 8752 Peter ftblk_t *fbp = buf; 307 8752 Peter int i; 308 8752 Peter 309 8752 Peter bzero(fbp, sizeof (ftblk_t)); 310 8752 Peter if (str_ftstack != 0) { 311 8752 Peter for (i = 0; i < FTBLK_EVNTS; i++) 312 8752 Peter fbp->ev[i].stk = kmem_alloc(sizeof (ftstk_t), kmflags); 313 8752 Peter } 314 8752 Peter 315 8752 Peter return (0); 316 8752 Peter } 317 8752 Peter 318 8752 Peter /* ARGSUSED */ 319 8752 Peter static void 320 8752 Peter ftblk_destructor(void *buf, void *cdrarg) 321 8752 Peter { 322 8752 Peter ftblk_t *fbp = buf; 323 8752 Peter int i; 324 8752 Peter 325 8752 Peter if (str_ftstack != 0) { 326 8752 Peter for (i = 0; i < FTBLK_EVNTS; i++) { 327 8752 Peter if (fbp->ev[i].stk != NULL) { 328 8752 Peter kmem_free(fbp->ev[i].stk, sizeof (ftstk_t)); 329 8752 Peter fbp->ev[i].stk = NULL; 330 8752 Peter } 331 8752 Peter } 332 8752 Peter } 333 8752 Peter } 334 8752 Peter 335 8752 Peter static int 336 8752 Peter fthdr_constructor(void *buf, void *cdrarg, int kmflags) 337 8752 Peter { 338 8752 Peter fthdr_t *fhp = buf; 339 8752 Peter 340 8752 Peter return (ftblk_constructor(&fhp->first, cdrarg, kmflags)); 341 8752 Peter } 342 8752 Peter 343 8752 Peter static void 344 8752 Peter fthdr_destructor(void *buf, void *cdrarg) 345 8752 Peter { 346 8752 Peter fthdr_t *fhp = buf; 347 8752 Peter 348 8752 Peter ftblk_destructor(&fhp->first, cdrarg); 349 0 stevel } 350 0 stevel 351 0 stevel void 352 0 stevel streams_msg_init(void) 353 0 stevel { 354 0 stevel char name[40]; 355 0 stevel size_t size; 356 0 stevel size_t lastsize = DBLK_MIN_SIZE; 357 0 stevel size_t *sizep; 358 0 stevel struct kmem_cache *cp; 359 0 stevel size_t tot_size; 360 0 stevel int offset; 361 0 stevel 362 8752 Peter mblk_cache = kmem_cache_create("streams_mblk", sizeof (mblk_t), 32, 363 8752 Peter NULL, NULL, NULL, NULL, NULL, mblk_kmem_flags); 364 0 stevel 365 0 stevel for (sizep = dblk_sizes; (size = *sizep) != 0; sizep++) { 366 0 stevel 367 0 stevel if ((offset = (size & PAGEOFFSET)) != 0) { 368 0 stevel /* 369 0 stevel * We are in the middle of a page, dblk should 370 0 stevel * be allocated on the same page 371 0 stevel */ 372 0 stevel tot_size = size + sizeof (dblk_t); 373 0 stevel ASSERT((offset + sizeof (dblk_t) + sizeof (kmem_slab_t)) 374 6707 brutus < PAGESIZE); 375 0 stevel ASSERT((tot_size & (DBLK_CACHE_ALIGN - 1)) == 0); 376 0 stevel 377 0 stevel } else { 378 0 stevel 379 0 stevel /* 380 0 stevel * buf size is multiple of page size, dblk and 381 0 stevel * buffer are allocated separately. 382 0 stevel */ 383 0 stevel 384 0 stevel ASSERT((size & (DBLK_CACHE_ALIGN - 1)) == 0); 385 0 stevel tot_size = sizeof (dblk_t); 386 0 stevel } 387 0 stevel 388 0 stevel (void) sprintf(name, "streams_dblk_%ld", size); 389 8752 Peter cp = kmem_cache_create(name, tot_size, DBLK_CACHE_ALIGN, 390 8752 Peter dblk_constructor, dblk_destructor, NULL, (void *)(size), 391 8752 Peter NULL, dblk_kmem_flags); 392 0 stevel 393 0 stevel while (lastsize <= size) { 394 0 stevel dblk_cache[(lastsize - 1) >> DBLK_SIZE_SHIFT] = cp; 395 0 stevel lastsize += DBLK_MIN_SIZE; 396 0 stevel } 397 0 stevel } 398 0 stevel 399 8752 Peter dblk_esb_cache = kmem_cache_create("streams_dblk_esb", sizeof (dblk_t), 400 8752 Peter DBLK_CACHE_ALIGN, dblk_esb_constructor, dblk_destructor, NULL, 401 8752 Peter (void *)sizeof (dblk_t), NULL, dblk_kmem_flags); 402 8752 Peter fthdr_cache = kmem_cache_create("streams_fthdr", sizeof (fthdr_t), 32, 403 8752 Peter fthdr_constructor, fthdr_destructor, NULL, NULL, NULL, 0); 404 8752 Peter ftblk_cache = kmem_cache_create("streams_ftblk", sizeof (ftblk_t), 32, 405 8752 Peter ftblk_constructor, ftblk_destructor, NULL, NULL, NULL, 0); 406 0 stevel 407 0 stevel /* Initialize Multidata caches */ 408 0 stevel mmd_init(); 409 3932 ss146032 410 3932 ss146032 /* initialize throttling queue for esballoc */ 411 3932 ss146032 esballoc_queue_init(); 412 0 stevel } 413 0 stevel 414 0 stevel /*ARGSUSED*/ 415 0 stevel mblk_t * 416 0 stevel allocb(size_t size, uint_t pri) 417 0 stevel { 418 0 stevel dblk_t *dbp; 419 0 stevel mblk_t *mp; 420 0 stevel size_t index; 421 0 stevel 422 0 stevel index = (size - 1) >> DBLK_SIZE_SHIFT; 423 0 stevel 424 0 stevel if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 425 0 stevel if (size != 0) { 426 0 stevel mp = allocb_oversize(size, KM_NOSLEEP); 427 0 stevel goto out; 428 0 stevel } 429 0 stevel index = 0; 430 0 stevel } 431 0 stevel 432 0 stevel if ((dbp = kmem_cache_alloc(dblk_cache[index], KM_NOSLEEP)) == NULL) { 433 0 stevel mp = NULL; 434 0 stevel goto out; 435 0 stevel } 436 0 stevel 437 0 stevel mp = dbp->db_mblk; 438 0 stevel DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 439 0 stevel mp->b_next = mp->b_prev = mp->b_cont = NULL; 440 0 stevel mp->b_rptr = mp->b_wptr = dbp->db_base; 441 0 stevel mp->b_queue = NULL; 442 0 stevel MBLK_BAND_FLAG_WORD(mp) = 0; 443 0 stevel STR_FTALLOC(&dbp->db_fthdr, FTEV_ALLOCB, size); 444 0 stevel out: 445 0 stevel FTRACE_1("allocb(): mp=0x%p", (uintptr_t)mp); 446 0 stevel 447 0 stevel return (mp); 448 0 stevel } 449 0 stevel 450 8778 Erik /* 451 8778 Erik * Allocate an mblk taking db_credp and db_cpid from the template. 452 8778 Erik * Allow the cred to be NULL. 453 8778 Erik */ 454 0 stevel mblk_t * 455 0 stevel allocb_tmpl(size_t size, const mblk_t *tmpl) 456 0 stevel { 457 0 stevel mblk_t *mp = allocb(size, 0); 458 0 stevel 459 0 stevel if (mp != NULL) { 460 8778 Erik dblk_t *src = tmpl->b_datap; 461 8778 Erik dblk_t *dst = mp->b_datap; 462 10163 Ken cred_t *cr; 463 10163 Ken pid_t cpid; 464 10163 Ken 465 10163 Ken cr = msg_getcred(tmpl, &cpid); 466 0 stevel if (cr != NULL) 467 8778 Erik crhold(dst->db_credp = cr); 468 10163 Ken dst->db_cpid = cpid; 469 8778 Erik dst->db_type = src->db_type; 470 8778 Erik } 471 8778 Erik return (mp); 472 8778 Erik } 473 8778 Erik 474 8778 Erik mblk_t * 475 8778 Erik allocb_cred(size_t size, cred_t *cr, pid_t cpid) 476 0 stevel { 477 0 stevel mblk_t *mp = allocb(size, 0); 478 0 stevel 479 8778 Erik ASSERT(cr != NULL); 480 8778 Erik if (mp != NULL) { 481 8778 Erik dblk_t *dbp = mp->b_datap; 482 8778 Erik 483 8778 Erik crhold(dbp->db_credp = cr); 484 8778 Erik dbp->db_cpid = cpid; 485 8778 Erik } 486 8778 Erik return (mp); 487 8778 Erik } 488 8778 Erik 489 8778 Erik mblk_t * 490 8778 Erik allocb_cred_wait(size_t size, uint_t flags, int *error, cred_t *cr, pid_t cpid) 491 0 stevel { 492 0 stevel mblk_t *mp = allocb_wait(size, 0, flags, error); 493 0 stevel 494 8778 Erik ASSERT(cr != NULL); 495 8778 Erik if (mp != NULL) { 496 8778 Erik dblk_t *dbp = mp->b_datap; 497 8778 Erik 498 8778 Erik crhold(dbp->db_credp = cr); 499 8778 Erik dbp->db_cpid = cpid; 500 8778 Erik } 501 8778 Erik 502 8778 Erik return (mp); 503 8778 Erik } 504 8778 Erik 505 8778 Erik /* 506 8778 Erik * Extract the db_cred (and optionally db_cpid) from a message. 507 8778 Erik * We find the first mblk which has a non-NULL db_cred and use that. 508 8778 Erik * If none found we return NULL. 509 8778 Erik * Does NOT get a hold on the cred. 510 8778 Erik */ 511 8778 Erik cred_t * 512 8778 Erik msg_getcred(const mblk_t *mp, pid_t *cpidp) 513 8778 Erik { 514 8778 Erik cred_t *cr = NULL; 515 8778 Erik cred_t *cr2; 516 10163 Ken mblk_t *mp2; 517 8778 Erik 518 8778 Erik while (mp != NULL) { 519 8778 Erik dblk_t *dbp = mp->b_datap; 520 8778 Erik 521 8778 Erik cr = dbp->db_credp; 522 8778 Erik if (cr == NULL) { 523 8778 Erik mp = mp->b_cont; 524 8778 Erik continue; 525 8778 Erik } 526 8778 Erik if (cpidp != NULL) 527 8778 Erik *cpidp = dbp->db_cpid; 528 8778 Erik 529 8778 Erik #ifdef DEBUG 530 8778 Erik /* 531 8778 Erik * Normally there should at most one db_credp in a message. 532 8778 Erik * But if there are multiple (as in the case of some M_IOC* 533 8778 Erik * and some internal messages in TCP/IP bind logic) then 534 8778 Erik * they must be identical in the normal case. 535 8778 Erik * However, a socket can be shared between different uids 536 8778 Erik * in which case data queued in TCP would be from different 537 8778 Erik * creds. Thus we can only assert for the zoneid being the 538 8778 Erik * same. Due to Multi-level Level Ports for TX, some 539 8778 Erik * cred_t can have a NULL cr_zone, and we skip the comparison 540 8778 Erik * in that case. 541 8778 Erik */ 542 10163 Ken mp2 = mp->b_cont; 543 10163 Ken while (mp2 != NULL) { 544 10163 Ken cr2 = DB_CRED(mp2); 545 10163 Ken if (cr2 != NULL) { 546 10163 Ken DTRACE_PROBE2(msg__getcred, 547 10163 Ken cred_t *, cr, cred_t *, cr2); 548 10163 Ken ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) || 549 10163 Ken crgetzone(cr) == NULL || 550 10163 Ken crgetzone(cr2) == NULL); 551 10163 Ken } 552 10163 Ken mp2 = mp2->b_cont; 553 8778 Erik } 554 8778 Erik #endif 555 8778 Erik return (cr); 556 8778 Erik } 557 8778 Erik if (cpidp != NULL) 558 8778 Erik *cpidp = NOPID; 559 8778 Erik return (NULL); 560 8778 Erik } 561 8778 Erik 562 8778 Erik /* 563 8778 Erik * Variant of msg_getcred which, when a cred is found 564 8778 Erik * 1. Returns with a hold on the cred 565 8778 Erik * 2. Clears the first cred in the mblk. 566 8778 Erik * This is more efficient to use than a msg_getcred() + crhold() when 567 8778 Erik * the message is freed after the cred has been extracted. 568 8778 Erik * 569 8778 Erik * The caller is responsible for ensuring that there is no other reference 570 8778 Erik * on the message since db_credp can not be cleared when there are other 571 8778 Erik * references. 572 8778 Erik */ 573 8778 Erik cred_t * 574 8778 Erik msg_extractcred(mblk_t *mp, pid_t *cpidp) 575 8778 Erik { 576 8778 Erik cred_t *cr = NULL; 577 8778 Erik cred_t *cr2; 578 10163 Ken mblk_t *mp2; 579 8778 Erik 580 8778 Erik while (mp != NULL) { 581 8778 Erik dblk_t *dbp = mp->b_datap; 582 8778 Erik 583 8778 Erik cr = dbp->db_credp; 584 8778 Erik if (cr == NULL) { 585 8778 Erik mp = mp->b_cont; 586 8778 Erik continue; 587 8778 Erik } 588 8778 Erik ASSERT(dbp->db_ref == 1); 589 8778 Erik dbp->db_credp = NULL; 590 8778 Erik if (cpidp != NULL) 591 8778 Erik *cpidp = dbp->db_cpid; 592 8778 Erik #ifdef DEBUG 593 8778 Erik /* 594 8778 Erik * Normally there should at most one db_credp in a message. 595 8778 Erik * But if there are multiple (as in the case of some M_IOC* 596 8778 Erik * and some internal messages in TCP/IP bind logic) then 597 8778 Erik * they must be identical in the normal case. 598 8778 Erik * However, a socket can be shared between different uids 599 8778 Erik * in which case data queued in TCP would be from different 600 8778 Erik * creds. Thus we can only assert for the zoneid being the 601 8778 Erik * same. Due to Multi-level Level Ports for TX, some 602 8778 Erik * cred_t can have a NULL cr_zone, and we skip the comparison 603 8778 Erik * in that case. 604 8778 Erik */ 605 10163 Ken mp2 = mp->b_cont; 606 10163 Ken while (mp2 != NULL) { 607 10163 Ken cr2 = DB_CRED(mp2); 608 10163 Ken if (cr2 != NULL) { 609 10163 Ken DTRACE_PROBE2(msg__extractcred, 610 10163 Ken cred_t *, cr, cred_t *, cr2); 611 10163 Ken ASSERT(crgetzoneid(cr) == crgetzoneid(cr2) || 612 10163 Ken crgetzone(cr) == NULL || 613 10163 Ken crgetzone(cr2) == NULL); 614 10163 Ken } 615 10163 Ken mp2 = mp2->b_cont; 616 8778 Erik } 617 8778 Erik #endif 618 8778 Erik return (cr); 619 8778 Erik } 620 8778 Erik return (NULL); 621 8778 Erik } 622 8778 Erik /* 623 8778 Erik * Get the label for a message. Uses the first mblk in the message 624 8778 Erik * which has a non-NULL db_credp. 625 8778 Erik * Returns NULL if there is no credp. 626 8778 Erik */ 627 8778 Erik extern struct ts_label_s * 628 8778 Erik msg_getlabel(const mblk_t *mp) 629 8778 Erik { 630 8778 Erik cred_t *cr = msg_getcred(mp, NULL); 631 8778 Erik 632 8778 Erik if (cr == NULL) 633 8778 Erik return (NULL); 634 8778 Erik 635 8778 Erik return (crgetlabel(cr)); 636 0 stevel } 637 0 stevel 638 0 stevel void 639 0 stevel freeb(mblk_t *mp) 640 0 stevel { 641 0 stevel dblk_t *dbp = mp->b_datap; 642 0 stevel 643 0 stevel ASSERT(dbp->db_ref > 0); 644 0 stevel ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 645 0 stevel FTRACE_1("freeb(): mp=0x%lx", (uintptr_t)mp); 646 0 stevel 647 0 stevel STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 648 0 stevel 649 0 stevel dbp->db_free(mp, dbp); 650 0 stevel } 651 0 stevel 652 0 stevel void 653 0 stevel freemsg(mblk_t *mp) 654 0 stevel { 655 0 stevel FTRACE_1("freemsg(): mp=0x%lx", (uintptr_t)mp); 656 0 stevel while (mp) { 657 0 stevel dblk_t *dbp = mp->b_datap; 658 0 stevel mblk_t *mp_cont = mp->b_cont; 659 0 stevel 660 0 stevel ASSERT(dbp->db_ref > 0); 661 0 stevel ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 662 0 stevel 663 0 stevel STR_FTEVENT_MBLK(mp, caller(), FTEV_FREEB, dbp->db_ref); 664 0 stevel 665 0 stevel dbp->db_free(mp, dbp); 666 0 stevel mp = mp_cont; 667 0 stevel } 668 0 stevel } 669 0 stevel 670 0 stevel /* 671 0 stevel * Reallocate a block for another use. Try hard to use the old block. 672 0 stevel * If the old data is wanted (copy), leave b_wptr at the end of the data, 673 0 stevel * otherwise return b_wptr = b_rptr. 674 0 stevel * 675 0 stevel * This routine is private and unstable. 676 0 stevel */ 677 0 stevel mblk_t * 678 0 stevel reallocb(mblk_t *mp, size_t size, uint_t copy) 679 0 stevel { 680 0 stevel mblk_t *mp1; 681 0 stevel unsigned char *old_rptr; 682 0 stevel ptrdiff_t cur_size; 683 0 stevel 684 0 stevel if (mp == NULL) 685 0 stevel return (allocb(size, BPRI_HI)); 686 0 stevel 687 0 stevel cur_size = mp->b_wptr - mp->b_rptr; 688 0 stevel old_rptr = mp->b_rptr; 689 0 stevel 690 0 stevel ASSERT(mp->b_datap->db_ref != 0); 691 0 stevel 692 0 stevel if (mp->b_datap->db_ref == 1 && MBLKSIZE(mp) >= size) { 693 0 stevel /* 694 0 stevel * If the data is wanted and it will fit where it is, no 695 0 stevel * work is required. 696 0 stevel */ 697 0 stevel if (copy && mp->b_datap->db_lim - mp->b_rptr >= size) 698 0 stevel return (mp); 699 0 stevel 700 0 stevel mp->b_wptr = mp->b_rptr = mp->b_datap->db_base; 701 0 stevel mp1 = mp; 702 0 stevel } else if ((mp1 = allocb_tmpl(size, mp)) != NULL) { 703 0 stevel /* XXX other mp state could be copied too, db_flags ... ? */ 704 0 stevel mp1->b_cont = mp->b_cont; 705 0 stevel } else { 706 0 stevel return (NULL); 707 0 stevel } 708 0 stevel 709 0 stevel if (copy) { 710 0 stevel bcopy(old_rptr, mp1->b_rptr, cur_size); 711 0 stevel mp1->b_wptr = mp1->b_rptr + cur_size; 712 0 stevel } 713 0 stevel 714 0 stevel if (mp != mp1) 715 0 stevel freeb(mp); 716 0 stevel 717 0 stevel return (mp1); 718 0 stevel } 719 0 stevel 720 0 stevel static void 721 0 stevel dblk_lastfree(mblk_t *mp, dblk_t *dbp) 722 0 stevel { 723 0 stevel ASSERT(dbp->db_mblk == mp); 724 0 stevel if (dbp->db_fthdr != NULL) 725 0 stevel str_ftfree(dbp); 726 0 stevel 727 0 stevel /* set credp and projid to be 'unspecified' before returning to cache */ 728 0 stevel if (dbp->db_credp != NULL) { 729 0 stevel crfree(dbp->db_credp); 730 0 stevel dbp->db_credp = NULL; 731 0 stevel } 732 0 stevel dbp->db_cpid = -1; 733 0 stevel 734 0 stevel /* Reset the struioflag and the checksum flag fields */ 735 0 stevel dbp->db_struioflag = 0; 736 0 stevel dbp->db_struioun.cksum.flags = 0; 737 0 stevel 738 6707 brutus /* and the COOKED and/or UIOA flag(s) */ 739 6707 brutus dbp->db_flags &= ~(DBLK_COOKED | DBLK_UIOA); 740 898 kais 741 0 stevel kmem_cache_free(dbp->db_cache, dbp); 742 0 stevel } 743 0 stevel 744 0 stevel static void 745 0 stevel dblk_decref(mblk_t *mp, dblk_t *dbp) 746 0 stevel { 747 0 stevel if (dbp->db_ref != 1) { 748 0 stevel uint32_t rtfu = atomic_add_32_nv(&DBLK_RTFU_WORD(dbp), 749 0 stevel -(1 << DBLK_RTFU_SHIFT(db_ref))); 750 0 stevel /* 751 0 stevel * atomic_add_32_nv() just decremented db_ref, so we no longer 752 0 stevel * have a reference to the dblk, which means another thread 753 0 stevel * could free it. Therefore we cannot examine the dblk to 754 0 stevel * determine whether ours was the last reference. Instead, 755 0 stevel * we extract the new and minimum reference counts from rtfu. 756 0 stevel * Note that all we're really saying is "if (ref != refmin)". 757 0 stevel */ 758 0 stevel if (((rtfu >> DBLK_RTFU_SHIFT(db_ref)) & DBLK_REFMAX) != 759 0 stevel ((rtfu >> DBLK_RTFU_SHIFT(db_flags)) & DBLK_REFMIN)) { 760 0 stevel kmem_cache_free(mblk_cache, mp); 761 0 stevel return; 762 0 stevel } 763 0 stevel } 764 0 stevel dbp->db_mblk = mp; 765 0 stevel dbp->db_free = dbp->db_lastfree; 766 0 stevel dbp->db_lastfree(mp, dbp); 767 0 stevel } 768 0 stevel 769 0 stevel mblk_t * 770 0 stevel dupb(mblk_t *mp) 771 0 stevel { 772 0 stevel dblk_t *dbp = mp->b_datap; 773 0 stevel mblk_t *new_mp; 774 0 stevel uint32_t oldrtfu, newrtfu; 775 0 stevel 776 0 stevel if ((new_mp = kmem_cache_alloc(mblk_cache, KM_NOSLEEP)) == NULL) 777 0 stevel goto out; 778 0 stevel 779 0 stevel new_mp->b_next = new_mp->b_prev = new_mp->b_cont = NULL; 780 0 stevel new_mp->b_rptr = mp->b_rptr; 781 0 stevel new_mp->b_wptr = mp->b_wptr; 782 0 stevel new_mp->b_datap = dbp; 783 0 stevel new_mp->b_queue = NULL; 784 0 stevel MBLK_BAND_FLAG_WORD(new_mp) = MBLK_BAND_FLAG_WORD(mp); 785 0 stevel 786 0 stevel STR_FTEVENT_MBLK(mp, caller(), FTEV_DUPB, dbp->db_ref); 787 0 stevel 788 3163 georges dbp->db_free = dblk_decref; 789 0 stevel do { 790 0 stevel ASSERT(dbp->db_ref > 0); 791 0 stevel oldrtfu = DBLK_RTFU_WORD(dbp); 792 0 stevel newrtfu = oldrtfu + (1 << DBLK_RTFU_SHIFT(db_ref)); 793 0 stevel /* 794 0 stevel * If db_ref is maxed out we can't dup this message anymore. 795 0 stevel */ 796 0 stevel if ((oldrtfu & DBLK_RTFU_REF_MASK) == DBLK_RTFU_REF_MASK) { 797 0 stevel kmem_cache_free(mblk_cache, new_mp); 798 0 stevel new_mp = NULL; 799 0 stevel goto out; 800 0 stevel } 801 0 stevel } while (cas32(&DBLK_RTFU_WORD(dbp), oldrtfu, newrtfu) != oldrtfu); 802 0 stevel 803 0 stevel out: 804 0 stevel FTRACE_1("dupb(): new_mp=0x%lx", (uintptr_t)new_mp); 805 0 stevel return (new_mp); 806 0 stevel } 807 0 stevel 808 0 stevel static void 809 0 stevel dblk_lastfree_desb(mblk_t *mp, dblk_t *dbp) 810 0 stevel { 811 0 stevel frtn_t *frp = dbp->db_frtnp; 812 0 stevel 813 0 stevel ASSERT(dbp->db_mblk == mp); 814 0 stevel frp->free_func(frp->free_arg); 815 0 stevel if (dbp->db_fthdr != NULL) 816 0 stevel str_ftfree(dbp); 817 0 stevel 818 0 stevel /* set credp and projid to be 'unspecified' before returning to cache */ 819 0 stevel if (dbp->db_credp != NULL) { 820 0 stevel crfree(dbp->db_credp); 821 0 stevel dbp->db_credp = NULL; 822 0 stevel } 823 0 stevel dbp->db_cpid = -1; 824 0 stevel dbp->db_struioflag = 0; 825 0 stevel dbp->db_struioun.cksum.flags = 0; 826 0 stevel 827 0 stevel kmem_cache_free(dbp->db_cache, dbp); 828 0 stevel } 829 0 stevel 830 0 stevel /*ARGSUSED*/ 831 0 stevel static void 832 0 stevel frnop_func(void *arg) 833 0 stevel { 834 0 stevel } 835 0 stevel 836 0 stevel /* 837 0 stevel * Generic esballoc used to implement the four flavors: [d]esballoc[a]. 838 0 stevel */ 839 0 stevel static mblk_t * 840 0 stevel gesballoc(unsigned char *base, size_t size, uint32_t db_rtfu, frtn_t *frp, 841 0 stevel void (*lastfree)(mblk_t *, dblk_t *), int kmflags) 842 0 stevel { 843 0 stevel dblk_t *dbp; 844 0 stevel mblk_t *mp; 845 0 stevel 846 0 stevel ASSERT(base != NULL && frp != NULL); 847 0 stevel 848 0 stevel if ((dbp = kmem_cache_alloc(dblk_esb_cache, kmflags)) == NULL) { 849 0 stevel mp = NULL; 850 0 stevel goto out; 851 0 stevel } 852 0 stevel 853 0 stevel mp = dbp->db_mblk; 854 0 stevel dbp->db_base = base; 855 0 stevel dbp->db_lim = base + size; 856 0 stevel dbp->db_free = dbp->db_lastfree = lastfree; 857 0 stevel dbp->db_frtnp = frp; 858 0 stevel DBLK_RTFU_WORD(dbp) = db_rtfu; 859 0 stevel mp->b_next = mp->b_prev = mp->b_cont = NULL; 860 0 stevel mp->b_rptr = mp->b_wptr = base; 861 0 stevel mp->b_queue = NULL; 862 0 stevel MBLK_BAND_FLAG_WORD(mp) = 0; 863 0 stevel 864 0 stevel out: 865 0 stevel FTRACE_1("gesballoc(): mp=0x%lx", (uintptr_t)mp); 866 0 stevel return (mp); 867 0 stevel } 868 0 stevel 869 0 stevel /*ARGSUSED*/ 870 0 stevel mblk_t * 871 0 stevel esballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 872 0 stevel { 873 0 stevel mblk_t *mp; 874 0 stevel 875 0 stevel /* 876 0 stevel * Note that this is structured to allow the common case (i.e. 877 0 stevel * STREAMS flowtracing disabled) to call gesballoc() with tail 878 0 stevel * call optimization. 879 0 stevel */ 880 0 stevel if (!str_ftnever) { 881 0 stevel mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 882 0 stevel frp, freebs_enqueue, KM_NOSLEEP); 883 0 stevel 884 0 stevel if (mp != NULL) 885 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 886 0 stevel return (mp); 887 0 stevel } 888 0 stevel 889 0 stevel return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 890 0 stevel frp, freebs_enqueue, KM_NOSLEEP)); 891 0 stevel } 892 0 stevel 893 0 stevel /* 894 0 stevel * Same as esballoc() but sleeps waiting for memory. 895 0 stevel */ 896 0 stevel /*ARGSUSED*/ 897 0 stevel mblk_t * 898 0 stevel esballoc_wait(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 899 0 stevel { 900 0 stevel mblk_t *mp; 901 0 stevel 902 0 stevel /* 903 0 stevel * Note that this is structured to allow the common case (i.e. 904 0 stevel * STREAMS flowtracing disabled) to call gesballoc() with tail 905 0 stevel * call optimization. 906 0 stevel */ 907 0 stevel if (!str_ftnever) { 908 0 stevel mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 909 0 stevel frp, freebs_enqueue, KM_SLEEP); 910 0 stevel 911 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOC, size); 912 0 stevel return (mp); 913 0 stevel } 914 0 stevel 915 0 stevel return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 916 0 stevel frp, freebs_enqueue, KM_SLEEP)); 917 0 stevel } 918 0 stevel 919 0 stevel /*ARGSUSED*/ 920 0 stevel mblk_t * 921 0 stevel desballoc(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 922 0 stevel { 923 0 stevel mblk_t *mp; 924 0 stevel 925 0 stevel /* 926 0 stevel * Note that this is structured to allow the common case (i.e. 927 0 stevel * STREAMS flowtracing disabled) to call gesballoc() with tail 928 0 stevel * call optimization. 929 0 stevel */ 930 0 stevel if (!str_ftnever) { 931 0 stevel mp = gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 932 6707 brutus frp, dblk_lastfree_desb, KM_NOSLEEP); 933 0 stevel 934 0 stevel if (mp != NULL) 935 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOC, size); 936 0 stevel return (mp); 937 0 stevel } 938 0 stevel 939 0 stevel return (gesballoc(base, size, DBLK_RTFU(1, M_DATA, 0, 0), 940 0 stevel frp, dblk_lastfree_desb, KM_NOSLEEP)); 941 0 stevel } 942 0 stevel 943 0 stevel /*ARGSUSED*/ 944 0 stevel mblk_t * 945 0 stevel esballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 946 0 stevel { 947 0 stevel mblk_t *mp; 948 0 stevel 949 0 stevel /* 950 0 stevel * Note that this is structured to allow the common case (i.e. 951 0 stevel * STREAMS flowtracing disabled) to call gesballoc() with tail 952 0 stevel * call optimization. 953 0 stevel */ 954 0 stevel if (!str_ftnever) { 955 0 stevel mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 956 0 stevel frp, freebs_enqueue, KM_NOSLEEP); 957 0 stevel 958 0 stevel if (mp != NULL) 959 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_ESBALLOCA, size); 960 0 stevel return (mp); 961 0 stevel } 962 0 stevel 963 0 stevel return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 964 0 stevel frp, freebs_enqueue, KM_NOSLEEP)); 965 0 stevel } 966 0 stevel 967 0 stevel /*ARGSUSED*/ 968 0 stevel mblk_t * 969 0 stevel desballoca(unsigned char *base, size_t size, uint_t pri, frtn_t *frp) 970 0 stevel { 971 0 stevel mblk_t *mp; 972 0 stevel 973 0 stevel /* 974 0 stevel * Note that this is structured to allow the common case (i.e. 975 0 stevel * STREAMS flowtracing disabled) to call gesballoc() with tail 976 0 stevel * call optimization. 977 0 stevel */ 978 0 stevel if (!str_ftnever) { 979 0 stevel mp = gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 980 0 stevel frp, dblk_lastfree_desb, KM_NOSLEEP); 981 0 stevel 982 0 stevel if (mp != NULL) 983 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_DESBALLOCA, size); 984 0 stevel return (mp); 985 0 stevel } 986 0 stevel 987 0 stevel return (gesballoc(base, size, DBLK_RTFU(2, M_DATA, 0, 0), 988 0 stevel frp, dblk_lastfree_desb, KM_NOSLEEP)); 989 0 stevel } 990 0 stevel 991 0 stevel static void 992 0 stevel bcache_dblk_lastfree(mblk_t *mp, dblk_t *dbp) 993 0 stevel { 994 0 stevel bcache_t *bcp = dbp->db_cache; 995 0 stevel 996 0 stevel ASSERT(dbp->db_mblk == mp); 997 0 stevel if (dbp->db_fthdr != NULL) 998 0 stevel str_ftfree(dbp); 999 0 stevel 1000 0 stevel /* set credp and projid to be 'unspecified' before returning to cache */ 1001 0 stevel if (dbp->db_credp != NULL) { 1002 0 stevel crfree(dbp->db_credp); 1003 0 stevel dbp->db_credp = NULL; 1004 0 stevel } 1005 0 stevel dbp->db_cpid = -1; 1006 0 stevel dbp->db_struioflag = 0; 1007 0 stevel dbp->db_struioun.cksum.flags = 0; 1008 0 stevel 1009 0 stevel mutex_enter(&bcp->mutex); 1010 0 stevel kmem_cache_free(bcp->dblk_cache, dbp); 1011 0 stevel bcp->alloc--; 1012 0 stevel 1013 0 stevel if (bcp->alloc == 0 && bcp->destroy != 0) { 1014 0 stevel kmem_cache_destroy(bcp->dblk_cache); 1015 0 stevel kmem_cache_destroy(bcp->buffer_cache); 1016 0 stevel mutex_exit(&bcp->mutex); 1017 0 stevel mutex_destroy(&bcp->mutex); 1018 0 stevel kmem_free(bcp, sizeof (bcache_t)); 1019 0 stevel } else { 1020 0 stevel mutex_exit(&bcp->mutex); 1021 0 stevel } 1022 0 stevel } 1023 0 stevel 1024 0 stevel bcache_t * 1025 0 stevel bcache_create(char *name, size_t size, uint_t align) 1026 0 stevel { 1027 0 stevel bcache_t *bcp; 1028 0 stevel char buffer[255]; 1029 0 stevel 1030 0 stevel ASSERT((align & (align - 1)) == 0); 1031 0 stevel 1032 8752 Peter if ((bcp = kmem_alloc(sizeof (bcache_t), KM_NOSLEEP)) == NULL) 1033 8752 Peter return (NULL); 1034 0 stevel 1035 0 stevel bcp->size = size; 1036 0 stevel bcp->align = align; 1037 0 stevel bcp->alloc = 0; 1038 0 stevel bcp->destroy = 0; 1039 0 stevel 1040 0 stevel mutex_init(&bcp->mutex, NULL, MUTEX_DRIVER, NULL); 1041 0 stevel 1042 0 stevel (void) sprintf(buffer, "%s_buffer_cache", name); 1043 0 stevel bcp->buffer_cache = kmem_cache_create(buffer, size, align, NULL, NULL, 1044 0 stevel NULL, NULL, NULL, 0); 1045 0 stevel (void) sprintf(buffer, "%s_dblk_cache", name); 1046 0 stevel bcp->dblk_cache = kmem_cache_create(buffer, sizeof (dblk_t), 1047 0 stevel DBLK_CACHE_ALIGN, bcache_dblk_constructor, bcache_dblk_destructor, 1048 6707 brutus NULL, (void *)bcp, NULL, 0); 1049 0 stevel 1050 0 stevel return (bcp); 1051 0 stevel } 1052 0 stevel 1053 0 stevel void 1054 0 stevel bcache_destroy(bcache_t *bcp) 1055 0 stevel { 1056 0 stevel ASSERT(bcp != NULL); 1057 0 stevel 1058 0 stevel mutex_enter(&bcp->mutex); 1059 0 stevel if (bcp->alloc == 0) { 1060 0 stevel kmem_cache_destroy(bcp->dblk_cache); 1061 0 stevel kmem_cache_destroy(bcp->buffer_cache); 1062 0 stevel mutex_exit(&bcp->mutex); 1063 0 stevel mutex_destroy(&bcp->mutex); 1064 0 stevel kmem_free(bcp, sizeof (bcache_t)); 1065 0 stevel } else { 1066 0 stevel bcp->destroy++; 1067 0 stevel mutex_exit(&bcp->mutex); 1068 0 stevel } 1069 0 stevel } 1070 0 stevel 1071 0 stevel /*ARGSUSED*/ 1072 0 stevel mblk_t * 1073 0 stevel bcache_allocb(bcache_t *bcp, uint_t pri) 1074 0 stevel { 1075 0 stevel dblk_t *dbp; 1076 0 stevel mblk_t *mp = NULL; 1077 0 stevel 1078 0 stevel ASSERT(bcp != NULL); 1079 0 stevel 1080 0 stevel mutex_enter(&bcp->mutex); 1081 0 stevel if (bcp->destroy != 0) { 1082 0 stevel mutex_exit(&bcp->mutex); 1083 0 stevel goto out; 1084 0 stevel } 1085 0 stevel 1086 0 stevel if ((dbp = kmem_cache_alloc(bcp->dblk_cache, KM_NOSLEEP)) == NULL) { 1087 0 stevel mutex_exit(&bcp->mutex); 1088 0 stevel goto out; 1089 0 stevel } 1090 0 stevel bcp->alloc++; 1091 0 stevel mutex_exit(&bcp->mutex); 1092 0 stevel 1093 0 stevel ASSERT(((uintptr_t)(dbp->db_base) & (bcp->align - 1)) == 0); 1094 0 stevel 1095 0 stevel mp = dbp->db_mblk; 1096 0 stevel DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 1097 0 stevel mp->b_next = mp->b_prev = mp->b_cont = NULL; 1098 0 stevel mp->b_rptr = mp->b_wptr = dbp->db_base; 1099 0 stevel mp->b_queue = NULL; 1100 0 stevel MBLK_BAND_FLAG_WORD(mp) = 0; 1101 0 stevel STR_FTALLOC(&dbp->db_fthdr, FTEV_BCALLOCB, bcp->size); 1102 0 stevel out: 1103 0 stevel FTRACE_1("bcache_allocb(): mp=0x%p", (uintptr_t)mp); 1104 0 stevel 1105 0 stevel return (mp); 1106 0 stevel } 1107 0 stevel 1108 0 stevel static void 1109 0 stevel dblk_lastfree_oversize(mblk_t *mp, dblk_t *dbp) 1110 0 stevel { 1111 0 stevel ASSERT(dbp->db_mblk == mp); 1112 0 stevel if (dbp->db_fthdr != NULL) 1113 0 stevel str_ftfree(dbp); 1114 0 stevel 1115 0 stevel /* set credp and projid to be 'unspecified' before returning to cache */ 1116 0 stevel if (dbp->db_credp != NULL) { 1117 0 stevel crfree(dbp->db_credp); 1118 0 stevel dbp->db_credp = NULL; 1119 0 stevel } 1120 0 stevel dbp->db_cpid = -1; 1121 0 stevel dbp->db_struioflag = 0; 1122 0 stevel dbp->db_struioun.cksum.flags = 0; 1123 0 stevel 1124 0 stevel kmem_free(dbp->db_base, dbp->db_lim - dbp->db_base); 1125 0 stevel kmem_cache_free(dbp->db_cache, dbp); 1126 0 stevel } 1127 0 stevel 1128 0 stevel static mblk_t * 1129 0 stevel allocb_oversize(size_t size, int kmflags) 1130 0 stevel { 1131 0 stevel mblk_t *mp; 1132 0 stevel void *buf; 1133 0 stevel 1134 0 stevel size = P2ROUNDUP(size, DBLK_CACHE_ALIGN); 1135 0 stevel if ((buf = kmem_alloc(size, kmflags)) == NULL) 1136 0 stevel return (NULL); 1137 0 stevel if ((mp = gesballoc(buf, size, DBLK_RTFU(1, M_DATA, 0, 0), 1138 0 stevel &frnop, dblk_lastfree_oversize, kmflags)) == NULL) 1139 0 stevel kmem_free(buf, size); 1140 0 stevel 1141 0 stevel if (mp != NULL) 1142 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBIG, size); 1143 0 stevel 1144 0 stevel return (mp); 1145 0 stevel } 1146 0 stevel 1147 0 stevel mblk_t * 1148 0 stevel allocb_tryhard(size_t target_size) 1149 0 stevel { 1150 0 stevel size_t size; 1151 0 stevel mblk_t *bp; 1152 0 stevel 1153 0 stevel for (size = target_size; size < target_size + 512; 1154 0 stevel size += DBLK_CACHE_ALIGN) 1155 0 stevel if ((bp = allocb(size, BPRI_HI)) != NULL) 1156 0 stevel return (bp); 1157 0 stevel allocb_tryhard_fails++; 1158 0 stevel return (NULL); 1159 0 stevel } 1160 0 stevel 1161 0 stevel /* 1162 0 stevel * This routine is consolidation private for STREAMS internal use 1163 0 stevel * This routine may only be called from sync routines (i.e., not 1164 0 stevel * from put or service procedures). It is located here (rather 1165 0 stevel * than strsubr.c) so that we don't have to expose all of the 1166 0 stevel * allocb() implementation details in header files. 1167 0 stevel */ 1168 0 stevel mblk_t * 1169 0 stevel allocb_wait(size_t size, uint_t pri, uint_t flags, int *error) 1170 0 stevel { 1171 0 stevel dblk_t *dbp; 1172 0 stevel mblk_t *mp; 1173 0 stevel size_t index; 1174 0 stevel 1175 0 stevel index = (size -1) >> DBLK_SIZE_SHIFT; 1176 0 stevel 1177 0 stevel if (flags & STR_NOSIG) { 1178 0 stevel if (index >= (DBLK_MAX_CACHE >> DBLK_SIZE_SHIFT)) { 1179 0 stevel if (size != 0) { 1180 0 stevel mp = allocb_oversize(size, KM_SLEEP); 1181 0 stevel FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", 1182 0 stevel (uintptr_t)mp); 1183 0 stevel return (mp); 1184 0 stevel } 1185 0 stevel index = 0; 1186 0 stevel } 1187 0 stevel 1188 0 stevel dbp = kmem_cache_alloc(dblk_cache[index], KM_SLEEP); 1189 0 stevel mp = dbp->db_mblk; 1190 0 stevel DBLK_RTFU_WORD(dbp) = DBLK_RTFU(1, M_DATA, 0, 0); 1191 0 stevel mp->b_next = mp->b_prev = mp->b_cont = NULL; 1192 0 stevel mp->b_rptr = mp->b_wptr = dbp->db_base; 1193 0 stevel mp->b_queue = NULL; 1194 0 stevel MBLK_BAND_FLAG_WORD(mp) = 0; 1195 0 stevel STR_FTALLOC(&DB_FTHDR(mp), FTEV_ALLOCBW, size); 1196 0 stevel 1197 0 stevel FTRACE_1("allocb_wait (NOSIG): mp=0x%lx", (uintptr_t)mp); 1198 0 stevel 1199 0 stevel } else { 1200 0 stevel while ((mp = allocb(size, pri)) == NULL) { 1201 0 stevel if ((*error = strwaitbuf(size, BPRI_HI)) != 0) 1202 0 stevel return (NULL); 1203 0 stevel } 1204 0 stevel } 1205 0 stevel 1206 0 stevel return (mp); 1207 0 stevel } 1208 0 stevel 1209 0 stevel /* 1210 0 stevel * Call function 'func' with 'arg' when a class zero block can 1211 0 stevel * be allocated with priority 'pri'. 1212 0 stevel */ 1213 0 stevel bufcall_id_t 1214 0 stevel esbbcall(uint_t pri, void (*func)(void *), void *arg) 1215 0 stevel { 1216 0 stevel return (bufcall(1, pri, func, arg)); 1217 0 stevel } 1218 0 stevel 1219 0 stevel /* 1220 0 stevel * Allocates an iocblk (M_IOCTL) block. Properly sets the credentials 1221 0 stevel * ioc_id, rval and error of the struct ioctl to set up an ioctl call. 1222 0 stevel * This provides consistency for all internal allocators of ioctl. 1223 0 stevel */ 1224 0 stevel mblk_t * 1225 0 stevel mkiocb(uint_t cmd) 1226 0 stevel { 1227 0 stevel struct iocblk *ioc; 1228 0 stevel mblk_t *mp; 1229 0 stevel 1230 0 stevel /* 1231 0 stevel * Allocate enough space for any of the ioctl related messages. 1232 0 stevel */ 1233 0 stevel if ((mp = allocb(sizeof (union ioctypes), BPRI_MED)) == NULL) 1234 0 stevel return (NULL); 1235 0 stevel 1236 0 stevel bzero(mp->b_rptr, sizeof (union ioctypes)); 1237 0 stevel 1238 0 stevel /* 1239 0 stevel * Set the mblk_t information and ptrs correctly. 1240 0 stevel */ 1241 0 stevel mp->b_wptr += sizeof (struct iocblk); 1242 0 stevel mp->b_datap->db_type = M_IOCTL; 1243 0 stevel 1244 0 stevel /* 1245 0 stevel * Fill in the fields. 1246 0 stevel */ 1247 0 stevel ioc = (struct iocblk *)mp->b_rptr; 1248 0 stevel ioc->ioc_cmd = cmd; 1249 0 stevel ioc->ioc_cr = kcred; 1250 0 stevel ioc->ioc_id = getiocseqno(); 1251 0 stevel ioc->ioc_flag = IOC_NATIVE; 1252 0 stevel return (mp); 1253 0 stevel } 1254 0 stevel 1255 0 stevel /* 1256 0 stevel * test if block of given size can be allocated with a request of 1257 0 stevel * the given priority. 1258 0 stevel * 'pri' is no longer used, but is retained for compatibility. 1259 0 stevel */ 1260 0 stevel /* ARGSUSED */ 1261 0 stevel int 1262 0 stevel testb(size_t size, uint_t pri) 1263 0 stevel { 1264 0 stevel return ((size + sizeof (dblk_t)) <= kmem_avail()); 1265 0 stevel } 1266 0 stevel 1267 0 stevel /* 1268 0 stevel * Call function 'func' with argument 'arg' when there is a reasonably 1269 0 stevel * good chance that a block of size 'size' can be allocated. 1270 0 stevel * 'pri' is no longer used, but is retained for compatibility. 1271 0 stevel */ 1272 0 stevel /* ARGSUSED */ 1273 0 stevel bufcall_id_t 1274 0 stevel bufcall(size_t size, uint_t pri, void (*func)(void *), void *arg) 1275 0 stevel { 1276 0 stevel static long bid = 1; /* always odd to save checking for zero */ 1277 0 stevel bufcall_id_t bc_id; 1278 0 stevel struct strbufcall *bcp; 1279 0 stevel 1280 0 stevel if ((bcp = kmem_alloc(sizeof (strbufcall_t), KM_NOSLEEP)) == NULL) 1281 0 stevel return (0); 1282 0 stevel 1283 0 stevel bcp->bc_func = func; 1284 0 stevel bcp->bc_arg = arg; 1285 0 stevel bcp->bc_size = size; 1286 0 stevel bcp->bc_next = NULL; 1287 0 stevel bcp->bc_executor = NULL; 1288 0 stevel 1289 0 stevel mutex_enter(&strbcall_lock); 1290 0 stevel /* 1291 0 stevel * After bcp is linked into strbcalls and strbcall_lock is dropped there 1292 0 stevel * should be no references to bcp since it may be freed by 1293 0 stevel * runbufcalls(). Since bcp_id field is returned, we save its value in 1294 0 stevel * the local var. 1295 0 stevel */ 1296 0 stevel bc_id = bcp->bc_id = (bufcall_id_t)(bid += 2); /* keep it odd */ 1297 0 stevel 1298 0 stevel /* 1299 0 stevel * add newly allocated stream event to existing 1300 0 stevel * linked list of events. 1301 0 stevel */ 1302 0 stevel if (strbcalls.bc_head == NULL) { 1303 0 stevel strbcalls.bc_head = strbcalls.bc_tail = bcp; 1304 0 stevel } else { 1305 0 stevel strbcalls.bc_tail->bc_next = bcp; 1306 0 stevel strbcalls.bc_tail = bcp; 1307 0 stevel } 1308 0 stevel 1309 0 stevel cv_signal(&strbcall_cv); 1310 0 stevel mutex_exit(&strbcall_lock); 1311 0 stevel return (bc_id); 1312 0 stevel } 1313 0 stevel 1314 0 stevel /* 1315 0 stevel * Cancel a bufcall request. 1316 0 stevel */ 1317 0 stevel void 1318 0 stevel unbufcall(bufcall_id_t id) 1319 0 stevel { 1320 0 stevel strbufcall_t *bcp, *pbcp; 1321 0 stevel 1322 0 stevel mutex_enter(&strbcall_lock); 1323 0 stevel again: 1324 0 stevel pbcp = NULL; 1325 0 stevel for (bcp = strbcalls.bc_head; bcp; bcp = bcp->bc_next) { 1326 0 stevel if (id == bcp->bc_id) 1327 0 stevel break; 1328 0 stevel pbcp = bcp; 1329 0 stevel } 1330 0 stevel if (bcp) { 1331 0 stevel if (bcp->bc_executor != NULL) { 1332 0 stevel if (bcp->bc_executor != curthread) { 1333 0 stevel cv_wait(&bcall_cv, &strbcall_lock); 1334 0 stevel goto again; 1335 0 stevel } 1336 0 stevel } else { 1337 0 stevel if (pbcp) 1338 0 stevel pbcp->bc_next = bcp->bc_next; 1339 0 stevel else 1340 0 stevel strbcalls.bc_head = bcp->bc_next; 1341 0 stevel if (bcp == strbcalls.bc_tail) 1342 0 stevel strbcalls.bc_tail = pbcp; 1343 0 stevel kmem_free(bcp, sizeof (strbufcall_t)); 1344 0 stevel } 1345 0 stevel } 1346 0 stevel mutex_exit(&strbcall_lock); 1347 0 stevel } 1348 0 stevel 1349 0 stevel /* 1350 0 stevel * Duplicate a message block by block (uses dupb), returning 1351 0 stevel * a pointer to the duplicate message. 1352 0 stevel * Returns a non-NULL value only if the entire message 1353 0 stevel * was dup'd. 1354 0 stevel */ 1355 0 stevel mblk_t * 1356 0 stevel dupmsg(mblk_t *bp) 1357 0 stevel { 1358 0 stevel mblk_t *head, *nbp; 1359 0 stevel 1360 0 stevel if (!bp || !(nbp = head = dupb(bp))) 1361 0 stevel return (NULL); 1362 0 stevel 1363 0 stevel while (bp->b_cont) { 1364 0 stevel if (!(nbp->b_cont = dupb(bp->b_cont))) { 1365 0 stevel freemsg(head); 1366 0 stevel return (NULL); 1367 0 stevel } 1368 0 stevel nbp = nbp->b_cont; 1369 0 stevel bp = bp->b_cont; 1370 0 stevel } 1371 0 stevel return (head); 1372 0 stevel } 1373 0 stevel 1374 0 stevel #define DUPB_NOLOAN(bp) \ 1375 0 stevel ((((bp)->b_datap->db_struioflag & STRUIO_ZC) != 0) ? \ 1376 0 stevel copyb((bp)) : dupb((bp))) 1377 0 stevel 1378 0 stevel mblk_t * 1379 0 stevel dupmsg_noloan(mblk_t *bp) 1380 0 stevel { 1381 0 stevel mblk_t *head, *nbp; 1382 0 stevel 1383 0 stevel if (bp == NULL || DB_TYPE(bp) != M_DATA || 1384 0 stevel ((nbp = head = DUPB_NOLOAN(bp)) == NULL)) 1385 0 stevel return (NULL); 1386 0 stevel 1387 0 stevel while (bp->b_cont) { 1388 0 stevel if ((nbp->b_cont = DUPB_NOLOAN(bp->b_cont)) == NULL) { 1389 0 stevel freemsg(head); 1390 0 stevel return (NULL); 1391 0 stevel } 1392 0 stevel nbp = nbp->b_cont; 1393 0 stevel bp = bp->b_cont; 1394 0 stevel } 1395 0 stevel return (head); 1396 0 stevel } 1397 0 stevel 1398 0 stevel /* 1399 0 stevel * Copy data from message and data block to newly allocated message and 1400 0 stevel * data block. Returns new message block pointer, or NULL if error. 1401 0 stevel * The alignment of rptr (w.r.t. word alignment) will be the same in the copy 1402 0 stevel * as in the original even when db_base is not word aligned. (bug 1052877) 1403 0 stevel */ 1404 0 stevel mblk_t * 1405 0 stevel copyb(mblk_t *bp) 1406 0 stevel { 1407 0 stevel mblk_t *nbp; 1408 0 stevel dblk_t *dp, *ndp; 1409 0 stevel uchar_t *base; 1410 0 stevel size_t size; 1411 0 stevel size_t unaligned; 1412 0 stevel 1413 0 stevel ASSERT(bp->b_wptr >= bp->b_rptr); 1414 0 stevel 1415 0 stevel dp = bp->b_datap; 1416 0 stevel if (dp->db_fthdr != NULL) 1417 0 stevel STR_FTEVENT_MBLK(bp, caller(), FTEV_COPYB, 0); 1418 0 stevel 1419 0 stevel /* 1420 0 stevel * Special handling for Multidata message; this should be 1421 0 stevel * removed once a copy-callback routine is made available. 1422 0 stevel */ 1423 0 stevel if (dp->db_type == M_MULTIDATA) { 1424 0 stevel cred_t *cr; 1425 0 stevel 1426 0 stevel if ((nbp = mmd_copy(bp, KM_NOSLEEP)) == NULL) 1427 0 stevel return (NULL); 1428 0 stevel 1429 0 stevel nbp->b_flag = bp->b_flag; 1430 0 stevel nbp->b_band = bp->b_band; 1431 0 stevel ndp = nbp->b_datap; 1432 0 stevel 1433 0 stevel /* See comments below on potential issues. */ 1434 0 stevel STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 1435 0 stevel 1436 0 stevel ASSERT(ndp->db_type == dp->db_type); 1437 0 stevel cr = dp->db_credp; 1438 0 stevel if (cr != NULL) 1439 0 stevel crhold(ndp->db_credp = cr); 1440 0 stevel ndp->db_cpid = dp->db_cpid; 1441 0 stevel return (nbp); 1442 0 stevel } 1443 0 stevel 1444 0 stevel size = dp->db_lim - dp->db_base; 1445 0 stevel unaligned = P2PHASE((uintptr_t)dp->db_base, sizeof (uint_t)); 1446 0 stevel if ((nbp = allocb_tmpl(size + unaligned, bp)) == NULL) 1447 0 stevel return (NULL); 1448 0 stevel nbp->b_flag = bp->b_flag; 1449 0 stevel nbp->b_band = bp->b_band; 1450 0 stevel ndp = nbp->b_datap; 1451 0 stevel 1452 0 stevel /* 1453 0 stevel * Well, here is a potential issue. If we are trying to 1454 0 stevel * trace a flow, and we copy the message, we might lose 1455 0 stevel * information about where this message might have been. 1456 0 stevel * So we should inherit the FT data. On the other hand, 1457 0 stevel * a user might be interested only in alloc to free data. 1458 0 stevel * So I guess the real answer is to provide a tunable. 1459 0 stevel */ 1460 0 stevel STR_FTEVENT_MBLK(nbp, caller(), FTEV_COPYB, 1); 1461 0 stevel 1462 0 stevel base = ndp->db_base + unaligned; 1463 0 stevel bcopy(dp->db_base, ndp->db_base + unaligned, size); 1464 0 stevel 1465 0 stevel nbp->b_rptr = base + (bp->b_rptr - dp->db_base); 1466 0 stevel nbp->b_wptr = nbp->b_rptr + MBLKL(bp); 1467 0 stevel 1468 0 stevel return (nbp); 1469 0 stevel } 1470 0 stevel 1471 0 stevel /* 1472 0 stevel * Copy data from message to newly allocated message using new 1473 0 stevel * data blocks. Returns a pointer to the new message, or NULL if error. 1474 0 stevel */ 1475 0 stevel mblk_t * 1476 0 stevel copymsg(mblk_t *bp) 1477 0 stevel { 1478 0 stevel mblk_t *head, *nbp; 1479 0 stevel 1480 0 stevel if (!bp || !(nbp = head = copyb(bp))) 1481 0 stevel return (NULL); 1482 0 stevel 1483 0 stevel while (bp->b_cont) { 1484 0 stevel if (!(nbp->b_cont = copyb(bp->b_cont))) { 1485 0 stevel freemsg(head); 1486 0 stevel return (NULL); 1487 0 stevel } 1488 0 stevel nbp = nbp->b_cont; 1489 0 stevel bp = bp->b_cont; 1490 0 stevel } 1491 0 stevel return (head); 1492 0 stevel } 1493 0 stevel 1494 0 stevel /* 1495 0 stevel * link a message block to tail of message 1496 0 stevel */ 1497 0 stevel void 1498 0 stevel linkb(mblk_t *mp, mblk_t *bp) 1499 0 stevel { 1500 0 stevel ASSERT(mp && bp); 1501 0 stevel 1502 0 stevel for (; mp->b_cont; mp = mp->b_cont) 1503 0 stevel ; 1504 0 stevel mp->b_cont = bp; 1505 0 stevel } 1506 0 stevel 1507 0 stevel /* 1508 0 stevel * unlink a message block from head of message 1509 0 stevel * return pointer to new message. 1510 0 stevel * NULL if message becomes empty. 1511 0 stevel */ 1512 0 stevel mblk_t * 1513 0 stevel unlinkb(mblk_t *bp) 1514 0 stevel { 1515 0 stevel mblk_t *bp1; 1516 0 stevel 1517 0 stevel bp1 = bp->b_cont; 1518 0 stevel bp->b_cont = NULL; 1519 0 stevel return (bp1); 1520 0 stevel } 1521 0 stevel 1522 0 stevel /* 1523 0 stevel * remove a message block "bp" from message "mp" 1524 0 stevel * 1525 0 stevel * Return pointer to new message or NULL if no message remains. 1526 0 stevel * Return -1 if bp is not found in message. 1527 0 stevel */ 1528 0 stevel mblk_t * 1529 0 stevel rmvb(mblk_t *mp, mblk_t *bp) 1530 0 stevel { 1531 0 stevel mblk_t *tmp; 1532 0 stevel mblk_t *lastp = NULL; 1533 0 stevel 1534 0 stevel ASSERT(mp && bp); 1535 0 stevel for (tmp = mp; tmp; tmp = tmp->b_cont) { 1536 0 stevel if (tmp == bp) { 1537 0 stevel if (lastp) 1538 0 stevel lastp->b_cont = tmp->b_cont; 1539 0 stevel else 1540 0 stevel mp = tmp->b_cont; 1541 0 stevel tmp->b_cont = NULL; 1542 0 stevel return (mp); 1543 0 stevel } 1544 0 stevel lastp = tmp; 1545 0 stevel } 1546 0 stevel return ((mblk_t *)-1); 1547 0 stevel } 1548 0 stevel 1549 0 stevel /* 1550 0 stevel * Concatenate and align first len bytes of common 1551 0 stevel * message type. Len == -1, means concat everything. 1552 0 stevel * Returns 1 on success, 0 on failure 1553 0 stevel * After the pullup, mp points to the pulled up data. 1554 0 stevel */ 1555 0 stevel int 1556 0 stevel pullupmsg(mblk_t *mp, ssize_t len) 1557 0 stevel { 1558 0 stevel mblk_t *bp, *b_cont; 1559 0 stevel dblk_t *dbp; 1560 0 stevel ssize_t n; 1561 0 stevel 1562 0 stevel ASSERT(mp->b_datap->db_ref > 0); 1563 0 stevel ASSERT(mp->b_next == NULL && mp->b_prev == NULL); 1564 0 stevel 1565 0 stevel /* 1566 0 stevel * We won't handle Multidata message, since it contains 1567 0 stevel * metadata which this function has no knowledge of; we 1568 0 stevel * assert on DEBUG, and return failure otherwise. 1569 0 stevel */ 1570 0 stevel ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1571 0 stevel if (mp->b_datap->db_type == M_MULTIDATA) 1572 0 stevel return (0); 1573 0 stevel 1574 0 stevel if (len == -1) { 1575 0 stevel if (mp->b_cont == NULL && str_aligned(mp->b_rptr)) 1576 0 stevel return (1); 1577 0 stevel len = xmsgsize(mp); 1578 0 stevel } else { 1579 0 stevel ssize_t first_mblk_len = mp->b_wptr - mp->b_rptr; 1580 0 stevel ASSERT(first_mblk_len >= 0); 1581 0 stevel /* 1582 0 stevel * If the length is less than that of the first mblk, 1583 0 stevel * we want to pull up the message into an aligned mblk. 1584 0 stevel * Though not part of the spec, some callers assume it. 1585 0 stevel */ 1586 0 stevel if (len <= first_mblk_len) { 1587 0 stevel if (str_aligned(mp->b_rptr)) 1588 0 stevel return (1); 1589 0 stevel len = first_mblk_len; 1590 0 stevel } else if (xmsgsize(mp) < len) 1591 0 stevel return (0); 1592 0 stevel } 1593 0 stevel 1594 0 stevel if ((bp = allocb_tmpl(len, mp)) == NULL) 1595 0 stevel return (0); 1596 0 stevel 1597 0 stevel dbp = bp->b_datap; 1598 0 stevel *bp = *mp; /* swap mblks so bp heads the old msg... */ 1599 0 stevel mp->b_datap = dbp; /* ... and mp heads the new message */ 1600 0 stevel mp->b_datap->db_mblk = mp; 1601 0 stevel bp->b_datap->db_mblk = bp; 1602 0 stevel mp->b_rptr = mp->b_wptr = dbp->db_base; 1603 0 stevel 1604 0 stevel do { 1605 0 stevel ASSERT(bp->b_datap->db_ref > 0); 1606 0 stevel ASSERT(bp->b_wptr >= bp->b_rptr); 1607 0 stevel n = MIN(bp->b_wptr - bp->b_rptr, len); 1608 11042 Erik ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1609 11042 Erik if (n > 0) 1610 11042 Erik bcopy(bp->b_rptr, mp->b_wptr, (size_t)n); 1611 0 stevel mp->b_wptr += n; 1612 0 stevel bp->b_rptr += n; 1613 0 stevel len -= n; 1614 0 stevel if (bp->b_rptr != bp->b_wptr) 1615 0 stevel break; 1616 0 stevel b_cont = bp->b_cont; 1617 0 stevel freeb(bp); 1618 0 stevel bp = b_cont; 1619 0 stevel } while (len && bp); 1620 0 stevel 1621 0 stevel mp->b_cont = bp; /* tack on whatever wasn't pulled up */ 1622 0 stevel 1623 0 stevel return (1); 1624 0 stevel } 1625 0 stevel 1626 0 stevel /* 1627 0 stevel * Concatenate and align at least the first len bytes of common message 1628 0 stevel * type. Len == -1 means concatenate everything. The original message is 1629 0 stevel * unaltered. Returns a pointer to a new message on success, otherwise 1630 0 stevel * returns NULL. 1631 0 stevel */ 1632 0 stevel mblk_t * 1633 0 stevel msgpullup(mblk_t *mp, ssize_t len) 1634 0 stevel { 1635 0 stevel mblk_t *newmp; 1636 0 stevel ssize_t totlen; 1637 0 stevel ssize_t n; 1638 0 stevel 1639 0 stevel /* 1640 0 stevel * We won't handle Multidata message, since it contains 1641 0 stevel * metadata which this function has no knowledge of; we 1642 0 stevel * assert on DEBUG, and return failure otherwise. 1643 0 stevel */ 1644 0 stevel ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1645 0 stevel if (mp->b_datap->db_type == M_MULTIDATA) 1646 0 stevel return (NULL); 1647 0 stevel 1648 0 stevel totlen = xmsgsize(mp); 1649 0 stevel 1650 0 stevel if ((len > 0) && (len > totlen)) 1651 0 stevel return (NULL); 1652 0 stevel 1653 0 stevel /* 1654 0 stevel * Copy all of the first msg type into one new mblk, then dupmsg 1655 0 stevel * and link the rest onto this. 1656 0 stevel */ 1657 0 stevel 1658 0 stevel len = totlen; 1659 0 stevel 1660 0 stevel if ((newmp = allocb_tmpl(len, mp)) == NULL) 1661 0 stevel return (NULL); 1662 0 stevel 1663 0 stevel newmp->b_flag = mp->b_flag; 1664 0 stevel newmp->b_band = mp->b_band; 1665 0 stevel 1666 0 stevel while (len > 0) { 1667 0 stevel n = mp->b_wptr - mp->b_rptr; 1668 0 stevel ASSERT(n >= 0); /* allow zero-length mblk_t's */ 1669 0 stevel if (n > 0) 1670 0 stevel bcopy(mp->b_rptr, newmp->b_wptr, n); 1671 0 stevel newmp->b_wptr += n; 1672 0 stevel len -= n; 1673 0 stevel mp = mp->b_cont; 1674 0 stevel } 1675 0 stevel 1676 0 stevel if (mp != NULL) { 1677 0 stevel newmp->b_cont = dupmsg(mp); 1678 0 stevel if (newmp->b_cont == NULL) { 1679 0 stevel freemsg(newmp); 1680 0 stevel return (NULL); 1681 0 stevel } 1682 0 stevel } 1683 0 stevel 1684 0 stevel return (newmp); 1685 0 stevel } 1686 0 stevel 1687 0 stevel /* 1688 0 stevel * Trim bytes from message 1689 0 stevel * len > 0, trim from head 1690 0 stevel * len < 0, trim from tail 1691 0 stevel * Returns 1 on success, 0 on failure. 1692 0 stevel */ 1693 0 stevel int 1694 0 stevel adjmsg(mblk_t *mp, ssize_t len) 1695 0 stevel { 1696 0 stevel mblk_t *bp; 1697 0 stevel mblk_t *save_bp = NULL; 1698 0 stevel mblk_t *prev_bp; 1699 0 stevel mblk_t *bcont; 1700 0 stevel unsigned char type; 1701 0 stevel ssize_t n; 1702 0 stevel int fromhead; 1703 0 stevel int first; 1704 0 stevel 1705 0 stevel ASSERT(mp != NULL); 1706 0 stevel /* 1707 0 stevel * We won't handle Multidata message, since it contains 1708 0 stevel * metadata which this function has no knowledge of; we 1709 0 stevel * assert on DEBUG, and return failure otherwise. 1710 0 stevel */ 1711 0 stevel ASSERT(mp->b_datap->db_type != M_MULTIDATA); 1712 0 stevel if (mp->b_datap->db_type == M_MULTIDATA) 1713 0 stevel return (0); 1714 0 stevel 1715 0 stevel if (len < 0) { 1716 0 stevel fromhead = 0; 1717 0 stevel len = -len; 1718 0 stevel } else { 1719 0 stevel fromhead = 1; 1720 0 stevel } 1721 0 stevel 1722 0 stevel if (xmsgsize(mp) < len) 1723 0 stevel return (0); 1724 0 stevel 1725 0 stevel if (fromhead) { 1726 0 stevel first = 1; 1727 0 stevel while (len) { 1728 0 stevel ASSERT(mp->b_wptr >= mp->b_rptr); 1729 0 stevel n = MIN(mp->b_wptr - mp->b_rptr, len); 1730 0 stevel mp->b_rptr += n; 1731 0 stevel len -= n; 1732 0 stevel 1733 0 stevel /* 1734 0 stevel * If this is not the first zero length 1735 0 stevel * message remove it 1736 0 stevel */ 1737 0 stevel if (!first && (mp->b_wptr == mp->b_rptr)) { 1738 0 stevel bcont = mp->b_cont; 1739 0 stevel freeb(mp); 1740 0 stevel mp = save_bp->b_cont = bcont; 1741 0 stevel } else { 1742 0 stevel save_bp = mp; 1743 0 stevel mp = mp->b_cont; 1744 0 stevel } 1745 0 stevel first = 0; 1746 0 stevel } 1747 0 stevel } else { 1748 0 stevel type = mp->b_datap->db_type; 1749 0 stevel while (len) { 1750 0 stevel bp = mp; 1751 0 stevel save_bp = NULL; 1752 0 stevel 1753 0 stevel /* 1754 0 stevel * Find the last message of same type 1755 0 stevel */ 1756 0 stevel while (bp && bp->b_datap->db_type == type) { 1757 0 stevel ASSERT(bp->b_wptr >= bp->b_rptr); 1758 0 stevel prev_bp = save_bp; 1759 0 stevel save_bp = bp; 1760 0 stevel bp = bp->b_cont; 1761 0 stevel } 1762 0 stevel if (save_bp == NULL) 1763 0 stevel break; 1764 0 stevel n = MIN(save_bp->b_wptr - save_bp->b_rptr, len); 1765 0 stevel save_bp->b_wptr -= n; 1766 0 stevel len -= n; 1767 0 stevel 1768 0 stevel /* 1769 0 stevel * If this is not the first message 1770 0 stevel * and we have taken away everything 1771 0 stevel * from this message, remove it 1772 0 stevel */ 1773 0 stevel 1774 0 stevel if ((save_bp != mp) && 1775 6707 brutus (save_bp->b_wptr == save_bp->b_rptr)) { 1776 0 stevel bcont = save_bp->b_cont; 1777 0 stevel freeb(save_bp); 1778 0 stevel prev_bp->b_cont = bcont; 1779 0 stevel } 1780 0 stevel } 1781 0 stevel } 1782 0 stevel return (1); 1783 0 stevel } 1784 0 stevel 1785 0 stevel /* 1786 0 stevel * get number of data bytes in message 1787 0 stevel */ 1788 0 stevel size_t 1789 0 stevel msgdsize(mblk_t *bp) 1790 0 stevel { 1791 0 stevel size_t count = 0; 1792 0 stevel 1793 0 stevel for (; bp; bp = bp->b_cont) 1794 0 stevel if (bp->b_datap->db_type == M_DATA) { 1795 0 stevel ASSERT(bp->b_wptr >= bp->b_rptr); 1796 0 stevel count += bp->b_wptr - bp->b_rptr; 1797 0 stevel } 1798 0 stevel return (count); 1799 0 stevel } 1800 0 stevel 1801 0 stevel /* 1802 0 stevel * Get a message off head of queue 1803 0 stevel * 1804 0 stevel * If queue has no buffers then mark queue 1805 0 stevel * with QWANTR. (queue wants to be read by 1806 0 stevel * someone when data becomes available) 1807 0 stevel * 1808 0 stevel * If there is something to take off then do so. 1809 0 stevel * If queue falls below hi water mark turn off QFULL 1810 0 stevel * flag. Decrement weighted count of queue. 1811 0 stevel * Also turn off QWANTR because queue is being read. 1812 0 stevel * 1813 0 stevel * The queue count is maintained on a per-band basis. 1814 0 stevel * Priority band 0 (normal messages) uses q_count, 1815 0 stevel * q_lowat, etc. Non-zero priority bands use the 1816 0 stevel * fields in their respective qband structures 1817 0 stevel * (qb_count, qb_lowat, etc.) All messages appear 1818 0 stevel * on the same list, linked via their b_next pointers. 1819 0 stevel * q_first is the head of the list. q_count does 1820 0 stevel * not reflect the size of all the messages on the 1821 0 stevel * queue. It only reflects those messages in the 1822 0 stevel * normal band of flow. The one exception to this 1823 0 stevel * deals with high priority messages. They are in 1824 0 stevel * their own conceptual "band", but are accounted 1825 0 stevel * against q_count. 1826 0 stevel * 1827 0 stevel * If queue count is below the lo water mark and QWANTW 1828 0 stevel * is set, enable the closest backq which has a service 1829 0 stevel * procedure and turn off the QWANTW flag. 1830 0 stevel * 1831 0 stevel * getq could be built on top of rmvq, but isn't because 1832 0 stevel * of performance considerations. 1833 0 stevel * 1834 0 stevel * A note on the use of q_count and q_mblkcnt: 1835 0 stevel * q_count is the traditional byte count for messages that 1836 0 stevel * have been put on a queue. Documentation tells us that 1837 0 stevel * we shouldn't rely on that count, but some drivers/modules 1838 0 stevel * do. What was needed, however, is a mechanism to prevent 1839 0 stevel * runaway streams from consuming all of the resources, 1840 0 stevel * and particularly be able to flow control zero-length 1841 0 stevel * messages. q_mblkcnt is used for this purpose. It 1842 0 stevel * counts the number of mblk's that are being put on 1843 0 stevel * the queue. The intention here, is that each mblk should 1844 0 stevel * contain one byte of data and, for the purpose of 1845 0 stevel * flow-control, logically does. A queue will become 1846 0 stevel * full when EITHER of these values (q_count and q_mblkcnt) 1847 0 stevel * reach the highwater mark. It will clear when BOTH 1848 0 stevel * of them drop below the highwater mark. And it will 1849 0 stevel * backenable when BOTH of them drop below the lowwater 1850 0 stevel * mark. 1851 0 stevel * With this algorithm, a driver/module might be able 1852 0 stevel * to find a reasonably accurate q_count, and the 1853 0 stevel * framework can still try and limit resource usage. 1854 0 stevel */ 1855 0 stevel mblk_t * 1856 0 stevel getq(queue_t *q) 1857 0 stevel { 1858 0 stevel mblk_t *bp; 1859 235 micheng uchar_t band = 0; 1860 0 stevel 1861 6769 ja97890 bp = getq_noenab(q, 0); 1862 0 stevel if (bp != NULL) 1863 0 stevel band = bp->b_band; 1864 0 stevel 1865 0 stevel /* 1866 0 stevel * Inlined from qbackenable(). 1867 0 stevel * Quick check without holding the lock. 1868 0 stevel */ 1869 0 stevel if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 1870 0 stevel return (bp); 1871 0 stevel 1872 0 stevel qbackenable(q, band); 1873 0 stevel return (bp); 1874 0 stevel } 1875 0 stevel 1876 0 stevel /* 1877 741 masputra * Calculate number of data bytes in a single data message block taking 1878 741 masputra * multidata messages into account. 1879 741 masputra */ 1880 741 masputra 1881 741 masputra #define ADD_MBLK_SIZE(mp, size) \ 1882 741 masputra if (DB_TYPE(mp) != M_MULTIDATA) { \ 1883 741 masputra (size) += MBLKL(mp); \ 1884 741 masputra } else { \ 1885 741 masputra uint_t pinuse; \ 1886 741 masputra \ 1887 741 masputra mmd_getsize(mmd_getmultidata(mp), NULL, &pinuse); \ 1888 741 masputra (size) += pinuse; \ 1889 741 masputra } 1890 741 masputra 1891 741 masputra /* 1892 6769 ja97890 * Returns the number of bytes in a message (a message is defined as a 1893 6769 ja97890 * chain of mblks linked by b_cont). If a non-NULL mblkcnt is supplied we 1894 6769 ja97890 * also return the number of distinct mblks in the message. 1895 6769 ja97890 */ 1896 6769 ja97890 int 1897 6769 ja97890 mp_cont_len(mblk_t *bp, int *mblkcnt) 1898 6769 ja97890 { 1899 6769 ja97890 mblk_t *mp; 1900 6769 ja97890 int mblks = 0; 1901 6769 ja97890 int bytes = 0; 1902 6769 ja97890 1903 6769 ja97890 for (mp = bp; mp != NULL; mp = mp->b_cont) { 1904 6769 ja97890 ADD_MBLK_SIZE(mp, bytes); 1905 6769 ja97890 mblks++; 1906 6769 ja97890 } 1907 6769 ja97890 1908 6769 ja97890 if (mblkcnt != NULL) 1909 6769 ja97890 *mblkcnt = mblks; 1910 6769 ja97890 1911 6769 ja97890 return (bytes); 1912 6769 ja97890 } 1913 6769 ja97890 1914 6769 ja97890 /* 1915 0 stevel * Like getq() but does not backenable. This is used by the stream 1916 0 stevel * head when a putback() is likely. The caller must call qbackenable() 1917 0 stevel * after it is done with accessing the queue. 1918 6769 ja97890 * The rbytes arguments to getq_noneab() allows callers to specify a 1919 6769 ja97890 * the maximum number of bytes to return. If the current amount on the 1920 6769 ja97890 * queue is less than this then the entire message will be returned. 1921 6769 ja97890 * A value of 0 returns the entire message and is equivalent to the old 1922 6769 ja97890 * default behaviour prior to the addition of the rbytes argument. 1923 6769 ja97890 */ 1924 6769 ja97890 mblk_t * 1925 6769 ja97890 getq_noenab(queue_t *q, ssize_t rbytes) 1926 6769 ja97890 { 1927 6769 ja97890 mblk_t *bp, *mp1; 1928 6769 ja97890 mblk_t *mp2 = NULL; 1929 0 stevel qband_t *qbp; 1930 0 stevel kthread_id_t freezer; 1931 0 stevel int bytecnt = 0, mblkcnt = 0; 1932 0 stevel 1933 0 stevel /* freezestr should allow its caller to call getq/putq */ 1934 0 stevel freezer = STREAM(q)->sd_freezer; 1935 0 stevel if (freezer == curthread) { 1936 0 stevel ASSERT(frozenstr(q)); 1937 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 1938 0 stevel } else 1939 0 stevel mutex_enter(QLOCK(q)); 1940 0 stevel 1941 0 stevel if ((bp = q->q_first) == 0) { 1942 0 stevel q->q_flag |= QWANTR; 1943 0 stevel } else { 1944 6769 ja97890 /* 1945 6769 ja97890 * If the caller supplied a byte threshold and there is 1946 6769 ja97890 * more than this amount on the queue then break up the 1947 6769 ja97890 * the message appropriately. We can only safely do 1948 6769 ja97890 * this for M_DATA messages. 1949 6769 ja97890 */ 1950 6769 ja97890 if ((DB_TYPE(bp) == M_DATA) && (rbytes > 0) && 1951 6769 ja97890 (q->q_count > rbytes)) { 1952 6769 ja97890 /* 1953 6769 ja97890 * Inline version of mp_cont_len() which terminates 1954 6769 ja97890 * when we meet or exceed rbytes. 1955 6769 ja97890 */ 1956 6769 ja97890 for (mp1 = bp; mp1 != NULL; mp1 = mp1->b_cont) { 1957 6769 ja97890 mblkcnt++; 1958 6769 ja97890 ADD_MBLK_SIZE(mp1, bytecnt); 1959 6769 ja97890 if (bytecnt >= rbytes) 1960 6769 ja97890 break; 1961 6769 ja97890 } 1962 6769 ja97890 /* 1963 6769 ja97890 * We need to account for the following scenarios: 1964 6769 ja97890 * 1965 6769 ja97890 * 1) Too much data in the first message: 1966 6769 ja97890 * mp1 will be the mblk which puts us over our 1967 6769 ja97890 * byte limit. 1968 6769 ja97890 * 2) Not enough data in the first message: 1969 6769 ja97890 * mp1 will be NULL. 1970 6769 ja97890 * 3) Exactly the right amount of data contained within 1971 6769 ja97890 * whole mblks: 1972 6769 ja97890 * mp1->b_cont will be where we break the message. 1973 6769 ja97890 */ 1974 6769 ja97890 if (bytecnt > rbytes) { 1975 6769 ja97890 /* 1976 6769 ja97890 * Dup/copy mp1 and put what we don't need 1977 6769 ja97890 * back onto the queue. Adjust the read/write 1978 6769 ja97890 * and continuation pointers appropriately 1979 6769 ja97890 * and decrement the current mblk count to 1980 6769 ja97890 * reflect we are putting an mblk back onto 1981 6769 ja97890 * the queue. 1982 6769 ja97890 * When adjusting the message pointers, it's 1983 6769 ja97890 * OK to use the existing bytecnt and the 1984 6769 ja97890 * requested amount (rbytes) to calculate the 1985 6769 ja97890 * the new write offset (b_wptr) of what we 1986 6769 ja97890 * are taking. However, we cannot use these 1987 6769 ja97890 * values when calculating the read offset of 1988 6769 ja97890 * the mblk we are putting back on the queue. 1989 6769 ja97890 * This is because the begining (b_rptr) of the 1990 6769 ja97890 * mblk represents some arbitrary point within 1991 6769 ja97890 * the message. 1992 6769 ja97890 * It's simplest to do this by advancing b_rptr 1993 6769 ja97890 * by the new length of mp1 as we don't have to 1994 6769 ja97890 * remember any intermediate state. 1995 6769 ja97890 */ 1996 6769 ja97890 ASSERT(mp1 != NULL); 1997 6769 ja97890 mblkcnt--; 1998 6769 ja97890 if ((mp2 = dupb(mp1)) == NULL && 1999 6769 ja97890 (mp2 = copyb(mp1)) == NULL) { 2000 6769 ja97890 bytecnt = mblkcnt = 0; 2001 6769 ja97890 goto dup_failed; 2002 6769 ja97890 } 2003 6769 ja97890 mp2->b_cont = mp1->b_cont; 2004 6769 ja97890 mp1->b_wptr -= bytecnt - rbytes; 2005 6769 ja97890 mp2->b_rptr += mp1->b_wptr - mp1->b_rptr; 2006 6769 ja97890 mp1->b_cont = NULL; 2007 6769 ja97890 bytecnt = rbytes; 2008 6769 ja97890 } else { 2009 6769 ja97890 /* 2010 6769 ja97890 * Either there is not enough data in the first 2011 6769 ja97890 * message or there is no excess data to deal 2012 6769 ja97890 * with. If mp1 is NULL, we are taking the 2013 6769 ja97890 * whole message. No need to do anything. 2014 6769 ja97890 * Otherwise we assign mp1->b_cont to mp2 as 2015 6769 ja97890 * we will be putting this back onto the head of 2016 6769 ja97890 * the queue. 2017 6769 ja97890 */ 2018 6769 ja97890 if (mp1 != NULL) { 2019 6769 ja97890 mp2 = mp1->b_cont; 2020 6769 ja97890 mp1->b_cont = NULL; 2021 6769 ja97890 } 2022 6769 ja97890 } 2023 6769 ja97890 /* 2024 6769 ja97890 * If mp2 is not NULL then we have part of the message 2025 6769 ja97890 * to put back onto the queue. 2026 6769 ja97890 */ 2027 6769 ja97890 if (mp2 != NULL) { 2028 6769 ja97890 if ((mp2->b_next = bp->b_next) == NULL) 2029 6769 ja97890 q->q_last = mp2; 2030 6769 ja97890 else 2031 6769 ja97890 bp->b_next->b_prev = mp2; 2032 6769 ja97890 q->q_first = mp2; 2033 6769 ja97890 } else { 2034 6769 ja97890 if ((q->q_first = bp->b_next) == NULL) 2035 6769 ja97890 q->q_last = NULL; 2036 6769 ja97890 else 2037 6769 ja97890 q->q_first->b_prev = NULL; 2038 6769 ja97890 } 2039 6769 ja97890 } else { 2040 6769 ja97890 /* 2041 6769 ja97890 * Either no byte threshold was supplied, there is 2042 6769 ja97890 * not enough on the queue or we failed to 2043 6769 ja97890 * duplicate/copy a data block. In these cases we 2044 6769 ja97890 * just take the entire first message. 2045 6769 ja97890 */ 2046 6769 ja97890 dup_failed: 2047 6769 ja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 2048 6769 ja97890 if ((q->q_first = bp->b_next) == NULL) 2049 6769 ja97890 q->q_last = NULL; 2050 6769 ja97890 else 2051 6769 ja97890 q->q_first->b_prev = NULL; 2052 6769 ja97890 } 2053 0 stevel if (bp->b_band == 0) { 2054 0 stevel q->q_count -= bytecnt; 2055 0 stevel q->q_mblkcnt -= mblkcnt; 2056 5360 rk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) && 2057 5360 rk129064 (q->q_mblkcnt < q->q_hiwat))) { 2058 0 stevel q->q_flag &= ~QFULL; 2059 0 stevel } 2060 0 stevel } else { 2061 0 stevel int i; 2062 0 stevel 2063 0 stevel ASSERT(bp->b_band <= q->q_nband); 2064 0 stevel ASSERT(q->q_bandp != NULL); 2065 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2066 0 stevel qbp = q->q_bandp; 2067 0 stevel i = bp->b_band; 2068 0 stevel while (--i > 0) 2069 0 stevel qbp = qbp->qb_next; 2070 0 stevel if (qbp->qb_first == qbp->qb_last) { 2071 0 stevel qbp->qb_first = NULL; 2072 0 stevel qbp->qb_last = NULL; 2073 0 stevel } else { 2074 0 stevel qbp->qb_first = bp->b_next; 2075 0 stevel } 2076 0 stevel qbp->qb_count -= bytecnt; 2077 0 stevel qbp->qb_mblkcnt -= mblkcnt; 2078 5360 rk129064 if (qbp->qb_mblkcnt == 0 || 2079 5360 rk129064 ((qbp->qb_count < qbp->qb_hiwat) && 2080 5360 rk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) { 2081 0 stevel qbp->qb_flag &= ~QB_FULL; 2082 0 stevel } 2083 0 stevel } 2084 0 stevel q->q_flag &= ~QWANTR; 2085 0 stevel bp->b_next = NULL; 2086 0 stevel bp->b_prev = NULL; 2087 0 stevel } 2088 0 stevel if (freezer != curthread) 2089 0 stevel mutex_exit(QLOCK(q)); 2090 0 stevel 2091 0 stevel STR_FTEVENT_MSG(bp, q, FTEV_GETQ, NULL); 2092 0 stevel 2093 0 stevel return (bp); 2094 0 stevel } 2095 0 stevel 2096 0 stevel /* 2097 0 stevel * Determine if a backenable is needed after removing a message in the 2098 0 stevel * specified band. 2099 0 stevel * NOTE: This routine assumes that something like getq_noenab() has been 2100 0 stevel * already called. 2101 0 stevel * 2102 0 stevel * For the read side it is ok to hold sd_lock across calling this (and the 2103 0 stevel * stream head often does). 2104 0 stevel * But for the write side strwakeq might be invoked and it acquires sd_lock. 2105 0 stevel */ 2106 0 stevel void 2107 235 micheng qbackenable(queue_t *q, uchar_t band) 2108 0 stevel { 2109 0 stevel int backenab = 0; 2110 0 stevel qband_t *qbp; 2111 0 stevel kthread_id_t freezer; 2112 0 stevel 2113 0 stevel ASSERT(q); 2114 0 stevel ASSERT((q->q_flag & QREADR) || MUTEX_NOT_HELD(&STREAM(q)->sd_lock)); 2115 0 stevel 2116 0 stevel /* 2117 0 stevel * Quick check without holding the lock. 2118 0 stevel * OK since after getq() has lowered the q_count these flags 2119 0 stevel * would not change unless either the qbackenable() is done by 2120 0 stevel * another thread (which is ok) or the queue has gotten QFULL 2121 0 stevel * in which case another backenable will take place when the queue 2122 0 stevel * drops below q_lowat. 2123 0 stevel */ 2124 0 stevel if (band == 0 && (q->q_flag & (QWANTW|QWANTWSYNC)) == 0) 2125 0 stevel return; 2126 0 stevel 2127 0 stevel /* freezestr should allow its caller to call getq/putq */ 2128 0 stevel freezer = STREAM(q)->sd_freezer; 2129 0 stevel if (freezer == curthread) { 2130 0 stevel ASSERT(frozenstr(q)); 2131 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2132 0 stevel } else 2133 0 stevel mutex_enter(QLOCK(q)); 2134 0 stevel 2135 0 stevel if (band == 0) { 2136 0 stevel if (q->q_lowat == 0 || (q->q_count < q->q_lowat && 2137 0 stevel q->q_mblkcnt < q->q_lowat)) { 2138 0 stevel backenab = q->q_flag & (QWANTW|QWANTWSYNC); 2139 0 stevel } 2140 0 stevel } else { 2141 0 stevel int i; 2142 0 stevel 2143 0 stevel ASSERT((unsigned)band <= q->q_nband); 2144 0 stevel ASSERT(q->q_bandp != NULL); 2145 0 stevel 2146 0 stevel qbp = q->q_bandp; 2147 0 stevel i = band; 2148 0 stevel while (--i > 0) 2149 0 stevel qbp = qbp->qb_next; 2150 0 stevel 2151 0 stevel if (qbp->qb_lowat == 0 || (qbp->qb_count < qbp->qb_lowat && 2152 0 stevel qbp->qb_mblkcnt < qbp->qb_lowat)) { 2153 0 stevel backenab = qbp->qb_flag & QB_WANTW; 2154 0 stevel } 2155 0 stevel } 2156 0 stevel 2157 0 stevel if (backenab == 0) { 2158 0 stevel if (freezer != curthread) 2159 0 stevel mutex_exit(QLOCK(q)); 2160 0 stevel return; 2161 0 stevel } 2162 0 stevel 2163 0 stevel /* Have to drop the lock across strwakeq and backenable */ 2164 0 stevel if (backenab & QWANTWSYNC) 2165 0 stevel q->q_flag &= ~QWANTWSYNC; 2166 0 stevel if (backenab & (QWANTW|QB_WANTW)) { 2167 0 stevel if (band != 0) 2168 0 stevel qbp->qb_flag &= ~QB_WANTW; 2169 0 stevel else { 2170 0 stevel q->q_flag &= ~QWANTW; 2171 0 stevel } 2172 0 stevel } 2173 0 stevel 2174 0 stevel if (freezer != curthread) 2175 0 stevel mutex_exit(QLOCK(q)); 2176 0 stevel 2177 0 stevel if (backenab & QWANTWSYNC) 2178 0 stevel strwakeq(q, QWANTWSYNC); 2179 0 stevel if (backenab & (QWANTW|QB_WANTW)) 2180 0 stevel backenable(q, band); 2181 0 stevel } 2182 0 stevel 2183 0 stevel /* 2184 0 stevel * Remove a message from a queue. The queue count and other 2185 0 stevel * flow control parameters are adjusted and the back queue 2186 0 stevel * enabled if necessary. 2187 0 stevel * 2188 0 stevel * rmvq can be called with the stream frozen, but other utility functions 2189 0 stevel * holding QLOCK, and by streams modules without any locks/frozen. 2190 0 stevel */ 2191 0 stevel void 2192 0 stevel rmvq(queue_t *q, mblk_t *mp) 2193 0 stevel { 2194 0 stevel ASSERT(mp != NULL); 2195 0 stevel 2196 0 stevel rmvq_noenab(q, mp); 2197 0 stevel if (curthread != STREAM(q)->sd_freezer && MUTEX_HELD(QLOCK(q))) { 2198 0 stevel /* 2199 0 stevel * qbackenable can handle a frozen stream but not a "random" 2200 0 stevel * qlock being held. Drop lock across qbackenable. 2201 0 stevel */ 2202 0 stevel mutex_exit(QLOCK(q)); 2203 0 stevel qbackenable(q, mp->b_band); 2204 0 stevel mutex_enter(QLOCK(q)); 2205 0 stevel } else { 2206 0 stevel qbackenable(q, mp->b_band); 2207 0 stevel } 2208 0 stevel } 2209 0 stevel 2210 0 stevel /* 2211 0 stevel * Like rmvq() but without any backenabling. 2212 0 stevel * This exists to handle SR_CONSOL_DATA in strrput(). 2213 0 stevel */ 2214 0 stevel void 2215 0 stevel rmvq_noenab(queue_t *q, mblk_t *mp) 2216 0 stevel { 2217 0 stevel int i; 2218 0 stevel qband_t *qbp = NULL; 2219 0 stevel kthread_id_t freezer; 2220 0 stevel int bytecnt = 0, mblkcnt = 0; 2221 0 stevel 2222 0 stevel freezer = STREAM(q)->sd_freezer; 2223 0 stevel if (freezer == curthread) { 2224 0 stevel ASSERT(frozenstr(q)); 2225 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2226 0 stevel } else if (MUTEX_HELD(QLOCK(q))) { 2227 0 stevel /* Don't drop lock on exit */ 2228 0 stevel freezer = curthread; 2229 0 stevel } else 2230 0 stevel mutex_enter(QLOCK(q)); 2231 0 stevel 2232 0 stevel ASSERT(mp->b_band <= q->q_nband); 2233 0 stevel if (mp->b_band != 0) { /* Adjust band pointers */ 2234 0 stevel ASSERT(q->q_bandp != NULL); 2235 0 stevel qbp = q->q_bandp; 2236 0 stevel i = mp->b_band; 2237 0 stevel while (--i > 0) 2238 0 stevel qbp = qbp->qb_next; 2239 0 stevel if (mp == qbp->qb_first) { 2240 0 stevel if (mp->b_next && mp->b_band == mp->b_next->b_band) 2241 0 stevel qbp->qb_first = mp->b_next; 2242 0 stevel else 2243 0 stevel qbp->qb_first = NULL; 2244 0 stevel } 2245 0 stevel if (mp == qbp->qb_last) { 2246 0 stevel if (mp->b_prev && mp->b_band == mp->b_prev->b_band) 2247 0 stevel qbp->qb_last = mp->b_prev; 2248 0 stevel else 2249 0 stevel qbp->qb_last = NULL; 2250 0 stevel } 2251 0 stevel } 2252 0 stevel 2253 0 stevel /* 2254 0 stevel * Remove the message from the list. 2255 0 stevel */ 2256 0 stevel if (mp->b_prev) 2257 0 stevel mp->b_prev->b_next = mp->b_next; 2258 0 stevel else 2259 0 stevel q->q_first = mp->b_next; 2260 0 stevel if (mp->b_next) 2261 0 stevel mp->b_next->b_prev = mp->b_prev; 2262 0 stevel else 2263 0 stevel q->q_last = mp->b_prev; 2264 0 stevel mp->b_next = NULL; 2265 0 stevel mp->b_prev = NULL; 2266 0 stevel 2267 0 stevel /* Get the size of the message for q_count accounting */ 2268 6769 ja97890 bytecnt = mp_cont_len(mp, &mblkcnt); 2269 0 stevel 2270 0 stevel if (mp->b_band == 0) { /* Perform q_count accounting */ 2271 0 stevel q->q_count -= bytecnt; 2272 0 stevel q->q_mblkcnt -= mblkcnt; 2273 5360 rk129064 if (q->q_mblkcnt == 0 || ((q->q_count < q->q_hiwat) && 2274 5360 rk129064 (q->q_mblkcnt < q->q_hiwat))) { 2275 0 stevel q->q_flag &= ~QFULL; 2276 0 stevel } 2277 0 stevel } else { /* Perform qb_count accounting */ 2278 0 stevel qbp->qb_count -= bytecnt; 2279 0 stevel qbp->qb_mblkcnt -= mblkcnt; 2280 5360 rk129064 if (qbp->qb_mblkcnt == 0 || ((qbp->qb_count < qbp->qb_hiwat) && 2281 5360 rk129064 (qbp->qb_mblkcnt < qbp->qb_hiwat))) { 2282 0 stevel qbp->qb_flag &= ~QB_FULL; 2283 0 stevel } 2284 0 stevel } 2285 0 stevel if (freezer != curthread) 2286 0 stevel mutex_exit(QLOCK(q)); 2287 0 stevel 2288 0 stevel STR_FTEVENT_MSG(mp, q, FTEV_RMVQ, NULL); 2289 0 stevel } 2290 0 stevel 2291 0 stevel /* 2292 0 stevel * Empty a queue. 2293 0 stevel * If flag is set, remove all messages. Otherwise, remove 2294 0 stevel * only non-control messages. If queue falls below its low 2295 0 stevel * water mark, and QWANTW is set, enable the nearest upstream 2296 0 stevel * service procedure. 2297 0 stevel * 2298 0 stevel * Historical note: when merging the M_FLUSH code in strrput with this 2299 0 stevel * code one difference was discovered. flushq did not have a check 2300 0 stevel * for q_lowat == 0 in the backenabling test. 2301 0 stevel * 2302 0 stevel * pcproto_flag specifies whether or not a M_PCPROTO message should be flushed 2303 0 stevel * if one exists on the queue. 2304 0 stevel */ 2305 0 stevel void 2306 0 stevel flushq_common(queue_t *q, int flag, int pcproto_flag) 2307 0 stevel { 2308 0 stevel mblk_t *mp, *nmp; 2309 0 stevel qband_t *qbp; 2310 0 stevel int backenab = 0; 2311 0 stevel unsigned char bpri; 2312 0 stevel unsigned char qbf[NBAND]; /* band flushing backenable flags */ 2313 0 stevel 2314 0 stevel if (q->q_first == NULL) 2315 0 stevel return; 2316 0 stevel 2317 0 stevel mutex_enter(QLOCK(q)); 2318 0 stevel mp = q->q_first; 2319 0 stevel q->q_first = NULL; 2320 0 stevel q->q_last = NULL; 2321 0 stevel q->q_count = 0; 2322 0 stevel q->q_mblkcnt = 0; 2323 0 stevel for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 2324 0 stevel qbp->qb_first = NULL; 2325 0 stevel qbp->qb_last = NULL; 2326 0 stevel qbp->qb_count = 0; 2327 0 stevel qbp->qb_mblkcnt = 0; 2328 0 stevel qbp->qb_flag &= ~QB_FULL; 2329 0 stevel } 2330 0 stevel q->q_flag &= ~QFULL; 2331 0 stevel mutex_exit(QLOCK(q)); 2332 0 stevel while (mp) { 2333 0 stevel nmp = mp->b_next; 2334 0 stevel mp->b_next = mp->b_prev = NULL; 2335 0 stevel 2336 0 stevel STR_FTEVENT_MBLK(mp, q, FTEV_FLUSHQ, NULL); 2337 0 stevel 2338 0 stevel if (pcproto_flag && (mp->b_datap->db_type == M_PCPROTO)) 2339 0 stevel (void) putq(q, mp); 2340 0 stevel else if (flag || datamsg(mp->b_datap->db_type)) 2341 0 stevel freemsg(mp); 2342 0 stevel else 2343 0 stevel (void) putq(q, mp); 2344 0 stevel mp = nmp; 2345 0 stevel } 2346 0 stevel bpri = 1; 2347 0 stevel mutex_enter(QLOCK(q)); 2348 0 stevel for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 2349 0 stevel if ((qbp->qb_flag & QB_WANTW) && 2350 0 stevel (((qbp->qb_count < qbp->qb_lowat) && 2351 0 stevel (qbp->qb_mblkcnt < qbp->qb_lowat)) || 2352 0 stevel qbp->qb_lowat == 0)) { 2353 0 stevel qbp->qb_flag &= ~QB_WANTW; 2354 0 stevel backenab = 1; 2355 0 stevel qbf[bpri] = 1; 2356 0 stevel } else 2357 0 stevel qbf[bpri] = 0; 2358 0 stevel bpri++; 2359 0 stevel } 2360 0 stevel ASSERT(bpri == (unsigned char)(q->q_nband + 1)); 2361 0 stevel if ((q->q_flag & QWANTW) && 2362 0 stevel (((q->q_count < q->q_lowat) && 2363 0 stevel (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 2364 0 stevel q->q_flag &= ~QWANTW; 2365 0 stevel backenab = 1; 2366 0 stevel qbf[0] = 1; 2367 0 stevel } else 2368 0 stevel qbf[0] = 0; 2369 0 stevel 2370 0 stevel /* 2371 0 stevel * If any band can now be written to, and there is a writer 2372 0 stevel * for that band, then backenable the closest service procedure. 2373 0 stevel */ 2374 0 stevel if (backenab) { 2375 0 stevel mutex_exit(QLOCK(q)); 2376 0 stevel for (bpri = q->q_nband; bpri != 0; bpri--) 2377 0 stevel if (qbf[bpri]) 2378 235 micheng backenable(q, bpri); 2379 0 stevel if (qbf[0]) 2380 0 stevel backenable(q, 0); 2381 0 stevel } else 2382 0 stevel mutex_exit(QLOCK(q)); 2383 0 stevel } 2384 0 stevel 2385 0 stevel /* 2386 0 stevel * The real flushing takes place in flushq_common. This is done so that 2387 0 stevel * a flag which specifies whether or not M_PCPROTO messages should be flushed 2388 0 stevel * or not. Currently the only place that uses this flag is the stream head. 2389 0 stevel */ 2390 0 stevel void 2391 0 stevel flushq(queue_t *q, int flag) 2392 0 stevel { 2393 0 stevel flushq_common(q, flag, 0); 2394 0 stevel } 2395 0 stevel 2396 0 stevel /* 2397 0 stevel * Flush the queue of messages of the given priority band. 2398 0 stevel * There is some duplication of code between flushq and flushband. 2399 0 stevel * This is because we want to optimize the code as much as possible. 2400 0 stevel * The assumption is that there will be more messages in the normal 2401 0 stevel * (priority 0) band than in any other. 2402 0 stevel * 2403 0 stevel * Historical note: when merging the M_FLUSH code in strrput with this 2404 0 stevel * code one difference was discovered. flushband had an extra check for 2405 0 stevel * did not have a check for (mp->b_datap->db_type < QPCTL) in the band 0 2406 0 stevel * case. That check does not match the man page for flushband and was not 2407 0 stevel * in the strrput flush code hence it was removed. 2408 0 stevel */ 2409 0 stevel void 2410 0 stevel flushband(queue_t *q, unsigned char pri, int flag) 2411 0 stevel { 2412 0 stevel mblk_t *mp; 2413 0 stevel mblk_t *nmp; 2414 0 stevel mblk_t *last; 2415 0 stevel qband_t *qbp; 2416 0 stevel int band; 2417 0 stevel 2418 0 stevel ASSERT((flag == FLUSHDATA) || (flag == FLUSHALL)); 2419 0 stevel if (pri > q->q_nband) { 2420 0 stevel return; 2421 0 stevel } 2422 0 stevel mutex_enter(QLOCK(q)); 2423 0 stevel if (pri == 0) { 2424 0 stevel mp = q->q_first; 2425 0 stevel q->q_first = NULL; 2426 0 stevel q->q_last = NULL; 2427 0 stevel q->q_count = 0; 2428 0 stevel q->q_mblkcnt = 0; 2429 0 stevel for (qbp = q->q_bandp; qbp; qbp = qbp->qb_next) { 2430 0 stevel qbp->qb_first = NULL; 2431 0 stevel qbp->qb_last = NULL; 2432 0 stevel qbp->qb_count = 0; 2433 0 stevel qbp->qb_mblkcnt = 0; 2434 0 stevel qbp->qb_flag &= ~QB_FULL; 2435 0 stevel } 2436 0 stevel q->q_flag &= ~QFULL; 2437 0 stevel mutex_exit(QLOCK(q)); 2438 0 stevel while (mp) { 2439 0 stevel nmp = mp->b_next; 2440 0 stevel mp->b_next = mp->b_prev = NULL; 2441 0 stevel if ((mp->b_band == 0) && 2442 6707 brutus ((flag == FLUSHALL) || 2443 6707 brutus datamsg(mp->b_datap->db_type))) 2444 0 stevel freemsg(mp); 2445 0 stevel else 2446 0 stevel (void) putq(q, mp); 2447 0 stevel mp = nmp; 2448 0 stevel } 2449 0 stevel mutex_enter(QLOCK(q)); 2450 0 stevel if ((q->q_flag & QWANTW) && 2451 0 stevel (((q->q_count < q->q_lowat) && 2452 0 stevel (q->q_mblkcnt < q->q_lowat)) || q->q_lowat == 0)) { 2453 0 stevel q->q_flag &= ~QWANTW; 2454 0 stevel mutex_exit(QLOCK(q)); 2455 0 stevel 2456 235 micheng backenable(q, pri); 2457 0 stevel } else 2458 0 stevel mutex_exit(QLOCK(q)); 2459 0 stevel } else { /* pri != 0 */ 2460 0 stevel boolean_t flushed = B_FALSE; 2461 0 stevel band = pri; 2462 0 stevel 2463 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2464 0 stevel qbp = q->q_bandp; 2465 0 stevel while (--band > 0) 2466 0 stevel qbp = qbp->qb_next; 2467 0 stevel mp = qbp->qb_first; 2468 0 stevel if (mp == NULL) { 2469 0 stevel mutex_exit(QLOCK(q)); 2470 0 stevel return; 2471 0 stevel } 2472 0 stevel last = qbp->qb_last->b_next; 2473 0 stevel /* 2474 0 stevel * rmvq_noenab() and freemsg() are called for each mblk that 2475 0 stevel * meets the criteria. The loop is executed until the last 2476 0 stevel * mblk has been processed. 2477 0 stevel */ 2478 0 stevel while (mp != last) { 2479 0 stevel ASSERT(mp->b_band == pri); 2480 0 stevel nmp = mp->b_next; 2481 0 stevel if (flag == FLUSHALL || datamsg(mp->b_datap->db_type)) { 2482 0 stevel rmvq_noenab(q, mp); 2483 0 stevel freemsg(mp); 2484 0 stevel flushed = B_TRUE; 2485 0 stevel } 2486 0 stevel mp = nmp; 2487 0 stevel } 2488 0 stevel mutex_exit(QLOCK(q)); 2489 0 stevel 2490 0 stevel /* 2491 0 stevel * If any mblk(s) has been freed, we know that qbackenable() 2492 0 stevel * will need to be called. 2493 0 stevel */ 2494 0 stevel if (flushed) 2495 235 micheng qbackenable(q, pri); 2496 0 stevel } 2497 0 stevel } 2498 0 stevel 2499 0 stevel /* 2500 0 stevel * Return 1 if the queue is not full. If the queue is full, return 2501 0 stevel * 0 (may not put message) and set QWANTW flag (caller wants to write 2502 0 stevel * to the queue). 2503 0 stevel */ 2504 0 stevel int 2505 0 stevel canput(queue_t *q) 2506 0 stevel { 2507 0 stevel TRACE_1(TR_FAC_STREAMS_FR, TR_CANPUT_IN, "canput:%p", q); 2508 0 stevel 2509 0 stevel /* this is for loopback transports, they should not do a canput */ 2510 0 stevel ASSERT(STRMATED(q->q_stream) || STREAM(q) == STREAM(q->q_nfsrv)); 2511 0 stevel 2512 0 stevel /* Find next forward module that has a service procedure */ 2513 0 stevel q = q->q_nfsrv; 2514 0 stevel 2515 0 stevel if (!(q->q_flag & QFULL)) { 2516 0 stevel TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 2517 0 stevel return (1); 2518 0 stevel } 2519 0 stevel mutex_enter(QLOCK(q)); 2520 0 stevel if (q->q_flag & QFULL) { 2521 0 stevel q->q_flag |= QWANTW; 2522 0 stevel mutex_exit(QLOCK(q)); 2523 0 stevel TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 0); 2524 0 stevel return (0); 2525 0 stevel } 2526 0 stevel mutex_exit(QLOCK(q)); 2527 0 stevel TRACE_2(TR_FAC_STREAMS_FR, TR_CANPUT_OUT, "canput:%p %d", q, 1); 2528 0 stevel return (1); 2529 0 stevel } 2530 0 stevel 2531 0 stevel /* 2532 0 stevel * This is the new canput for use with priority bands. Return 1 if the 2533 0 stevel * band is not full. If the band is full, return 0 (may not put message) 2534 0 stevel * and set QWANTW(QB_WANTW) flag for zero(non-zero) band (caller wants to 2535 0 stevel * write to the queue). 2536 0 stevel */ 2537 0 stevel int 2538 0 stevel bcanput(queue_t *q, unsigned char pri) 2539 0 stevel { 2540 0 stevel qband_t *qbp; 2541 0 stevel 2542 0 stevel TRACE_2(TR_FAC_STREAMS_FR, TR_BCANPUT_IN, "bcanput:%p %p", q, pri); 2543 0 stevel if (!q) 2544 0 stevel return (0); 2545 0 stevel 2546 0 stevel /* Find next forward module that has a service procedure */ 2547 0 stevel q = q->q_nfsrv; 2548 0 stevel 2549 0 stevel mutex_enter(QLOCK(q)); 2550 0 stevel if (pri == 0) { 2551 0 stevel if (q->q_flag & QFULL) { 2552 0 stevel q->q_flag |= QWANTW; 2553 0 stevel mutex_exit(QLOCK(q)); 2554 0 stevel TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 2555 6707 brutus "bcanput:%p %X %d", q, pri, 0); 2556 0 stevel return (0); 2557 0 stevel } 2558 0 stevel } else { /* pri != 0 */ 2559 0 stevel if (pri > q->q_nband) { 2560 0 stevel /* 2561 0 stevel * No band exists yet, so return success. 2562 0 stevel */ 2563 0 stevel mutex_exit(QLOCK(q)); 2564 0 stevel TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 2565 6707 brutus "bcanput:%p %X %d", q, pri, 1); 2566 0 stevel return (1); 2567 0 stevel } 2568 0 stevel qbp = q->q_bandp; 2569 0 stevel while (--pri) 2570 0 stevel qbp = qbp->qb_next; 2571 0 stevel if (qbp->qb_flag & QB_FULL) { 2572 0 stevel qbp->qb_flag |= QB_WANTW; 2573 0 stevel mutex_exit(QLOCK(q)); 2574 0 stevel TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 2575 6707 brutus "bcanput:%p %X %d", q, pri, 0); 2576 0 stevel return (0); 2577 0 stevel } 2578 0 stevel } 2579 0 stevel mutex_exit(QLOCK(q)); 2580 0 stevel TRACE_3(TR_FAC_STREAMS_FR, TR_BCANPUT_OUT, 2581 6707 brutus "bcanput:%p %X %d", q, pri, 1); 2582 0 stevel return (1); 2583 0 stevel } 2584 0 stevel 2585 0 stevel /* 2586 0 stevel * Put a message on a queue. 2587 0 stevel * 2588 0 stevel * Messages are enqueued on a priority basis. The priority classes 2589 0 stevel * are HIGH PRIORITY (type >= QPCTL), PRIORITY (type < QPCTL && band > 0), 2590 0 stevel * and B_NORMAL (type < QPCTL && band == 0). 2591 0 stevel * 2592 0 stevel * Add appropriate weighted data block sizes to queue count. 2593 0 stevel * If queue hits high water mark then set QFULL flag. 2594 0 stevel * 2595 0 stevel * If QNOENAB is not set (putq is allowed to enable the queue), 2596 0 stevel * enable the queue only if the message is PRIORITY, 2597 0 stevel * or the QWANTR flag is set (indicating that the service procedure 2598 0 stevel * is ready to read the queue. This implies that a service 2599 0 stevel * procedure must NEVER put a high priority message back on its own 2600 0 stevel * queue, as this would result in an infinite loop (!). 2601 0 stevel */ 2602 0 stevel int 2603 0 stevel putq(queue_t *q, mblk_t *bp) 2604 0 stevel { 2605 0 stevel mblk_t *tmp; 2606 0 stevel qband_t *qbp = NULL; 2607 0 stevel int mcls = (int)queclass(bp); 2608 0 stevel kthread_id_t freezer; 2609 0 stevel int bytecnt = 0, mblkcnt = 0; 2610 0 stevel 2611 0 stevel freezer = STREAM(q)->sd_freezer; 2612 0 stevel if (freezer == curthread) { 2613 0 stevel ASSERT(frozenstr(q)); 2614 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2615 0 stevel } else 2616 0 stevel mutex_enter(QLOCK(q)); 2617 0 stevel 2618 0 stevel /* 2619 0 stevel * Make sanity checks and if qband structure is not yet 2620 0 stevel * allocated, do so. 2621 0 stevel */ 2622 0 stevel if (mcls == QPCTL) { 2623 0 stevel if (bp->b_band != 0) 2624 0 stevel bp->b_band = 0; /* force to be correct */ 2625 0 stevel } else if (bp->b_band != 0) { 2626 0 stevel int i; 2627 0 stevel qband_t **qbpp; 2628 0 stevel 2629 0 stevel if (bp->b_band > q->q_nband) { 2630 0 stevel 2631 0 stevel /* 2632 0 stevel * The qband structure for this priority band is 2633 0 stevel * not on the queue yet, so we have to allocate 2634 0 stevel * one on the fly. It would be wasteful to 2635 0 stevel * associate the qband structures with every 2636 0 stevel * queue when the queues are allocated. This is 2637 0 stevel * because most queues will only need the normal 2638 0 stevel * band of flow which can be described entirely 2639 0 stevel * by the queue itself. 2640 0 stevel */ 2641 0 stevel qbpp = &q->q_bandp; 2642 0 stevel while (*qbpp) 2643 0 stevel qbpp = &(*qbpp)->qb_next; 2644 0 stevel while (bp->b_band > q->q_nband) { 2645 0 stevel if ((*qbpp = allocband()) == NULL) { 2646 0 stevel if (freezer != curthread) 2647 0 stevel mutex_exit(QLOCK(q)); 2648 0 stevel return (0); 2649 0 stevel } 2650 0 stevel (*qbpp)->qb_hiwat = q->q_hiwat; 2651 0 stevel (*qbpp)->qb_lowat = q->q_lowat; 2652 0 stevel q->q_nband++; 2653 0 stevel qbpp = &(*qbpp)->qb_next; 2654 0 stevel } 2655 0 stevel } 2656 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2657 0 stevel qbp = q->q_bandp; 2658 0 stevel i = bp->b_band; 2659 0 stevel while (--i) 2660 0 stevel qbp = qbp->qb_next; 2661 0 stevel } 2662 0 stevel 2663 0 stevel /* 2664 0 stevel * If queue is empty, add the message and initialize the pointers. 2665 0 stevel * Otherwise, adjust message pointers and queue pointers based on 2666 0 stevel * the type of the message and where it belongs on the queue. Some 2667 0 stevel * code is duplicated to minimize the number of conditionals and 2668 0 stevel * hopefully minimize the amount of time this routine takes. 2669 0 stevel */ 2670 0 stevel if (!q->q_first) { 2671 0 stevel bp->b_next = NULL; 2672 0 stevel bp->b_prev = NULL; 2673 0 stevel q->q_first = bp; 2674 0 stevel q->q_last = bp; 2675 0 stevel if (qbp) { 2676 0 stevel qbp->qb_first = bp; 2677 0 stevel qbp->qb_last = bp; 2678 0 stevel } 2679 0 stevel } else if (!qbp) { /* bp->b_band == 0 */ 2680 0 stevel 2681 0 stevel /* 2682 0 stevel * If queue class of message is less than or equal to 2683 0 stevel * that of the last one on the queue, tack on to the end. 2684 0 stevel */ 2685 0 stevel tmp = q->q_last; 2686 0 stevel if (mcls <= (int)queclass(tmp)) { 2687 0 stevel bp->b_next = NULL; 2688 0 stevel bp->b_prev = tmp; 2689 0 stevel tmp->b_next = bp; 2690 0 stevel q->q_last = bp; 2691 0 stevel } else { 2692 0 stevel tmp = q->q_first; 2693 0 stevel while ((int)queclass(tmp) >= mcls) 2694 0 stevel tmp = tmp->b_next; 2695 0 stevel 2696 0 stevel /* 2697 0 stevel * Insert bp before tmp. 2698 0 stevel */ 2699 0 stevel bp->b_next = tmp; 2700 0 stevel bp->b_prev = tmp->b_prev; 2701 0 stevel if (tmp->b_prev) 2702 0 stevel tmp->b_prev->b_next = bp; 2703 0 stevel else 2704 0 stevel q->q_first = bp; 2705 0 stevel tmp->b_prev = bp; 2706 0 stevel } 2707 0 stevel } else { /* bp->b_band != 0 */ 2708 0 stevel if (qbp->qb_first) { 2709 0 stevel tmp = qbp->qb_last; 2710 0 stevel 2711 0 stevel /* 2712 0 stevel * Insert bp after the last message in this band. 2713 0 stevel */ 2714 0 stevel bp->b_next = tmp->b_next; 2715 0 stevel if (tmp->b_next) 2716 0 stevel tmp->b_next->b_prev = bp; 2717 0 stevel else 2718 0 stevel q->q_last = bp; 2719 0 stevel bp->b_prev = tmp; 2720 0 stevel tmp->b_next = bp; 2721 0 stevel } else { 2722 0 stevel tmp = q->q_last; 2723 0 stevel if ((mcls < (int)queclass(tmp)) || 2724 0 stevel (bp->b_band <= tmp->b_band)) { 2725 0 stevel 2726 0 stevel /* 2727 0 stevel * Tack bp on end of queue. 2728 0 stevel */ 2729 0 stevel bp->b_next = NULL; 2730 0 stevel bp->b_prev = tmp; 2731 0 stevel tmp->b_next = bp; 2732 0 stevel q->q_last = bp; 2733 0 stevel } else { 2734 0 stevel tmp = q->q_first; 2735 0 stevel while (tmp->b_datap->db_type >= QPCTL) 2736 0 stevel tmp = tmp->b_next; 2737 0 stevel while (tmp->b_band >= bp->b_band) 2738 0 stevel tmp = tmp->b_next; 2739 0 stevel 2740 0 stevel /* 2741 0 stevel * Insert bp before tmp. 2742 0 stevel */ 2743 0 stevel bp->b_next = tmp; 2744 0 stevel bp->b_prev = tmp->b_prev; 2745 0 stevel if (tmp->b_prev) 2746 0 stevel tmp->b_prev->b_next = bp; 2747 0 stevel else 2748 0 stevel q->q_first = bp; 2749 0 stevel tmp->b_prev = bp; 2750 0 stevel } 2751 0 stevel qbp->qb_first = bp; 2752 0 stevel } 2753 0 stevel qbp->qb_last = bp; 2754 0 stevel } 2755 0 stevel 2756 0 stevel /* Get message byte count for q_count accounting */ 2757 6769 ja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 2758 741 masputra 2759 0 stevel if (qbp) { 2760 0 stevel qbp->qb_count += bytecnt; 2761 0 stevel qbp->qb_mblkcnt += mblkcnt; 2762 0 stevel if ((qbp->qb_count >= qbp->qb_hiwat) || 2763 0 stevel (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 2764 0 stevel qbp->qb_flag |= QB_FULL; 2765 0 stevel } 2766 0 stevel } else { 2767 0 stevel q->q_count += bytecnt; 2768 0 stevel q->q_mblkcnt += mblkcnt; 2769 0 stevel if ((q->q_count >= q->q_hiwat) || 2770 0 stevel (q->q_mblkcnt >= q->q_hiwat)) { 2771 0 stevel q->q_flag |= QFULL; 2772 0 stevel } 2773 0 stevel } 2774 0 stevel 2775 0 stevel STR_FTEVENT_MSG(bp, q, FTEV_PUTQ, NULL); 2776 0 stevel 2777 0 stevel if ((mcls > QNORM) || 2778 0 stevel (canenable(q) && (q->q_flag & QWANTR || bp->b_band))) 2779 0 stevel qenable_locked(q); 2780 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2781 0 stevel if (freezer != curthread) 2782 0 stevel mutex_exit(QLOCK(q)); 2783 0 stevel 2784 0 stevel return (1); 2785 0 stevel } 2786 0 stevel 2787 0 stevel /* 2788 0 stevel * Put stuff back at beginning of Q according to priority order. 2789 0 stevel * See comment on putq above for details. 2790 0 stevel */ 2791 0 stevel int 2792 0 stevel putbq(queue_t *q, mblk_t *bp) 2793 0 stevel { 2794 0 stevel mblk_t *tmp; 2795 0 stevel qband_t *qbp = NULL; 2796 0 stevel int mcls = (int)queclass(bp); 2797 0 stevel kthread_id_t freezer; 2798 0 stevel int bytecnt = 0, mblkcnt = 0; 2799 0 stevel 2800 0 stevel ASSERT(q && bp); 2801 0 stevel ASSERT(bp->b_next == NULL); 2802 0 stevel freezer = STREAM(q)->sd_freezer; 2803 0 stevel if (freezer == curthread) { 2804 0 stevel ASSERT(frozenstr(q)); 2805 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2806 0 stevel } else 2807 0 stevel mutex_enter(QLOCK(q)); 2808 0 stevel 2809 0 stevel /* 2810 0 stevel * Make sanity checks and if qband structure is not yet 2811 0 stevel * allocated, do so. 2812 0 stevel */ 2813 0 stevel if (mcls == QPCTL) { 2814 0 stevel if (bp->b_band != 0) 2815 0 stevel bp->b_band = 0; /* force to be correct */ 2816 0 stevel } else if (bp->b_band != 0) { 2817 0 stevel int i; 2818 0 stevel qband_t **qbpp; 2819 0 stevel 2820 0 stevel if (bp->b_band > q->q_nband) { 2821 0 stevel qbpp = &q->q_bandp; 2822 0 stevel while (*qbpp) 2823 0 stevel qbpp = &(*qbpp)->qb_next; 2824 0 stevel while (bp->b_band > q->q_nband) { 2825 0 stevel if ((*qbpp = allocband()) == NULL) { 2826 0 stevel if (freezer != curthread) 2827 0 stevel mutex_exit(QLOCK(q)); 2828 0 stevel return (0); 2829 0 stevel } 2830 0 stevel (*qbpp)->qb_hiwat = q->q_hiwat; 2831 0 stevel (*qbpp)->qb_lowat = q->q_lowat; 2832 0 stevel q->q_nband++; 2833 0 stevel qbpp = &(*qbpp)->qb_next; 2834 0 stevel } 2835 0 stevel } 2836 0 stevel qbp = q->q_bandp; 2837 0 stevel i = bp->b_band; 2838 0 stevel while (--i) 2839 0 stevel qbp = qbp->qb_next; 2840 0 stevel } 2841 0 stevel 2842 0 stevel /* 2843 0 stevel * If queue is empty or if message is high priority, 2844 0 stevel * place on the front of the queue. 2845 0 stevel */ 2846 0 stevel tmp = q->q_first; 2847 0 stevel if ((!tmp) || (mcls == QPCTL)) { 2848 0 stevel bp->b_next = tmp; 2849 0 stevel if (tmp) 2850 0 stevel tmp->b_prev = bp; 2851 0 stevel else 2852 0 stevel q->q_last = bp; 2853 0 stevel q->q_first = bp; 2854 0 stevel bp->b_prev = NULL; 2855 0 stevel if (qbp) { 2856 0 stevel qbp->qb_first = bp; 2857 0 stevel qbp->qb_last = bp; 2858 0 stevel } 2859 0 stevel } else if (qbp) { /* bp->b_band != 0 */ 2860 0 stevel tmp = qbp->qb_first; 2861 0 stevel if (tmp) { 2862 0 stevel 2863 0 stevel /* 2864 0 stevel * Insert bp before the first message in this band. 2865 0 stevel */ 2866 0 stevel bp->b_next = tmp; 2867 0 stevel bp->b_prev = tmp->b_prev; 2868 0 stevel if (tmp->b_prev) 2869 0 stevel tmp->b_prev->b_next = bp; 2870 0 stevel else 2871 0 stevel q->q_first = bp; 2872 0 stevel tmp->b_prev = bp; 2873 0 stevel } else { 2874 0 stevel tmp = q->q_last; 2875 0 stevel if ((mcls < (int)queclass(tmp)) || 2876 0 stevel (bp->b_band < tmp->b_band)) { 2877 0 stevel 2878 0 stevel /* 2879 0 stevel * Tack bp on end of queue. 2880 0 stevel */ 2881 0 stevel bp->b_next = NULL; 2882 0 stevel bp->b_prev = tmp; 2883 0 stevel tmp->b_next = bp; 2884 0 stevel q->q_last = bp; 2885 0 stevel } else { 2886 0 stevel tmp = q->q_first; 2887 0 stevel while (tmp->b_datap->db_type >= QPCTL) 2888 0 stevel tmp = tmp->b_next; 2889 0 stevel while (tmp->b_band > bp->b_band) 2890 0 stevel tmp = tmp->b_next; 2891 0 stevel 2892 0 stevel /* 2893 0 stevel * Insert bp before tmp. 2894 0 stevel */ 2895 0 stevel bp->b_next = tmp; 2896 0 stevel bp->b_prev = tmp->b_prev; 2897 0 stevel if (tmp->b_prev) 2898 0 stevel tmp->b_prev->b_next = bp; 2899 0 stevel else 2900 0 stevel q->q_first = bp; 2901 0 stevel tmp->b_prev = bp; 2902 0 stevel } 2903 0 stevel qbp->qb_last = bp; 2904 0 stevel } 2905 0 stevel qbp->qb_first = bp; 2906 0 stevel } else { /* bp->b_band == 0 && !QPCTL */ 2907 0 stevel 2908 0 stevel /* 2909 0 stevel * If the queue class or band is less than that of the last 2910 0 stevel * message on the queue, tack bp on the end of the queue. 2911 0 stevel */ 2912 0 stevel tmp = q->q_last; 2913 0 stevel if ((mcls < (int)queclass(tmp)) || (bp->b_band < tmp->b_band)) { 2914 0 stevel bp->b_next = NULL; 2915 0 stevel bp->b_prev = tmp; 2916 0 stevel tmp->b_next = bp; 2917 0 stevel q->q_last = bp; 2918 0 stevel } else { 2919 0 stevel tmp = q->q_first; 2920 0 stevel while (tmp->b_datap->db_type >= QPCTL) 2921 0 stevel tmp = tmp->b_next; 2922 0 stevel while (tmp->b_band > bp->b_band) 2923 0 stevel tmp = tmp->b_next; 2924 0 stevel 2925 0 stevel /* 2926 0 stevel * Insert bp before tmp. 2927 0 stevel */ 2928 0 stevel bp->b_next = tmp; 2929 0 stevel bp->b_prev = tmp->b_prev; 2930 0 stevel if (tmp->b_prev) 2931 0 stevel tmp->b_prev->b_next = bp; 2932 0 stevel else 2933 0 stevel q->q_first = bp; 2934 0 stevel tmp->b_prev = bp; 2935 0 stevel } 2936 0 stevel } 2937 0 stevel 2938 0 stevel /* Get message byte count for q_count accounting */ 2939 6769 ja97890 bytecnt = mp_cont_len(bp, &mblkcnt); 2940 6769 ja97890 2941 0 stevel if (qbp) { 2942 0 stevel qbp->qb_count += bytecnt; 2943 0 stevel qbp->qb_mblkcnt += mblkcnt; 2944 0 stevel if ((qbp->qb_count >= qbp->qb_hiwat) || 2945 0 stevel (qbp->qb_mblkcnt >= qbp->qb_hiwat)) { 2946 0 stevel qbp->qb_flag |= QB_FULL; 2947 0 stevel } 2948 0 stevel } else { 2949 0 stevel q->q_count += bytecnt; 2950 0 stevel q->q_mblkcnt += mblkcnt; 2951 0 stevel if ((q->q_count >= q->q_hiwat) || 2952 0 stevel (q->q_mblkcnt >= q->q_hiwat)) { 2953 0 stevel q->q_flag |= QFULL; 2954 0 stevel } 2955 0 stevel } 2956 0 stevel 2957 0 stevel STR_FTEVENT_MSG(bp, q, FTEV_PUTBQ, NULL); 2958 0 stevel 2959 0 stevel if ((mcls > QNORM) || (canenable(q) && (q->q_flag & QWANTR))) 2960 0 stevel qenable_locked(q); 2961 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2962 0 stevel if (freezer != curthread) 2963 0 stevel mutex_exit(QLOCK(q)); 2964 0 stevel 2965 0 stevel return (1); 2966 0 stevel } 2967 0 stevel 2968 0 stevel /* 2969 0 stevel * Insert a message before an existing message on the queue. If the 2970 0 stevel * existing message is NULL, the new messages is placed on the end of 2971 0 stevel * the queue. The queue class of the new message is ignored. However, 2972 0 stevel * the priority band of the new message must adhere to the following 2973 0 stevel * ordering: 2974 0 stevel * 2975 0 stevel * emp->b_prev->b_band >= mp->b_band >= emp->b_band. 2976 0 stevel * 2977 0 stevel * All flow control parameters are updated. 2978 0 stevel * 2979 0 stevel * insq can be called with the stream frozen, but other utility functions 2980 0 stevel * holding QLOCK, and by streams modules without any locks/frozen. 2981 0 stevel */ 2982 0 stevel int 2983 0 stevel insq(queue_t *q, mblk_t *emp, mblk_t *mp) 2984 0 stevel { 2985 0 stevel mblk_t *tmp; 2986 0 stevel qband_t *qbp = NULL; 2987 0 stevel int mcls = (int)queclass(mp); 2988 0 stevel kthread_id_t freezer; 2989 0 stevel int bytecnt = 0, mblkcnt = 0; 2990 0 stevel 2991 0 stevel freezer = STREAM(q)->sd_freezer; 2992 0 stevel if (freezer == curthread) { 2993 0 stevel ASSERT(frozenstr(q)); 2994 0 stevel ASSERT(MUTEX_HELD(QLOCK(q))); 2995 0 stevel } else if (MUTEX_HELD(QLOCK(q))) { 2996 0 stevel /* Don't drop lock on exit */ 2997 0 stevel freezer = curthread; 2998 0 stevel } else 2999 0 stevel mutex_enter(QLOCK(q)); 3000 0 stevel 3001 0 stevel if (mcls == QPCTL) { 3002 0 stevel if (mp->b_band != 0) 3003 0 stevel mp->b_band = 0; /* force to be correct */ 3004 0 stevel if (emp && emp->b_prev && 3005 0 stevel (emp->b_prev->b_datap->db_type < QPCTL)) 3006 0 stevel goto badord; 3007 0 stevel } 3008 0 stevel if (emp) { 3009 0 stevel if (((mcls == QNORM) && (mp->b_band < emp->b_band)) || 3010 0 stevel (emp->b_prev && (emp->b_prev->b_datap->db_type < QPCTL) && 3011 0 stevel (emp->b_prev->b_band < mp->b_band))) { 3012 0 stevel goto badord; 3013 0 stevel } 3014 0 stevel } else { 3015 0 stevel tmp = q->q_last; 3016 0 stevel if (tmp && (mcls == QNORM) && (mp->b_band > tmp->b_band)) { 3017 0 stevel badord: 3018 0 stevel cmn_err(CE_WARN, 3019 0 stevel "insq: attempt to insert message out of order " 3020 0 stevel "on q %p", (void *)q); 3021 0 stevel if (freezer != curthread) 3022 0 stevel mutex_exit(QLOCK(q)); 3023 0 stevel return (0); 3024 0 stevel } 3025 0 stevel } 3026 0 stevel 3027 0 stevel if (mp->b_band != 0) { 3028 0 stevel int i; 3029 0 stevel qband_t **qbpp; 3030 0 stevel 3031 0 stevel if (mp->b_band > q->q_nband) { 3032 0 stevel qbpp = &q->q_bandp; 3033 0 stevel while (*qbpp) 3034 0 stevel qbpp = &(*qbpp)->qb_next; 3035 0 stevel while (mp->b_band > q->q_nband) { 3036 0 stevel if ((*qbpp = allocband()) == NULL) { 3037 0 stevel if (freezer != curthread) 3038 0 stevel mutex_exit(QLOCK(q)); 3039 0 stevel return (0); 3040 0 stevel } 3041 0 stevel (*qbpp)->qb_hiwat = q->q_hiwat; 3042 0 stevel (*qbpp)->qb_lowat = q->q_lowat; 3043 0 stevel q->q_nband++; 3044 0 stevel qbpp = &(*qbpp)->qb_next; 3045 0 stevel } 3046 0 stevel } 3047 0 stevel qbp = q->q_bandp; 3048 0 stevel i = mp->b_band; 3049 0 stevel while (--i) 3050 0 stevel qbp = qbp->qb_next; 3051 0 stevel } 3052 0 stevel 3053 0 stevel if ((mp->b_next = emp) != NULL) { 3054 0 stevel if ((mp->b_prev = emp->b_prev) != NULL) 3055 0 stevel emp->b_prev->b_next = mp; 3056 0 stevel else 3057 0 stevel q->q_first = mp; 3058 0 stevel emp->b_prev = mp; 3059 0 stevel } else { 3060 0 stevel if ((mp->b_prev = q->q_last) != NULL) 3061 0 stevel q->q_last->b_next = mp; 3062 0 stevel else 3063 0 stevel q->q_first = mp; 3064 0 stevel q->q_last = mp; 3065 0 stevel } 3066 0 stevel 3067 0 stevel /* Get mblk and byte count for q_count accounting */ 3068 6769 ja97890 bytecnt = mp_cont_len(mp, &mblkcnt); 3069 0 stevel 3070 0 stevel if (qbp) { /* adjust qband pointers and count */ 3071 0 stevel if (!qbp->qb_first) { 3072 0 stevel qbp->qb_first = mp; 3073 0 stevel qbp->qb_last = mp; 3074 0 stevel } else { 3075 0 stevel if (mp->b_prev == NULL || (mp->b_prev != NULL && 3076 0 stevel (mp->b_prev->b_band != mp->b_band))) 3077 0 stevel qbp->qb_first = mp; 3078 0 stevel else if (mp->b_next == NULL || (mp->b_next != NULL && 3079 0 stevel (mp->b_next->b_band != mp->b_band))) 3080 0 stevel qbp->qb_last = mp; 3081 0 stevel } 3082 0 stevel qbp->qb_count += bytecnt; 3083 0 stevel qbp->qb_mblkcnt += mblkcnt; 3084 0 stevel if ((qbp->qb_count >= qbp->qb_hiwat) || 3085 0 stevel (qbp->qb_mblkcnt >= qbp-