Home | History | Annotate | Download | only in fs
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License, Version 1.0 only
      6  * (the "License").  You may not use this file except in compliance
      7  * with the License.
      8  *
      9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
     10  * or http://www.opensolaris.org/os/licensing.
     11  * See the License for the specific language governing permissions
     12  * and limitations under the License.
     13  *
     14  * When distributing Covered Code, include this CDDL HEADER in each
     15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     16  * If applicable, add the following below this CDDL HEADER, with the
     17  * fields enclosed by brackets "[]" replaced with your own identifying
     18  * information: Portions Copyright [yyyy] [name of copyright owner]
     19  *
     20  * CDDL HEADER END
     21  */
     22 /*
     23  * Copyright (c) 1998,2001 by Sun Microsystems, Inc.
     24  * All rights reserved.
     25  *
     26  */
     27 
     28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
     29 
     30 #include <sys/types.h>
     31 #include <sys/cmn_err.h>
     32 #include <sys/kmem.h>
     33 #include <sys/systm.h>
     34 #include <sys/debug.h>
     35 #include <sys/ddi.h>
     36 
     37 #include <sys/fdbuffer.h>
     38 
     39 #ifdef DEBUG
     40 static int fdb_debug;
     41 #define	FDB_D_CREATE	001
     42 #define	FDB_D_ALLOC	002
     43 #define	FDB_D_IO	004
     44 #define	FDB_D_ASYNC	010
     45 #define	DEBUGF(lvl, args)	{ if ((lvl) & fdb_debug) cmn_err args; }
     46 #else
     47 #define	DEBUGF(level, args)
     48 #endif
     49 static struct kmem_cache *fdb_cache;
     50 static void fdb_zero_holes(fdbuffer_t *fdb);
     51 
     52 /* ARGSUSED */
     53 static int
     54 fdb_cache_constructor(void *buf, void *cdrarg, int kmflags)
     55 {
     56 	fdbuffer_t *fdb = buf;
     57 
     58 	mutex_init(&fdb->fd_mutex, NULL, MUTEX_DEFAULT, NULL);
     59 
     60 	return (0);
     61 }
     62 
     63 /* ARGSUSED */
     64 static void
     65 fdb_cache_destructor(void *buf, void *cdrarg)
     66 {
     67 	fdbuffer_t *fdb = buf;
     68 
     69 	mutex_destroy(&fdb->fd_mutex);
     70 }
     71 
     72 void
     73 fdb_init()
     74 {
     75 	fdb_cache = kmem_cache_create("fdb_cache", sizeof (fdbuffer_t),
     76 	    0, fdb_cache_constructor, fdb_cache_destructor,
     77 	    NULL, NULL, NULL, 0);
     78 }
     79 
     80 static void
     81 fdb_prepare(fdbuffer_t *fdb)
     82 {
     83 	fdb->fd_holes = NULL;
     84 	fdb->fd_iofunc = NULL;
     85 	fdb->fd_iargp = NULL;
     86 	fdb->fd_parentbp = NULL;
     87 	fdb->fd_resid = 0;
     88 	fdb->fd_iocount = 0;
     89 	fdb->fd_iodispatch = 0;
     90 	fdb->fd_err = 0;
     91 }
     92 
     93 fdbuffer_t *
     94 fdb_page_create(page_t *pp, size_t len, int flags)
     95 {
     96 	fdbuffer_t *fdb;
     97 
     98 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
     99 	    "?fdb_page_create: pp: %p len: %lux flags: %x",
    100 	    (void *)pp, len, flags));
    101 
    102 	ASSERT(flags & (FDB_READ|FDB_WRITE));
    103 
    104 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
    105 
    106 	fdb_prepare(fdb);
    107 
    108 	fdb->fd_type = FDB_PAGEIO;
    109 	fdb->fd_len = len;
    110 	fdb->fd_state = flags;
    111 	fdb->fd_pages = pp;
    112 
    113 	return (fdb);
    114 }
    115 
    116 fdbuffer_t *
    117 fdb_addr_create(
    118 	caddr_t addr,
    119 	size_t len,
    120 	int flags,
    121 	page_t **pplist,
    122 	struct proc *procp)
    123 {
    124 	fdbuffer_t *fdb;
    125 
    126 	DEBUGF(FDB_D_CREATE, (CE_NOTE,
    127 	    "?fdb_addr_create: addr: %p len: %lux flags: %x",
    128 	    (void *)addr, len, flags));
    129 
    130 	ASSERT(flags & (FDB_READ|FDB_WRITE));
    131 
    132 	fdb = kmem_cache_alloc(fdb_cache, KM_SLEEP);
    133 
    134 	fdb_prepare(fdb);
    135 
    136 	fdb->fd_type = FDB_VADDR;
    137 	fdb->fd_len = len;
    138 	fdb->fd_state = flags;
    139 	fdb->fd_addr = addr;
    140 	fdb->fd_shadow = pplist;
    141 	fdb->fd_procp = procp;
    142 
    143 	return (fdb);
    144 }
    145 
    146 void
    147 fdb_set_iofunc(fdbuffer_t *fdb, fdb_iodone_t iofunc, void *ioargp, int flag)
    148 {
    149 	ASSERT(fdb);
    150 	ASSERT(iofunc);
    151 	ASSERT((flag & ~FDB_ICALLBACK) == 0);
    152 
    153 	fdb->fd_iofunc = iofunc;
    154 	fdb->fd_iargp = ioargp;
    155 
    156 	mutex_enter(&fdb->fd_mutex);
    157 
    158 	if (flag & FDB_ICALLBACK)
    159 		fdb->fd_state |= FDB_ICALLBACK;
    160 
    161 	fdb->fd_state |= FDB_ASYNC;
    162 
    163 	mutex_exit(&fdb->fd_mutex);
    164 }
    165 
    166 int
    167 fdb_get_error(fdbuffer_t *fdb)
    168 {
    169 	return (fdb->fd_err);
    170 }
    171 
    172 void
    173 fdb_free(fdbuffer_t *fdb)
    174 {
    175 	fdb_holes_t *fdh, *fdhp;
    176 
    177 	DEBUGF(FDB_D_CREATE, (CE_NOTE, "?fdb_free: addr: %p flags: %x",
    178 	    (void *)fdb, fdb->fd_state));
    179 
    180 	ASSERT(fdb);
    181 	ASSERT(fdb->fd_iodispatch == 0);
    182 
    183 	if (fdb->fd_state & FDB_ZEROHOLE) {
    184 		fdb_zero_holes(fdb);
    185 	}
    186 
    187 	for (fdh = fdb->fd_holes; fdh; ) {
    188 		fdhp = fdh;
    189 		fdh = fdh->next_hole;
    190 		kmem_free(fdhp, sizeof (fdb_holes_t));
    191 	}
    192 
    193 	if (fdb->fd_parentbp != NULL) {
    194 		switch (fdb->fd_type) {
    195 		case FDB_PAGEIO:
    196 			pageio_done(fdb->fd_parentbp);
    197 			break;
    198 		case FDB_VADDR:
    199 			kmem_free(fdb->fd_parentbp, sizeof (struct buf));
    200 			break;
    201 		default:
    202 			cmn_err(CE_CONT, "?fdb_free: Unknown fdb type.");
    203 			break;
    204 		}
    205 	}
    206 
    207 	kmem_cache_free(fdb_cache, fdb);
    208 
    209 }
    210 
    211 /*
    212  * The offset should be from the begining of the buffer
    213  * it has nothing to do with file offset. This fact should be
    214  * reflected in the caller of this routine.
    215  */
    216 
    217 void
    218 fdb_add_hole(fdbuffer_t *fdb, u_offset_t off, size_t len)
    219 {
    220 	fdb_holes_t *this_hole;
    221 
    222 	ASSERT(fdb);
    223 	ASSERT(off < fdb->fd_len);
    224 
    225 	DEBUGF(FDB_D_IO, (CE_NOTE, "?fdb_add_hole: off %llx len %lx",
    226 	    off, len));
    227 
    228 	this_hole = kmem_alloc(sizeof (fdb_holes_t), KM_SLEEP);
    229 	this_hole->off = off;
    230 	this_hole->len = len;
    231 
    232 	if (fdb->fd_holes == NULL || off < fdb->fd_holes->off) {
    233 		this_hole->next_hole = fdb->fd_holes;
    234 		fdb->fd_holes = this_hole;
    235 	} else {
    236 		fdb_holes_t *fdhp = fdb->fd_holes;
    237 
    238 		while (fdhp->next_hole && off > fdhp->next_hole->off)
    239 			fdhp = fdhp->next_hole;
    240 
    241 		this_hole->next_hole = fdhp->next_hole;
    242 		fdhp->next_hole = this_hole;
    243 	}
    244 
    245 	mutex_enter(&fdb->fd_mutex);
    246 
    247 	fdb->fd_iocount += len;
    248 
    249 	mutex_exit(&fdb->fd_mutex);
    250 }
    251 
    252 fdb_holes_t *
    253 fdb_get_holes(fdbuffer_t *fdb)
    254 {
    255 	ASSERT(fdb);
    256 
    257 	if (fdb->fd_state & FDB_ZEROHOLE) {
    258 		fdb_zero_holes(fdb);
    259 	}
    260 
    261 	return (fdb->fd_holes);
    262 }
    263 
    264 /*
    265  * Note that offsets refer to offsets from the begining of the buffer
    266  * and as such the memory should be cleared accordingly.
    267  */
    268 
    269 static void
    270 fdb_zero_holes(fdbuffer_t *fdb)
    271 {
    272 	fdb_holes_t *fdh = fdb->fd_holes;
    273 	page_t *pp;
    274 
    275 	ASSERT(fdb);
    276 
    277 	if (!fdh)
    278 		return;
    279 
    280 	switch (fdb->fd_type) {
    281 	case FDB_PAGEIO:
    282 		pp = fdb->fd_pages;
    283 		while (fdh) {
    284 			fdb_holes_t *pfdh = fdh;
    285 			size_t l = fdh->len;
    286 			u_offset_t o = fdh->off;
    287 			ASSERT(pp);
    288 
    289 			do {
    290 				int  zerolen;
    291 				ASSERT(o >= pp->p_offset);
    292 
    293 				/*
    294 				 * This offset is wrong since
    295 				 * the offset passed from the pages
    296 				 * perspective starts at some virtual
    297 				 * address but the hole is relative
    298 				 * to the beginning of the fdbuffer.
    299 				 */
    300 				if (o >= pp->p_offset + PAGESIZE)
    301 					continue;
    302 
    303 				zerolen = min(PAGESIZE, l);
    304 
    305 				ASSERT(zerolen > 0);
    306 				ASSERT(zerolen <= PAGESIZE);
    307 
    308 				pagezero(pp, ((uintptr_t)o & PAGEOFFSET),
    309 				    zerolen);
    310 
    311 				l -= zerolen;
    312 				o += zerolen;
    313 
    314 				if (l == 0)
    315 					break;
    316 
    317 			} while (pp = page_list_next(pp));
    318 
    319 			if (!pp)
    320 				break;
    321 
    322 			fdh = fdh->next_hole;
    323 			kmem_free(pfdh, sizeof (fdb_holes_t));
    324 		}
    325 		break;
    326 	case FDB_VADDR:
    327 		while (fdh) {
    328 			fdb_holes_t *pfdh = fdh;
    329 
    330 			bzero(fdb->fd_addr + fdh->off, fdh->len);
    331 
    332 			fdh = fdh->next_hole;
    333 			kmem_free(pfdh, sizeof (fdb_holes_t));
    334 		}
    335 	default:
    336 		panic("fdb_zero_holes: Unknown fdb type.");
    337 		break;
    338 	}
    339 }
    340 
    341 
    342 buf_t *
    343 fdb_iosetup(fdbuffer_t *fdb, u_offset_t off, size_t len, struct vnode *vp,
    344     int b_flags)
    345 {
    346 	buf_t *bp;
    347 
    348 	DEBUGF(FDB_D_IO, (CE_NOTE,
    349 	    "?fdb_iosetup: off: %llx len: %lux fdb: len: %lux flags: %x",
    350 	    off, len, fdb->fd_len, fdb->fd_state));
    351 
    352 	ASSERT(fdb);
    353 
    354 	mutex_enter(&fdb->fd_mutex);
    355 
    356 	ASSERT(((b_flags & B_READ) && (fdb->fd_state & FDB_READ)) ||
    357 	    ((b_flags & B_WRITE) && (fdb->fd_state & FDB_WRITE)));
    358 	/*
    359 	 * The fdb can be used either in sync or async mode, if the
    360 	 * buffer has not been used it may be used in either mode, but
    361 	 * once you have started to use the buf in either mode all
    362 	 * subsequent i/o requests must take place the same way.
    363 	 */
    364 
    365 	ASSERT(((b_flags & B_ASYNC) &&
    366 	    ((fdb->fd_state & FDB_ASYNC) || !(fdb->fd_state & FDB_SYNC))) ||
    367 	    (!(b_flags & B_ASYNC) &&
    368 	    ((fdb->fd_state & FDB_SYNC) || !(fdb->fd_state & FDB_ASYNC))));
    369 
    370 
    371 	fdb->fd_state |= b_flags & B_ASYNC ? FDB_ASYNC : FDB_SYNC;
    372 
    373 	fdb->fd_iodispatch++;
    374 
    375 	ASSERT((fdb->fd_state & FDB_ASYNC && fdb->fd_iofunc != NULL) ||
    376 	    fdb->fd_state & FDB_SYNC);
    377 
    378 	mutex_exit(&fdb->fd_mutex);
    379 
    380 	ASSERT((len & (DEV_BSIZE - 1)) == 0);
    381 	ASSERT(off+len <= fdb->fd_len);
    382 
    383 	switch (fdb->fd_type) {
    384 	case FDB_PAGEIO:
    385 		if (fdb->fd_parentbp == NULL) {
    386 			bp = pageio_setup(fdb->fd_pages, len, vp, b_flags);
    387 			fdb->fd_parentbp = bp;
    388 		}
    389 		break;
    390 	case FDB_VADDR:
    391 		if (fdb->fd_parentbp == NULL) {
    392 
    393 			bp = kmem_alloc(sizeof (buf_t), KM_SLEEP);
    394 			bioinit(bp);
    395 			bp->b_error = 0;
    396 			bp->b_proc = fdb->fd_procp;
    397 			bp->b_flags = b_flags | B_BUSY | B_PHYS;
    398 			bp->b_bcount = len;
    399 			bp->b_un.b_addr = fdb->fd_addr;
    400 			bp->b_shadow = fdb->fd_shadow;
    401 			if (fdb->fd_shadow != NULL)
    402 				bp->b_flags |= B_SHADOW;
    403 			fdb->fd_parentbp = bp;
    404 		}
    405 		break;
    406 	default:
    407 		panic("fdb_iosetup: Unsupported fdb type.");
    408 		break;
    409 	};
    410 
    411 	bp = bioclone(fdb->fd_parentbp, off, len, 0, 0,
    412 	    (b_flags & B_ASYNC) ? (int (*)())fdb_iodone : NULL,
    413 	    NULL, KM_SLEEP);
    414 
    415 	bp->b_forw = (struct buf *)fdb;
    416 
    417 	if (b_flags & B_ASYNC)
    418 		bp->b_flags |= B_ASYNC;
    419 
    420 	return (bp);
    421 }
    422 
    423 size_t
    424 fdb_get_iolen(fdbuffer_t *fdb)
    425 {
    426 	ASSERT(fdb);
    427 	ASSERT(fdb->fd_iodispatch == 0);
    428 
    429 	return (fdb->fd_iocount - fdb->fd_resid);
    430 }
    431 
    432 void
    433 fdb_ioerrdone(fdbuffer_t *fdb, int error)
    434 {
    435 	ASSERT(fdb);
    436 	ASSERT(fdb->fd_state & FDB_ASYNC);
    437 
    438 	DEBUGF(FDB_D_IO, (CE_NOTE,
    439 	    "?fdb_ioerrdone: fdb: len: %lux flags: %x error: %d",
    440 	    fdb->fd_len, fdb->fd_state, error));
    441 
    442 	mutex_enter(&fdb->fd_mutex);
    443 
    444 	fdb->fd_err = error;
    445 
    446 	if (error)
    447 		fdb->fd_state |= FDB_ERROR;
    448 	else
    449 		fdb->fd_state |= FDB_DONE;
    450 
    451 	/*
    452 	 * If there is outstanding i/o return wainting for i/o's to complete.
    453 	 */
    454 	if (fdb->fd_iodispatch > 0) {
    455 		mutex_exit(&fdb->fd_mutex);
    456 		return;
    457 	}
    458 
    459 	mutex_exit(&fdb->fd_mutex);
    460 	fdb->fd_iofunc(fdb, fdb->fd_iargp, NULL);
    461 }
    462 
    463 void
    464 fdb_iodone(buf_t *bp)
    465 {
    466 	fdbuffer_t *fdb = (fdbuffer_t *)bp->b_forw;
    467 	int	error, isasync;
    468 	int	icallback;
    469 
    470 	ASSERT(fdb);
    471 
    472 	DEBUGF(FDB_D_IO, (CE_NOTE,
    473 	    "?fdb_iodone: fdb: len: %lux flags: %x error: %d",
    474 	    fdb->fd_len, fdb->fd_state, geterror(bp)));
    475 
    476 	if (bp->b_flags & B_REMAPPED)
    477 		bp_mapout(bp);
    478 
    479 	mutex_enter(&fdb->fd_mutex);
    480 
    481 	icallback = fdb->fd_state & FDB_ICALLBACK;
    482 	isasync = fdb->fd_state & FDB_ASYNC;
    483 
    484 	ASSERT(fdb->fd_iodispatch > 0);
    485 	fdb->fd_iodispatch--;
    486 
    487 	if (error = geterror(bp)) {
    488 		fdb->fd_err = error;
    489 		if (bp->b_resid)
    490 			fdb->fd_resid += bp->b_resid;
    491 		else
    492 			fdb->fd_resid += bp->b_bcount;
    493 	}
    494 
    495 	fdb->fd_iocount += bp->b_bcount;
    496 
    497 	/*
    498 	 * ioack collects the total amount of i/o accounted for
    499 	 * this includes:
    500 	 *
    501 	 *	- i/o completed
    502 	 *	- i/o attempted but not completed,
    503 	 *	- i/o not done due to holes.
    504 	 *
    505 	 * Once the entire i/o ranges has been accounted for we'll
    506 	 * call the async function associated with the fdb.
    507 	 *
    508 	 */
    509 
    510 	if ((fdb->fd_iodispatch == 0) &&
    511 	    (fdb->fd_state & (FDB_ERROR|FDB_DONE))) {
    512 
    513 		mutex_exit(&fdb->fd_mutex);
    514 
    515 		if (isasync || icallback) {
    516 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
    517 		}
    518 
    519 	} else {
    520 
    521 		mutex_exit(&fdb->fd_mutex);
    522 
    523 		if (icallback) {
    524 			fdb->fd_iofunc(fdb, fdb->fd_iargp, bp);
    525 		}
    526 	}
    527 
    528 	freerbuf(bp);
    529 }
    530