Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  *
     25  * Portions Copyright 2008 Denis Cheng
     26  */
     27 
     28 #include <fcntl.h>
     29 #include <pthread.h>
     30 #include <errno.h>
     31 #include <math.h>
     32 #include <libgen.h>
     33 #include <sys/mman.h>
     34 #include <sys/shm.h>
     35 
     36 #include "filebench.h"
     37 #include "fileset.h"
     38 #include "gamma_dist.h"
     39 #include "utils.h"
     40 #include "fsplug.h"
     41 
     42 /*
     43  * File sets, of type fileset_t, are entities which contain
     44  * information about collections of files and subdirectories in Filebench.
     45  * The fileset, once populated, consists of a tree of fileset entries of
     46  * type filesetentry_t which specify files and directories.  The fileset
     47  * is rooted in a directory specified by fileset_path, and once the populated
     48  * fileset has been created, has a tree of directories and files
     49  * corresponding to the fileset's filesetentry tree.
     50  *
     51  * Fileset entities are allocated by fileset_define() which is called from
     52  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
     53  * to the eventual directory and file tree to be instantiated on the storage
     54  * medium is built by fileset_populate(), which is This routine is called
     55  * from fileset_createset(), which is in turn called by fileset_createset().
     56  * After calling fileset_populate(), fileset_createset() will call
     57  * fileset_create() to pre-allocate designated files and directories.
     58  *
     59  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
     60  * when a "create fileset" or "run" command is encountered. When the
     61  * "create fileset" command is used, it is generally paired with
     62  * a "create processes" command, and must appear first, in order to
     63  * instantiate all the files in the fileset before trying to use them.
     64  */
     65 
     66 static int fileset_checkraw(fileset_t *fileset);
     67 
     68 /* maximum parallel allocation control */
     69 #define	MAX_PARALLOC_THREADS 32
     70 
     71 /*
     72  * returns pointer to file or fileset
     73  * string, as appropriate
     74  */
     75 static char *
     76 fileset_entity_name(fileset_t *fileset)
     77 {
     78 	if (fileset->fs_attrs & FILESET_IS_FILE)
     79 		return ("file");
     80 	else
     81 		return ("fileset");
     82 }
     83 
     84 /*
     85  * Removes the last file or directory name from a pathname.
     86  * Basically removes characters from the end of the path by
     87  * setting them to \0 until a forward slash '/' is
     88  * encountered. It also removes the forward slash.
     89  */
     90 static char *
     91 trunc_dirname(char *dir)
     92 {
     93 	char *s = dir + strlen(dir);
     94 
     95 	while (s != dir) {
     96 		int c = *s;
     97 
     98 		*s = 0;
     99 		if (c == '/')
    100 			break;
    101 		s--;
    102 	}
    103 	return (dir);
    104 }
    105 
    106 /*
    107  * Prints a list of allowed options and how to specify them.
    108  */
    109 void
    110 fileset_usage(void)
    111 {
    112 	(void) fprintf(stderr,
    113 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
    114 	    ",entries=<number>\n");
    115 	(void) fprintf(stderr,
    116 	    "		        [,filesize=[size]]\n");
    117 	(void) fprintf(stderr,
    118 	    "		        [,dirwidth=[width]]\n");
    119 	(void) fprintf(stderr,
    120 	    "		        [,dirdepthrv=$random_variable_name]\n");
    121 	(void) fprintf(stderr,
    122 	    "		        [,dirgamma=[100-10000]] "
    123 	    "(Gamma * 1000)\n");
    124 	(void) fprintf(stderr,
    125 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
    126 	(void) fprintf(stderr,
    127 	    "		        [,prealloc=[percent]]\n");
    128 	(void) fprintf(stderr, "		        [,paralloc]\n");
    129 	(void) fprintf(stderr, "		        [,reuse]\n");
    130 	(void) fprintf(stderr, "\n");
    131 }
    132 
    133 /*
    134  * Creates a path string from the filesetentry_t "*entry"
    135  * and all of its parent's path names. The resulting path
    136  * is a concatination of all the individual parent paths.
    137  * Allocates memory for the path string and returns a
    138  * pointer to it.
    139  */
    140 char *
    141 fileset_resolvepath(filesetentry_t *entry)
    142 {
    143 	filesetentry_t *fsep = entry;
    144 	char path[MAXPATHLEN];
    145 	char pathtmp[MAXPATHLEN];
    146 	char *s;
    147 
    148 	path[0] = '\0';
    149 	while (fsep->fse_parent) {
    150 		(void) strcpy(pathtmp, "/");
    151 		(void) fb_strlcat(pathtmp, fsep->fse_path, MAXPATHLEN);
    152 		(void) fb_strlcat(pathtmp, path, MAXPATHLEN);
    153 		(void) fb_strlcpy(path, pathtmp, MAXPATHLEN);
    154 		fsep = fsep->fse_parent;
    155 	}
    156 
    157 	s = malloc(strlen(path) + 1);
    158 	(void) fb_strlcpy(s, path, MAXPATHLEN);
    159 	return (s);
    160 }
    161 
    162 /*
    163  * Creates multiple nested directories as required by the
    164  * supplied path. Starts at the end of the path, creating
    165  * a list of directories to mkdir, up to the root of the
    166  * path, then mkdirs them one at a time from the root on down.
    167  */
    168 static int
    169 fileset_mkdir(char *path, int mode)
    170 {
    171 	char *p;
    172 	char *dirs[65536];
    173 	int i = 0;
    174 
    175 	if ((p = strdup(path)) == NULL)
    176 		goto null_str;
    177 
    178 	/*
    179 	 * Fill an array of subdirectory path names until either we
    180 	 * reach the root or encounter an already existing subdirectory
    181 	 */
    182 	/* CONSTCOND */
    183 	while (1) {
    184 		struct stat64 sb;
    185 
    186 		if (stat64(p, &sb) == 0)
    187 			break;
    188 		if (strlen(p) < 3)
    189 			break;
    190 		if ((dirs[i] = strdup(p)) == NULL) {
    191 			free(p);
    192 			goto null_str;
    193 		}
    194 
    195 		(void) trunc_dirname(p);
    196 		i++;
    197 	}
    198 
    199 	/* Make the directories, from closest to root downwards. */
    200 	for (--i; i >= 0; i--) {
    201 		(void) FB_MKDIR(dirs[i], mode);
    202 		free(dirs[i]);
    203 	}
    204 
    205 	free(p);
    206 	return (FILEBENCH_OK);
    207 
    208 null_str:
    209 	/* clean up */
    210 	for (--i; i >= 0; i--)
    211 		free(dirs[i]);
    212 
    213 	filebench_log(LOG_ERROR,
    214 	    "Failed to create directory path %s: Out of memory", path);
    215 	return (FILEBENCH_ERROR);
    216 }
    217 
    218 /*
    219  * creates the subdirectory tree for a fileset.
    220  */
    221 static int
    222 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
    223 {
    224 	filesetentry_t *direntry;
    225 	char full_path[MAXPATHLEN];
    226 	char *part_path;
    227 
    228 	/* walk the subdirectory list, enstanciating subdirs */
    229 	direntry = fileset->fs_dirlist;
    230 	while (direntry) {
    231 		(void) fb_strlcpy(full_path, filesetpath, MAXPATHLEN);
    232 		part_path = fileset_resolvepath(direntry);
    233 		(void) fb_strlcat(full_path, part_path, MAXPATHLEN);
    234 		free(part_path);
    235 
    236 		/* now create this portion of the subdirectory tree */
    237 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
    238 			return (FILEBENCH_ERROR);
    239 
    240 		direntry = direntry->fse_nextoftype;
    241 	}
    242 	return (FILEBENCH_OK);
    243 }
    244 
    245 /*
    246  * move filesetentry between exist tree and non-exist tree, source_tree
    247  * to destination tree.
    248  */
    249 static void
    250 fileset_move_entry(avl_tree_t *src_tree, avl_tree_t *dst_tree,
    251     filesetentry_t *entry)
    252 {
    253 	avl_remove(src_tree, entry);
    254 	avl_add(dst_tree, entry);
    255 }
    256 
    257 /*
    258  * given a fileset entry, determines if the associated leaf directory
    259  * needs to be made or not, and if so does the mkdir.
    260  */
    261 static int
    262 fileset_alloc_leafdir(filesetentry_t *entry)
    263 {
    264 	fileset_t *fileset;
    265 	char path[MAXPATHLEN];
    266 	struct stat64 sb;
    267 	char *pathtmp;
    268 
    269 	fileset = entry->fse_fileset;
    270 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
    271 	(void) fb_strlcat(path, "/", MAXPATHLEN);
    272 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
    273 	pathtmp = fileset_resolvepath(entry);
    274 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
    275 	free(pathtmp);
    276 
    277 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
    278 
    279 	/* see if not reusing and this directory does not exist */
    280 	if (!((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0))) {
    281 
    282 		/* No file or not reusing, so create */
    283 		if (FB_MKDIR(path, 0755) < 0) {
    284 			filebench_log(LOG_ERROR,
    285 			    "Failed to pre-allocate leaf directory %s: %s",
    286 			    path, strerror(errno));
    287 			fileset_unbusy(entry, TRUE, FALSE, 0);
    288 			return (FILEBENCH_ERROR);
    289 		}
    290 	}
    291 
    292 	/* unbusy the allocated entry */
    293 	fileset_unbusy(entry, TRUE, TRUE, 0);
    294 	return (FILEBENCH_OK);
    295 }
    296 
    297 /*
    298  * given a fileset entry, determines if the associated file
    299  * needs to be allocated or not, and if so does the allocation.
    300  */
    301 static int
    302 fileset_alloc_file(filesetentry_t *entry)
    303 {
    304 	fileset_t *fileset;
    305 	char path[MAXPATHLEN];
    306 	char *buf;
    307 	struct stat64 sb;
    308 	char *pathtmp;
    309 	off64_t seek;
    310 	fb_fdesc_t fdesc;
    311 	int trust_tree;
    312 
    313 	fileset = entry->fse_fileset;
    314 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
    315 	(void) fb_strlcat(path, "/", MAXPATHLEN);
    316 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
    317 	pathtmp = fileset_resolvepath(entry);
    318 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
    319 	free(pathtmp);
    320 
    321 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
    322 
    323 	/* see if reusing and this file exists */
    324 	trust_tree = avd_get_bool(fileset->fs_trust_tree);
    325 	if ((entry->fse_flags & FSE_REUSING) && (trust_tree ||
    326 	    (FB_STAT(path, &sb) == 0))) {
    327 		if (FB_OPEN(&fdesc, path, O_RDWR, 0) == FILEBENCH_ERROR) {
    328 			filebench_log(LOG_INFO,
    329 			    "Attempted but failed to Re-use file %s",
    330 			    path);
    331 			fileset_unbusy(entry, TRUE, FALSE, 0);
    332 			return (FILEBENCH_ERROR);
    333 		}
    334 
    335 		if (trust_tree || (sb.st_size == (off64_t)entry->fse_size)) {
    336 			filebench_log(LOG_DEBUG_IMPL,
    337 			    "Re-using file %s", path);
    338 
    339 			if (!avd_get_bool(fileset->fs_cached))
    340 				(void) FB_FREEMEM(&fdesc, entry->fse_size);
    341 
    342 			(void) FB_CLOSE(&fdesc);
    343 
    344 			/* unbusy the allocated entry */
    345 			fileset_unbusy(entry, TRUE, TRUE, 0);
    346 			return (FILEBENCH_OK);
    347 
    348 		} else if (sb.st_size > (off64_t)entry->fse_size) {
    349 			/* reuse, but too large */
    350 			filebench_log(LOG_DEBUG_IMPL,
    351 			    "Truncating & re-using file %s", path);
    352 
    353 			(void) FB_FTRUNC(&fdesc, (off64_t)entry->fse_size);
    354 
    355 			if (!avd_get_bool(fileset->fs_cached))
    356 				(void) FB_FREEMEM(&fdesc, entry->fse_size);
    357 
    358 			(void) FB_CLOSE(&fdesc);
    359 
    360 			/* unbusy the allocated entry */
    361 			fileset_unbusy(entry, TRUE, TRUE, 0);
    362 			return (FILEBENCH_OK);
    363 		}
    364 	} else {
    365 
    366 		/* No file or not reusing, so create */
    367 		if (FB_OPEN(&fdesc, path, O_RDWR | O_CREAT, 0644) ==
    368 		    FILEBENCH_ERROR) {
    369 			filebench_log(LOG_ERROR,
    370 			    "Failed to pre-allocate file %s: %s",
    371 			    path, strerror(errno));
    372 
    373 			/* unbusy the unallocated entry */
    374 			fileset_unbusy(entry, TRUE, FALSE, 0);
    375 			return (FILEBENCH_ERROR);
    376 		}
    377 	}
    378 
    379 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL) {
    380 		/* unbusy the unallocated entry */
    381 		fileset_unbusy(entry, TRUE, FALSE, 0);
    382 		return (FILEBENCH_ERROR);
    383 	}
    384 
    385 	for (seek = 0; seek < entry->fse_size; ) {
    386 		off64_t wsize;
    387 		int ret = 0;
    388 
    389 		/*
    390 		 * Write FILE_ALLOC_BLOCK's worth,
    391 		 * except on last write
    392 		 */
    393 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
    394 
    395 		ret = FB_WRITE(&fdesc, buf, wsize);
    396 		if (ret != wsize) {
    397 			filebench_log(LOG_ERROR,
    398 			    "Failed to pre-allocate file %s: %s",
    399 			    path, strerror(errno));
    400 			(void) FB_CLOSE(&fdesc);
    401 			free(buf);
    402 			fileset_unbusy(entry, TRUE, FALSE, 0);
    403 			return (FILEBENCH_ERROR);
    404 		}
    405 		seek += wsize;
    406 	}
    407 
    408 	if (!avd_get_bool(fileset->fs_cached))
    409 		(void) FB_FREEMEM(&fdesc, entry->fse_size);
    410 
    411 	(void) FB_CLOSE(&fdesc);
    412 
    413 	free(buf);
    414 
    415 	/* unbusy the allocated entry */
    416 	fileset_unbusy(entry, TRUE, TRUE, 0);
    417 
    418 	filebench_log(LOG_DEBUG_IMPL,
    419 	    "Pre-allocated file %s size %llu",
    420 	    path, (u_longlong_t)entry->fse_size);
    421 
    422 	return (FILEBENCH_OK);
    423 }
    424 
    425 /*
    426  * given a fileset entry, determines if the associated file
    427  * needs to be allocated or not, and if so does the allocation.
    428  * Sets shm_fsparalloc_count to -1 on error.
    429  */
    430 static void *
    431 fileset_alloc_thread(filesetentry_t *entry)
    432 {
    433 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
    434 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
    435 		filebench_shm->shm_fsparalloc_count = -1;
    436 	} else {
    437 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
    438 		filebench_shm->shm_fsparalloc_count--;
    439 	}
    440 
    441 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
    442 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
    443 
    444 	pthread_exit(NULL);
    445 	return (NULL);
    446 }
    447 
    448 
    449 /*
    450  * First creates the parent directories of the file using
    451  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
    452  * and opens the file with open64(). It unlocks the fileset
    453  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
    454  * as requested, and returns the file descriptor integer
    455  * for the opened file in the supplied filebench file descriptor.
    456  * Returns FILEBENCH_ERROR on error, and FILEBENCH_OK on success.
    457  */
    458 int
    459 fileset_openfile(fb_fdesc_t *fdesc, fileset_t *fileset,
    460     filesetentry_t *entry, int flag, int filemode, int attrs)
    461 {
    462 	char path[MAXPATHLEN];
    463 	char dir[MAXPATHLEN];
    464 	char *pathtmp;
    465 	struct stat64 sb;
    466 	int open_attrs = 0;
    467 
    468 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
    469 	(void) fb_strlcat(path, "/", MAXPATHLEN);
    470 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
    471 	pathtmp = fileset_resolvepath(entry);
    472 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
    473 	(void) fb_strlcpy(dir, path, MAXPATHLEN);
    474 	free(pathtmp);
    475 	(void) trunc_dirname(dir);
    476 
    477 	/* If we are going to create a file, create the parent dirs */
    478 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
    479 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
    480 			return (FILEBENCH_ERROR);
    481 	}
    482 
    483 	if (attrs & FLOW_ATTR_DSYNC) {
    484 #ifdef sun
    485 		open_attrs |= O_DSYNC;
    486 #else
    487 		open_attrs |= O_FSYNC;
    488 #endif
    489 	}
    490 
    491 	if (FB_OPEN(fdesc, path, flag | open_attrs, filemode)
    492 	    == FILEBENCH_ERROR) {
    493 		filebench_log(LOG_ERROR,
    494 		    "Failed to open file %d, %s, with status %x: %s",
    495 		    entry->fse_index, path, entry->fse_flags, strerror(errno));
    496 
    497 		fileset_unbusy(entry, FALSE, FALSE, 0);
    498 		return (FILEBENCH_ERROR);
    499 	}
    500 
    501 	if (flag & O_CREAT)
    502 		fileset_unbusy(entry, TRUE, TRUE, 1);
    503 	else
    504 		fileset_unbusy(entry, FALSE, FALSE, 1);
    505 
    506 #ifdef sun
    507 	if (attrs & FLOW_ATTR_DIRECTIO)
    508 		(void) directio(fdesc->fd_num, DIRECTIO_ON);
    509 	else
    510 		(void) directio(fdesc->fd_num, DIRECTIO_OFF);
    511 #endif
    512 
    513 	return (FILEBENCH_OK);
    514 }
    515 
    516 /*
    517  * removes all filesetentries from their respective btrees, and puts them
    518  * on the free list. The supplied argument indicates which free list to
    519  * use.
    520  */
    521 static void
    522 fileset_pickreset(fileset_t *fileset, int entry_type)
    523 {
    524 	filesetentry_t	*entry;
    525 
    526 	switch (entry_type & FILESET_PICKMASK) {
    527 	case FILESET_PICKFILE:
    528 		entry = (filesetentry_t *)avl_first(&fileset->fs_noex_files);
    529 
    530 		/* make sure non-existing files are marked free */
    531 		while (entry) {
    532 			entry->fse_flags |= FSE_FREE;
    533 			entry->fse_open_cnt = 0;
    534 			fileset_move_entry(&fileset->fs_noex_files,
    535 			    &fileset->fs_free_files, entry);
    536 			entry =  AVL_NEXT(&fileset->fs_noex_files, entry);
    537 		}
    538 
    539 		/* free up any existing files */
    540 		entry = (filesetentry_t *)avl_first(&fileset->fs_exist_files);
    541 
    542 		while (entry) {
    543 			entry->fse_flags |= FSE_FREE;
    544 			entry->fse_open_cnt = 0;
    545 			fileset_move_entry(&fileset->fs_exist_files,
    546 			    &fileset->fs_free_files, entry);
    547 
    548 			entry =  AVL_NEXT(&fileset->fs_exist_files, entry);
    549 		}
    550 
    551 		break;
    552 
    553 	case FILESET_PICKDIR:
    554 		/* nothing to reset, as all (sub)dirs always exist */
    555 		break;
    556 
    557 	case FILESET_PICKLEAFDIR:
    558 		entry = (filesetentry_t *)
    559 		    avl_first(&fileset->fs_noex_leaf_dirs);
    560 
    561 		/* make sure non-existing leaf dirs are marked free */
    562 		while (entry) {
    563 			entry->fse_flags |= FSE_FREE;
    564 			entry->fse_open_cnt = 0;
    565 			fileset_move_entry(&fileset->fs_noex_leaf_dirs,
    566 			    &fileset->fs_free_leaf_dirs, entry);
    567 			entry =  AVL_NEXT(&fileset->fs_noex_leaf_dirs, entry);
    568 		}
    569 
    570 		/* free up any existing leaf dirs */
    571 		entry = (filesetentry_t *)
    572 		    avl_first(&fileset->fs_exist_leaf_dirs);
    573 
    574 		while (entry) {
    575 			entry->fse_flags |= FSE_FREE;
    576 			entry->fse_open_cnt = 0;
    577 			fileset_move_entry(&fileset->fs_exist_leaf_dirs,
    578 			    &fileset->fs_free_leaf_dirs, entry);
    579 
    580 			entry =  AVL_NEXT(&fileset->fs_exist_leaf_dirs, entry);
    581 		}
    582 
    583 		break;
    584 	}
    585 }
    586 
    587 /*
    588  * find a filesetentry from the fileset using the supplied index
    589  */
    590 static filesetentry_t *
    591 fileset_find_entry(avl_tree_t *atp, uint_t index)
    592 {
    593 	avl_index_t	found_loc;
    594 	filesetentry_t	desired_fse, *found_fse;
    595 
    596 	/* find the file with the desired index, if it is in the tree */
    597 	desired_fse.fse_index = index;
    598 	found_fse = avl_find(atp, (void *)(&desired_fse), &found_loc);
    599 	if (found_fse != NULL)
    600 		return (found_fse);
    601 
    602 	/* if requested node not found, find next higher node */
    603 	found_fse = avl_nearest(atp, found_loc, AVL_AFTER);
    604 	if (found_fse != NULL)
    605 		return (found_fse);
    606 
    607 	/* might have hit the end, return lowest available index node */
    608 	found_fse = avl_first(atp);
    609 	return (found_fse);
    610 }
    611 
    612 /*
    613  * Selects a fileset entry from a fileset. If the
    614  * FILESET_PICKLEAFDIR flag is set it will pick a leaf directory entry,
    615  * if the FILESET_PICKDIR flag is set it will pick a non leaf directory
    616  * entry, otherwise a file entry. The FILESET_PICKUNIQUE
    617  * flag will take an entry off of one of the free (unused)
    618  * lists (file or directory), otherwise the entry will be
    619  * picked off of one of the rotor lists (file or directory).
    620  * The FILESET_PICKEXISTS will insure that only extant
    621  * (FSE_EXISTS) state files are selected, while
    622  * FILESET_PICKNOEXIST insures that only non extant
    623  * (not FSE_EXISTS) state files are selected.
    624  * Note that the selected fileset entry (file) is returned
    625  * with its FSE_BUSY flag (in fse_flags) set.
    626  */
    627 filesetentry_t *
    628 fileset_pick(fileset_t *fileset, int flags, int tid, int index)
    629 {
    630 	filesetentry_t *entry = NULL;
    631 	filesetentry_t *start_point;
    632 	avl_tree_t *atp;
    633 	fbint_t max_entries;
    634 
    635 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
    636 
    637 	/* see if we have to wait for available files or directories */
    638 	switch (flags & FILESET_PICKMASK) {
    639 	case FILESET_PICKFILE:
    640 		if (fileset->fs_filelist == NULL)
    641 			goto empty;
    642 
    643 		while (fileset->fs_idle_files == 0) {
    644 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
    645 			    &fileset->fs_pick_lock);
    646 		}
    647 
    648 		max_entries = fileset->fs_constentries;
    649 		if (flags & FILESET_PICKUNIQUE) {
    650 			atp = &fileset->fs_free_files;
    651 		} else if (flags & FILESET_PICKNOEXIST) {
    652 			atp = &fileset->fs_noex_files;
    653 		} else {
    654 			atp = &fileset->fs_exist_files;
    655 		}
    656 		break;
    657 
    658 	case FILESET_PICKDIR:
    659 		if (fileset->fs_dirlist == NULL)
    660 			goto empty;
    661 
    662 		while (fileset->fs_idle_dirs == 0) {
    663 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
    664 			    &fileset->fs_pick_lock);
    665 		}
    666 
    667 		max_entries = 1;
    668 		atp = &fileset->fs_dirs;
    669 		break;
    670 
    671 	case FILESET_PICKLEAFDIR:
    672 		if (fileset->fs_leafdirlist == NULL)
    673 			goto empty;
    674 
    675 		while (fileset->fs_idle_leafdirs == 0) {
    676 			(void) pthread_cond_wait(&fileset->fs_idle_leafdirs_cv,
    677 			    &fileset->fs_pick_lock);
    678 		}
    679 
    680 		max_entries = fileset->fs_constleafdirs;
    681 		if (flags & FILESET_PICKUNIQUE) {
    682 			atp = &fileset->fs_free_leaf_dirs;
    683 		} else if (flags & FILESET_PICKNOEXIST) {
    684 			atp = &fileset->fs_noex_leaf_dirs;
    685 		} else {
    686 			atp = &fileset->fs_exist_leaf_dirs;
    687 		}
    688 		break;
    689 	}
    690 
    691 	/* see if asking for impossible */
    692 	if (avl_is_empty(atp))
    693 		goto empty;
    694 
    695 	if (flags & FILESET_PICKUNIQUE) {
    696 		uint64_t  index64;
    697 
    698 		/*
    699 		 * pick at random from free list in order to
    700 		 * distribute initially allocated files more
    701 		 * randomly on storage media. Use uniform
    702 		 * random number generator to select index
    703 		 * if it is not supplied with pick call.
    704 		 */
    705 		if (index) {
    706 			index64 = index;
    707 		} else {
    708 			if (filebench_randomno64(&index64, max_entries, 1,
    709 			    NULL) == FILEBENCH_ERROR)
    710 				return (NULL);
    711 		}
    712 
    713 		entry = fileset_find_entry(atp, (int)index64);
    714 
    715 		if (entry == NULL)
    716 			goto empty;
    717 
    718 	} else if (flags & FILESET_PICKBYINDEX) {
    719 		/* pick by supplied index */
    720 		entry = fileset_find_entry(atp, index);
    721 
    722 	} else {
    723 		/* pick in rotation */
    724 		switch (flags & FILESET_PICKMASK) {
    725 		case FILESET_PICKFILE:
    726 			if (flags & FILESET_PICKNOEXIST) {
    727 				entry = fileset_find_entry(atp,
    728 				    fileset->fs_file_nerotor);
    729 				fileset->fs_file_nerotor =
    730 				    entry->fse_index + 1;
    731 			} else {
    732 				entry = fileset_find_entry(atp,
    733 				    fileset->fs_file_exrotor[tid]);
    734 				fileset->fs_file_exrotor[tid] =
    735 				    entry->fse_index + 1;
    736 			}
    737 			break;
    738 
    739 		case FILESET_PICKDIR:
    740 			entry = fileset_find_entry(atp, fileset->fs_dirrotor);
    741 			fileset->fs_dirrotor = entry->fse_index + 1;
    742 			break;
    743 
    744 		case FILESET_PICKLEAFDIR:
    745 			if (flags & FILESET_PICKNOEXIST) {
    746 				entry = fileset_find_entry(atp,
    747 				    fileset->fs_leafdir_nerotor);
    748 				fileset->fs_leafdir_nerotor =
    749 				    entry->fse_index + 1;
    750 			} else {
    751 				entry = fileset_find_entry(atp,
    752 				    fileset->fs_leafdir_exrotor);
    753 				fileset->fs_leafdir_exrotor =
    754 				    entry->fse_index + 1;
    755 			}
    756 			break;
    757 		}
    758 	}
    759 
    760 	if (entry == NULL)
    761 		goto empty;
    762 
    763 	/* see if entry in use */
    764 	start_point = entry;
    765 	while (entry->fse_flags & FSE_BUSY) {
    766 
    767 		/* it is, so try next */
    768 		entry = AVL_NEXT(atp, entry);
    769 		if (entry == NULL)
    770 			entry = avl_first(atp);
    771 
    772 		/* see if we have wrapped around */
    773 		if ((entry == NULL) || (entry == start_point)) {
    774 			filebench_log(LOG_DEBUG_SCRIPT,
    775 			    "All %d files are busy", avl_numnodes(atp));
    776 			goto empty;
    777 		}
    778 
    779 	}
    780 
    781 	/* update file or directory idle counts */
    782 	switch (flags & FILESET_PICKMASK) {
    783 	case FILESET_PICKFILE:
    784 		fileset->fs_idle_files--;
    785 		break;
    786 	case FILESET_PICKDIR:
    787 		fileset->fs_idle_dirs--;
    788 		break;
    789 	case FILESET_PICKLEAFDIR:
    790 		fileset->fs_idle_leafdirs--;
    791 		break;
    792 	}
    793 
    794 	/* Indicate that file or directory is now busy */
    795 	entry->fse_flags |= FSE_BUSY;
    796 
    797 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    798 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
    799 	return (entry);
    800 
    801 empty:
    802 	filebench_log(LOG_DEBUG_SCRIPT, "No file found");
    803 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    804 	return (NULL);
    805 }
    806 
    807 /*
    808  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
    809  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
    810  * existant or not, or leaves that designation alone.
    811  */
    812 void
    813 fileset_unbusy(filesetentry_t *entry, int update_exist,
    814     int new_exist_val, int open_cnt_incr)
    815 {
    816 	fileset_t *fileset = NULL;
    817 
    818 	if (entry)
    819 		fileset = entry->fse_fileset;
    820 
    821 	if (fileset == NULL) {
    822 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
    823 		return;
    824 	}
    825 
    826 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
    827 
    828 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
    829 	if (update_exist) {
    830 		if (new_exist_val == TRUE) {
    831 			if (entry->fse_flags & FSE_FREE) {
    832 
    833 				/* asked to set and it was free */
    834 				entry->fse_flags |= FSE_EXISTS;
    835 				entry->fse_flags &= (~FSE_FREE);
    836 				switch (entry->fse_flags & FSE_TYPE_MASK) {
    837 				case FSE_TYPE_FILE:
    838 					fileset_move_entry(
    839 					    &fileset->fs_free_files,
    840 					    &fileset->fs_exist_files, entry);
    841 					break;
    842 
    843 				case FSE_TYPE_DIR:
    844 					break;
    845 
    846 				case FSE_TYPE_LEAFDIR:
    847 					fileset_move_entry(
    848 					    &fileset->fs_free_leaf_dirs,
    849 					    &fileset->fs_exist_leaf_dirs,
    850 					    entry);
    851 					break;
    852 				}
    853 
    854 			} else if (!(entry->fse_flags & FSE_EXISTS)) {
    855 
    856 				/* asked to set, and it was clear */
    857 				entry->fse_flags |= FSE_EXISTS;
    858 				switch (entry->fse_flags & FSE_TYPE_MASK) {
    859 				case FSE_TYPE_FILE:
    860 					fileset_move_entry(
    861 					    &fileset->fs_noex_files,
    862 					    &fileset->fs_exist_files, entry);
    863 					break;
    864 				case FSE_TYPE_DIR:
    865 					break;
    866 				case FSE_TYPE_LEAFDIR:
    867 					fileset_move_entry(
    868 					    &fileset->fs_noex_leaf_dirs,
    869 					    &fileset->fs_exist_leaf_dirs,
    870 					    entry);
    871 					break;
    872 				}
    873 			}
    874 		} else {
    875 			if (entry->fse_flags & FSE_FREE) {
    876 				/* asked to clear, and it was free */
    877 				entry->fse_flags &= (~(FSE_FREE | FSE_EXISTS));
    878 				switch (entry->fse_flags & FSE_TYPE_MASK) {
    879 				case FSE_TYPE_FILE:
    880 					fileset_move_entry(
    881 					    &fileset->fs_free_files,
    882 					    &fileset->fs_noex_files, entry);
    883 					break;
    884 
    885 				case FSE_TYPE_DIR:
    886 					break;
    887 
    888 				case FSE_TYPE_LEAFDIR:
    889 					fileset_move_entry(
    890 					    &fileset->fs_free_leaf_dirs,
    891 					    &fileset->fs_noex_leaf_dirs,
    892 					    entry);
    893 					break;
    894 				}
    895 			} else if (entry->fse_flags & FSE_EXISTS) {
    896 
    897 				/* asked to clear, and it was set */
    898 				entry->fse_flags &= (~FSE_EXISTS);
    899 				switch (entry->fse_flags & FSE_TYPE_MASK) {
    900 				case FSE_TYPE_FILE:
    901 					fileset_move_entry(
    902 					    &fileset->fs_exist_files,
    903 					    &fileset->fs_noex_files, entry);
    904 					break;
    905 				case FSE_TYPE_DIR:
    906 					break;
    907 				case FSE_TYPE_LEAFDIR:
    908 					fileset_move_entry(
    909 					    &fileset->fs_exist_leaf_dirs,
    910 					    &fileset->fs_noex_leaf_dirs,
    911 					    entry);
    912 					break;
    913 				}
    914 			}
    915 		}
    916 	}
    917 
    918 	/* update open count */
    919 	entry->fse_open_cnt += open_cnt_incr;
    920 
    921 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
    922 	if (entry->fse_flags & FSE_BUSY) {
    923 
    924 		/* unbusy it */
    925 		entry->fse_flags &= (~FSE_BUSY);
    926 
    927 		/* release any threads waiting for unbusy */
    928 		if (entry->fse_flags & FSE_THRD_WAITNG) {
    929 			entry->fse_flags &= (~FSE_THRD_WAITNG);
    930 			(void) pthread_cond_broadcast(
    931 			    &fileset->fs_thrd_wait_cv);
    932 		}
    933 
    934 		/* increment idle count and signal waiting threads */
    935 		switch (entry->fse_flags & FSE_TYPE_MASK) {
    936 		case FSE_TYPE_FILE:
    937 			fileset->fs_idle_files++;
    938 			if (fileset->fs_idle_files == 1) {
    939 				(void) pthread_cond_signal(
    940 				    &fileset->fs_idle_files_cv);
    941 			}
    942 			break;
    943 
    944 		case FSE_TYPE_DIR:
    945 			fileset->fs_idle_dirs++;
    946 			if (fileset->fs_idle_dirs == 1) {
    947 				(void) pthread_cond_signal(
    948 				    &fileset->fs_idle_dirs_cv);
    949 			}
    950 			break;
    951 
    952 		case FSE_TYPE_LEAFDIR:
    953 			fileset->fs_idle_leafdirs++;
    954 			if (fileset->fs_idle_leafdirs == 1) {
    955 				(void) pthread_cond_signal(
    956 				    &fileset->fs_idle_leafdirs_cv);
    957 			}
    958 			break;
    959 		}
    960 	}
    961 
    962 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    963 }
    964 
    965 /*
    966  * Given a fileset "fileset", create the associated files as
    967  * specified in the attributes of the fileset. The fileset is
    968  * rooted in a directory whose pathname is in fileset_path. If the
    969  * directory exists, meaning that there is already a fileset,
    970  * and the fileset_reuse attribute is false, then remove it and all
    971  * its contained files and subdirectories. Next, the routine
    972  * creates a root directory for the fileset. All the file type
    973  * filesetentries are cycled through creating as needed
    974  * their containing subdirectory trees in the filesystem and
    975  * creating actual files for fileset_preallocpercent of them. The
    976  * created files are filled with fse_size bytes of unitialized
    977  * data. The routine returns FILEBENCH_ERROR on errors,
    978  * FILEBENCH_OK on success.
    979  */
    980 static int
    981 fileset_create(fileset_t *fileset)
    982 {
    983 	filesetentry_t *entry;
    984 	char path[MAXPATHLEN];
    985 	struct stat64 sb;
    986 	hrtime_t start = gethrtime();
    987 	char *fileset_path;
    988 	char *fileset_name;
    989 	int randno;
    990 	int preallocated = 0;
    991 	int reusing;
    992 
    993 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
    994 		filebench_log(LOG_ERROR, "%s path not set",
    995 		    fileset_entity_name(fileset));
    996 		return (FILEBENCH_ERROR);
    997 	}
    998 
    999 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
   1000 		filebench_log(LOG_ERROR, "%s name not set",
   1001 		    fileset_entity_name(fileset));
   1002 		return (FILEBENCH_ERROR);
   1003 	}
   1004 
   1005 #ifdef HAVE_RAW_SUPPORT
   1006 	/* treat raw device as special case */
   1007 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
   1008 		return (FILEBENCH_OK);
   1009 #endif /* HAVE_RAW_SUPPORT */
   1010 
   1011 	/* XXX Add check to see if there is enough space */
   1012 
   1013 	/* set up path to fileset */
   1014 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
   1015 	(void) fb_strlcat(path, "/", MAXPATHLEN);
   1016 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
   1017 
   1018 	/* if reusing and trusting to exist, just blindly reuse */
   1019 	if (avd_get_bool(fileset->fs_trust_tree)) {
   1020 		reusing = 1;
   1021 
   1022 	/* if exists and resusing, then don't create new */
   1023 	} else if (((stat64(path, &sb) == 0)&& (strlen(path) > 3) &&
   1024 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) &&
   1025 	    avd_get_bool(fileset->fs_reuse)) {
   1026 		reusing = 1;
   1027 	} else {
   1028 		reusing = 0;
   1029 	}
   1030 
   1031 	if (!reusing) {
   1032 		/* Remove existing */
   1033 		FB_RECUR_RM(path);
   1034 		filebench_log(LOG_VERBOSE,
   1035 		    "Removed any existing %s %s in %llu seconds",
   1036 		    fileset_entity_name(fileset), fileset_name,
   1037 		    (u_longlong_t)(((gethrtime() - start) /
   1038 		    1000000000) + 1));
   1039 	} else {
   1040 		/* we are re-using */
   1041 		filebench_log(LOG_VERBOSE, "Re-using %s %s.",
   1042 		    fileset_entity_name(fileset), fileset_name);
   1043 	}
   1044 
   1045 	/* make the filesets directory tree unless in reuse mode */
   1046 	if (!reusing && (avd_get_bool(fileset->fs_prealloc))) {
   1047 		filebench_log(LOG_VERBOSE,
   1048 		    "making tree for filset %s", path);
   1049 
   1050 		(void) FB_MKDIR(path, 0755);
   1051 
   1052 		if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
   1053 			return (FILEBENCH_ERROR);
   1054 	}
   1055 
   1056 	start = gethrtime();
   1057 
   1058 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
   1059 	    fileset_entity_name(fileset), fileset_name);
   1060 
   1061 	randno = ((RAND_MAX * (100
   1062 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
   1063 
   1064 	/* alloc any files, as required */
   1065 	fileset_pickreset(fileset, FILESET_PICKFILE);
   1066 	while (entry = fileset_pick(fileset,
   1067 	    FILESET_PICKFREE | FILESET_PICKFILE, 0, 0)) {
   1068 		pthread_t tid;
   1069 		int newrand;
   1070 
   1071 		newrand = rand();
   1072 
   1073 		if (newrand < randno) {
   1074 			/* unbusy the unallocated entry */
   1075 			fileset_unbusy(entry, TRUE, FALSE, 0);
   1076 			continue;
   1077 		}
   1078 
   1079 		preallocated++;
   1080 
   1081 		if (reusing)
   1082 			entry->fse_flags |= FSE_REUSING;
   1083 		else
   1084 			entry->fse_flags &= (~FSE_REUSING);
   1085 
   1086 		/* fire off allocation threads for each file if paralloc set */
   1087 		if (avd_get_bool(fileset->fs_paralloc)) {
   1088 
   1089 			/* limit total number of simultaneous allocations */
   1090 			(void) pthread_mutex_lock(
   1091 			    &filebench_shm->shm_fsparalloc_lock);
   1092 			while (filebench_shm->shm_fsparalloc_count
   1093 			    >= MAX_PARALLOC_THREADS) {
   1094 				(void) pthread_cond_wait(
   1095 				    &filebench_shm->shm_fsparalloc_cv,
   1096 				    &filebench_shm->shm_fsparalloc_lock);
   1097 			}
   1098 
   1099 			/* quit if any allocation thread reports an error */
   1100 			if (filebench_shm->shm_fsparalloc_count < 0) {
   1101 				(void) pthread_mutex_unlock(
   1102 				    &filebench_shm->shm_fsparalloc_lock);
   1103 				return (FILEBENCH_ERROR);
   1104 			}
   1105 
   1106 			filebench_shm->shm_fsparalloc_count++;
   1107 			(void) pthread_mutex_unlock(
   1108 			    &filebench_shm->shm_fsparalloc_lock);
   1109 
   1110 			/*
   1111 			 * Fire off a detached allocation thread per file.
   1112 			 * The thread will self destruct when it finishes
   1113 			 * writing pre-allocation data to the file.
   1114 			 */
   1115 			if (pthread_create(&tid, NULL,
   1116 			    (void *(*)(void*))fileset_alloc_thread,
   1117 			    entry) == 0) {
   1118 				/*
   1119 				 * A thread was created; detach it so it can
   1120 				 * fully quit when finished.
   1121 				 */
   1122 				(void) pthread_detach(tid);
   1123 			} else {
   1124 				filebench_log(LOG_ERROR,
   1125 				    "File prealloc thread create failed");
   1126 				filebench_shutdown(1);
   1127 			}
   1128 
   1129 		} else {
   1130 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
   1131 				return (FILEBENCH_ERROR);
   1132 		}
   1133 	}
   1134 
   1135 	/* alloc any leaf directories, as required */
   1136 	fileset_pickreset(fileset, FILESET_PICKLEAFDIR);
   1137 	while (entry = fileset_pick(fileset,
   1138 	    FILESET_PICKFREE | FILESET_PICKLEAFDIR, 0, 0)) {
   1139 
   1140 		if (rand() < randno) {
   1141 			/* unbusy the unallocated entry */
   1142 			fileset_unbusy(entry, TRUE, FALSE, 0);
   1143 			continue;
   1144 		}
   1145 
   1146 		preallocated++;
   1147 
   1148 		if (reusing)
   1149 			entry->fse_flags |= FSE_REUSING;
   1150 		else
   1151 			entry->fse_flags &= (~FSE_REUSING);
   1152 
   1153 		if (fileset_alloc_leafdir(entry) == FILEBENCH_ERROR)
   1154 			return (FILEBENCH_ERROR);
   1155 	}
   1156 
   1157 exit:
   1158 	filebench_log(LOG_VERBOSE,
   1159 	    "Preallocated %d of %llu of %s %s in %llu seconds",
   1160 	    preallocated,
   1161 	    (u_longlong_t)fileset->fs_constentries,
   1162 	    fileset_entity_name(fileset), fileset_name,
   1163 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
   1164 
   1165 	return (FILEBENCH_OK);
   1166 }
   1167 
   1168 /*
   1169  * Removes all files and directories associated with a fileset
   1170  * from the storage subsystem.
   1171  */
   1172 static void
   1173 fileset_delete_storage(fileset_t *fileset)
   1174 {
   1175 	char path[MAXPATHLEN];
   1176 	char *fileset_path;
   1177 	char *fileset_name;
   1178 
   1179 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL)
   1180 		return;
   1181 
   1182 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL)
   1183 		return;
   1184 
   1185 #ifdef HAVE_RAW_SUPPORT
   1186 	/* treat raw device as special case */
   1187 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
   1188 		return;
   1189 #endif /* HAVE_RAW_SUPPORT */
   1190 
   1191 	/* set up path to file */
   1192 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
   1193 	(void) fb_strlcat(path, "/", MAXPATHLEN);
   1194 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
   1195 
   1196 	/* now delete any files and directories on the disk */
   1197 	FB_RECUR_RM(path);
   1198 }
   1199 
   1200 /*
   1201  * Removes the fileset entity and all of its filesetentry entities.
   1202  */
   1203 static void
   1204 fileset_delete_fileset(fileset_t *fileset)
   1205 {
   1206 	filesetentry_t *entry, *next_entry;
   1207 
   1208 	/* run down the file list, removing and freeing each filesetentry */
   1209 	for (entry = fileset->fs_filelist; entry; entry = next_entry) {
   1210 
   1211 		/* free the entry */
   1212 		next_entry = entry->fse_next;
   1213 
   1214 		/* return it to the pool */
   1215 		switch (entry->fse_flags & FSE_TYPE_MASK) {
   1216 		case FSE_TYPE_FILE:
   1217 		case FSE_TYPE_LEAFDIR:
   1218 		case FSE_TYPE_DIR:
   1219 			ipc_free(FILEBENCH_FILESETENTRY, (void *)entry);
   1220 			break;
   1221 		default:
   1222 			filebench_log(LOG_ERROR,
   1223 			    "Unallocated filesetentry found on list");
   1224 			break;
   1225 		}
   1226 	}
   1227 
   1228 	ipc_free(FILEBENCH_FILESET, (void *)fileset);
   1229 }
   1230 
   1231 void
   1232 fileset_delete_all_filesets(void)
   1233 {
   1234 	fileset_t *fileset, *next_fileset;
   1235 
   1236 	for (fileset = filebench_shm->shm_filesetlist;
   1237 	    fileset; fileset = next_fileset) {
   1238 		next_fileset = fileset->fs_next;
   1239 		fileset_delete_storage(fileset);
   1240 		fileset_delete_fileset(fileset);
   1241 	}
   1242 
   1243 	filebench_shm->shm_filesetlist = NULL;
   1244 }
   1245 /*
   1246  * Adds an entry to the fileset's file list. Single threaded so
   1247  * no locking needed.
   1248  */
   1249 static void
   1250 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
   1251 {
   1252 	entry->fse_flags = FSE_TYPE_FILE | FSE_FREE;
   1253 	avl_add(&fileset->fs_free_files, entry);
   1254 
   1255 	if (fileset->fs_filelist == NULL) {
   1256 		fileset->fs_filelist = entry;
   1257 		entry->fse_nextoftype = NULL;
   1258 	} else {
   1259 		entry->fse_nextoftype = fileset->fs_filelist;
   1260 		fileset->fs_filelist = entry;
   1261 	}
   1262 }
   1263 
   1264 /*
   1265  * Adds an entry to the fileset's directory list. Single
   1266  * threaded so no locking needed.
   1267  */
   1268 static void
   1269 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
   1270 {
   1271 	entry->fse_flags = FSE_TYPE_DIR | FSE_EXISTS;
   1272 	avl_add(&fileset->fs_dirs, entry);
   1273 
   1274 	if (fileset->fs_dirlist == NULL) {
   1275 		fileset->fs_dirlist = entry;
   1276 		entry->fse_nextoftype = NULL;
   1277 	} else {
   1278 		entry->fse_nextoftype = fileset->fs_dirlist;
   1279 		fileset->fs_dirlist = entry;
   1280 	}
   1281 }
   1282 
   1283 /*
   1284  * Adds an entry to the fileset's leaf directory list. Single
   1285  * threaded so no locking needed.
   1286  */
   1287 static void
   1288 fileset_insleafdirlist(fileset_t *fileset, filesetentry_t *entry)
   1289 {
   1290 	entry->fse_flags = FSE_TYPE_LEAFDIR | FSE_FREE;
   1291 	avl_add(&fileset->fs_free_leaf_dirs, entry);
   1292 
   1293 	if (fileset->fs_leafdirlist == NULL) {
   1294 		fileset->fs_leafdirlist = entry;
   1295 		entry->fse_nextoftype = NULL;
   1296 	} else {
   1297 		entry->fse_nextoftype = fileset->fs_leafdirlist;
   1298 		fileset->fs_leafdirlist = entry;
   1299 	}
   1300 }
   1301 
   1302 /*
   1303  * Compares two fileset entries to determine their relative order
   1304  */
   1305 static int
   1306 fileset_entry_compare(const void *node_1, const void *node_2)
   1307 {
   1308 	if (((filesetentry_t *)node_1)->fse_index <
   1309 	    ((filesetentry_t *)node_2)->fse_index)
   1310 		return (-1);
   1311 
   1312 	if (((filesetentry_t *)node_1)->fse_index ==
   1313 	    ((filesetentry_t *)node_2)->fse_index)
   1314 		return (0);
   1315 
   1316 	return (1);
   1317 }
   1318 
   1319 /*
   1320  * Obtains a filesetentry entity for a file to be placed in a
   1321  * (sub)directory of a fileset. The size of the file may be
   1322  * specified by fileset_meansize, or calculated from a gamma
   1323  * distribution of parameter fileset_sizegamma and of mean size
   1324  * fileset_meansize. The filesetentry entity is placed on the file
   1325  * list in the specified parent filesetentry entity, which may
   1326  * be a directory filesetentry, or the root filesetentry in the
   1327  * fileset. It is also placed on the fileset's list of all
   1328  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
   1329  * if ipc memory for the path string cannot be allocated.
   1330  */
   1331 static int
   1332 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
   1333 {
   1334 	char tmpname[16];
   1335 	filesetentry_t *entry;
   1336 	double drand;
   1337 	uint_t index;
   1338 
   1339 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
   1340 	    == NULL) {
   1341 		filebench_log(LOG_ERROR,
   1342 		    "fileset_populate_file: Can't malloc filesetentry");
   1343 		return (FILEBENCH_ERROR);
   1344 	}
   1345 
   1346 	/* Another currently idle file */
   1347 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
   1348 	index = fileset->fs_idle_files++;
   1349 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
   1350 
   1351 	entry->fse_index = index;
   1352 	entry->fse_parent = parent;
   1353 	entry->fse_fileset = fileset;
   1354 	fileset_insfilelist(fileset, entry);
   1355 
   1356 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
   1357 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
   1358 		filebench_log(LOG_ERROR,
   1359 		    "fileset_populate_file: Can't alloc path string");
   1360 		return (FILEBENCH_ERROR);
   1361 	}
   1362 
   1363 	/* see if random variable was supplied for file size */
   1364 	if (fileset->fs_meansize == -1) {
   1365 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
   1366 	} else {
   1367 		double gamma;
   1368 
   1369 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
   1370 		if (gamma > 0) {
   1371 			drand = gamma_dist_knuth(gamma,
   1372 			    fileset->fs_meansize / gamma);
   1373 			entry->fse_size = (off64_t)drand;
   1374 		} else {
   1375 			entry->fse_size = (off64_t)fileset->fs_meansize;
   1376 		}
   1377 	}
   1378 
   1379 	fileset->fs_bytes += entry->fse_size;
   1380 
   1381 	fileset->fs_realfiles++;
   1382 	return (FILEBENCH_OK);
   1383 }
   1384 
   1385 /*
   1386  * Obtaines a filesetentry entity for a leaf directory to be placed in a
   1387  * (sub)directory of a fileset. The leaf directory will always be empty so
   1388  * it can be created and deleted (mkdir, rmdir) at will. The filesetentry
   1389  * entity is placed on the leaf directory list in the specified parent
   1390  * filesetentry entity, which may be a (sub) directory filesetentry, or
   1391  * the root filesetentry in the fileset. It is also placed on the fileset's
   1392  * list of all contained leaf directories. Returns FILEBENCH_OK if successful
   1393  * or FILEBENCH_ERROR if ipc memory cannot be allocated.
   1394  */
   1395 static int
   1396 fileset_populate_leafdir(fileset_t *fileset, filesetentry_t *parent, int serial)
   1397 {
   1398 	char tmpname[16];
   1399 	filesetentry_t *entry;
   1400 	uint_t index;
   1401 
   1402 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
   1403 	    == NULL) {
   1404 		filebench_log(LOG_ERROR,
   1405 		    "fileset_populate_file: Can't malloc filesetentry");
   1406 		return (FILEBENCH_ERROR);
   1407 	}
   1408 
   1409 	/* Another currently idle leaf directory */
   1410 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
   1411 	index = fileset->fs_idle_leafdirs++;
   1412 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
   1413 
   1414 	entry->fse_index = index;
   1415 	entry->fse_parent = parent;
   1416 	entry->fse_fileset = fileset;
   1417 	fileset_insleafdirlist(fileset, entry);
   1418 
   1419 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
   1420 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
   1421 		filebench_log(LOG_ERROR,
   1422 		    "fileset_populate_file: Can't alloc path string");
   1423 		return (FILEBENCH_ERROR);
   1424 	}
   1425 
   1426 	fileset->fs_realleafdirs++;
   1427 	return (FILEBENCH_OK);
   1428 }
   1429 
   1430 /*
   1431  * Creates a directory node in a fileset, by obtaining a
   1432  * filesetentry entity for the node and initializing it
   1433  * according to parameters of the fileset. It determines a
   1434  * directory tree depth and directory width, optionally using
   1435  * a gamma distribution. If its calculated depth is less then
   1436  * its actual depth in the directory tree, it becomes a leaf
   1437  * node and files itself with "width" number of file type
   1438  * filesetentries, otherwise it files itself with "width"
   1439  * number of directory type filesetentries, using recursive
   1440  * calls to fileset_populate_subdir. The end result of the
   1441  * initial call to this routine is a tree of directories of
   1442  * random width and varying depth with sufficient leaf
   1443  * directories to contain all required files.
   1444  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
   1445  * string memory cannot be allocated and returns the error code (currently
   1446  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
   1447  * calls to fileset_populate_subdir.
   1448  */
   1449 static int
   1450 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
   1451     int serial, double depth)
   1452 {
   1453 	double randepth, drand, ranwidth;
   1454 	int isleaf = 0;
   1455 	char tmpname[16];
   1456 	filesetentry_t *entry;
   1457 	int i;
   1458 	uint_t index;
   1459 
   1460 	depth += 1;
   1461 
   1462 	/* Create dir node */
   1463 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
   1464 	    == NULL) {
   1465 		filebench_log(LOG_ERROR,
   1466 		    "fileset_populate_subdir: Can't malloc filesetentry");
   1467 		return (FILEBENCH_ERROR);
   1468 	}
   1469 
   1470 	/* another idle directory */
   1471 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
   1472 	index = fileset->fs_idle_dirs++;
   1473 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
   1474 
   1475 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
   1476 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
   1477 		filebench_log(LOG_ERROR,
   1478 		    "fileset_populate_subdir: Can't alloc path string");
   1479 		return (FILEBENCH_ERROR);
   1480 	}
   1481 
   1482 	entry->fse_index = index;
   1483 	entry->fse_parent = parent;
   1484 	entry->fse_fileset = fileset;
   1485 	fileset_insdirlist(fileset, entry);
   1486 
   1487 	if (fileset->fs_dirdepthrv) {
   1488 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
   1489 	} else {
   1490 		double gamma;
   1491 
   1492 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
   1493 		if (gamma > 0) {
   1494 			drand = gamma_dist_knuth(gamma,
   1495 			    fileset->fs_meandepth / gamma);
   1496 			randepth = (int)drand;
   1497 		} else {
   1498 			randepth = (int)fileset->fs_meandepth;
   1499 		}
   1500 	}
   1501 
   1502 	if (fileset->fs_meanwidth == -1) {
   1503 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
   1504 	} else {
   1505 		double gamma;
   1506 
   1507 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
   1508 		if (gamma > 0) {
   1509 			drand = gamma_dist_knuth(gamma,
   1510 			    fileset->fs_meanwidth / gamma);
   1511 			ranwidth = drand;
   1512 		} else {
   1513 			ranwidth = fileset->fs_meanwidth;
   1514 		}
   1515 	}
   1516 
   1517 	if (randepth == 0)
   1518 		randepth = 1;
   1519 	if (ranwidth == 0)
   1520 		ranwidth = 1;
   1521 	if (depth >= randepth)
   1522 		isleaf = 1;
   1523 
   1524 	/*
   1525 	 * Create directory of random width filled with files according
   1526 	 * to distribution, or if root directory, continue until #files required
   1527 	 */
   1528 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
   1529 	    (fileset->fs_realfiles < fileset->fs_constentries);
   1530 	    i++) {
   1531 		int ret = 0;
   1532 
   1533 		if (parent && isleaf)
   1534 			ret = fileset_populate_file(fileset, entry, i);
   1535 		else
   1536 			ret = fileset_populate_subdir(fileset, entry, i, depth);
   1537 
   1538 		if (ret != 0)
   1539 			return (ret);
   1540 	}
   1541 
   1542 	/*
   1543 	 * Create directory of random width filled with leaf directories
   1544 	 * according to distribution, or if root directory, continue until
   1545 	 * the number of leaf directories required has been generated.
   1546 	 */
   1547 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
   1548 	    (fileset->fs_realleafdirs < fileset->fs_constleafdirs);
   1549 	    i++) {
   1550 		int ret = 0;
   1551 
   1552 		if (parent && isleaf)
   1553 			ret = fileset_populate_leafdir(fileset, entry, i);
   1554 		else
   1555 			ret = fileset_populate_subdir(fileset, entry, i, depth);
   1556 
   1557 		if (ret != 0)
   1558 			return (ret);
   1559 	}
   1560 
   1561 	return (FILEBENCH_OK);
   1562 }
   1563 
   1564 /*
   1565  * Populates a fileset with files and subdirectory entries. Uses
   1566  * the supplied fileset_dirwidth and fileset_entries (number of files) to
   1567  * calculate the required fileset_meandepth (of subdirectories) and
   1568  * initialize the fileset_meanwidth and fileset_meansize variables. Then
   1569  * calls fileset_populate_subdir() to do the recursive
   1570  * subdirectory entry creation and leaf file entry creation. All
   1571  * of the above is skipped if the fileset has already been
   1572  * populated. Returns 0 on success, or an error code from the
   1573  * call to fileset_populate_subdir if that call fails.
   1574  */
   1575 static int
   1576 fileset_populate(fileset_t *fileset)
   1577 {
   1578 	fbint_t entries = avd_get_int(fileset->fs_entries);
   1579 	fbint_t leafdirs = avd_get_int(fileset->fs_leafdirs);
   1580 	int meandirwidth;
   1581 	int ret;
   1582 
   1583 	/* Skip if already populated */
   1584 	if (fileset->fs_bytes > 0)
   1585 		goto exists;
   1586 
   1587 #ifdef HAVE_RAW_SUPPORT
   1588 	/* check for raw device */
   1589 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
   1590 		return (FILEBENCH_OK);
   1591 #endif /* HAVE_RAW_SUPPORT */
   1592 
   1593 	/*
   1594 	 * save value of entries and leaf dirs obtained for later
   1595 	 * in case it was random
   1596 	 */
   1597 	fileset->fs_constentries = entries;
   1598 	fileset->fs_constleafdirs = leafdirs;
   1599 
   1600 	/* initialize idle files and directories condition variables */
   1601 	(void) pthread_cond_init(&fileset->fs_idle_files_cv, ipc_condattr());
   1602 	(void) pthread_cond_init(&fileset->fs_idle_dirs_cv, ipc_condattr());
   1603 	(void) pthread_cond_init(&fileset->fs_idle_leafdirs_cv, ipc_condattr());
   1604 
   1605 	/* no files or dirs idle (or busy) yet */
   1606 	fileset->fs_idle_files = 0;
   1607 	fileset->fs_idle_dirs = 0;
   1608 	fileset->fs_idle_leafdirs = 0;
   1609 
   1610 	/* initialize locks and other condition variables */
   1611 	(void) pthread_mutex_init(&fileset->fs_pick_lock,
   1612 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
   1613 	(void) pthread_mutex_init(&fileset->fs_histo_lock,
   1614 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
   1615 	(void) pthread_cond_init(&fileset->fs_thrd_wait_cv, ipc_condattr());
   1616 
   1617 	/* Initialize avl btrees */
   1618 	avl_create(&(fileset->fs_free_files), fileset_entry_compare,
   1619 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1620 	avl_create(&(fileset->fs_noex_files), fileset_entry_compare,
   1621 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1622 	avl_create(&(fileset->fs_exist_files), fileset_entry_compare,
   1623 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1624 	avl_create(&(fileset->fs_free_leaf_dirs), fileset_entry_compare,
   1625 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1626 	avl_create(&(fileset->fs_noex_leaf_dirs), fileset_entry_compare,
   1627 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1628 	avl_create(&(fileset->fs_exist_leaf_dirs), fileset_entry_compare,
   1629 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1630 	avl_create(&(fileset->fs_dirs), fileset_entry_compare,
   1631 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
   1632 
   1633 	/* is dirwidth a random variable? */
   1634 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
   1635 		meandirwidth =
   1636 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
   1637 		fileset->fs_meanwidth = -1;
   1638 	} else {
   1639 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
   1640 		fileset->fs_meanwidth = (double)meandirwidth;
   1641 	}
   1642 
   1643 	/*
   1644 	 * Input params are:
   1645 	 *	# of files
   1646 	 *	ave # of files per dir
   1647 	 *	max size of dir
   1648 	 *	# ave size of file
   1649 	 *	max size of file
   1650 	 */
   1651 	fileset->fs_meandepth = log(entries+leafdirs) / log(meandirwidth);
   1652 
   1653 	/* Has a random variable been supplied for dirdepth? */
   1654 	if (fileset->fs_dirdepthrv) {
   1655 		/* yes, so set the random variable's mean value to meandepth */
   1656 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
   1657 		    fileset->fs_meandepth;
   1658 	}
   1659 
   1660 	/* test for random size variable */
   1661 	if (AVD_IS_RANDOM(fileset->fs_size))
   1662 		fileset->fs_meansize = -1;
   1663 	else
   1664 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
   1665 
   1666 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
   1667 		return (ret);
   1668 
   1669 
   1670 exists:
   1671 	if (fileset->fs_attrs & FILESET_IS_FILE) {
   1672 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
   1673 		    avd_get_str(fileset->fs_name),
   1674 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
   1675 	} else {
   1676 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, %d leafdirs "
   1677 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
   1678 		    avd_get_str(fileset->fs_name), entries, leafdirs,
   1679 		    meandirwidth,
   1680 		    fileset->fs_meandepth,
   1681 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
   1682 	}
   1683 
   1684 	return (FILEBENCH_OK);
   1685 }
   1686 
   1687 /*
   1688  * Allocates a fileset instance, initializes fileset_dirgamma and
   1689  * fileset_sizegamma default values, and sets the fileset name to the
   1690  * supplied name string. Puts the allocated fileset on the
   1691  * master fileset list and returns a pointer to it.
   1692  *
   1693  * This routine implements the 'define fileset' calls found in a .f
   1694  * workload, such as in the following example:
   1695  * define fileset name=drew4ever, entries=$nfiles
   1696  */
   1697 fileset_t *
   1698 fileset_define(avd_t name)
   1699 {
   1700 	fileset_t *fileset;
   1701 
   1702 	if (name == NULL)
   1703 		return (NULL);
   1704 
   1705 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
   1706 		filebench_log(LOG_ERROR,
   1707 		    "fileset_define: Can't malloc fileset");
   1708 		return (NULL);
   1709 	}
   1710 
   1711 	filebench_log(LOG_DEBUG_IMPL,
   1712 	    "Defining file %s", avd_get_str(name));
   1713 
   1714 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
   1715 
   1716 	fileset->fs_dirgamma = avd_int_alloc(1500);
   1717 	fileset->fs_sizegamma = avd_int_alloc(1500);
   1718 	fileset->fs_histo_id = -1;
   1719 
   1720 	/* Add fileset to global list */
   1721 	if (filebench_shm->shm_filesetlist == NULL) {
   1722 		filebench_shm->shm_filesetlist = fileset;
   1723 		fileset->fs_next = NULL;
   1724 	} else {
   1725 		fileset->fs_next = filebench_shm->shm_filesetlist;
   1726 		filebench_shm->shm_filesetlist = fileset;
   1727 	}
   1728 
   1729 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
   1730 
   1731 	fileset->fs_name = name;
   1732 
   1733 	return (fileset);
   1734 }
   1735 
   1736 /*
   1737  * If supplied with a pointer to a fileset and the fileset's
   1738  * fileset_prealloc flag is set, calls fileset_populate() to populate
   1739  * the fileset with filesetentries, then calls fileset_create()
   1740  * to make actual directories and files for the filesetentries.
   1741  * Otherwise, it applies fileset_populate() and fileset_create()
   1742  * to all the filesets on the master fileset list. It always
   1743  * returns zero (0) if one fileset is populated / created,
   1744  * otherwise it returns the sum of returned values from
   1745  * fileset_create() and fileset_populate(), which
   1746  * will be a negative one (-1) times the number of
   1747  * fileset_create() calls which failed.
   1748  */
   1749 int
   1750 fileset_createset(fileset_t *fileset)
   1751 {
   1752 	fileset_t *list;
   1753 	int ret = 0;
   1754 
   1755 	/* set up for possible parallel allocate */
   1756 	filebench_shm->shm_fsparalloc_count = 0;
   1757 	(void) pthread_cond_init(
   1758 	    &filebench_shm->shm_fsparalloc_cv,
   1759 	    ipc_condattr());
   1760 
   1761 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
   1762 
   1763 		/* check for raw files */
   1764 		if (fileset_checkraw(fileset)) {
   1765 			filebench_log(LOG_INFO,
   1766 			    "file %s/%s is a RAW device",
   1767 			    avd_get_str(fileset->fs_path),
   1768 			    avd_get_str(fileset->fs_name));
   1769 			return (FILEBENCH_OK);
   1770 		}
   1771 
   1772 		filebench_log(LOG_INFO,
   1773 		    "creating/pre-allocating %s %s",
   1774 		    fileset_entity_name(fileset),
   1775 		    avd_get_str(fileset->fs_name));
   1776 
   1777 		if ((ret = fileset_populate(fileset)) != FILEBENCH_OK)
   1778 			return (ret);
   1779 
   1780 		if ((ret = fileset_create(fileset)) != FILEBENCH_OK)
   1781 			return (ret);
   1782 	} else {
   1783 
   1784 		filebench_log(LOG_INFO,
   1785 		    "Creating/pre-allocating files and filesets");
   1786 
   1787 		list = filebench_shm->shm_filesetlist;
   1788 		while (list) {
   1789 			/* check for raw files */
   1790 			if (fileset_checkraw(list)) {
   1791 				filebench_log(LOG_INFO,
   1792 				    "file %s/%s is a RAW device",
   1793 				    avd_get_str(list->fs_path),
   1794 				    avd_get_str(list->fs_name));
   1795 				list = list->fs_next;
   1796 				continue;
   1797 			}
   1798 
   1799 			if ((ret = fileset_populate(list)) != FILEBENCH_OK)
   1800 				return (ret);
   1801 
   1802 			if ((ret = fileset_create(list)) != FILEBENCH_OK)
   1803 				return (ret);
   1804 
   1805 			list = list->fs_next;
   1806 		}
   1807 	}
   1808 
   1809 	/* wait for allocation threads to finish */
   1810 	filebench_log(LOG_INFO,
   1811 	    "waiting for fileset pre-allocation to finish");
   1812 
   1813 	(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
   1814 	while (filebench_shm->shm_fsparalloc_count > 0)
   1815 		(void) pthread_cond_wait(
   1816 		    &filebench_shm->shm_fsparalloc_cv,
   1817 		    &filebench_shm->shm_fsparalloc_lock);
   1818 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
   1819 
   1820 	if (filebench_shm->shm_fsparalloc_count < 0)
   1821 		return (FILEBENCH_ERROR);
   1822 
   1823 	return (FILEBENCH_OK);
   1824 }
   1825 
   1826 /*
   1827  * Searches through the master fileset list for the named fileset.
   1828  * If found, returns pointer to same, otherwise returns NULL.
   1829  */
   1830 fileset_t *
   1831 fileset_find(char *name)
   1832 {
   1833 	fileset_t *fileset = filebench_shm->shm_filesetlist;
   1834 
   1835 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
   1836 
   1837 	while (fileset) {
   1838 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
   1839 			(void) ipc_mutex_unlock(
   1840 			    &filebench_shm->shm_fileset_lock);
   1841 			return (fileset);
   1842 		}
   1843 		fileset = fileset->fs_next;
   1844 	}
   1845 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
   1846 
   1847 	return (NULL);
   1848 }
   1849 
   1850 /*
   1851  * Iterates over all the file sets in the filesetlist,
   1852  * executing the supplied command "*cmd()" on them. Also
   1853  * indicates to the executed command if it is the first
   1854  * time the command has been executed since the current
   1855  * call to fileset_iter.
   1856  */
   1857 int
   1858 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
   1859 {
   1860 	fileset_t *fileset = filebench_shm->shm_filesetlist;
   1861 	int count = 0;
   1862 
   1863 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
   1864 
   1865 	while (fileset) {
   1866 		if (cmd(fileset, count == 0) == FILEBENCH_ERROR) {
   1867 			(void) ipc_mutex_unlock(
   1868 			    &filebench_shm->shm_fileset_lock);
   1869 			return (FILEBENCH_ERROR);
   1870 		}
   1871 		fileset = fileset->fs_next;
   1872 		count++;
   1873 	}
   1874 
   1875 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
   1876 	return (FILEBENCH_OK);
   1877 }
   1878 
   1879 /*
   1880  * Prints information to the filebench log about the file
   1881  * object. Also prints a header on the first call.
   1882  */
   1883 int
   1884 fileset_print(fileset_t *fileset, int first)
   1885 {
   1886 	int pathlength;
   1887 	char *fileset_path;
   1888 	char *fileset_name;
   1889 	static char pad[] = "                              "; /* 30 spaces */
   1890 
   1891 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
   1892 		filebench_log(LOG_ERROR, "%s path not set",
   1893 		    fileset_entity_name(fileset));
   1894 		return (FILEBENCH_ERROR);
   1895 	}
   1896 
   1897 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
   1898 		filebench_log(LOG_ERROR, "%s name not set",
   1899 		    fileset_entity_name(fileset));
   1900 		return (FILEBENCH_ERROR);
   1901 	}
   1902 
   1903 	pathlength = strlen(fileset_path) + strlen(fileset_name);
   1904 
   1905 	if (pathlength > 29)
   1906 		pathlength = 29;
   1907 
   1908 	if (first) {
   1909 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
   1910 		    "file size",
   1911 		    "dir width",
   1912 		    "entries");
   1913 	}
   1914 
   1915 	if (fileset->fs_attrs & FILESET_IS_FILE) {
   1916 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
   1917 			filebench_log(LOG_INFO,
   1918 			    "%s/%s%s         (Raw Device)",
   1919 			    fileset_path, fileset_name, &pad[pathlength]);
   1920 		} else {
   1921 			filebench_log(LOG_INFO,
   1922 			    "%s/%s%s%9llu     (Single File)",
   1923 			    fileset_path, fileset_name, &pad[pathlength],
   1924 			    (u_longlong_t)avd_get_int(fileset->fs_size));
   1925 		}
   1926 	} else {
   1927 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
   1928 		    fileset_path, fileset_name,
   1929 		    &pad[pathlength],
   1930 		    (u_longlong_t)avd_get_int(fileset->fs_size),
   1931 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
   1932 		    (u_longlong_t)fileset->fs_constentries);
   1933 	}
   1934 	return (FILEBENCH_OK);
   1935 }
   1936 
   1937 /*
   1938  * checks to see if the path/name pair points to a raw device. If
   1939  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
   1940  * If RAW is not defined, or it is not a raw device, it clears the
   1941  * raw device flag and returns 0.
   1942  */
   1943 int
   1944 fileset_checkraw(fileset_t *fileset)
   1945 {
   1946 	char path[MAXPATHLEN];
   1947 	struct stat64 sb;
   1948 	char *pathname;
   1949 	char *setname;
   1950 
   1951 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
   1952 
   1953 #ifdef HAVE_RAW_SUPPORT
   1954 	/* check for raw device */
   1955 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
   1956 		return (FILEBENCH_OK);
   1957 
   1958 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
   1959 		return (FILEBENCH_OK);
   1960 
   1961 	(void) fb_strlcpy(path, pathname, MAXPATHLEN);
   1962 	(void) fb_strlcat(path, "/", MAXPATHLEN);
   1963 	(void) fb_strlcat(path, setname, MAXPATHLEN);
   1964 	if ((stat64(path, &sb) == 0) &&
   1965 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
   1966 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
   1967 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
   1968 			filebench_log(LOG_ERROR,
   1969 			    "WARNING Fileset %s/%s Cannot be RAW device",
   1970 			    avd_get_str(fileset->fs_path),
   1971 			    avd_get_str(fileset->fs_name));
   1972 			filebench_shutdown(1);
   1973 		}
   1974 
   1975 		return (1);
   1976 	}
   1977 #endif /* HAVE_RAW_SUPPORT */
   1978 
   1979 	return (FILEBENCH_OK);
   1980 }
   1981