Home | History | Annotate | Download | only in common
      1 /*
      2  * CDDL HEADER START
      3  *
      4  * The contents of this file are subject to the terms of the
      5  * Common Development and Distribution License (the "License").
      6  * You may not use this file except in compliance with the License.
      7  *
      8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
      9  * or http://www.opensolaris.org/os/licensing.
     10  * See the License for the specific language governing permissions
     11  * and limitations under the License.
     12  *
     13  * When distributing Covered Code, include this CDDL HEADER in each
     14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
     15  * If applicable, add the following below this CDDL HEADER, with the
     16  * fields enclosed by brackets "[]" replaced with your own identifying
     17  * information: Portions Copyright [yyyy] [name of copyright owner]
     18  *
     19  * CDDL HEADER END
     20  */
     21 /*
     22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
     23  * Use is subject to license terms.
     24  *
     25  * Portions Copyright 2008 Denis Cheng
     26  */
     27 
     28 #include <fcntl.h>
     29 #include <pthread.h>
     30 #include <errno.h>
     31 #include <math.h>
     32 #include <libgen.h>
     33 #include <sys/mman.h>
     34 
     35 #include "filebench.h"
     36 #include "fileset.h"
     37 #include "gamma_dist.h"
     38 
     39 /*
     40  * File sets, of type fileset_t, are entities which contain
     41  * information about collections of files and subdirectories in Filebench.
     42  * The fileset, once populated, consists of a tree of fileset entries of
     43  * type filesetentry_t which specify files and directories.  The fileset
     44  * is rooted in a directory specified by fileset_path, and once the populated
     45  * fileset has been created, has a tree of directories and files
     46  * corresponding to the fileset's filesetentry tree.
     47  *
     48  * Fileset entities are allocated by fileset_define() which is called from
     49  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
     50  * to the eventual directory and file tree to be instantiated on the storage
     51  * medium is built by fileset_populate(), which is called from
     52  * fileset_createset(). After calling fileset_populate(), fileset_createset()
     53  * will call fileset_create() to pre-allocate designated files and directories.
     54  *
     55  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
     56  * when a "create fileset" or "run" command is encountered. When the
     57  * "create fileset" command is used, it is generally paired with
     58  * a "create processes" command, and must appear first, in order to
     59  * instantiate all the files in the fileset before trying to use them.
     60  */
     61 
     62 static int fileset_checkraw(fileset_t *fileset);
     63 
     64 /* maximum parallel allocation control */
     65 #define	MAX_PARALLOC_THREADS 32
     66 
     67 /*
     68  * returns pointer to file or fileset
     69  * string, as appropriate
     70  */
     71 static char *
     72 fileset_entity_name(fileset_t *fileset)
     73 {
     74 	if (fileset->fs_attrs & FILESET_IS_FILE)
     75 		return ("file");
     76 	else
     77 		return ("fileset");
     78 }
     79 
     80 /*
     81  * Removes the last file or directory name from a pathname.
     82  * Basically removes characters from the end of the path by
     83  * setting them to \0 until a forward slash '/' is
     84  * encountered. It also removes the forward slash.
     85  */
     86 static char *
     87 trunc_dirname(char *dir)
     88 {
     89 	char *s = dir + strlen(dir);
     90 
     91 	while (s != dir) {
     92 		int c = *s;
     93 
     94 		*s = 0;
     95 		if (c == '/')
     96 			break;
     97 		s--;
     98 	}
     99 	return (dir);
    100 }
    101 
    102 /*
    103  * Prints a list of allowed options and how to specify them.
    104  */
    105 void
    106 fileset_usage(void)
    107 {
    108 	(void) fprintf(stderr,
    109 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
    110 	    ",entries=<number>\n");
    111 	(void) fprintf(stderr,
    112 	    "		        [,filesize=[size]]\n");
    113 	(void) fprintf(stderr,
    114 	    "		        [,dirwidth=[width]]\n");
    115 	(void) fprintf(stderr,
    116 	    "		        [,dirdepthrv=$random_variable_name]\n");
    117 	(void) fprintf(stderr,
    118 	    "		        [,dirgamma=[100-10000]] "
    119 	    "(Gamma * 1000)\n");
    120 	(void) fprintf(stderr,
    121 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
    122 	(void) fprintf(stderr,
    123 	    "		        [,prealloc=[percent]]\n");
    124 	(void) fprintf(stderr, "		        [,paralloc]\n");
    125 	(void) fprintf(stderr, "		        [,reuse]\n");
    126 	(void) fprintf(stderr, "\n");
    127 }
    128 
    129 /*
    130  * Frees up memory mapped file region of supplied size. The
    131  * file descriptor "fd" indicates which memory mapped file.
    132  * If successful, returns 0. Otherwise returns -1 times the number of
    133  * times msync() failed.
    134  */
    135 static int
    136 fileset_freemem(int fd, off64_t size)
    137 {
    138 	off64_t left;
    139 	int ret = 0;
    140 
    141 	for (left = size; left > 0; left -= MMAP_SIZE) {
    142 		off64_t thismapsize;
    143 		caddr_t addr;
    144 
    145 		thismapsize = MIN(MMAP_SIZE, left);
    146 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
    147 		    MAP_SHARED, fd, size - left);
    148 		ret += msync(addr, thismapsize, MS_INVALIDATE);
    149 		(void) munmap(addr, thismapsize);
    150 	}
    151 	return (ret);
    152 }
    153 
    154 /*
    155  * Creates a path string from the filesetentry_t "*entry"
    156  * and all of its parent's path names. The resulting path
    157  * is a concatination of all the individual parent paths.
    158  * Allocates memory for the path string and returns a
    159  * pointer to it.
    160  */
    161 char *
    162 fileset_resolvepath(filesetentry_t *entry)
    163 {
    164 	filesetentry_t *fsep = entry;
    165 	char path[MAXPATHLEN];
    166 	char pathtmp[MAXPATHLEN];
    167 	char *s;
    168 
    169 	*path = 0;
    170 	while (fsep->fse_parent) {
    171 		(void) strcpy(pathtmp, "/");
    172 		(void) strcat(pathtmp, fsep->fse_path);
    173 		(void) strcat(pathtmp, path);
    174 		(void) strcpy(path, pathtmp);
    175 		fsep = fsep->fse_parent;
    176 	}
    177 
    178 	s = malloc(strlen(path) + 1);
    179 	(void) strcpy(s, path);
    180 	return (s);
    181 }
    182 
    183 /*
    184  * Creates multiple nested directories as required by the
    185  * supplied path. Starts at the end of the path, creating
    186  * a list of directories to mkdir, up to the root of the
    187  * path, then mkdirs them one at a time from the root on down.
    188  */
    189 static int
    190 fileset_mkdir(char *path, int mode)
    191 {
    192 	char *p;
    193 	char *dirs[65536];
    194 	int i = 0;
    195 
    196 	if ((p = strdup(path)) == NULL)
    197 		goto null_str;
    198 
    199 	/*
    200 	 * Fill an array of subdirectory path names until either we
    201 	 * reach the root or encounter an already existing subdirectory
    202 	 */
    203 	/* CONSTCOND */
    204 	while (1) {
    205 		struct stat64 sb;
    206 
    207 		if (stat64(p, &sb) == 0)
    208 			break;
    209 		if (strlen(p) < 3)
    210 			break;
    211 		if ((dirs[i] = strdup(p)) == NULL) {
    212 			free(p);
    213 			goto null_str;
    214 		}
    215 
    216 		(void) trunc_dirname(p);
    217 		i++;
    218 	}
    219 
    220 	/* Make the directories, from closest to root downwards. */
    221 	for (--i; i >= 0; i--) {
    222 		(void) mkdir(dirs[i], mode);
    223 		free(dirs[i]);
    224 	}
    225 
    226 	free(p);
    227 	return (FILEBENCH_OK);
    228 
    229 null_str:
    230 	/* clean up */
    231 	for (--i; i >= 0; i--)
    232 		free(dirs[i]);
    233 
    234 	filebench_log(LOG_ERROR,
    235 	    "Failed to create directory path %s: Out of memory", path);
    236 
    237 	return (FILEBENCH_ERROR);
    238 }
    239 
    240 /*
    241  * creates the subdirectory tree for a fileset.
    242  */
    243 static int
    244 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
    245 {
    246 	filesetentry_t *direntry;
    247 	char full_path[MAXPATHLEN];
    248 	char *part_path;
    249 
    250 	/* walk the subdirectory list, enstanciating subdirs */
    251 	direntry = fileset->fs_dirlist;
    252 	while (direntry) {
    253 		(void) strcpy(full_path, filesetpath);
    254 		part_path = fileset_resolvepath(direntry);
    255 		(void) strcat(full_path, part_path);
    256 		free(part_path);
    257 
    258 		/* now create this portion of the subdirectory tree */
    259 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
    260 			return (FILEBENCH_ERROR);
    261 
    262 		direntry = direntry->fse_dirnext;
    263 	}
    264 	return (FILEBENCH_OK);
    265 }
    266 
    267 /*
    268  * given a fileset entry, determines if the associated file
    269  * needs to be allocated or not, and if so does the allocation.
    270  */
    271 static int
    272 fileset_alloc_file(filesetentry_t *entry)
    273 {
    274 	char path[MAXPATHLEN];
    275 	char *buf;
    276 	struct stat64 sb;
    277 	char *pathtmp;
    278 	off64_t seek;
    279 	int fd;
    280 
    281 	*path = 0;
    282 	(void) strcpy(path, avd_get_str(entry->fse_fileset->fs_path));
    283 	(void) strcat(path, "/");
    284 	(void) strcat(path, avd_get_str(entry->fse_fileset->fs_name));
    285 	pathtmp = fileset_resolvepath(entry);
    286 	(void) strcat(path, pathtmp);
    287 
    288 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
    289 
    290 	/* see if reusing and this file exists */
    291 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
    292 		if ((fd = open64(path, O_RDWR)) < 0) {
    293 			filebench_log(LOG_INFO,
    294 			    "Attempted but failed to Re-use file %s",
    295 			    path);
    296 			return (FILEBENCH_ERROR);
    297 		}
    298 
    299 		if (sb.st_size == (off64_t)entry->fse_size) {
    300 			filebench_log(LOG_DEBUG_IMPL,
    301 			    "Re-using file %s", path);
    302 
    303 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
    304 				(void) fileset_freemem(fd,
    305 				    entry->fse_size);
    306 
    307 			(void) ipc_mutex_lock(
    308 			    &entry->fse_fileset->fs_pick_lock);
    309 			entry->fse_flags |= FSE_EXISTS;
    310 			entry->fse_fileset->fs_num_act_files++;
    311 			(void) ipc_mutex_unlock(
    312 			    &entry->fse_fileset->fs_pick_lock);
    313 
    314 			(void) close(fd);
    315 			return (FILEBENCH_OK);
    316 
    317 		} else if (sb.st_size > (off64_t)entry->fse_size) {
    318 			/* reuse, but too large */
    319 			filebench_log(LOG_INFO,
    320 			    "Truncating & re-using file %s", path);
    321 
    322 #ifdef HAVE_FTRUNCATE64
    323 			(void) ftruncate64(fd, (off64_t)entry->fse_size);
    324 #else
    325 			(void) ftruncate(fd, (off_t)entry->fse_size);
    326 #endif
    327 
    328 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
    329 				(void) fileset_freemem(fd,
    330 				    entry->fse_size);
    331 
    332 			(void) ipc_mutex_lock(
    333 			    &entry->fse_fileset->fs_pick_lock);
    334 			entry->fse_flags |= FSE_EXISTS;
    335 			entry->fse_fileset->fs_num_act_files++;
    336 			(void) ipc_mutex_unlock(
    337 			    &entry->fse_fileset->fs_pick_lock);
    338 
    339 			(void) close(fd);
    340 			return (FILEBENCH_OK);
    341 		}
    342 	} else {
    343 
    344 		/* No file or not reusing, so create */
    345 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
    346 			filebench_log(LOG_ERROR,
    347 			    "Failed to pre-allocate file %s: %s",
    348 			    path, strerror(errno));
    349 
    350 			return (FILEBENCH_ERROR);
    351 		}
    352 	}
    353 
    354 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
    355 		return (FILEBENCH_ERROR);
    356 
    357 	(void) ipc_mutex_lock(&entry->fse_fileset->fs_pick_lock);
    358 	entry->fse_flags |= FSE_EXISTS;
    359 	entry->fse_fileset->fs_num_act_files++;
    360 	(void) ipc_mutex_unlock(&entry->fse_fileset->fs_pick_lock);
    361 
    362 	for (seek = 0; seek < entry->fse_size; ) {
    363 		off64_t wsize;
    364 		int ret = 0;
    365 
    366 		/*
    367 		 * Write FILE_ALLOC_BLOCK's worth,
    368 		 * except on last write
    369 		 */
    370 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
    371 
    372 		ret = write(fd, buf, wsize);
    373 		if (ret != wsize) {
    374 			filebench_log(LOG_ERROR,
    375 			    "Failed to pre-allocate file %s: %s",
    376 			    path, strerror(errno));
    377 			(void) close(fd);
    378 			free(buf);
    379 			return (FILEBENCH_ERROR);
    380 		}
    381 		seek += wsize;
    382 	}
    383 
    384 	if (!avd_get_bool(entry->fse_fileset->fs_cached))
    385 		(void) fileset_freemem(fd, entry->fse_size);
    386 
    387 	(void) close(fd);
    388 
    389 	free(buf);
    390 
    391 	filebench_log(LOG_DEBUG_IMPL,
    392 	    "Pre-allocated file %s size %llu",
    393 	    path, (u_longlong_t)entry->fse_size);
    394 
    395 	return (FILEBENCH_OK);
    396 }
    397 
    398 /*
    399  * given a fileset entry, determines if the associated file
    400  * needs to be allocated or not, and if so does the allocation.
    401  * Sets shm_fsparalloc_count to -1 on error.
    402  */
    403 static void *
    404 fileset_alloc_thread(filesetentry_t *entry)
    405 {
    406 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
    407 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
    408 		filebench_shm->shm_fsparalloc_count = -1;
    409 	} else {
    410 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
    411 		filebench_shm->shm_fsparalloc_count--;
    412 	}
    413 
    414 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
    415 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
    416 
    417 	pthread_exit(NULL);
    418 	return (NULL);
    419 }
    420 
    421 
    422 /*
    423  * First creates the parent directories of the file using
    424  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
    425  * and opens the file with open64(). It unlocks the fileset
    426  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
    427  * as requested, and returns the file descriptor integer
    428  * for the opened file.
    429  */
    430 int
    431 fileset_openfile(fileset_t *fileset,
    432     filesetentry_t *entry, int flag, int filemode, int attrs)
    433 {
    434 	char path[MAXPATHLEN];
    435 	char dir[MAXPATHLEN];
    436 	char *pathtmp;
    437 	struct stat64 sb;
    438 	int fd;
    439 	int open_attrs = 0;
    440 
    441 	*path = 0;
    442 	(void) strcpy(path, avd_get_str(fileset->fs_path));
    443 	(void) strcat(path, "/");
    444 	(void) strcat(path, avd_get_str(fileset->fs_name));
    445 	pathtmp = fileset_resolvepath(entry);
    446 	(void) strcat(path, pathtmp);
    447 	(void) strcpy(dir, path);
    448 	free(pathtmp);
    449 	(void) trunc_dirname(dir);
    450 
    451 	/* If we are going to create a file, create the parent dirs */
    452 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
    453 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
    454 			return (FILEBENCH_ERROR);
    455 	}
    456 
    457 	if (attrs & FLOW_ATTR_DSYNC) {
    458 #ifdef sun
    459 		open_attrs |= O_DSYNC;
    460 #else
    461 		open_attrs |= O_FSYNC;
    462 #endif
    463 	}
    464 
    465 	if ((fd = open64(path, flag | open_attrs, filemode)) < 0) {
    466 		filebench_log(LOG_ERROR,
    467 		    "Failed to open file %s: %s",
    468 		    path, strerror(errno));
    469 
    470 		fileset_unbusy(entry, FALSE, FALSE);
    471 		return (FILEBENCH_ERROR);
    472 	}
    473 
    474 	if (flag & O_CREAT)
    475 		fileset_unbusy(entry, TRUE, TRUE);
    476 	else
    477 		fileset_unbusy(entry, FALSE, FALSE);
    478 
    479 #ifdef sun
    480 	if (attrs & FLOW_ATTR_DIRECTIO)
    481 		(void) directio(fd, DIRECTIO_ON);
    482 	else
    483 		(void) directio(fd, DIRECTIO_OFF);
    484 #endif
    485 
    486 	return (fd);
    487 }
    488 
    489 
    490 /*
    491  * Selects a fileset entry from a fileset. If the
    492  * FILESET_PICKDIR flag is set it will pick a directory
    493  * entry, otherwise a file entry. The FILESET_PICKRESET
    494  * flag will cause it to reset the free list to the
    495  * overall list (file or directory). The FILESET_PICKUNIQUE
    496  * flag will take an entry off of one of the free (unused)
    497  * lists (file or directory), otherwise the entry will be
    498  * picked off of one of the rotor lists (file or directory).
    499  * The FILESET_PICKEXISTS will insure that only extant
    500  * (FSE_EXISTS) state files are selected, while
    501  * FILESET_PICKNOEXIST insures that only non extant
    502  * (not FSE_EXISTS) state files are selected.
    503  * Note that the selected fileset entry (file) is returned
    504  * with its FSE_BUSY flag (in fse_flags) set.
    505  */
    506 filesetentry_t *
    507 fileset_pick(fileset_t *fileset, int flags, int tid)
    508 {
    509 	filesetentry_t *entry = NULL;
    510 	filesetentry_t *first = NULL;
    511 
    512 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
    513 
    514 	/* see if we have to wait for available files or directories */
    515 	if (flags & FILESET_PICKDIR) {
    516 		while (fileset->fs_idle_dirs == 0) {
    517 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
    518 			    &fileset->fs_pick_lock);
    519 		}
    520 	} else {
    521 		while (fileset->fs_idle_files == 0) {
    522 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
    523 			    &fileset->fs_pick_lock);
    524 		}
    525 	}
    526 
    527 	/* see if asking for impossible */
    528 	if (flags & FILESET_PICKEXISTS) {
    529 		if (fileset->fs_num_act_files == 0) {
    530 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    531 			return (NULL);
    532 		}
    533 	} else if (flags & FILESET_PICKNOEXIST) {
    534 		if (fileset->fs_num_act_files == fileset->fs_realfiles) {
    535 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    536 			return (NULL);
    537 		}
    538 	}
    539 
    540 	while (entry == NULL) {
    541 
    542 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
    543 			entry = fileset->fs_dirlist;
    544 			while (entry) {
    545 				entry->fse_flags |= FSE_FREE;
    546 				entry = entry->fse_dirnext;
    547 			}
    548 			fileset->fs_dirfree = fileset->fs_dirlist;
    549 		}
    550 
    551 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
    552 			entry = fileset->fs_filelist;
    553 			while (entry) {
    554 				entry->fse_flags |= FSE_FREE;
    555 				entry = entry->fse_filenext;
    556 			}
    557 			fileset->fs_filefree = fileset->fs_filelist;
    558 		}
    559 
    560 		if (flags & FILESET_PICKUNIQUE) {
    561 			if (flags & FILESET_PICKDIR) {
    562 				entry = fileset->fs_dirfree;
    563 				if (entry == NULL)
    564 					goto empty;
    565 				fileset->fs_dirfree = entry->fse_dirnext;
    566 			} else {
    567 				entry = fileset->fs_filefree;
    568 				if (entry == NULL)
    569 					goto empty;
    570 				fileset->fs_filefree = entry->fse_filenext;
    571 			}
    572 			entry->fse_flags &= ~FSE_FREE;
    573 		} else {
    574 			if (flags & FILESET_PICKDIR) {
    575 				entry = fileset->fs_dirrotor;
    576 				if (entry == NULL)
    577 				fileset->fs_dirrotor =
    578 				    entry = fileset->fs_dirlist;
    579 				fileset->fs_dirrotor = entry->fse_dirnext;
    580 			} else {
    581 				if (flags & FILESET_PICKNOEXIST) {
    582 					entry = fileset->fs_file_ne_rotor;
    583 					if (entry == NULL)
    584 						fileset->fs_file_ne_rotor =
    585 						    entry =
    586 						    fileset->fs_filelist;
    587 					fileset->fs_file_ne_rotor =
    588 					    entry->fse_filenext;
    589 				} else {
    590 					entry = fileset->fs_filerotor[tid];
    591 					if (entry == NULL)
    592 						fileset->fs_filerotor[tid] =
    593 						    entry =
    594 						    fileset->fs_filelist;
    595 					fileset->fs_filerotor[tid] =
    596 					    entry->fse_filenext;
    597 				}
    598 			}
    599 		}
    600 
    601 		if (first == entry)
    602 			goto empty;
    603 
    604 		if (first == NULL)
    605 			first = entry;
    606 
    607 		/* see if entry in use */
    608 		if (entry->fse_flags & FSE_BUSY) {
    609 
    610 			/* it is, so try next */
    611 			entry = NULL;
    612 			continue;
    613 		}
    614 
    615 		/* If we ask for an existing file, go round again */
    616 		if ((flags & FILESET_PICKEXISTS) &&
    617 		    !(entry->fse_flags & FSE_EXISTS))
    618 			entry = NULL;
    619 
    620 		/* If we ask for not an existing file, go round again */
    621 		if ((flags & FILESET_PICKNOEXIST) &&
    622 		    (entry->fse_flags & FSE_EXISTS))
    623 			entry = NULL;
    624 	}
    625 
    626 	/* update file or directory idle counts */
    627 	if (flags & FILESET_PICKDIR)
    628 		fileset->fs_idle_dirs--;
    629 	else
    630 		fileset->fs_idle_files--;
    631 
    632 	/* Indicate that file or directory is now busy */
    633 	entry->fse_flags |= FSE_BUSY;
    634 
    635 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    636 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
    637 	return (entry);
    638 
    639 empty:
    640 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    641 	return (NULL);
    642 }
    643 
    644 /*
    645  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
    646  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
    647  * existant or not, or leaves that designation alone.
    648  */
    649 void
    650 fileset_unbusy(filesetentry_t *entry, int update_exist, int new_exist_val)
    651 {
    652 	fileset_t *fileset = NULL;
    653 	int fse_is_dir;
    654 
    655 	if (entry)
    656 		fileset = entry->fse_fileset;
    657 
    658 	if (fileset == NULL) {
    659 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
    660 		return;
    661 	}
    662 
    663 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
    664 	fse_is_dir = entry->fse_flags & FSE_DIR;
    665 
    666 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
    667 	if (entry->fse_flags & FSE_BUSY) {
    668 
    669 		/* unbusy it */
    670 		entry->fse_flags &= (~FSE_BUSY);
    671 
    672 		/* release any threads waiting for unbusy */
    673 		if (entry->fse_flags & FSE_THRD_WAITNG) {
    674 			entry->fse_flags &= (~FSE_THRD_WAITNG);
    675 			(void) pthread_cond_broadcast(
    676 			    &fileset->fs_thrd_wait_cv);
    677 		}
    678 
    679 		/* increment idle count and signal waiting threads */
    680 		if (fse_is_dir) {
    681 			fileset->fs_idle_dirs++;
    682 			if (fileset->fs_idle_dirs == 1) {
    683 				(void) pthread_cond_signal(
    684 				    &fileset->fs_idle_dirs_cv);
    685 			}
    686 		} else {
    687 			fileset->fs_idle_files++;
    688 			if (fileset->fs_idle_files == 1) {
    689 				(void) pthread_cond_signal(
    690 				    &fileset->fs_idle_files_cv);
    691 			}
    692 		}
    693 	}
    694 
    695 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
    696 	if (update_exist) {
    697 		if (new_exist_val == TRUE) {
    698 			if (!(entry->fse_flags & FSE_EXISTS)) {
    699 
    700 				/* asked to set, and it was clear */
    701 				entry->fse_flags |= FSE_EXISTS;
    702 				if (fse_is_dir)
    703 					fileset->fs_num_act_dirs++;
    704 				else
    705 					fileset->fs_num_act_files++;
    706 			}
    707 		} else {
    708 			if (entry->fse_flags & FSE_EXISTS) {
    709 
    710 				/* asked to clear, and it was set */
    711 				entry->fse_flags &= (~FSE_EXISTS);
    712 				if (fse_is_dir)
    713 					fileset->fs_num_act_dirs--;
    714 				else
    715 					fileset->fs_num_act_files--;
    716 			}
    717 		}
    718 	}
    719 
    720 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    721 }
    722 
    723 /*
    724  * Given a fileset "fileset", create the associated files as
    725  * specified in the attributes of the fileset. The fileset is
    726  * rooted in a directory whose pathname is in fileset_path. If the
    727  * directory exists, meaning that there is already a fileset,
    728  * and the fileset_reuse attribute is false, then remove it and all
    729  * its contained files and subdirectories. Next, the routine
    730  * creates a root directory for the fileset. All the file type
    731  * filesetentries are cycled through creating as needed
    732  * their containing subdirectory trees in the filesystem and
    733  * creating actual files for fileset_preallocpercent of them. The
    734  * created files are filled with fse_size bytes of unitialized
    735  * data. The routine returns FILEBENCH_ERROR on errors,
    736  * FILEBENCH_OK on success.
    737  */
    738 static int
    739 fileset_create(fileset_t *fileset)
    740 {
    741 	filesetentry_t *entry;
    742 	char path[MAXPATHLEN];
    743 	struct stat64 sb;
    744 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
    745 	hrtime_t start = gethrtime();
    746 	char *fileset_path;
    747 	char *fileset_name;
    748 	int randno;
    749 	int preallocated = 0;
    750 	int reusing;
    751 
    752 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
    753 		filebench_log(LOG_ERROR, "%s path not set",
    754 		    fileset_entity_name(fileset));
    755 		return (FILEBENCH_ERROR);
    756 	}
    757 
    758 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
    759 		filebench_log(LOG_ERROR, "%s name not set",
    760 		    fileset_entity_name(fileset));
    761 		return (FILEBENCH_ERROR);
    762 	}
    763 
    764 #ifdef HAVE_RAW_SUPPORT
    765 	/* treat raw device as special case */
    766 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
    767 		return (FILEBENCH_OK);
    768 #endif /* HAVE_RAW_SUPPORT */
    769 
    770 	/* XXX Add check to see if there is enough space */
    771 
    772 	/* set up path to fileset */
    773 	(void) strcpy(path, fileset_path);
    774 	(void) strcat(path, "/");
    775 	(void) strcat(path, fileset_name);
    776 
    777 	/* if exists and resusing, then don't create new */
    778 	if (((stat64(path, &sb) == 0)&& (strlen(path) > 3) &&
    779 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) &&
    780 	    avd_get_bool(fileset->fs_reuse)) {
    781 		reusing = 1;
    782 	} else {
    783 		reusing = 0;
    784 	}
    785 
    786 	if (!reusing) {
    787 		char cmd[MAXPATHLEN];
    788 
    789 		/* Remove existing */
    790 		(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
    791 		(void) system(cmd);
    792 		filebench_log(LOG_VERBOSE,
    793 		    "Removed any existing %s %s in %llu seconds",
    794 		    fileset_entity_name(fileset), fileset_name,
    795 		    (u_longlong_t)(((gethrtime() - start) /
    796 		    1000000000) + 1));
    797 	} else {
    798 		/* we are re-using */
    799 		filebench_log(LOG_VERBOSE, "Re-using %s %s.",
    800 		    fileset_entity_name(fileset), fileset_name);
    801 	}
    802 
    803 	/* make the filesets directory tree unless in reuse mode */
    804 	if (!reusing && (avd_get_bool(fileset->fs_prealloc))) {
    805 		filebench_log(LOG_INFO,
    806 		    "making tree for filset %s", path);
    807 
    808 		(void) mkdir(path, 0755);
    809 
    810 		if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
    811 			return (FILEBENCH_ERROR);
    812 	}
    813 
    814 	start = gethrtime();
    815 
    816 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
    817 	    fileset_entity_name(fileset), fileset_name);
    818 
    819 	if (!avd_get_bool(fileset->fs_prealloc))
    820 		goto exit;
    821 
    822 	randno = ((RAND_MAX * (100
    823 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
    824 
    825 	while (entry = fileset_pick(fileset, pickflags, 0)) {
    826 		pthread_t tid;
    827 		int newrand;
    828 
    829 		pickflags = FILESET_PICKUNIQUE;
    830 
    831 		/* entry doesn't need to be locked during initialization */
    832 		fileset_unbusy(entry, FALSE, FALSE);
    833 
    834 		newrand = rand();
    835 
    836 		if (newrand < randno)
    837 			continue;
    838 
    839 		preallocated++;
    840 
    841 		if (reusing)
    842 			entry->fse_flags |= FSE_REUSING;
    843 		else
    844 			entry->fse_flags &= (~FSE_REUSING);
    845 
    846 		/* fire off allocation threads for each file if paralloc set */
    847 		if (avd_get_bool(fileset->fs_paralloc)) {
    848 
    849 			/* limit total number of simultaneous allocations */
    850 			(void) pthread_mutex_lock(
    851 			    &filebench_shm->shm_fsparalloc_lock);
    852 			while (filebench_shm->shm_fsparalloc_count
    853 			    >= MAX_PARALLOC_THREADS) {
    854 				(void) pthread_cond_wait(
    855 				    &filebench_shm->shm_fsparalloc_cv,
    856 				    &filebench_shm->shm_fsparalloc_lock);
    857 			}
    858 
    859 			/* quit if any allocation thread reports and error */
    860 			if (filebench_shm->shm_fsparalloc_count < 0) {
    861 				(void) pthread_mutex_unlock(
    862 				    &filebench_shm->shm_fsparalloc_lock);
    863 				return (FILEBENCH_ERROR);
    864 			}
    865 
    866 			filebench_shm->shm_fsparalloc_count++;
    867 			(void) pthread_mutex_unlock(
    868 			    &filebench_shm->shm_fsparalloc_lock);
    869 
    870 			/*
    871 			 * Fire off a detached allocation thread per file.
    872 			 * The thread will self destruct when it finishes
    873 			 * writing pre-allocation data to the file.
    874 			 */
    875 			if (pthread_create(&tid, NULL,
    876 			    (void *(*)(void*))fileset_alloc_thread,
    877 			    entry) == 0) {
    878 				/*
    879 				 * A thread was created; detach it so it can
    880 				 * fully quit when finished.
    881 				 */
    882 				(void) pthread_detach(tid);
    883 			} else {
    884 				filebench_log(LOG_ERROR,
    885 				    "File prealloc thread create failed");
    886 				filebench_shutdown(1);
    887 			}
    888 
    889 		} else {
    890 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
    891 				return (FILEBENCH_ERROR);
    892 		}
    893 	}
    894 
    895 exit:
    896 	filebench_log(LOG_VERBOSE,
    897 	    "Preallocated %d of %llu of %s %s in %llu seconds",
    898 	    preallocated,
    899 	    (u_longlong_t)fileset->fs_constentries,
    900 	    fileset_entity_name(fileset), fileset_name,
    901 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
    902 
    903 	return (FILEBENCH_OK);
    904 }
    905 
    906 /*
    907  * Adds an entry to the fileset's file list. Single threaded so
    908  * no locking needed.
    909  */
    910 static void
    911 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
    912 {
    913 	if (fileset->fs_filelist == NULL) {
    914 		fileset->fs_filelist = entry;
    915 		entry->fse_filenext = NULL;
    916 	} else {
    917 		entry->fse_filenext = fileset->fs_filelist;
    918 		fileset->fs_filelist = entry;
    919 	}
    920 }
    921 
    922 /*
    923  * Adds an entry to the fileset's directory list. Single
    924  * threaded so no locking needed.
    925  */
    926 static void
    927 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
    928 {
    929 	if (fileset->fs_dirlist == NULL) {
    930 		fileset->fs_dirlist = entry;
    931 		entry->fse_dirnext = NULL;
    932 	} else {
    933 		entry->fse_dirnext = fileset->fs_dirlist;
    934 		fileset->fs_dirlist = entry;
    935 	}
    936 }
    937 
    938 /*
    939  * Obtaines a filesetentry entity for a file to be placed in a
    940  * (sub)directory of a fileset. The size of the file may be
    941  * specified by fileset_meansize, or calculated from a gamma
    942  * distribution of parameter fileset_sizegamma and of mean size
    943  * fileset_meansize. The filesetentry entity is placed on the file
    944  * list in the specified parent filesetentry entity, which may
    945  * be a directory filesetentry, or the root filesetentry in the
    946  * fileset. It is also placed on the fileset's list of all
    947  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
    948  * if ipc memory for the path string cannot be allocated.
    949  */
    950 static int
    951 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
    952 {
    953 	char tmpname[16];
    954 	filesetentry_t *entry;
    955 	double drand;
    956 
    957 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
    958 	    == NULL) {
    959 		filebench_log(LOG_ERROR,
    960 		    "fileset_populate_file: Can't malloc filesetentry");
    961 		return (FILEBENCH_ERROR);
    962 	}
    963 
    964 	/* Another currently idle file */
    965 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
    966 	fileset->fs_idle_files++;
    967 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
    968 
    969 	entry->fse_parent = parent;
    970 	entry->fse_fileset = fileset;
    971 	entry->fse_flags = FSE_FREE;
    972 	fileset_insfilelist(fileset, entry);
    973 
    974 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
    975 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
    976 		filebench_log(LOG_ERROR,
    977 		    "fileset_populate_file: Can't alloc path string");
    978 		return (FILEBENCH_ERROR);
    979 	}
    980 
    981 	/* see if random variable was supplied for file size */
    982 	if (fileset->fs_meansize == -1) {
    983 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
    984 	} else {
    985 		double gamma;
    986 
    987 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
    988 		if (gamma > 0) {
    989 			drand = gamma_dist_knuth(gamma,
    990 			    fileset->fs_meansize / gamma);
    991 			entry->fse_size = (off64_t)drand;
    992 		} else {
    993 			entry->fse_size = (off64_t)fileset->fs_meansize;
    994 		}
    995 	}
    996 
    997 	fileset->fs_bytes += entry->fse_size;
    998 
    999 	fileset->fs_realfiles++;
   1000 	return (FILEBENCH_OK);
   1001 }
   1002 
   1003 /*
   1004  * Creates a directory node in a fileset, by obtaining a
   1005  * filesetentry entity for the node and initializing it
   1006  * according to parameters of the fileset. It determines a
   1007  * directory tree depth and directory width, optionally using
   1008  * a gamma distribution. If its calculated depth is less then
   1009  * its actual depth in the directory tree, it becomes a leaf
   1010  * node and files itself with "width" number of file type
   1011  * filesetentries, otherwise it files itself with "width"
   1012  * number of directory type filesetentries, using recursive
   1013  * calls to fileset_populate_subdir. The end result of the
   1014  * initial call to this routine is a tree of directories of
   1015  * random width and varying depth with sufficient leaf
   1016  * directories to contain all required files.
   1017  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
   1018  * string memory cannot be allocated and returns the error code (currently
   1019  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
   1020  * calls to fileset_populate_subdir.
   1021  */
   1022 static int
   1023 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
   1024     int serial, double depth)
   1025 {
   1026 	double randepth, drand, ranwidth;
   1027 	int isleaf = 0;
   1028 	char tmpname[16];
   1029 	filesetentry_t *entry;
   1030 	int i;
   1031 
   1032 	depth += 1;
   1033 
   1034 	/* Create dir node */
   1035 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
   1036 	    == NULL) {
   1037 		filebench_log(LOG_ERROR,
   1038 		    "fileset_populate_subdir: Can't malloc filesetentry");
   1039 		return (FILEBENCH_ERROR);
   1040 	}
   1041 
   1042 	/* another idle directory */
   1043 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
   1044 	fileset->fs_idle_dirs++;
   1045 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
   1046 
   1047 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
   1048 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
   1049 		filebench_log(LOG_ERROR,
   1050 		    "fileset_populate_subdir: Can't alloc path string");
   1051 		return (FILEBENCH_ERROR);
   1052 	}
   1053 
   1054 	entry->fse_parent = parent;
   1055 	entry->fse_flags = FSE_DIR | FSE_FREE;
   1056 	fileset_insdirlist(fileset, entry);
   1057 
   1058 	if (fileset->fs_dirdepthrv) {
   1059 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
   1060 	} else {
   1061 		double gamma;
   1062 
   1063 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
   1064 		if (gamma > 0) {
   1065 			drand = gamma_dist_knuth(gamma,
   1066 			    fileset->fs_meandepth / gamma);
   1067 			randepth = (int)drand;
   1068 		} else {
   1069 			randepth = (int)fileset->fs_meandepth;
   1070 		}
   1071 	}
   1072 
   1073 	if (fileset->fs_meanwidth == -1) {
   1074 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
   1075 	} else {
   1076 		double gamma;
   1077 
   1078 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
   1079 		if (gamma > 0) {
   1080 			drand = gamma_dist_knuth(gamma,
   1081 			    fileset->fs_meanwidth / gamma);
   1082 			ranwidth = drand;
   1083 		} else {
   1084 			ranwidth = fileset->fs_meanwidth;
   1085 		}
   1086 	}
   1087 
   1088 	if (randepth == 0)
   1089 		randepth = 1;
   1090 	if (ranwidth == 0)
   1091 		ranwidth = 1;
   1092 	if (depth >= randepth)
   1093 		isleaf = 1;
   1094 
   1095 	/*
   1096 	 * Create directory of random width according to distribution, or
   1097 	 * if root directory, continue until #files required
   1098 	 */
   1099 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
   1100 	    (fileset->fs_realfiles < fileset->fs_constentries);
   1101 	    i++) {
   1102 		int ret = 0;
   1103 
   1104 		if (parent && isleaf)
   1105 			ret = fileset_populate_file(fileset, entry, i);
   1106 		else
   1107 			ret = fileset_populate_subdir(fileset, entry, i, depth);
   1108 
   1109 		if (ret != 0)
   1110 			return (ret);
   1111 	}
   1112 	return (FILEBENCH_OK);
   1113 }
   1114 
   1115 /*
   1116  * Populates a fileset with files and subdirectory entries. Uses
   1117  * the supplied fileset_dirwidth and fileset_entries (number of files) to
   1118  * calculate the required fileset_meandepth (of subdirectories) and
   1119  * initialize the fileset_meanwidth and fileset_meansize variables. Then
   1120  * calls fileset_populate_subdir() to do the recursive
   1121  * subdirectory entry creation and leaf file entry creation. All
   1122  * of the above is skipped if the fileset has already been
   1123  * populated. Returns 0 on success, or an error code from the
   1124  * call to fileset_populate_subdir if that call fails.
   1125  */
   1126 static int
   1127 fileset_populate(fileset_t *fileset)
   1128 {
   1129 	int entries = (int)avd_get_int(fileset->fs_entries);
   1130 	int