xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 7946:36d7e7afa9fa)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Portions Copyright 2008 Denis Cheng
26  */
27 
28 #include <fcntl.h>
29 #include <pthread.h>
30 #include <errno.h>
31 #include <math.h>
32 #include <libgen.h>
33 #include <sys/mman.h>
34 
35 #include "filebench.h"
36 #include "fileset.h"
37 #include "gamma_dist.h"
38 #include "utils.h"
39 
40 /*
41  * File sets, of type fileset_t, are entities which contain
42  * information about collections of files and subdirectories in Filebench.
43  * The fileset, once populated, consists of a tree of fileset entries of
44  * type filesetentry_t which specify files and directories.  The fileset
45  * is rooted in a directory specified by fileset_path, and once the populated
46  * fileset has been created, has a tree of directories and files
47  * corresponding to the fileset's filesetentry tree.
48  *
49  * Fileset entities are allocated by fileset_define() which is called from
50  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
51  * to the eventual directory and file tree to be instantiated on the storage
52  * medium is built by fileset_populate(), which is This routine is called
53  * from fileset_createset(), which is in turn called by fileset_createset().
54  * After calling fileset_populate(), fileset_createset() will call
55  * fileset_create() to pre-allocate designated files and directories.
56  *
57  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
58  * when a "create fileset" or "run" command is encountered. When the
59  * "create fileset" command is used, it is generally paired with
60  * a "create processes" command, and must appear first, in order to
61  * instantiate all the files in the fileset before trying to use them.
62  */
63 
64 static int fileset_checkraw(fileset_t *fileset);
65 
66 /* maximum parallel allocation control */
67 #define	MAX_PARALLOC_THREADS 32
68 
69 /*
70  * returns pointer to file or fileset
71  * string, as appropriate
72  */
73 static char *
74 fileset_entity_name(fileset_t *fileset)
75 {
76 	if (fileset->fs_attrs & FILESET_IS_FILE)
77 		return ("file");
78 	else
79 		return ("fileset");
80 }
81 
82 /*
83  * Removes the last file or directory name from a pathname.
84  * Basically removes characters from the end of the path by
85  * setting them to \0 until a forward slash '/' is
86  * encountered. It also removes the forward slash.
87  */
88 static char *
89 trunc_dirname(char *dir)
90 {
91 	char *s = dir + strlen(dir);
92 
93 	while (s != dir) {
94 		int c = *s;
95 
96 		*s = 0;
97 		if (c == '/')
98 			break;
99 		s--;
100 	}
101 	return (dir);
102 }
103 
104 /*
105  * Prints a list of allowed options and how to specify them.
106  */
107 void
108 fileset_usage(void)
109 {
110 	(void) fprintf(stderr,
111 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
112 	    ",entries=<number>\n");
113 	(void) fprintf(stderr,
114 	    "		        [,filesize=[size]]\n");
115 	(void) fprintf(stderr,
116 	    "		        [,dirwidth=[width]]\n");
117 	(void) fprintf(stderr,
118 	    "		        [,dirdepthrv=$random_variable_name]\n");
119 	(void) fprintf(stderr,
120 	    "		        [,dirgamma=[100-10000]] "
121 	    "(Gamma * 1000)\n");
122 	(void) fprintf(stderr,
123 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
124 	(void) fprintf(stderr,
125 	    "		        [,prealloc=[percent]]\n");
126 	(void) fprintf(stderr, "		        [,paralloc]\n");
127 	(void) fprintf(stderr, "		        [,reuse]\n");
128 	(void) fprintf(stderr, "\n");
129 }
130 
131 /*
132  * Frees up memory mapped file region of supplied size. The
133  * file descriptor "fd" indicates which memory mapped file.
134  * If successful, returns 0. Otherwise returns -1 if "size"
135  * is zero, or -1 times the number of times msync() failed.
136  */
137 static int
138 fileset_freemem(int fd, off64_t size)
139 {
140 	off64_t left;
141 	int ret = 0;
142 
143 	for (left = size; left > 0; left -= MMAP_SIZE) {
144 		off64_t thismapsize;
145 		caddr_t addr;
146 
147 		thismapsize = MIN(MMAP_SIZE, left);
148 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
149 		    MAP_SHARED, fd, size - left);
150 		ret += msync(addr, thismapsize, MS_INVALIDATE);
151 		(void) munmap(addr, thismapsize);
152 	}
153 	return (ret);
154 }
155 
156 /*
157  * Creates a path string from the filesetentry_t "*entry"
158  * and all of its parent's path names. The resulting path
159  * is a concatination of all the individual parent paths.
160  * Allocates memory for the path string and returns a
161  * pointer to it.
162  */
163 char *
164 fileset_resolvepath(filesetentry_t *entry)
165 {
166 	filesetentry_t *fsep = entry;
167 	char path[MAXPATHLEN];
168 	char pathtmp[MAXPATHLEN];
169 	char *s;
170 
171 	path[0] = '\0';
172 	while (fsep->fse_parent) {
173 		(void) strcpy(pathtmp, "/");
174 		(void) fb_strlcat(pathtmp, fsep->fse_path, MAXPATHLEN);
175 		(void) fb_strlcat(pathtmp, path, MAXPATHLEN);
176 		(void) fb_strlcpy(path, pathtmp, MAXPATHLEN);
177 		fsep = fsep->fse_parent;
178 	}
179 
180 	s = malloc(strlen(path) + 1);
181 	(void) fb_strlcpy(s, path, MAXPATHLEN);
182 	return (s);
183 }
184 
185 /*
186  * Creates multiple nested directories as required by the
187  * supplied path. Starts at the end of the path, creating
188  * a list of directories to mkdir, up to the root of the
189  * path, then mkdirs them one at a time from the root on down.
190  */
191 static int
192 fileset_mkdir(char *path, int mode)
193 {
194 	char *p;
195 	char *dirs[65536];
196 	int i = 0;
197 
198 	if ((p = strdup(path)) == NULL)
199 		goto null_str;
200 
201 	/*
202 	 * Fill an array of subdirectory path names until either we
203 	 * reach the root or encounter an already existing subdirectory
204 	 */
205 	/* CONSTCOND */
206 	while (1) {
207 		struct stat64 sb;
208 
209 		if (stat64(p, &sb) == 0)
210 			break;
211 		if (strlen(p) < 3)
212 			break;
213 		if ((dirs[i] = strdup(p)) == NULL) {
214 			free(p);
215 			goto null_str;
216 		}
217 
218 		(void) trunc_dirname(p);
219 		i++;
220 	}
221 
222 	/* Make the directories, from closest to root downwards. */
223 	for (--i; i >= 0; i--) {
224 		(void) mkdir(dirs[i], mode);
225 		free(dirs[i]);
226 	}
227 
228 	free(p);
229 	return (FILEBENCH_OK);
230 
231 null_str:
232 	/* clean up */
233 	for (--i; i >= 0; i--)
234 		free(dirs[i]);
235 
236 	filebench_log(LOG_ERROR,
237 	    "Failed to create directory path %s: Out of memory", path);
238 	return (FILEBENCH_ERROR);
239 }
240 
241 /*
242  * creates the subdirectory tree for a fileset.
243  */
244 static int
245 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
246 {
247 	filesetentry_t *direntry;
248 	char full_path[MAXPATHLEN];
249 	char *part_path;
250 
251 	/* walk the subdirectory list, enstanciating subdirs */
252 	direntry = fileset->fs_dirlist;
253 	while (direntry) {
254 		(void) fb_strlcpy(full_path, filesetpath, MAXPATHLEN);
255 		part_path = fileset_resolvepath(direntry);
256 		(void) fb_strlcat(full_path, part_path, MAXPATHLEN);
257 		free(part_path);
258 
259 		/* now create this portion of the subdirectory tree */
260 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
261 			return (FILEBENCH_ERROR);
262 
263 		direntry = direntry->fse_dirnext;
264 	}
265 	return (FILEBENCH_OK);
266 }
267 
268 /*
269  * given a fileset entry, determines if the associated leaf directory
270  * needs to be made or not, and if so does the mkdir.
271  */
272 static int
273 fileset_alloc_leafdir(filesetentry_t *entry)
274 {
275 	fileset_t *fileset;
276 	char path[MAXPATHLEN];
277 	struct stat64 sb;
278 	char *pathtmp;
279 
280 	fileset = entry->fse_fileset;
281 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
282 	(void) fb_strlcat(path, "/", MAXPATHLEN);
283 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
284 	pathtmp = fileset_resolvepath(entry);
285 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
286 	free(pathtmp);
287 
288 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
289 
290 	/* see if not reusing and this directory does not exist */
291 	if (!((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0))) {
292 
293 		/* No file or not reusing, so create */
294 		if (mkdir(path, 0755) < 0) {
295 			filebench_log(LOG_ERROR,
296 			    "Failed to pre-allocate leaf directory %s: %s",
297 			    path, strerror(errno));
298 
299 			return (FILEBENCH_ERROR);
300 		}
301 	}
302 
303 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
304 	entry->fse_flags |= FSE_EXISTS;
305 	fileset->fs_num_act_leafdirs++;
306 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
307 
308 	return (FILEBENCH_OK);
309 }
310 
311 /*
312  * given a fileset entry, determines if the associated file
313  * needs to be allocated or not, and if so does the allocation.
314  */
315 static int
316 fileset_alloc_file(filesetentry_t *entry)
317 {
318 	fileset_t *fileset;
319 	char path[MAXPATHLEN];
320 	char *buf;
321 	struct stat64 sb;
322 	char *pathtmp;
323 	off64_t seek;
324 	int fd;
325 
326 	fileset = entry->fse_fileset;
327 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
328 	(void) fb_strlcat(path, "/", MAXPATHLEN);
329 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
330 	pathtmp = fileset_resolvepath(entry);
331 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
332 	free(pathtmp);
333 
334 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
335 
336 	/* see if reusing and this file exists */
337 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
338 		if ((fd = open64(path, O_RDWR)) < 0) {
339 			filebench_log(LOG_INFO,
340 			    "Attempted but failed to Re-use file %s",
341 			    path);
342 			return (FILEBENCH_ERROR);
343 		}
344 
345 		if (sb.st_size == (off64_t)entry->fse_size) {
346 			filebench_log(LOG_DEBUG_IMPL,
347 			    "Re-using file %s", path);
348 
349 			if (!avd_get_bool(fileset->fs_cached))
350 				(void) fileset_freemem(fd,
351 				    entry->fse_size);
352 
353 			(void) ipc_mutex_lock(&fileset->fs_pick_lock);
354 			entry->fse_flags |= FSE_EXISTS;
355 			fileset->fs_num_act_files++;
356 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
357 
358 			(void) close(fd);
359 			return (FILEBENCH_OK);
360 
361 		} else if (sb.st_size > (off64_t)entry->fse_size) {
362 			/* reuse, but too large */
363 			filebench_log(LOG_INFO,
364 			    "Truncating & re-using file %s", path);
365 
366 #ifdef HAVE_FTRUNCATE64
367 			(void) ftruncate64(fd, (off64_t)entry->fse_size);
368 #else
369 			(void) ftruncate(fd, (off_t)entry->fse_size);
370 #endif
371 
372 			if (!avd_get_bool(fileset->fs_cached))
373 				(void) fileset_freemem(fd,
374 				    entry->fse_size);
375 
376 			(void) ipc_mutex_lock(&fileset->fs_pick_lock);
377 			entry->fse_flags |= FSE_EXISTS;
378 			fileset->fs_num_act_files++;
379 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
380 
381 			(void) close(fd);
382 			return (FILEBENCH_OK);
383 		}
384 	} else {
385 
386 		/* No file or not reusing, so create */
387 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
388 			filebench_log(LOG_ERROR,
389 			    "Failed to pre-allocate file %s: %s",
390 			    path, strerror(errno));
391 
392 			return (FILEBENCH_ERROR);
393 		}
394 	}
395 
396 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
397 		return (FILEBENCH_ERROR);
398 
399 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
400 	entry->fse_flags |= FSE_EXISTS;
401 	fileset->fs_num_act_files++;
402 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
403 
404 	for (seek = 0; seek < entry->fse_size; ) {
405 		off64_t wsize;
406 		int ret = 0;
407 
408 		/*
409 		 * Write FILE_ALLOC_BLOCK's worth,
410 		 * except on last write
411 		 */
412 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
413 
414 		ret = write(fd, buf, wsize);
415 		if (ret != wsize) {
416 			filebench_log(LOG_ERROR,
417 			    "Failed to pre-allocate file %s: %s",
418 			    path, strerror(errno));
419 			(void) close(fd);
420 			free(buf);
421 			return (FILEBENCH_ERROR);
422 		}
423 		seek += wsize;
424 	}
425 
426 	if (!avd_get_bool(fileset->fs_cached))
427 		(void) fileset_freemem(fd, entry->fse_size);
428 
429 	(void) close(fd);
430 
431 	free(buf);
432 
433 	filebench_log(LOG_DEBUG_IMPL,
434 	    "Pre-allocated file %s size %llu",
435 	    path, (u_longlong_t)entry->fse_size);
436 
437 	return (FILEBENCH_OK);
438 }
439 
440 /*
441  * given a fileset entry, determines if the associated file
442  * needs to be allocated or not, and if so does the allocation.
443  * Sets shm_fsparalloc_count to -1 on error.
444  */
445 static void *
446 fileset_alloc_thread(filesetentry_t *entry)
447 {
448 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
449 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
450 		filebench_shm->shm_fsparalloc_count = -1;
451 	} else {
452 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
453 		filebench_shm->shm_fsparalloc_count--;
454 	}
455 
456 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
457 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
458 
459 	pthread_exit(NULL);
460 	return (NULL);
461 }
462 
463 
464 /*
465  * First creates the parent directories of the file using
466  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
467  * and opens the file with open64(). It unlocks the fileset
468  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
469  * as requested, and returns the file descriptor integer
470  * for the opened file.
471  */
472 int
473 fileset_openfile(fileset_t *fileset,
474     filesetentry_t *entry, int flag, int filemode, int attrs)
475 {
476 	char path[MAXPATHLEN];
477 	char dir[MAXPATHLEN];
478 	char *pathtmp;
479 	struct stat64 sb;
480 	int fd;
481 	int open_attrs = 0;
482 
483 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
484 	(void) fb_strlcat(path, "/", MAXPATHLEN);
485 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
486 	pathtmp = fileset_resolvepath(entry);
487 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
488 	(void) fb_strlcpy(dir, path, MAXPATHLEN);
489 	free(pathtmp);
490 	(void) trunc_dirname(dir);
491 
492 	/* If we are going to create a file, create the parent dirs */
493 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
494 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
495 			return (FILEBENCH_ERROR);
496 	}
497 
498 	if (attrs & FLOW_ATTR_DSYNC) {
499 #ifdef sun
500 		open_attrs |= O_DSYNC;
501 #else
502 		open_attrs |= O_FSYNC;
503 #endif
504 	}
505 
506 	if ((fd = open64(path, flag | open_attrs, filemode)) < 0) {
507 		filebench_log(LOG_ERROR,
508 		    "Failed to open file %s: %s",
509 		    path, strerror(errno));
510 
511 		fileset_unbusy(entry, FALSE, FALSE);
512 		return (FILEBENCH_ERROR);
513 	}
514 
515 	if (flag & O_CREAT)
516 		fileset_unbusy(entry, TRUE, TRUE);
517 	else
518 		fileset_unbusy(entry, FALSE, FALSE);
519 
520 #ifdef sun
521 	if (attrs & FLOW_ATTR_DIRECTIO)
522 		(void) directio(fd, DIRECTIO_ON);
523 	else
524 		(void) directio(fd, DIRECTIO_OFF);
525 #endif
526 
527 	return (fd);
528 }
529 
530 
531 /*
532  * Selects a fileset entry from a fileset. If the
533  * FILESET_PICKLEAFDIR flag is set it will pick a leaf directory entry,
534  * if the FILESET_PICKDIR flag is set it will pick a non leaf directory
535  * entry, otherwise a file entry. The FILESET_PICKRESET
536  * flag will cause it to reset the free list to the
537  * overall list (file or directory). The FILESET_PICKUNIQUE
538  * flag will take an entry off of one of the free (unused)
539  * lists (file or directory), otherwise the entry will be
540  * picked off of one of the rotor lists (file or directory).
541  * The FILESET_PICKEXISTS will insure that only extant
542  * (FSE_EXISTS) state files are selected, while
543  * FILESET_PICKNOEXIST insures that only non extant
544  * (not FSE_EXISTS) state files are selected.
545  * Note that the selected fileset entry (file) is returned
546  * with its FSE_BUSY flag (in fse_flags) set.
547  */
548 filesetentry_t *
549 fileset_pick(fileset_t *fileset, int flags, int tid)
550 {
551 	filesetentry_t *entry = NULL;
552 	filesetentry_t *first = NULL;
553 
554 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
555 
556 	/* see if we have to wait for available files or directories */
557 	switch (flags & FILESET_PICKMASK) {
558 	case FILESET_PICKFILE:
559 		if (fileset->fs_filelist == NULL)
560 			goto empty;
561 		while (fileset->fs_idle_files == 0) {
562 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
563 			    &fileset->fs_pick_lock);
564 		}
565 		break;
566 	case FILESET_PICKDIR:
567 		if (fileset->fs_dirlist == NULL)
568 			goto empty;
569 		while (fileset->fs_idle_dirs == 0) {
570 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
571 			    &fileset->fs_pick_lock);
572 		}
573 		break;
574 	case FILESET_PICKLEAFDIR:
575 		if (fileset->fs_leafdirlist == NULL)
576 			goto empty;
577 		while (fileset->fs_idle_leafdirs == 0) {
578 			(void) pthread_cond_wait(&fileset->fs_idle_leafdirs_cv,
579 			    &fileset->fs_pick_lock);
580 		}
581 		break;
582 	}
583 
584 	/* see if asking for impossible */
585 	switch (flags & FILESET_PICKMASK) {
586 	case FILESET_PICKFILE:
587 		if (flags & FILESET_PICKEXISTS) {
588 			if (fileset->fs_num_act_files == 0) {
589 				(void) ipc_mutex_unlock(
590 				    &fileset->fs_pick_lock);
591 				return (NULL);
592 			}
593 		} else if (flags & FILESET_PICKNOEXIST) {
594 			if (fileset->fs_num_act_files ==
595 			    fileset->fs_realfiles) {
596 				(void) ipc_mutex_unlock(
597 				    &fileset->fs_pick_lock);
598 				return (NULL);
599 			}
600 		}
601 		break;
602 	case FILESET_PICKLEAFDIR:
603 		if (flags & FILESET_PICKEXISTS) {
604 			if (fileset->fs_num_act_leafdirs == 0) {
605 				(void) ipc_mutex_unlock(
606 				    &fileset->fs_pick_lock);
607 				return (NULL);
608 			}
609 		} else if (flags & FILESET_PICKNOEXIST) {
610 			if (fileset->fs_num_act_leafdirs ==
611 			    fileset->fs_realleafdirs) {
612 				(void) ipc_mutex_unlock(
613 				    &fileset->fs_pick_lock);
614 				return (NULL);
615 			}
616 		}
617 		break;
618 	case FILESET_PICKDIR:
619 	default:
620 		break;
621 	}
622 
623 	while (entry == NULL) {
624 
625 		if (flags & FILESET_PICKRESET) {
626 			switch (flags & FILESET_PICKMASK) {
627 			case FILESET_PICKFILE:
628 				entry = fileset->fs_filelist;
629 				while (entry) {
630 					entry->fse_flags |= FSE_FREE;
631 					entry = entry->fse_filenext;
632 				}
633 				fileset->fs_filefree = fileset->fs_filelist;
634 				break;
635 			case FILESET_PICKDIR:
636 				entry = fileset->fs_dirlist;
637 				while (entry) {
638 					entry->fse_flags |= FSE_FREE;
639 					entry = entry->fse_dirnext;
640 				}
641 				fileset->fs_dirfree = fileset->fs_dirlist;
642 				break;
643 			case FILESET_PICKLEAFDIR:
644 				entry = fileset->fs_leafdirlist;
645 				while (entry) {
646 					entry->fse_flags |= FSE_FREE;
647 					entry = entry->fse_leafdirnext;
648 				}
649 				fileset->fs_leafdirfree =
650 				    fileset->fs_leafdirlist;
651 				break;
652 			}
653 		}
654 
655 		if (flags & FILESET_PICKUNIQUE) {
656 			switch (flags & FILESET_PICKMASK) {
657 			case FILESET_PICKFILE:
658 				entry = fileset->fs_filefree;
659 				if (entry == NULL)
660 					goto empty;
661 				fileset->fs_filefree = entry->fse_filenext;
662 				break;
663 			case FILESET_PICKDIR:
664 				entry = fileset->fs_dirfree;
665 				if (entry == NULL)
666 					goto empty;
667 				fileset->fs_dirfree = entry->fse_dirnext;
668 				break;
669 			case FILESET_PICKLEAFDIR:
670 				entry = fileset->fs_leafdirfree;
671 				if (entry == NULL)
672 					goto empty;
673 				fileset->fs_leafdirfree =
674 				    entry->fse_leafdirnext;
675 				break;
676 			}
677 			entry->fse_flags &= ~FSE_FREE;
678 		} else {
679 			switch (flags & FILESET_PICKMASK) {
680 			case FILESET_PICKFILE:
681 				if (flags & FILESET_PICKNOEXIST) {
682 					entry = fileset->fs_file_ne_rotor;
683 					if (entry == NULL)
684 						fileset->fs_file_ne_rotor =
685 						    entry =
686 						    fileset->fs_filelist;
687 					fileset->fs_file_ne_rotor =
688 					    entry->fse_filenext;
689 				} else {
690 					entry = fileset->fs_filerotor[tid];
691 					if (entry == NULL)
692 						fileset->fs_filerotor[tid] =
693 						    entry =
694 						    fileset->fs_filelist;
695 					fileset->fs_filerotor[tid] =
696 					    entry->fse_filenext;
697 				}
698 				break;
699 			case FILESET_PICKDIR:
700 				entry = fileset->fs_dirrotor;
701 				if (entry == NULL)
702 					fileset->fs_dirrotor =
703 					    entry = fileset->fs_dirlist;
704 				fileset->fs_dirrotor = entry->fse_dirnext;
705 				break;
706 			case FILESET_PICKLEAFDIR:
707 				entry = fileset->fs_leafdirrotor;
708 				if (entry == NULL)
709 					fileset->fs_leafdirrotor =
710 					    entry = fileset->fs_leafdirlist;
711 				fileset->fs_leafdirrotor =
712 				    entry->fse_leafdirnext;
713 				break;
714 			}
715 		}
716 
717 		if (first == entry)
718 			goto empty;
719 
720 		if (first == NULL)
721 			first = entry;
722 
723 		/* see if entry in use */
724 		if (entry->fse_flags & FSE_BUSY) {
725 
726 			/* it is, so try next */
727 			entry = NULL;
728 			continue;
729 		}
730 
731 		/* If we ask for an existing file, go round again */
732 		if ((flags & FILESET_PICKEXISTS) &&
733 		    !(entry->fse_flags & FSE_EXISTS))
734 			entry = NULL;
735 
736 		/* If we ask for not an existing file, go round again */
737 		if ((flags & FILESET_PICKNOEXIST) &&
738 		    (entry->fse_flags & FSE_EXISTS))
739 			entry = NULL;
740 	}
741 
742 	/* update file or directory idle counts */
743 	switch (flags & FILESET_PICKMASK) {
744 	case FILESET_PICKFILE:
745 		fileset->fs_idle_files--;
746 		break;
747 	case FILESET_PICKDIR:
748 		fileset->fs_idle_dirs--;
749 		break;
750 	case FILESET_PICKLEAFDIR:
751 		fileset->fs_idle_leafdirs--;
752 		break;
753 	}
754 
755 	/* Indicate that file or directory is now busy */
756 	entry->fse_flags |= FSE_BUSY;
757 
758 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
759 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
760 	return (entry);
761 
762 empty:
763 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
764 	return (NULL);
765 }
766 
767 /*
768  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
769  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
770  * existant or not, or leaves that designation alone.
771  */
772 void
773 fileset_unbusy(filesetentry_t *entry, int update_exist, int new_exist_val)
774 {
775 	fileset_t *fileset = NULL;
776 
777 	if (entry)
778 		fileset = entry->fse_fileset;
779 
780 	if (fileset == NULL) {
781 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
782 		return;
783 	}
784 
785 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
786 
787 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
788 	if (entry->fse_flags & FSE_BUSY) {
789 
790 		/* unbusy it */
791 		entry->fse_flags &= (~FSE_BUSY);
792 
793 		/* release any threads waiting for unbusy */
794 		if (entry->fse_flags & FSE_THRD_WAITNG) {
795 			entry->fse_flags &= (~FSE_THRD_WAITNG);
796 			(void) pthread_cond_broadcast(
797 			    &fileset->fs_thrd_wait_cv);
798 		}
799 
800 		/* increment idle count and signal waiting threads */
801 		switch (entry->fse_flags & FSE_TYPE_MASK) {
802 		case FSE_TYPE_FILE:
803 			fileset->fs_idle_files++;
804 			if (fileset->fs_idle_files == 1) {
805 				(void) pthread_cond_signal(
806 				    &fileset->fs_idle_files_cv);
807 			}
808 			break;
809 		case FSE_TYPE_DIR:
810 			fileset->fs_idle_dirs++;
811 			if (fileset->fs_idle_dirs == 1) {
812 				(void) pthread_cond_signal(
813 				    &fileset->fs_idle_dirs_cv);
814 			}
815 			break;
816 		case FSE_TYPE_LEAFDIR:
817 			fileset->fs_idle_leafdirs++;
818 			if (fileset->fs_idle_leafdirs == 1) {
819 				(void) pthread_cond_signal(
820 				    &fileset->fs_idle_leafdirs_cv);
821 			}
822 			break;
823 		}
824 	}
825 
826 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
827 	if (update_exist) {
828 		if (new_exist_val == TRUE) {
829 			if (!(entry->fse_flags & FSE_EXISTS)) {
830 
831 				/* asked to set, and it was clear */
832 				entry->fse_flags |= FSE_EXISTS;
833 				switch (entry->fse_flags & FSE_TYPE_MASK) {
834 				case FSE_TYPE_FILE:
835 					fileset->fs_num_act_files++;
836 					break;
837 				case FSE_TYPE_DIR:
838 					break;
839 				case FSE_TYPE_LEAFDIR:
840 					fileset->fs_num_act_leafdirs++;
841 					break;
842 				}
843 			}
844 		} else {
845 			if (entry->fse_flags & FSE_EXISTS) {
846 
847 				/* asked to clear, and it was set */
848 				entry->fse_flags &= (~FSE_EXISTS);
849 				switch (entry->fse_flags & FSE_TYPE_MASK) {
850 				case FSE_TYPE_FILE:
851 					fileset->fs_num_act_files--;
852 					break;
853 				case FSE_TYPE_DIR:
854 					break;
855 				case FSE_TYPE_LEAFDIR:
856 					fileset->fs_num_act_leafdirs--;
857 					break;
858 				}
859 			}
860 		}
861 	}
862 
863 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
864 }
865 
866 /*
867  * Given a fileset "fileset", create the associated files as
868  * specified in the attributes of the fileset. The fileset is
869  * rooted in a directory whose pathname is in fileset_path. If the
870  * directory exists, meaning that there is already a fileset,
871  * and the fileset_reuse attribute is false, then remove it and all
872  * its contained files and subdirectories. Next, the routine
873  * creates a root directory for the fileset. All the file type
874  * filesetentries are cycled through creating as needed
875  * their containing subdirectory trees in the filesystem and
876  * creating actual files for fileset_preallocpercent of them. The
877  * created files are filled with fse_size bytes of unitialized
878  * data. The routine returns FILEBENCH_ERROR on errors,
879  * FILEBENCH_OK on success.
880  */
881 static int
882 fileset_create(fileset_t *fileset)
883 {
884 	filesetentry_t *entry;
885 	char path[MAXPATHLEN];
886 	struct stat64 sb;
887 	int pickflags;
888 	hrtime_t start = gethrtime();
889 	char *fileset_path;
890 	char *fileset_name;
891 	int randno;
892 	int preallocated = 0;
893 	int reusing;
894 
895 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
896 		filebench_log(LOG_ERROR, "%s path not set",
897 		    fileset_entity_name(fileset));
898 		return (FILEBENCH_ERROR);
899 	}
900 
901 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
902 		filebench_log(LOG_ERROR, "%s name not set",
903 		    fileset_entity_name(fileset));
904 		return (FILEBENCH_ERROR);
905 	}
906 
907 	/* declare all files currently non existant (single threaded code) */
908 	fileset->fs_num_act_files = 0;
909 
910 #ifdef HAVE_RAW_SUPPORT
911 	/* treat raw device as special case */
912 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
913 		return (FILEBENCH_OK);
914 #endif /* HAVE_RAW_SUPPORT */
915 
916 	/* XXX Add check to see if there is enough space */
917 
918 	/* set up path to fileset */
919 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
920 	(void) fb_strlcat(path, "/", MAXPATHLEN);
921 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
922 
923 	/* if exists and resusing, then don't create new */
924 	if (((stat64(path, &sb) == 0)&& (strlen(path) > 3) &&
925 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) &&
926 	    avd_get_bool(fileset->fs_reuse)) {
927 		reusing = 1;
928 	} else {
929 		reusing = 0;
930 	}
931 
932 	if (!reusing) {
933 		char cmd[MAXPATHLEN];
934 
935 		/* Remove existing */
936 		(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
937 		(void) system(cmd);
938 		filebench_log(LOG_VERBOSE,
939 		    "Removed any existing %s %s in %llu seconds",
940 		    fileset_entity_name(fileset), fileset_name,
941 		    (u_longlong_t)(((gethrtime() - start) /
942 		    1000000000) + 1));
943 	} else {
944 		/* we are re-using */
945 		filebench_log(LOG_VERBOSE, "Re-using %s %s.",
946 		    fileset_entity_name(fileset), fileset_name);
947 	}
948 
949 	/* make the filesets directory tree unless in reuse mode */
950 	if (!reusing && (avd_get_bool(fileset->fs_prealloc))) {
951 		filebench_log(LOG_VERBOSE,
952 		    "making tree for filset %s", path);
953 
954 		(void) mkdir(path, 0755);
955 
956 		if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
957 			return (FILEBENCH_ERROR);
958 	}
959 
960 	start = gethrtime();
961 
962 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
963 	    fileset_entity_name(fileset), fileset_name);
964 
965 	if (!avd_get_bool(fileset->fs_prealloc))
966 		goto exit;
967 
968 	randno = ((RAND_MAX * (100
969 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
970 
971 	/* alloc any files, as required */
972 	pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
973 	while (entry = fileset_pick(fileset, pickflags, 0)) {
974 		pthread_t tid;
975 		int newrand;
976 
977 		pickflags = FILESET_PICKUNIQUE;
978 
979 		/* entry doesn't need to be locked during initialization */
980 		fileset_unbusy(entry, FALSE, FALSE);
981 
982 		newrand = rand();
983 
984 		if (newrand < randno)
985 			continue;
986 
987 		preallocated++;
988 
989 		if (reusing)
990 			entry->fse_flags |= FSE_REUSING;
991 		else
992 			entry->fse_flags &= (~FSE_REUSING);
993 
994 		/* fire off allocation threads for each file if paralloc set */
995 		if (avd_get_bool(fileset->fs_paralloc)) {
996 
997 			/* limit total number of simultaneous allocations */
998 			(void) pthread_mutex_lock(
999 			    &filebench_shm->shm_fsparalloc_lock);
1000 			while (filebench_shm->shm_fsparalloc_count
1001 			    >= MAX_PARALLOC_THREADS) {
1002 				(void) pthread_cond_wait(
1003 				    &filebench_shm->shm_fsparalloc_cv,
1004 				    &filebench_shm->shm_fsparalloc_lock);
1005 			}
1006 
1007 			/* quit if any allocation thread reports and error */
1008 			if (filebench_shm->shm_fsparalloc_count < 0) {
1009 				(void) pthread_mutex_unlock(
1010 				    &filebench_shm->shm_fsparalloc_lock);
1011 				return (FILEBENCH_ERROR);
1012 			}
1013 
1014 			filebench_shm->shm_fsparalloc_count++;
1015 			(void) pthread_mutex_unlock(
1016 			    &filebench_shm->shm_fsparalloc_lock);
1017 
1018 			/*
1019 			 * Fire off a detached allocation thread per file.
1020 			 * The thread will self destruct when it finishes
1021 			 * writing pre-allocation data to the file.
1022 			 */
1023 			if (pthread_create(&tid, NULL,
1024 			    (void *(*)(void*))fileset_alloc_thread,
1025 			    entry) == 0) {
1026 				/*
1027 				 * A thread was created; detach it so it can
1028 				 * fully quit when finished.
1029 				 */
1030 				(void) pthread_detach(tid);
1031 			} else {
1032 				filebench_log(LOG_ERROR,
1033 				    "File prealloc thread create failed");
1034 				filebench_shutdown(1);
1035 			}
1036 
1037 		} else {
1038 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
1039 				return (FILEBENCH_ERROR);
1040 		}
1041 	}
1042 
1043 	/* alloc any leaf directories, as required */
1044 	pickflags =
1045 	    FILESET_PICKUNIQUE | FILESET_PICKRESET | FILESET_PICKLEAFDIR;
1046 	while (entry = fileset_pick(fileset, pickflags, 0)) {
1047 
1048 		pickflags = FILESET_PICKUNIQUE | FILESET_PICKLEAFDIR;
1049 
1050 		/* entry doesn't need to be locked during initialization */
1051 		fileset_unbusy(entry, FALSE, FALSE);
1052 
1053 		if (rand() < randno)
1054 			continue;
1055 
1056 		preallocated++;
1057 
1058 		if (reusing)
1059 			entry->fse_flags |= FSE_REUSING;
1060 		else
1061 			entry->fse_flags &= (~FSE_REUSING);
1062 
1063 		if (fileset_alloc_leafdir(entry) == FILEBENCH_ERROR)
1064 			return (FILEBENCH_ERROR);
1065 	}
1066 
1067 exit:
1068 	filebench_log(LOG_VERBOSE,
1069 	    "Preallocated %d of %llu of %s %s in %llu seconds",
1070 	    preallocated,
1071 	    (u_longlong_t)fileset->fs_constentries,
1072 	    fileset_entity_name(fileset), fileset_name,
1073 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
1074 
1075 	return (FILEBENCH_OK);
1076 }
1077 
1078 /*
1079  * Adds an entry to the fileset's file list. Single threaded so
1080  * no locking needed.
1081  */
1082 static void
1083 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
1084 {
1085 	if (fileset->fs_filelist == NULL) {
1086 		fileset->fs_filelist = entry;
1087 		entry->fse_filenext = NULL;
1088 	} else {
1089 		entry->fse_filenext = fileset->fs_filelist;
1090 		fileset->fs_filelist = entry;
1091 	}
1092 }
1093 
1094 /*
1095  * Adds an entry to the fileset's directory list. Single
1096  * threaded so no locking needed.
1097  */
1098 static void
1099 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
1100 {
1101 	if (fileset->fs_dirlist == NULL) {
1102 		fileset->fs_dirlist = entry;
1103 		entry->fse_dirnext = NULL;
1104 	} else {
1105 		entry->fse_dirnext = fileset->fs_dirlist;
1106 		fileset->fs_dirlist = entry;
1107 	}
1108 }
1109 
1110 /*
1111  * Adds an entry to the fileset's leaf directory list. Single
1112  * threaded so no locking needed.
1113  */
1114 static void
1115 fileset_insleafdirlist(fileset_t *fileset, filesetentry_t *entry)
1116 {
1117 	if (fileset->fs_leafdirlist == NULL) {
1118 		fileset->fs_leafdirlist = entry;
1119 		entry->fse_leafdirnext = NULL;
1120 	} else {
1121 		entry->fse_leafdirnext = fileset->fs_leafdirlist;
1122 		fileset->fs_leafdirlist = entry;
1123 	}
1124 }
1125 
1126 /*
1127  * Obtains a filesetentry entity for a file to be placed in a
1128  * (sub)directory of a fileset. The size of the file may be
1129  * specified by fileset_meansize, or calculated from a gamma
1130  * distribution of parameter fileset_sizegamma and of mean size
1131  * fileset_meansize. The filesetentry entity is placed on the file
1132  * list in the specified parent filesetentry entity, which may
1133  * be a directory filesetentry, or the root filesetentry in the
1134  * fileset. It is also placed on the fileset's list of all
1135  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
1136  * if ipc memory for the path string cannot be allocated.
1137  */
1138 static int
1139 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
1140 {
1141 	char tmpname[16];
1142 	filesetentry_t *entry;
1143 	double drand;
1144 
1145 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1146 	    == NULL) {
1147 		filebench_log(LOG_ERROR,
1148 		    "fileset_populate_file: Can't malloc filesetentry");
1149 		return (FILEBENCH_ERROR);
1150 	}
1151 
1152 	/* Another currently idle file */
1153 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1154 	fileset->fs_idle_files++;
1155 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1156 
1157 	entry->fse_parent = parent;
1158 	entry->fse_fileset = fileset;
1159 	entry->fse_flags = FSE_TYPE_FILE | FSE_FREE;
1160 	fileset_insfilelist(fileset, entry);
1161 
1162 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1163 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1164 		filebench_log(LOG_ERROR,
1165 		    "fileset_populate_file: Can't alloc path string");
1166 		return (FILEBENCH_ERROR);
1167 	}
1168 
1169 	/* see if random variable was supplied for file size */
1170 	if (fileset->fs_meansize == -1) {
1171 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
1172 	} else {
1173 		double gamma;
1174 
1175 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1176 		if (gamma > 0) {
1177 			drand = gamma_dist_knuth(gamma,
1178 			    fileset->fs_meansize / gamma);
1179 			entry->fse_size = (off64_t)drand;
1180 		} else {
1181 			entry->fse_size = (off64_t)fileset->fs_meansize;
1182 		}
1183 	}
1184 
1185 	fileset->fs_bytes += entry->fse_size;
1186 
1187 	fileset->fs_realfiles++;
1188 	return (FILEBENCH_OK);
1189 }
1190 
1191 /*
1192  * Obtaines a filesetentry entity for a leaf directory to be placed in a
1193  * (sub)directory of a fileset. The leaf directory will always be empty so
1194  * it can be created and deleted (mkdir, rmdir) at will. The filesetentry
1195  * entity is placed on the leaf directory list in the specified parent
1196  * filesetentry entity, which may be a (sub) directory filesetentry, or
1197  * the root filesetentry in the fileset. It is also placed on the fileset's
1198  * list of all contained leaf directories. Returns FILEBENCH_OK if successful
1199  * or FILEBENCH_ERROR if ipc memory cannot be allocated.
1200  */
1201 static int
1202 fileset_populate_leafdir(fileset_t *fileset, filesetentry_t *parent, int serial)
1203 {
1204 	char tmpname[16];
1205 	filesetentry_t *entry;
1206 
1207 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1208 	    == NULL) {
1209 		filebench_log(LOG_ERROR,
1210 		    "fileset_populate_file: Can't malloc filesetentry");
1211 		return (FILEBENCH_ERROR);
1212 	}
1213 
1214 	/* Another currently idle leaf directory */
1215 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1216 	fileset->fs_idle_leafdirs++;
1217 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1218 
1219 	entry->fse_parent = parent;
1220 	entry->fse_fileset = fileset;
1221 	entry->fse_flags = FSE_TYPE_LEAFDIR | FSE_FREE;
1222 	fileset_insleafdirlist(fileset, entry);
1223 
1224 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1225 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1226 		filebench_log(LOG_ERROR,
1227 		    "fileset_populate_file: Can't alloc path string");
1228 		return (FILEBENCH_ERROR);
1229 	}
1230 
1231 	fileset->fs_realleafdirs++;
1232 	return (FILEBENCH_OK);
1233 }
1234 
1235 /*
1236  * Creates a directory node in a fileset, by obtaining a
1237  * filesetentry entity for the node and initializing it
1238  * according to parameters of the fileset. It determines a
1239  * directory tree depth and directory width, optionally using
1240  * a gamma distribution. If its calculated depth is less then
1241  * its actual depth in the directory tree, it becomes a leaf
1242  * node and files itself with "width" number of file type
1243  * filesetentries, otherwise it files itself with "width"
1244  * number of directory type filesetentries, using recursive
1245  * calls to fileset_populate_subdir. The end result of the
1246  * initial call to this routine is a tree of directories of
1247  * random width and varying depth with sufficient leaf
1248  * directories to contain all required files.
1249  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
1250  * string memory cannot be allocated and returns the error code (currently
1251  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
1252  * calls to fileset_populate_subdir.
1253  */
1254 static int
1255 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
1256     int serial, double depth)
1257 {
1258 	double randepth, drand, ranwidth;
1259 	int isleaf = 0;
1260 	char tmpname[16];
1261 	filesetentry_t *entry;
1262 	int i;
1263 
1264 	depth += 1;
1265 
1266 	/* Create dir node */
1267 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1268 	    == NULL) {
1269 		filebench_log(LOG_ERROR,
1270 		    "fileset_populate_subdir: Can't malloc filesetentry");
1271 		return (FILEBENCH_ERROR);
1272 	}
1273 
1274 	/* another idle directory */
1275 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1276 	fileset->fs_idle_dirs++;
1277 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1278 
1279 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1280 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1281 		filebench_log(LOG_ERROR,
1282 		    "fileset_populate_subdir: Can't alloc path string");
1283 		return (FILEBENCH_ERROR);
1284 	}
1285 
1286 	entry->fse_parent = parent;
1287 	entry->fse_fileset = fileset;
1288 	entry->fse_flags = FSE_TYPE_DIR | FSE_FREE;
1289 	fileset_insdirlist(fileset, entry);
1290 
1291 	if (fileset->fs_dirdepthrv) {
1292 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
1293 	} else {
1294 		double gamma;
1295 
1296 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
1297 		if (gamma > 0) {
1298 			drand = gamma_dist_knuth(gamma,
1299 			    fileset->fs_meandepth / gamma);
1300 			randepth = (int)drand;
1301 		} else {
1302 			randepth = (int)fileset->fs_meandepth;
1303 		}
1304 	}
1305 
1306 	if (fileset->fs_meanwidth == -1) {
1307 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
1308 	} else {
1309 		double gamma;
1310 
1311 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1312 		if (gamma > 0) {
1313 			drand = gamma_dist_knuth(gamma,
1314 			    fileset->fs_meanwidth / gamma);
1315 			ranwidth = drand;
1316 		} else {
1317 			ranwidth = fileset->fs_meanwidth;
1318 		}
1319 	}
1320 
1321 	if (randepth == 0)
1322 		randepth = 1;
1323 	if (ranwidth == 0)
1324 		ranwidth = 1;
1325 	if (depth >= randepth)
1326 		isleaf = 1;
1327 
1328 	/*
1329 	 * Create directory of random width filled with files according
1330 	 * to distribution, or if root directory, continue until #files required
1331 	 */
1332 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1333 	    (fileset->fs_realfiles < fileset->fs_constentries);
1334 	    i++) {
1335 		int ret = 0;
1336 
1337 		if (parent && isleaf)
1338 			ret = fileset_populate_file(fileset, entry, i);
1339 		else
1340 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1341 
1342 		if (ret != 0)
1343 			return (ret);
1344 	}
1345 
1346 	/*
1347 	 * Create directory of random width filled with leaf directories
1348 	 * according to distribution, or if root directory, continue until
1349 	 * the number of leaf directories required has been generated.
1350 	 */
1351 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1352 	    (fileset->fs_realleafdirs < fileset->fs_constleafdirs);
1353 	    i++) {
1354 		int ret = 0;
1355 
1356 		if (parent && isleaf)
1357 			ret = fileset_populate_leafdir(fileset, entry, i);
1358 		else
1359 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1360 
1361 		if (ret != 0)
1362 			return (ret);
1363 	}
1364 
1365 	return (FILEBENCH_OK);
1366 }
1367 
1368 /*
1369  * Populates a fileset with files and subdirectory entries. Uses
1370  * the supplied fileset_dirwidth and fileset_entries (number of files) to
1371  * calculate the required fileset_meandepth (of subdirectories) and
1372  * initialize the fileset_meanwidth and fileset_meansize variables. Then
1373  * calls fileset_populate_subdir() to do the recursive
1374  * subdirectory entry creation and leaf file entry creation. All
1375  * of the above is skipped if the fileset has already been
1376  * populated. Returns 0 on success, or an error code from the
1377  * call to fileset_populate_subdir if that call fails.
1378  */
1379 static int
1380 fileset_populate(fileset_t *fileset)
1381 {
1382 	int entries = (int)avd_get_int(fileset->fs_entries);
1383 	int leafdirs = (int)avd_get_int(fileset->fs_leafdirs);
1384 	int meandirwidth;
1385 	int ret;
1386 
1387 	/* Skip if already populated */
1388 	if (fileset->fs_bytes > 0)
1389 		goto exists;
1390 
1391 #ifdef HAVE_RAW_SUPPORT
1392 	/* check for raw device */
1393 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1394 		return (FILEBENCH_OK);
1395 #endif /* HAVE_RAW_SUPPORT */
1396 
1397 	/*
1398 	 * save value of entries and leaf dirs obtained for later
1399 	 * in case it was random
1400 	 */
1401 	fileset->fs_constentries = entries;
1402 	fileset->fs_constleafdirs = leafdirs;
1403 
1404 	/* declare all files and leafdirs currently non existant */
1405 	fileset->fs_num_act_files = 0;
1406 	fileset->fs_num_act_leafdirs = 0;
1407 
1408 	/* initialize idle files and directories condition variables */
1409 	(void) pthread_cond_init(&fileset->fs_idle_files_cv, ipc_condattr());
1410 	(void) pthread_cond_init(&fileset->fs_idle_dirs_cv, ipc_condattr());
1411 	(void) pthread_cond_init(&fileset->fs_idle_leafdirs_cv, ipc_condattr());
1412 
1413 	/* no files or dirs idle (or busy) yet */
1414 	fileset->fs_idle_files = 0;
1415 	fileset->fs_idle_dirs = 0;
1416 	fileset->fs_idle_leafdirs = 0;
1417 
1418 	/* initialize locks and other condition variables */
1419 	(void) pthread_mutex_init(&fileset->fs_pick_lock,
1420 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1421 	(void) pthread_cond_init(&fileset->fs_thrd_wait_cv, ipc_condattr());
1422 
1423 	/* is dirwidth a random variable? */
1424 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
1425 		meandirwidth =
1426 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
1427 		fileset->fs_meanwidth = -1;
1428 	} else {
1429 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
1430 		fileset->fs_meanwidth = (double)meandirwidth;
1431 	}
1432 
1433 	/*
1434 	 * Input params are:
1435 	 *	# of files
1436 	 *	ave # of files per dir
1437 	 *	max size of dir
1438 	 *	# ave size of file
1439 	 *	max size of file
1440 	 */
1441 	fileset->fs_meandepth = log(entries+leafdirs) / log(meandirwidth);
1442 
1443 	/* Has a random variable been supplied for dirdepth? */
1444 	if (fileset->fs_dirdepthrv) {
1445 		/* yes, so set the random variable's mean value to meandepth */
1446 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
1447 		    fileset->fs_meandepth;
1448 	}
1449 
1450 	/* test for random size variable */
1451 	if (AVD_IS_RANDOM(fileset->fs_size))
1452 		fileset->fs_meansize = -1;
1453 	else
1454 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
1455 
1456 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
1457 		return (ret);
1458 
1459 
1460 exists:
1461 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1462 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
1463 		    avd_get_str(fileset->fs_name),
1464 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1465 	} else {
1466 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, %d leafdirs "
1467 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
1468 		    avd_get_str(fileset->fs_name), entries, leafdirs,
1469 		    meandirwidth,
1470 		    fileset->fs_meandepth,
1471 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1472 	}
1473 
1474 	return (FILEBENCH_OK);
1475 }
1476 
1477 /*
1478  * Allocates a fileset instance, initializes fileset_dirgamma and
1479  * fileset_sizegamma default values, and sets the fileset name to the
1480  * supplied name string. Puts the allocated fileset on the
1481  * master fileset list and returns a pointer to it.
1482  *
1483  * This routine implements the 'define fileset' calls found in a .f
1484  * workload, such as in the following example:
1485  * define fileset name=drew4ever, entries=$nfiles
1486  */
1487 fileset_t *
1488 fileset_define(avd_t name)
1489 {
1490 	fileset_t *fileset;
1491 
1492 	if (name == NULL)
1493 		return (NULL);
1494 
1495 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1496 		filebench_log(LOG_ERROR,
1497 		    "fileset_define: Can't malloc fileset");
1498 		return (NULL);
1499 	}
1500 
1501 	filebench_log(LOG_DEBUG_IMPL,
1502 	    "Defining file %s", avd_get_str(name));
1503 
1504 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1505 
1506 	fileset->fs_dirgamma = avd_int_alloc(1500);
1507 	fileset->fs_sizegamma = avd_int_alloc(1500);
1508 
1509 	/* Add fileset to global list */
1510 	if (filebench_shm->shm_filesetlist == NULL) {
1511 		filebench_shm->shm_filesetlist = fileset;
1512 		fileset->fs_next = NULL;
1513 	} else {
1514 		fileset->fs_next = filebench_shm->shm_filesetlist;
1515 		filebench_shm->shm_filesetlist = fileset;
1516 	}
1517 
1518 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1519 
1520 	fileset->fs_name = name;
1521 
1522 	return (fileset);
1523 }
1524 
1525 /*
1526  * If supplied with a pointer to a fileset and the fileset's
1527  * fileset_prealloc flag is set, calls fileset_populate() to populate
1528  * the fileset with filesetentries, then calls fileset_create()
1529  * to make actual directories and files for the filesetentries.
1530  * Otherwise, it applies fileset_populate() and fileset_create()
1531  * to all the filesets on the master fileset list. It always
1532  * returns zero (0) if one fileset is populated / created,
1533  * otherwise it returns the sum of returned values from
1534  * fileset_create() and fileset_populate(), which
1535  * will be a negative one (-1) times the number of
1536  * fileset_create() calls which failed.
1537  */
1538 int
1539 fileset_createset(fileset_t *fileset)
1540 {
1541 	fileset_t *list;
1542 	int ret = 0;
1543 
1544 	/* set up for possible parallel allocate */
1545 	filebench_shm->shm_fsparalloc_count = 0;
1546 	(void) pthread_cond_init(
1547 	    &filebench_shm->shm_fsparalloc_cv,
1548 	    ipc_condattr());
1549 
1550 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1551 
1552 		/* check for raw files */
1553 		if (fileset_checkraw(fileset)) {
1554 			filebench_log(LOG_INFO,
1555 			    "file %s/%s is a RAW device",
1556 			    avd_get_str(fileset->fs_path),
1557 			    avd_get_str(fileset->fs_name));
1558 			return (FILEBENCH_OK);
1559 		}
1560 
1561 		filebench_log(LOG_INFO,
1562 		    "creating/pre-allocating %s %s",
1563 		    fileset_entity_name(fileset),
1564 		    avd_get_str(fileset->fs_name));
1565 
1566 		if ((ret = fileset_populate(fileset)) != FILEBENCH_OK)
1567 			return (ret);
1568 
1569 		if ((ret = fileset_create(fileset)) != FILEBENCH_OK)
1570 			return (ret);
1571 	} else {
1572 
1573 		filebench_log(LOG_INFO,
1574 		    "Creating/pre-allocating files and filesets");
1575 
1576 		list = filebench_shm->shm_filesetlist;
1577 		while (list) {
1578 			/* check for raw files */
1579 			if (fileset_checkraw(list)) {
1580 				filebench_log(LOG_INFO,
1581 				    "file %s/%s is a RAW device",
1582 				    avd_get_str(list->fs_path),
1583 				    avd_get_str(list->fs_name));
1584 				list = list->fs_next;
1585 				continue;
1586 			}
1587 
1588 			if ((ret = fileset_populate(list)) != FILEBENCH_OK)
1589 				return (ret);
1590 
1591 			if ((ret = fileset_create(list)) != FILEBENCH_OK)
1592 				return (ret);
1593 
1594 			list = list->fs_next;
1595 		}
1596 	}
1597 
1598 	/* wait for allocation threads to finish */
1599 	filebench_log(LOG_INFO,
1600 	    "waiting for fileset pre-allocation to finish");
1601 
1602 	(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
1603 	while (filebench_shm->shm_fsparalloc_count > 0)
1604 		(void) pthread_cond_wait(
1605 		    &filebench_shm->shm_fsparalloc_cv,
1606 		    &filebench_shm->shm_fsparalloc_lock);
1607 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
1608 
1609 	if (filebench_shm->shm_fsparalloc_count < 0)
1610 		return (FILEBENCH_ERROR);
1611 
1612 	return (FILEBENCH_OK);
1613 }
1614 
1615 /*
1616  * Searches through the master fileset list for the named fileset.
1617  * If found, returns pointer to same, otherwise returns NULL.
1618  */
1619 fileset_t *
1620 fileset_find(char *name)
1621 {
1622 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1623 
1624 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1625 
1626 	while (fileset) {
1627 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1628 			(void) ipc_mutex_unlock(
1629 			    &filebench_shm->shm_fileset_lock);
1630 			return (fileset);
1631 		}
1632 		fileset = fileset->fs_next;
1633 	}
1634 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1635 
1636 	return (NULL);
1637 }
1638 
1639 /*
1640  * Iterates over all the file sets in the filesetlist,
1641  * executing the supplied command "*cmd()" on them. Also
1642  * indicates to the executed command if it is the first
1643  * time the command has been executed since the current
1644  * call to fileset_iter.
1645  */
1646 void
1647 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1648 {
1649 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1650 	int count = 0;
1651 
1652 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1653 
1654 	while (fileset) {
1655 		cmd(fileset, count == 0);
1656 		fileset = fileset->fs_next;
1657 		count++;
1658 	}
1659 
1660 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1661 }
1662 
1663 /*
1664  * Prints information to the filebench log about the file
1665  * object. Also prints a header on the first call.
1666  */
1667 int
1668 fileset_print(fileset_t *fileset, int first)
1669 {
1670 	int pathlength;
1671 	char *fileset_path;
1672 	char *fileset_name;
1673 	static char pad[] = "                              "; /* 30 spaces */
1674 
1675 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1676 		filebench_log(LOG_ERROR, "%s path not set",
1677 		    fileset_entity_name(fileset));
1678 		return (FILEBENCH_ERROR);
1679 	}
1680 
1681 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1682 		filebench_log(LOG_ERROR, "%s name not set",
1683 		    fileset_entity_name(fileset));
1684 		return (FILEBENCH_ERROR);
1685 	}
1686 
1687 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1688 
1689 	if (pathlength > 29)
1690 		pathlength = 29;
1691 
1692 	if (first) {
1693 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1694 		    "file size",
1695 		    "dir width",
1696 		    "entries");
1697 	}
1698 
1699 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1700 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1701 			filebench_log(LOG_INFO,
1702 			    "%s/%s%s         (Raw Device)",
1703 			    fileset_path, fileset_name, &pad[pathlength]);
1704 		} else {
1705 			filebench_log(LOG_INFO,
1706 			    "%s/%s%s%9llu     (Single File)",
1707 			    fileset_path, fileset_name, &pad[pathlength],
1708 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1709 		}
1710 	} else {
1711 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1712 		    fileset_path, fileset_name,
1713 		    &pad[pathlength],
1714 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1715 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1716 		    (u_longlong_t)fileset->fs_constentries);
1717 	}
1718 	return (FILEBENCH_OK);
1719 }
1720 
1721 /*
1722  * checks to see if the path/name pair points to a raw device. If
1723  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1724  * If RAW is not defined, or it is not a raw device, it clears the
1725  * raw device flag and returns 0.
1726  */
1727 int
1728 fileset_checkraw(fileset_t *fileset)
1729 {
1730 	char path[MAXPATHLEN];
1731 	struct stat64 sb;
1732 	char *pathname;
1733 	char *setname;
1734 
1735 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1736 
1737 #ifdef HAVE_RAW_SUPPORT
1738 	/* check for raw device */
1739 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1740 		return (FILEBENCH_OK);
1741 
1742 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1743 		return (FILEBENCH_OK);
1744 
1745 	(void) fb_strlcpy(path, pathname, MAXPATHLEN);
1746 	(void) fb_strlcat(path, "/", MAXPATHLEN);
1747 	(void) fb_strlcat(path, setname, MAXPATHLEN);
1748 	if ((stat64(path, &sb) == 0) &&
1749 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1750 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1751 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1752 			filebench_log(LOG_ERROR,
1753 			    "WARNING Fileset %s/%s Cannot be RAW device",
1754 			    avd_get_str(fileset->fs_path),
1755 			    avd_get_str(fileset->fs_name));
1756 			filebench_shutdown(1);
1757 		}
1758 
1759 		return (1);
1760 	}
1761 #endif /* HAVE_RAW_SUPPORT */
1762 
1763 	return (FILEBENCH_OK);
1764 }
1765