xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 7556:55f6926392fe)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Portions Copyright 2008 Denis Cheng
26  */
27 
28 #include <fcntl.h>
29 #include <pthread.h>
30 #include <errno.h>
31 #include <math.h>
32 #include <libgen.h>
33 #include <sys/mman.h>
34 
35 #include "filebench.h"
36 #include "fileset.h"
37 #include "gamma_dist.h"
38 
39 /*
40  * File sets, of type fileset_t, are entities which contain
41  * information about collections of files and subdirectories in Filebench.
42  * The fileset, once populated, consists of a tree of fileset entries of
43  * type filesetentry_t which specify files and directories.  The fileset
44  * is rooted in a directory specified by fileset_path, and once the populated
45  * fileset has been created, has a tree of directories and files
46  * corresponding to the fileset's filesetentry tree.
47  *
48  * Fileset entities are allocated by fileset_define() which is called from
49  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
50  * to the eventual directory and file tree to be instantiated on the storage
51  * medium is built by fileset_populate(), which is called from
52  * fileset_createset(). After calling fileset_populate(), fileset_createset()
53  * will call fileset_create() to pre-allocate designated files and directories.
54  *
55  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
56  * when a "create fileset" or "run" command is encountered. When the
57  * "create fileset" command is used, it is generally paired with
58  * a "create processes" command, and must appear first, in order to
59  * instantiate all the files in the fileset before trying to use them.
60  */
61 
62 static int fileset_checkraw(fileset_t *fileset);
63 
64 /* maximum parallel allocation control */
65 #define	MAX_PARALLOC_THREADS 32
66 
67 /*
68  * returns pointer to file or fileset
69  * string, as appropriate
70  */
71 static char *
72 fileset_entity_name(fileset_t *fileset)
73 {
74 	if (fileset->fs_attrs & FILESET_IS_FILE)
75 		return ("file");
76 	else
77 		return ("fileset");
78 }
79 
80 /*
81  * Removes the last file or directory name from a pathname.
82  * Basically removes characters from the end of the path by
83  * setting them to \0 until a forward slash '/' is
84  * encountered. It also removes the forward slash.
85  */
86 static char *
87 trunc_dirname(char *dir)
88 {
89 	char *s = dir + strlen(dir);
90 
91 	while (s != dir) {
92 		int c = *s;
93 
94 		*s = 0;
95 		if (c == '/')
96 			break;
97 		s--;
98 	}
99 	return (dir);
100 }
101 
102 /*
103  * Prints a list of allowed options and how to specify them.
104  */
105 void
106 fileset_usage(void)
107 {
108 	(void) fprintf(stderr,
109 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
110 	    ",entries=<number>\n");
111 	(void) fprintf(stderr,
112 	    "		        [,filesize=[size]]\n");
113 	(void) fprintf(stderr,
114 	    "		        [,dirwidth=[width]]\n");
115 	(void) fprintf(stderr,
116 	    "		        [,dirdepthrv=$random_variable_name]\n");
117 	(void) fprintf(stderr,
118 	    "		        [,dirgamma=[100-10000]] "
119 	    "(Gamma * 1000)\n");
120 	(void) fprintf(stderr,
121 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
122 	(void) fprintf(stderr,
123 	    "		        [,prealloc=[percent]]\n");
124 	(void) fprintf(stderr, "		        [,paralloc]\n");
125 	(void) fprintf(stderr, "		        [,reuse]\n");
126 	(void) fprintf(stderr, "\n");
127 }
128 
129 /*
130  * Frees up memory mapped file region of supplied size. The
131  * file descriptor "fd" indicates which memory mapped file.
132  * If successful, returns 0. Otherwise returns -1 times the number of
133  * times msync() failed.
134  */
135 static int
136 fileset_freemem(int fd, off64_t size)
137 {
138 	off64_t left;
139 	int ret = 0;
140 
141 	for (left = size; left > 0; left -= MMAP_SIZE) {
142 		off64_t thismapsize;
143 		caddr_t addr;
144 
145 		thismapsize = MIN(MMAP_SIZE, left);
146 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
147 		    MAP_SHARED, fd, size - left);
148 		ret += msync(addr, thismapsize, MS_INVALIDATE);
149 		(void) munmap(addr, thismapsize);
150 	}
151 	return (ret);
152 }
153 
154 /*
155  * Creates a path string from the filesetentry_t "*entry"
156  * and all of its parent's path names. The resulting path
157  * is a concatination of all the individual parent paths.
158  * Allocates memory for the path string and returns a
159  * pointer to it.
160  */
161 char *
162 fileset_resolvepath(filesetentry_t *entry)
163 {
164 	filesetentry_t *fsep = entry;
165 	char path[MAXPATHLEN];
166 	char pathtmp[MAXPATHLEN];
167 	char *s;
168 
169 	*path = 0;
170 	while (fsep->fse_parent) {
171 		(void) strcpy(pathtmp, "/");
172 		(void) strcat(pathtmp, fsep->fse_path);
173 		(void) strcat(pathtmp, path);
174 		(void) strcpy(path, pathtmp);
175 		fsep = fsep->fse_parent;
176 	}
177 
178 	s = malloc(strlen(path) + 1);
179 	(void) strcpy(s, path);
180 	return (s);
181 }
182 
183 /*
184  * Creates multiple nested directories as required by the
185  * supplied path. Starts at the end of the path, creating
186  * a list of directories to mkdir, up to the root of the
187  * path, then mkdirs them one at a time from the root on down.
188  */
189 static int
190 fileset_mkdir(char *path, int mode)
191 {
192 	char *p;
193 	char *dirs[65536];
194 	int i = 0;
195 
196 	if ((p = strdup(path)) == NULL)
197 		goto null_str;
198 
199 	/*
200 	 * Fill an array of subdirectory path names until either we
201 	 * reach the root or encounter an already existing subdirectory
202 	 */
203 	/* CONSTCOND */
204 	while (1) {
205 		struct stat64 sb;
206 
207 		if (stat64(p, &sb) == 0)
208 			break;
209 		if (strlen(p) < 3)
210 			break;
211 		if ((dirs[i] = strdup(p)) == NULL) {
212 			free(p);
213 			goto null_str;
214 		}
215 
216 		(void) trunc_dirname(p);
217 		i++;
218 	}
219 
220 	/* Make the directories, from closest to root downwards. */
221 	for (--i; i >= 0; i--) {
222 		(void) mkdir(dirs[i], mode);
223 		free(dirs[i]);
224 	}
225 
226 	free(p);
227 	return (FILEBENCH_OK);
228 
229 null_str:
230 	/* clean up */
231 	for (--i; i >= 0; i--)
232 		free(dirs[i]);
233 
234 	filebench_log(LOG_ERROR,
235 	    "Failed to create directory path %s: Out of memory", path);
236 
237 	return (FILEBENCH_ERROR);
238 }
239 
240 /*
241  * creates the subdirectory tree for a fileset.
242  */
243 static int
244 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
245 {
246 	filesetentry_t *direntry;
247 	char full_path[MAXPATHLEN];
248 	char *part_path;
249 
250 	/* walk the subdirectory list, enstanciating subdirs */
251 	direntry = fileset->fs_dirlist;
252 	while (direntry) {
253 		(void) strcpy(full_path, filesetpath);
254 		part_path = fileset_resolvepath(direntry);
255 		(void) strcat(full_path, part_path);
256 		free(part_path);
257 
258 		/* now create this portion of the subdirectory tree */
259 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
260 			return (FILEBENCH_ERROR);
261 
262 		direntry = direntry->fse_dirnext;
263 	}
264 	return (FILEBENCH_OK);
265 }
266 
267 /*
268  * given a fileset entry, determines if the associated file
269  * needs to be allocated or not, and if so does the allocation.
270  */
271 static int
272 fileset_alloc_file(filesetentry_t *entry)
273 {
274 	char path[MAXPATHLEN];
275 	char *buf;
276 	struct stat64 sb;
277 	char *pathtmp;
278 	off64_t seek;
279 	int fd;
280 
281 	*path = 0;
282 	(void) strcpy(path, avd_get_str(entry->fse_fileset->fs_path));
283 	(void) strcat(path, "/");
284 	(void) strcat(path, avd_get_str(entry->fse_fileset->fs_name));
285 	pathtmp = fileset_resolvepath(entry);
286 	(void) strcat(path, pathtmp);
287 
288 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
289 
290 	/* see if reusing and this file exists */
291 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
292 		if ((fd = open64(path, O_RDWR)) < 0) {
293 			filebench_log(LOG_INFO,
294 			    "Attempted but failed to Re-use file %s",
295 			    path);
296 			return (FILEBENCH_ERROR);
297 		}
298 
299 		if (sb.st_size == (off64_t)entry->fse_size) {
300 			filebench_log(LOG_DEBUG_IMPL,
301 			    "Re-using file %s", path);
302 
303 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
304 				(void) fileset_freemem(fd,
305 				    entry->fse_size);
306 
307 			(void) ipc_mutex_lock(
308 			    &entry->fse_fileset->fs_pick_lock);
309 			entry->fse_flags |= FSE_EXISTS;
310 			entry->fse_fileset->fs_num_act_files++;
311 			(void) ipc_mutex_unlock(
312 			    &entry->fse_fileset->fs_pick_lock);
313 
314 			(void) close(fd);
315 			return (FILEBENCH_OK);
316 
317 		} else if (sb.st_size > (off64_t)entry->fse_size) {
318 			/* reuse, but too large */
319 			filebench_log(LOG_INFO,
320 			    "Truncating & re-using file %s", path);
321 
322 #ifdef HAVE_FTRUNCATE64
323 			(void) ftruncate64(fd, (off64_t)entry->fse_size);
324 #else
325 			(void) ftruncate(fd, (off_t)entry->fse_size);
326 #endif
327 
328 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
329 				(void) fileset_freemem(fd,
330 				    entry->fse_size);
331 
332 			(void) ipc_mutex_lock(
333 			    &entry->fse_fileset->fs_pick_lock);
334 			entry->fse_flags |= FSE_EXISTS;
335 			entry->fse_fileset->fs_num_act_files++;
336 			(void) ipc_mutex_unlock(
337 			    &entry->fse_fileset->fs_pick_lock);
338 
339 			(void) close(fd);
340 			return (FILEBENCH_OK);
341 		}
342 	} else {
343 
344 		/* No file or not reusing, so create */
345 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
346 			filebench_log(LOG_ERROR,
347 			    "Failed to pre-allocate file %s: %s",
348 			    path, strerror(errno));
349 
350 			return (FILEBENCH_ERROR);
351 		}
352 	}
353 
354 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
355 		return (FILEBENCH_ERROR);
356 
357 	(void) ipc_mutex_lock(&entry->fse_fileset->fs_pick_lock);
358 	entry->fse_flags |= FSE_EXISTS;
359 	entry->fse_fileset->fs_num_act_files++;
360 	(void) ipc_mutex_unlock(&entry->fse_fileset->fs_pick_lock);
361 
362 	for (seek = 0; seek < entry->fse_size; ) {
363 		off64_t wsize;
364 		int ret = 0;
365 
366 		/*
367 		 * Write FILE_ALLOC_BLOCK's worth,
368 		 * except on last write
369 		 */
370 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
371 
372 		ret = write(fd, buf, wsize);
373 		if (ret != wsize) {
374 			filebench_log(LOG_ERROR,
375 			    "Failed to pre-allocate file %s: %s",
376 			    path, strerror(errno));
377 			(void) close(fd);
378 			free(buf);
379 			return (FILEBENCH_ERROR);
380 		}
381 		seek += wsize;
382 	}
383 
384 	if (!avd_get_bool(entry->fse_fileset->fs_cached))
385 		(void) fileset_freemem(fd, entry->fse_size);
386 
387 	(void) close(fd);
388 
389 	free(buf);
390 
391 	filebench_log(LOG_DEBUG_IMPL,
392 	    "Pre-allocated file %s size %llu",
393 	    path, (u_longlong_t)entry->fse_size);
394 
395 	return (FILEBENCH_OK);
396 }
397 
398 /*
399  * given a fileset entry, determines if the associated file
400  * needs to be allocated or not, and if so does the allocation.
401  * Sets shm_fsparalloc_count to -1 on error.
402  */
403 static void *
404 fileset_alloc_thread(filesetentry_t *entry)
405 {
406 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
407 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
408 		filebench_shm->shm_fsparalloc_count = -1;
409 	} else {
410 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
411 		filebench_shm->shm_fsparalloc_count--;
412 	}
413 
414 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
415 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
416 
417 	pthread_exit(NULL);
418 	return (NULL);
419 }
420 
421 
422 /*
423  * First creates the parent directories of the file using
424  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
425  * and opens the file with open64(). It unlocks the fileset
426  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
427  * as requested, and returns the file descriptor integer
428  * for the opened file.
429  */
430 int
431 fileset_openfile(fileset_t *fileset,
432     filesetentry_t *entry, int flag, int mode, int attrs)
433 {
434 	char path[MAXPATHLEN];
435 	char dir[MAXPATHLEN];
436 	char *pathtmp;
437 	struct stat64 sb;
438 	int fd;
439 	int open_attrs = 0;
440 
441 	*path = 0;
442 	(void) strcpy(path, avd_get_str(fileset->fs_path));
443 	(void) strcat(path, "/");
444 	(void) strcat(path, avd_get_str(fileset->fs_name));
445 	pathtmp = fileset_resolvepath(entry);
446 	(void) strcat(path, pathtmp);
447 	(void) strcpy(dir, path);
448 	free(pathtmp);
449 	(void) trunc_dirname(dir);
450 
451 	/* If we are going to create a file, create the parent dirs */
452 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
453 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
454 			return (FILEBENCH_ERROR);
455 	}
456 
457 	if (attrs & FLOW_ATTR_DSYNC) {
458 #ifdef sun
459 		open_attrs |= O_DSYNC;
460 #else
461 		open_attrs |= O_FSYNC;
462 #endif
463 	}
464 
465 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
466 		filebench_log(LOG_ERROR,
467 		    "Failed to open file %s: %s",
468 		    path, strerror(errno));
469 
470 		fileset_unbusy(entry, FALSE, FALSE);
471 		return (FILEBENCH_ERROR);
472 	}
473 
474 	if (flag & O_CREAT)
475 		fileset_unbusy(entry, TRUE, TRUE);
476 	else
477 		fileset_unbusy(entry, FALSE, FALSE);
478 
479 #ifdef sun
480 	if (attrs & FLOW_ATTR_DIRECTIO)
481 		(void) directio(fd, DIRECTIO_ON);
482 	else
483 		(void) directio(fd, DIRECTIO_OFF);
484 #endif
485 
486 	return (fd);
487 }
488 
489 
490 /*
491  * Selects a fileset entry from a fileset. If the
492  * FILESET_PICKDIR flag is set it will pick a directory
493  * entry, otherwise a file entry. The FILESET_PICKRESET
494  * flag will cause it to reset the free list to the
495  * overall list (file or directory). The FILESET_PICKUNIQUE
496  * flag will take an entry off of one of the free (unused)
497  * lists (file or directory), otherwise the entry will be
498  * picked off of one of the rotor lists (file or directory).
499  * The FILESET_PICKEXISTS will insure that only extant
500  * (FSE_EXISTS) state files are selected, while
501  * FILESET_PICKNOEXIST insures that only non extant
502  * (not FSE_EXISTS) state files are selected.
503  * Note that the selected fileset entry (file) is returned
504  * with its FSE_BUSY flag (in fse_flags) set.
505  */
506 filesetentry_t *
507 fileset_pick(fileset_t *fileset, int flags, int tid)
508 {
509 	filesetentry_t *entry = NULL;
510 	filesetentry_t *first = NULL;
511 
512 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
513 
514 	/* see if we have to wait for available files or directories */
515 	if (flags & FILESET_PICKDIR) {
516 		while (fileset->fs_idle_dirs == 0) {
517 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
518 			    &fileset->fs_pick_lock);
519 		}
520 	} else {
521 		while (fileset->fs_idle_files == 0) {
522 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
523 			    &fileset->fs_pick_lock);
524 		}
525 	}
526 
527 	/* see if asking for impossible */
528 	if (flags & FILESET_PICKEXISTS) {
529 		if (fileset->fs_num_act_files == 0) {
530 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
531 			return (NULL);
532 		}
533 	} else if (flags & FILESET_PICKNOEXIST) {
534 		if (fileset->fs_num_act_files == fileset->fs_realfiles) {
535 			(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
536 			return (NULL);
537 		}
538 	}
539 
540 	while (entry == NULL) {
541 
542 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
543 			entry = fileset->fs_dirlist;
544 			while (entry) {
545 				entry->fse_flags |= FSE_FREE;
546 				entry = entry->fse_dirnext;
547 			}
548 			fileset->fs_dirfree = fileset->fs_dirlist;
549 		}
550 
551 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
552 			entry = fileset->fs_filelist;
553 			while (entry) {
554 				entry->fse_flags |= FSE_FREE;
555 				entry = entry->fse_filenext;
556 			}
557 			fileset->fs_filefree = fileset->fs_filelist;
558 		}
559 
560 		if (flags & FILESET_PICKUNIQUE) {
561 			if (flags & FILESET_PICKDIR) {
562 				entry = fileset->fs_dirfree;
563 				if (entry == NULL)
564 					goto empty;
565 				fileset->fs_dirfree = entry->fse_dirnext;
566 			} else {
567 				entry = fileset->fs_filefree;
568 				if (entry == NULL)
569 					goto empty;
570 				fileset->fs_filefree = entry->fse_filenext;
571 			}
572 			entry->fse_flags &= ~FSE_FREE;
573 		} else {
574 			if (flags & FILESET_PICKDIR) {
575 				entry = fileset->fs_dirrotor;
576 				if (entry == NULL)
577 				fileset->fs_dirrotor =
578 				    entry = fileset->fs_dirlist;
579 				fileset->fs_dirrotor = entry->fse_dirnext;
580 			} else {
581 				if (flags & FILESET_PICKNOEXIST) {
582 					entry = fileset->fs_file_ne_rotor;
583 					if (entry == NULL)
584 						fileset->fs_file_ne_rotor =
585 						    entry =
586 						    fileset->fs_filelist;
587 					fileset->fs_file_ne_rotor =
588 					    entry->fse_filenext;
589 				} else {
590 					entry = fileset->fs_filerotor[tid];
591 					if (entry == NULL)
592 						fileset->fs_filerotor[tid] =
593 						    entry =
594 						    fileset->fs_filelist;
595 					fileset->fs_filerotor[tid] =
596 					    entry->fse_filenext;
597 				}
598 			}
599 		}
600 
601 		if (first == entry)
602 			goto empty;
603 
604 		if (first == NULL)
605 			first = entry;
606 
607 		/* see if entry in use */
608 		if (entry->fse_flags & FSE_BUSY) {
609 
610 			/* it is, so try next */
611 			entry = NULL;
612 			continue;
613 		}
614 
615 		/* If we ask for an existing file, go round again */
616 		if ((flags & FILESET_PICKEXISTS) &&
617 		    !(entry->fse_flags & FSE_EXISTS))
618 			entry = NULL;
619 
620 		/* If we ask for not an existing file, go round again */
621 		if ((flags & FILESET_PICKNOEXIST) &&
622 		    (entry->fse_flags & FSE_EXISTS))
623 			entry = NULL;
624 	}
625 
626 	/* update file or directory idle counts */
627 	if (flags & FILESET_PICKDIR)
628 		fileset->fs_idle_dirs--;
629 	else
630 		fileset->fs_idle_files--;
631 
632 	/* Indicate that file or directory is now busy */
633 	entry->fse_flags |= FSE_BUSY;
634 
635 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
636 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
637 	return (entry);
638 
639 empty:
640 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
641 	return (NULL);
642 }
643 
644 /*
645  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
646  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
647  * existant or not, or leaves that designation alone.
648  */
649 void
650 fileset_unbusy(filesetentry_t *entry, int update_exist, int new_exist_val)
651 {
652 	fileset_t *fileset = NULL;
653 	int fse_is_dir;
654 
655 	if (entry)
656 		fileset = entry->fse_fileset;
657 
658 	if (fileset == NULL) {
659 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
660 		return;
661 	}
662 
663 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
664 	fse_is_dir = entry->fse_flags & FSE_DIR;
665 
666 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
667 	if (entry->fse_flags & FSE_BUSY) {
668 
669 		/* unbusy it */
670 		entry->fse_flags &= (~FSE_BUSY);
671 
672 		/* release any threads waiting for unbusy */
673 		if (entry->fse_flags & FSE_THRD_WAITNG) {
674 			entry->fse_flags &= (~FSE_THRD_WAITNG);
675 			(void) pthread_cond_broadcast(
676 			    &fileset->fs_thrd_wait_cv);
677 		}
678 
679 		/* increment idle count and signal waiting threads */
680 		if (fse_is_dir) {
681 			fileset->fs_idle_dirs++;
682 			if (fileset->fs_idle_dirs == 1) {
683 				(void) pthread_cond_signal(
684 				    &fileset->fs_idle_dirs_cv);
685 			}
686 		} else {
687 			fileset->fs_idle_files++;
688 			if (fileset->fs_idle_files == 1) {
689 				(void) pthread_cond_signal(
690 				    &fileset->fs_idle_files_cv);
691 			}
692 		}
693 	}
694 
695 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
696 	if (update_exist) {
697 		if (new_exist_val == TRUE) {
698 			if (!(entry->fse_flags & FSE_EXISTS)) {
699 
700 				/* asked to set, and it was clear */
701 				entry->fse_flags |= FSE_EXISTS;
702 				if (fse_is_dir)
703 					fileset->fs_num_act_dirs++;
704 				else
705 					fileset->fs_num_act_files++;
706 			}
707 		} else {
708 			if (entry->fse_flags & FSE_EXISTS) {
709 
710 				/* asked to clear, and it was set */
711 				entry->fse_flags &= (~FSE_EXISTS);
712 				if (fse_is_dir)
713 					fileset->fs_num_act_dirs--;
714 				else
715 					fileset->fs_num_act_files--;
716 			}
717 		}
718 	}
719 
720 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
721 }
722 
723 /*
724  * Given a fileset "fileset", create the associated files as
725  * specified in the attributes of the fileset. The fileset is
726  * rooted in a directory whose pathname is in fileset_path. If the
727  * directory exists, meaning that there is already a fileset,
728  * and the fileset_reuse attribute is false, then remove it and all
729  * its contained files and subdirectories. Next, the routine
730  * creates a root directory for the fileset. All the file type
731  * filesetentries are cycled through creating as needed
732  * their containing subdirectory trees in the filesystem and
733  * creating actual files for fileset_preallocpercent of them. The
734  * created files are filled with fse_size bytes of unitialized
735  * data. The routine returns FILEBENCH_ERROR on errors,
736  * FILEBENCH_OK on success.
737  */
738 static int
739 fileset_create(fileset_t *fileset)
740 {
741 	filesetentry_t *entry;
742 	char path[MAXPATHLEN];
743 	struct stat64 sb;
744 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
745 	hrtime_t start = gethrtime();
746 	char *fileset_path;
747 	char *fileset_name;
748 	int randno;
749 	int preallocated = 0;
750 	int reusing = 0;
751 
752 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
753 		filebench_log(LOG_ERROR, "%s path not set",
754 		    fileset_entity_name(fileset));
755 		return (FILEBENCH_ERROR);
756 	}
757 
758 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
759 		filebench_log(LOG_ERROR, "%s name not set",
760 		    fileset_entity_name(fileset));
761 		return (FILEBENCH_ERROR);
762 	}
763 
764 #ifdef HAVE_RAW_SUPPORT
765 	/* treat raw device as special case */
766 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
767 		return (FILEBENCH_OK);
768 #endif /* HAVE_RAW_SUPPORT */
769 
770 	/* XXX Add check to see if there is enough space */
771 
772 	/* Remove existing */
773 	(void) strcpy(path, fileset_path);
774 	(void) strcat(path, "/");
775 	(void) strcat(path, fileset_name);
776 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
777 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) {
778 		if (!avd_get_bool(fileset->fs_reuse)) {
779 			char cmd[MAXPATHLEN];
780 
781 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
782 			(void) system(cmd);
783 			filebench_log(LOG_VERBOSE,
784 			    "Removed any existing %s %s in %llu seconds",
785 			    fileset_entity_name(fileset), fileset_name,
786 			    (u_longlong_t)(((gethrtime() - start) /
787 			    1000000000) + 1));
788 		} else {
789 			/* we are re-using */
790 			reusing = 1;
791 			filebench_log(LOG_VERBOSE, "Re-using %s %s.",
792 			    fileset_entity_name(fileset), fileset_name);
793 		}
794 	}
795 	(void) mkdir(path, 0755);
796 
797 	/* make the filesets directory tree */
798 	if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
799 		return (FILEBENCH_ERROR);
800 
801 	start = gethrtime();
802 
803 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
804 	    fileset_entity_name(fileset), fileset_name);
805 
806 	if (!avd_get_bool(fileset->fs_prealloc))
807 		goto exit;
808 
809 	randno = ((RAND_MAX * (100
810 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
811 
812 	while (entry = fileset_pick(fileset, pickflags, 0)) {
813 		pthread_t tid;
814 
815 		pickflags = FILESET_PICKUNIQUE;
816 
817 		/* entry doesn't need to be locked during initialization */
818 		fileset_unbusy(entry, FALSE, FALSE);
819 
820 		if (rand() < randno)
821 			continue;
822 
823 		preallocated++;
824 
825 		if (reusing)
826 			entry->fse_flags |= FSE_REUSING;
827 		else
828 			entry->fse_flags &= (~FSE_REUSING);
829 
830 		/* fire off allocation threads for each file if paralloc set */
831 		if (avd_get_bool(fileset->fs_paralloc)) {
832 
833 			/* limit total number of simultaneous allocations */
834 			(void) pthread_mutex_lock(
835 			    &filebench_shm->shm_fsparalloc_lock);
836 			while (filebench_shm->shm_fsparalloc_count
837 			    >= MAX_PARALLOC_THREADS) {
838 				(void) pthread_cond_wait(
839 				    &filebench_shm->shm_fsparalloc_cv,
840 				    &filebench_shm->shm_fsparalloc_lock);
841 			}
842 
843 			/* quit if any allocation thread reports and error */
844 			if (filebench_shm->shm_fsparalloc_count < 0) {
845 				(void) pthread_mutex_unlock(
846 				    &filebench_shm->shm_fsparalloc_lock);
847 				return (FILEBENCH_ERROR);
848 			}
849 
850 			filebench_shm->shm_fsparalloc_count++;
851 			(void) pthread_mutex_unlock(
852 			    &filebench_shm->shm_fsparalloc_lock);
853 
854 			/*
855 			 * Fire off a detached allocation thread per file.
856 			 * The thread will self destruct when it finishes
857 			 * writing pre-allocation data to the file.
858 			 */
859 			if (pthread_create(&tid, NULL,
860 			    (void *(*)(void*))fileset_alloc_thread,
861 			    entry) == 0) {
862 				/*
863 				 * A thread was created; detach it so it can
864 				 * fully quit when finished.
865 				 */
866 				(void) pthread_detach(tid);
867 			} else {
868 				filebench_log(LOG_ERROR,
869 				    "File prealloc thread create failed");
870 				filebench_shutdown(1);
871 			}
872 
873 		} else {
874 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
875 				return (FILEBENCH_ERROR);
876 		}
877 	}
878 
879 exit:
880 	filebench_log(LOG_VERBOSE,
881 	    "Preallocated %d of %llu of %s %s in %llu seconds",
882 	    preallocated,
883 	    (u_longlong_t)fileset->fs_constentries,
884 	    fileset_entity_name(fileset), fileset_name,
885 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
886 
887 	return (FILEBENCH_OK);
888 }
889 
890 /*
891  * Adds an entry to the fileset's file list. Single threaded so
892  * no locking needed.
893  */
894 static void
895 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
896 {
897 	if (fileset->fs_filelist == NULL) {
898 		fileset->fs_filelist = entry;
899 		entry->fse_filenext = NULL;
900 	} else {
901 		entry->fse_filenext = fileset->fs_filelist;
902 		fileset->fs_filelist = entry;
903 	}
904 }
905 
906 /*
907  * Adds an entry to the fileset's directory list. Single
908  * threaded so no locking needed.
909  */
910 static void
911 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
912 {
913 	if (fileset->fs_dirlist == NULL) {
914 		fileset->fs_dirlist = entry;
915 		entry->fse_dirnext = NULL;
916 	} else {
917 		entry->fse_dirnext = fileset->fs_dirlist;
918 		fileset->fs_dirlist = entry;
919 	}
920 }
921 
922 /*
923  * Obtaines a filesetentry entity for a file to be placed in a
924  * (sub)directory of a fileset. The size of the file may be
925  * specified by fileset_meansize, or calculated from a gamma
926  * distribution of parameter fileset_sizegamma and of mean size
927  * fileset_meansize. The filesetentry entity is placed on the file
928  * list in the specified parent filesetentry entity, which may
929  * be a directory filesetentry, or the root filesetentry in the
930  * fileset. It is also placed on the fileset's list of all
931  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
932  * if ipc memory for the path string cannot be allocated.
933  */
934 static int
935 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
936 {
937 	char tmpname[16];
938 	filesetentry_t *entry;
939 	double drand;
940 
941 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
942 	    == NULL) {
943 		filebench_log(LOG_ERROR,
944 		    "fileset_populate_file: Can't malloc filesetentry");
945 		return (FILEBENCH_ERROR);
946 	}
947 
948 	/* Another currently idle file */
949 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
950 	fileset->fs_idle_files++;
951 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
952 
953 	entry->fse_parent = parent;
954 	entry->fse_fileset = fileset;
955 	entry->fse_flags = FSE_FREE;
956 	fileset_insfilelist(fileset, entry);
957 
958 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
959 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
960 		filebench_log(LOG_ERROR,
961 		    "fileset_populate_file: Can't alloc path string");
962 		return (FILEBENCH_ERROR);
963 	}
964 
965 	/* see if random variable was supplied for file size */
966 	if (fileset->fs_meansize == -1) {
967 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
968 	} else {
969 		double gamma;
970 
971 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
972 		if (gamma > 0) {
973 			drand = gamma_dist_knuth(gamma,
974 			    fileset->fs_meansize / gamma);
975 			entry->fse_size = (off64_t)drand;
976 		} else {
977 			entry->fse_size = (off64_t)fileset->fs_meansize;
978 		}
979 	}
980 
981 	fileset->fs_bytes += entry->fse_size;
982 
983 	fileset->fs_realfiles++;
984 	return (FILEBENCH_OK);
985 }
986 
987 /*
988  * Creates a directory node in a fileset, by obtaining a
989  * filesetentry entity for the node and initializing it
990  * according to parameters of the fileset. It determines a
991  * directory tree depth and directory width, optionally using
992  * a gamma distribution. If its calculated depth is less then
993  * its actual depth in the directory tree, it becomes a leaf
994  * node and files itself with "width" number of file type
995  * filesetentries, otherwise it files itself with "width"
996  * number of directory type filesetentries, using recursive
997  * calls to fileset_populate_subdir. The end result of the
998  * initial call to this routine is a tree of directories of
999  * random width and varying depth with sufficient leaf
1000  * directories to contain all required files.
1001  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
1002  * string memory cannot be allocated and returns the error code (currently
1003  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
1004  * calls to fileset_populate_subdir.
1005  */
1006 static int
1007 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
1008     int serial, double depth)
1009 {
1010 	double randepth, drand, ranwidth;
1011 	int isleaf = 0;
1012 	char tmpname[16];
1013 	filesetentry_t *entry;
1014 	int i;
1015 
1016 	depth += 1;
1017 
1018 	/* Create dir node */
1019 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1020 	    == NULL) {
1021 		filebench_log(LOG_ERROR,
1022 		    "fileset_populate_subdir: Can't malloc filesetentry");
1023 		return (FILEBENCH_ERROR);
1024 	}
1025 
1026 	/* another idle directory */
1027 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1028 	fileset->fs_idle_dirs++;
1029 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1030 
1031 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1032 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1033 		filebench_log(LOG_ERROR,
1034 		    "fileset_populate_subdir: Can't alloc path string");
1035 		return (FILEBENCH_ERROR);
1036 	}
1037 
1038 	entry->fse_parent = parent;
1039 	entry->fse_flags = FSE_DIR | FSE_FREE;
1040 	fileset_insdirlist(fileset, entry);
1041 
1042 	if (fileset->fs_dirdepthrv) {
1043 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
1044 	} else {
1045 		double gamma;
1046 
1047 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
1048 		if (gamma > 0) {
1049 			drand = gamma_dist_knuth(gamma,
1050 			    fileset->fs_meandepth / gamma);
1051 			randepth = (int)drand;
1052 		} else {
1053 			randepth = (int)fileset->fs_meandepth;
1054 		}
1055 	}
1056 
1057 	if (fileset->fs_meanwidth == -1) {
1058 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
1059 	} else {
1060 		double gamma;
1061 
1062 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1063 		if (gamma > 0) {
1064 			drand = gamma_dist_knuth(gamma,
1065 			    fileset->fs_meanwidth / gamma);
1066 			ranwidth = drand;
1067 		} else {
1068 			ranwidth = fileset->fs_meanwidth;
1069 		}
1070 	}
1071 
1072 	if (randepth == 0)
1073 		randepth = 1;
1074 	if (ranwidth == 0)
1075 		ranwidth = 1;
1076 	if (depth >= randepth)
1077 		isleaf = 1;
1078 
1079 	/*
1080 	 * Create directory of random width according to distribution, or
1081 	 * if root directory, continue until #files required
1082 	 */
1083 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1084 	    (fileset->fs_realfiles < fileset->fs_constentries);
1085 	    i++) {
1086 		int ret = 0;
1087 
1088 		if (parent && isleaf)
1089 			ret = fileset_populate_file(fileset, entry, i);
1090 		else
1091 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1092 
1093 		if (ret != 0)
1094 			return (ret);
1095 	}
1096 	return (FILEBENCH_OK);
1097 }
1098 
1099 /*
1100  * Populates a fileset with files and subdirectory entries. Uses
1101  * the supplied fileset_dirwidth and fileset_entries (number of files) to
1102  * calculate the required fileset_meandepth (of subdirectories) and
1103  * initialize the fileset_meanwidth and fileset_meansize variables. Then
1104  * calls fileset_populate_subdir() to do the recursive
1105  * subdirectory entry creation and leaf file entry creation. All
1106  * of the above is skipped if the fileset has already been
1107  * populated. Returns 0 on success, or an error code from the
1108  * call to fileset_populate_subdir if that call fails.
1109  */
1110 static int
1111 fileset_populate(fileset_t *fileset)
1112 {
1113 	int entries = (int)avd_get_int(fileset->fs_entries);
1114 	int meandirwidth;
1115 	int ret;
1116 
1117 	/* Skip if already populated */
1118 	if (fileset->fs_bytes > 0)
1119 		goto exists;
1120 
1121 #ifdef HAVE_RAW_SUPPORT
1122 	/* check for raw device */
1123 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1124 		return (FILEBENCH_OK);
1125 #endif /* HAVE_RAW_SUPPORT */
1126 
1127 	/* save value of entries obtained for later, in case it was random */
1128 	fileset->fs_constentries = entries;
1129 
1130 	/* declare all files currently non existant */
1131 	fileset->fs_num_act_files = 0;
1132 
1133 	/* initialize idle files and directories condition variables */
1134 	(void) pthread_cond_init(&fileset->fs_idle_dirs_cv, ipc_condattr());
1135 	(void) pthread_cond_init(&fileset->fs_idle_files_cv, ipc_condattr());
1136 
1137 	/* no files or dirs idle (or busy) yet */
1138 	fileset->fs_idle_files = 0;
1139 	fileset->fs_idle_dirs = 0;
1140 
1141 	/* initialize locks and other condition variables */
1142 	(void) pthread_mutex_init(&fileset->fs_pick_lock,
1143 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1144 	(void) pthread_cond_init(&fileset->fs_thrd_wait_cv, ipc_condattr());
1145 
1146 	/* is dirwidth a random variable? */
1147 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
1148 		meandirwidth =
1149 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
1150 		fileset->fs_meanwidth = -1;
1151 	} else {
1152 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
1153 		fileset->fs_meanwidth = (double)meandirwidth;
1154 	}
1155 
1156 	/*
1157 	 * Input params are:
1158 	 *	# of files
1159 	 *	ave # of files per dir
1160 	 *	max size of dir
1161 	 *	# ave size of file
1162 	 *	max size of file
1163 	 */
1164 	fileset->fs_meandepth = log(entries) / log(meandirwidth);
1165 
1166 	/* Has a random variable been supplied for dirdepth? */
1167 	if (fileset->fs_dirdepthrv) {
1168 		/* yes, so set the random variable's mean value to meandepth */
1169 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
1170 		    fileset->fs_meandepth;
1171 	}
1172 
1173 	/* test for random size variable */
1174 	if (AVD_IS_RANDOM(fileset->fs_size))
1175 		fileset->fs_meansize = -1;
1176 	else
1177 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
1178 
1179 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
1180 		return (ret);
1181 
1182 
1183 exists:
1184 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1185 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
1186 		    avd_get_str(fileset->fs_name),
1187 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1188 	} else {
1189 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, "
1190 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
1191 		    avd_get_str(fileset->fs_name), entries,
1192 		    meandirwidth,
1193 		    fileset->fs_meandepth,
1194 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1195 	}
1196 
1197 	return (FILEBENCH_OK);
1198 }
1199 
1200 /*
1201  * Allocates a fileset instance, initializes fileset_dirgamma and
1202  * fileset_sizegamma default values, and sets the fileset name to the
1203  * supplied name string. Puts the allocated fileset on the
1204  * master fileset list and returns a pointer to it.
1205  *
1206  * This routine implements the 'define fileset' calls found in a .f
1207  * workload, such as in the following example:
1208  * define fileset name=drew4ever, entries=$nfiles
1209  */
1210 fileset_t *
1211 fileset_define(avd_t name)
1212 {
1213 	fileset_t *fileset;
1214 
1215 	if (name == NULL)
1216 		return (NULL);
1217 
1218 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1219 		filebench_log(LOG_ERROR,
1220 		    "fileset_define: Can't malloc fileset");
1221 		return (NULL);
1222 	}
1223 
1224 	filebench_log(LOG_DEBUG_IMPL,
1225 	    "Defining file %s", avd_get_str(name));
1226 
1227 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1228 
1229 	fileset->fs_dirgamma = avd_int_alloc(1500);
1230 	fileset->fs_sizegamma = avd_int_alloc(1500);
1231 
1232 	/* Add fileset to global list */
1233 	if (filebench_shm->shm_filesetlist == NULL) {
1234 		filebench_shm->shm_filesetlist = fileset;
1235 		fileset->fs_next = NULL;
1236 	} else {
1237 		fileset->fs_next = filebench_shm->shm_filesetlist;
1238 		filebench_shm->shm_filesetlist = fileset;
1239 	}
1240 
1241 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1242 
1243 	fileset->fs_name = name;
1244 
1245 	return (fileset);
1246 }
1247 
1248 /*
1249  * If supplied with a pointer to a fileset and the fileset's
1250  * fileset_prealloc flag is set, calls fileset_populate() to populate
1251  * the fileset with filesetentries, then calls fileset_create()
1252  * to make actual directories and files for the filesetentries.
1253  * Otherwise, it applies fileset_populate() and fileset_create()
1254  * to all the filesets on the master fileset list. It always
1255  * returns zero (0) if one fileset is populated / created,
1256  * otherwise it returns the sum of returned values from
1257  * fileset_create() and fileset_populate(), which
1258  * will be a negative one (-1) times the number of
1259  * fileset_create() calls which failed.
1260  */
1261 int
1262 fileset_createset(fileset_t *fileset)
1263 {
1264 	fileset_t *list;
1265 	int ret = 0;
1266 
1267 	/* set up for possible parallel allocate */
1268 	filebench_shm->shm_fsparalloc_count = 0;
1269 	(void) pthread_cond_init(
1270 	    &filebench_shm->shm_fsparalloc_cv,
1271 	    ipc_condattr());
1272 
1273 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1274 
1275 		/* check for raw files */
1276 		if (fileset_checkraw(fileset)) {
1277 			filebench_log(LOG_INFO,
1278 			    "file %s/%s is a RAW device",
1279 			    avd_get_str(fileset->fs_path),
1280 			    avd_get_str(fileset->fs_name));
1281 			return (FILEBENCH_OK);
1282 		}
1283 
1284 		filebench_log(LOG_INFO,
1285 		    "creating/pre-allocating %s %s",
1286 		    fileset_entity_name(fileset),
1287 		    avd_get_str(fileset->fs_name));
1288 
1289 		if ((ret = fileset_populate(fileset)) != FILEBENCH_OK)
1290 			return (ret);
1291 
1292 		if ((ret = fileset_create(fileset)) != FILEBENCH_OK)
1293 			return (ret);
1294 	} else {
1295 
1296 		filebench_log(LOG_INFO,
1297 		    "Creating/pre-allocating files and filesets");
1298 
1299 		list = filebench_shm->shm_filesetlist;
1300 		while (list) {
1301 			/* check for raw files */
1302 			if (fileset_checkraw(list)) {
1303 				filebench_log(LOG_INFO,
1304 				    "file %s/%s is a RAW device",
1305 				    avd_get_str(list->fs_path),
1306 				    avd_get_str(list->fs_name));
1307 				list = list->fs_next;
1308 				continue;
1309 			}
1310 
1311 			if ((ret = fileset_populate(list)) != FILEBENCH_OK)
1312 				return (ret);
1313 
1314 			if ((ret = fileset_create(list)) != FILEBENCH_OK)
1315 				return (ret);
1316 
1317 			list = list->fs_next;
1318 		}
1319 	}
1320 
1321 	/* wait for allocation threads to finish */
1322 	filebench_log(LOG_INFO,
1323 	    "waiting for fileset pre-allocation to finish");
1324 
1325 	(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
1326 	while (filebench_shm->shm_fsparalloc_count > 0)
1327 		(void) pthread_cond_wait(
1328 		    &filebench_shm->shm_fsparalloc_cv,
1329 		    &filebench_shm->shm_fsparalloc_lock);
1330 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
1331 
1332 	if (filebench_shm->shm_fsparalloc_count < 0)
1333 		return (FILEBENCH_ERROR);
1334 
1335 	return (FILEBENCH_OK);
1336 }
1337 
1338 /*
1339  * Searches through the master fileset list for the named fileset.
1340  * If found, returns pointer to same, otherwise returns NULL.
1341  */
1342 fileset_t *
1343 fileset_find(char *name)
1344 {
1345 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1346 
1347 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1348 
1349 	while (fileset) {
1350 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1351 			(void) ipc_mutex_unlock(
1352 			    &filebench_shm->shm_fileset_lock);
1353 			return (fileset);
1354 		}
1355 		fileset = fileset->fs_next;
1356 	}
1357 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1358 
1359 	return (NULL);
1360 }
1361 
1362 /*
1363  * Iterates over all the file sets in the filesetlist,
1364  * executing the supplied command "*cmd()" on them. Also
1365  * indicates to the executed command if it is the first
1366  * time the command has been executed since the current
1367  * call to fileset_iter.
1368  */
1369 void
1370 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1371 {
1372 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1373 	int count = 0;
1374 
1375 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1376 
1377 	while (fileset) {
1378 		cmd(fileset, count == 0);
1379 		fileset = fileset->fs_next;
1380 		count++;
1381 	}
1382 
1383 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1384 }
1385 
1386 /*
1387  * Prints information to the filebench log about the file
1388  * object. Also prints a header on the first call.
1389  */
1390 int
1391 fileset_print(fileset_t *fileset, int first)
1392 {
1393 	int pathlength;
1394 	char *fileset_path;
1395 	char *fileset_name;
1396 	static char pad[] = "                              "; /* 30 spaces */
1397 
1398 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1399 		filebench_log(LOG_ERROR, "%s path not set",
1400 		    fileset_entity_name(fileset));
1401 		return (FILEBENCH_ERROR);
1402 	}
1403 
1404 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1405 		filebench_log(LOG_ERROR, "%s name not set",
1406 		    fileset_entity_name(fileset));
1407 		return (FILEBENCH_ERROR);
1408 	}
1409 
1410 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1411 
1412 	if (pathlength > 29)
1413 		pathlength = 29;
1414 
1415 	if (first) {
1416 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1417 		    "file size",
1418 		    "dir width",
1419 		    "entries");
1420 	}
1421 
1422 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1423 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1424 			filebench_log(LOG_INFO,
1425 			    "%s/%s%s         (Raw Device)",
1426 			    fileset_path, fileset_name, &pad[pathlength]);
1427 		} else {
1428 			filebench_log(LOG_INFO,
1429 			    "%s/%s%s%9llu     (Single File)",
1430 			    fileset_path, fileset_name, &pad[pathlength],
1431 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1432 		}
1433 	} else {
1434 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1435 		    fileset_path, fileset_name,
1436 		    &pad[pathlength],
1437 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1438 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1439 		    (u_longlong_t)fileset->fs_constentries);
1440 	}
1441 	return (FILEBENCH_OK);
1442 }
1443 /*
1444  * checks to see if the path/name pair points to a raw device. If
1445  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1446  * If RAW is not defined, or it is not a raw device, it clears the
1447  * raw device flag and returns 0.
1448  */
1449 int
1450 fileset_checkraw(fileset_t *fileset)
1451 {
1452 	char path[MAXPATHLEN];
1453 	struct stat64 sb;
1454 	char *pathname;
1455 	char *setname;
1456 
1457 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1458 
1459 #ifdef HAVE_RAW_SUPPORT
1460 	/* check for raw device */
1461 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1462 		return (FILEBENCH_OK);
1463 
1464 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1465 		return (FILEBENCH_OK);
1466 
1467 	(void) strcpy(path, pathname);
1468 	(void) strcat(path, "/");
1469 	(void) strcat(path, setname);
1470 	if ((stat64(path, &sb) == 0) &&
1471 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1472 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1473 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1474 			filebench_log(LOG_ERROR,
1475 			    "WARNING Fileset %s/%s Cannot be RAW device",
1476 			    avd_get_str(fileset->fs_path),
1477 			    avd_get_str(fileset->fs_name));
1478 			filebench_shutdown(1);
1479 		}
1480 
1481 		return (1);
1482 	}
1483 #endif /* HAVE_RAW_SUPPORT */
1484 
1485 	return (FILEBENCH_OK);
1486 }
1487