xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 9326:475779da8c08)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Portions Copyright 2008 Denis Cheng
26  */
27 
28 #include <fcntl.h>
29 #include <pthread.h>
30 #include <errno.h>
31 #include <math.h>
32 #include <libgen.h>
33 #include <sys/mman.h>
34 #include <sys/shm.h>
35 
36 #include "filebench.h"
37 #include "fileset.h"
38 #include "gamma_dist.h"
39 #include "utils.h"
40 #include "fsplug.h"
41 
42 /*
43  * File sets, of type fileset_t, are entities which contain
44  * information about collections of files and subdirectories in Filebench.
45  * The fileset, once populated, consists of a tree of fileset entries of
46  * type filesetentry_t which specify files and directories.  The fileset
47  * is rooted in a directory specified by fileset_path, and once the populated
48  * fileset has been created, has a tree of directories and files
49  * corresponding to the fileset's filesetentry tree.
50  *
51  * Fileset entities are allocated by fileset_define() which is called from
52  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
53  * to the eventual directory and file tree to be instantiated on the storage
54  * medium is built by fileset_populate(), which is This routine is called
55  * from fileset_createset(), which is in turn called by fileset_createset().
56  * After calling fileset_populate(), fileset_createset() will call
57  * fileset_create() to pre-allocate designated files and directories.
58  *
59  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
60  * when a "create fileset" or "run" command is encountered. When the
61  * "create fileset" command is used, it is generally paired with
62  * a "create processes" command, and must appear first, in order to
63  * instantiate all the files in the fileset before trying to use them.
64  */
65 
66 static int fileset_checkraw(fileset_t *fileset);
67 
68 /* maximum parallel allocation control */
69 #define	MAX_PARALLOC_THREADS 32
70 
71 /*
72  * returns pointer to file or fileset
73  * string, as appropriate
74  */
75 static char *
76 fileset_entity_name(fileset_t *fileset)
77 {
78 	if (fileset->fs_attrs & FILESET_IS_FILE)
79 		return ("file");
80 	else
81 		return ("fileset");
82 }
83 
84 /*
85  * Removes the last file or directory name from a pathname.
86  * Basically removes characters from the end of the path by
87  * setting them to \0 until a forward slash '/' is
88  * encountered. It also removes the forward slash.
89  */
90 static char *
91 trunc_dirname(char *dir)
92 {
93 	char *s = dir + strlen(dir);
94 
95 	while (s != dir) {
96 		int c = *s;
97 
98 		*s = 0;
99 		if (c == '/')
100 			break;
101 		s--;
102 	}
103 	return (dir);
104 }
105 
106 /*
107  * Prints a list of allowed options and how to specify them.
108  */
109 void
110 fileset_usage(void)
111 {
112 	(void) fprintf(stderr,
113 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
114 	    ",entries=<number>\n");
115 	(void) fprintf(stderr,
116 	    "		        [,filesize=[size]]\n");
117 	(void) fprintf(stderr,
118 	    "		        [,dirwidth=[width]]\n");
119 	(void) fprintf(stderr,
120 	    "		        [,dirdepthrv=$random_variable_name]\n");
121 	(void) fprintf(stderr,
122 	    "		        [,dirgamma=[100-10000]] "
123 	    "(Gamma * 1000)\n");
124 	(void) fprintf(stderr,
125 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
126 	(void) fprintf(stderr,
127 	    "		        [,prealloc=[percent]]\n");
128 	(void) fprintf(stderr, "		        [,paralloc]\n");
129 	(void) fprintf(stderr, "		        [,reuse]\n");
130 	(void) fprintf(stderr, "\n");
131 }
132 
133 /*
134  * Creates a path string from the filesetentry_t "*entry"
135  * and all of its parent's path names. The resulting path
136  * is a concatination of all the individual parent paths.
137  * Allocates memory for the path string and returns a
138  * pointer to it.
139  */
140 char *
141 fileset_resolvepath(filesetentry_t *entry)
142 {
143 	filesetentry_t *fsep = entry;
144 	char path[MAXPATHLEN];
145 	char pathtmp[MAXPATHLEN];
146 	char *s;
147 
148 	path[0] = '\0';
149 	while (fsep->fse_parent) {
150 		(void) strcpy(pathtmp, "/");
151 		(void) fb_strlcat(pathtmp, fsep->fse_path, MAXPATHLEN);
152 		(void) fb_strlcat(pathtmp, path, MAXPATHLEN);
153 		(void) fb_strlcpy(path, pathtmp, MAXPATHLEN);
154 		fsep = fsep->fse_parent;
155 	}
156 
157 	s = malloc(strlen(path) + 1);
158 	(void) fb_strlcpy(s, path, MAXPATHLEN);
159 	return (s);
160 }
161 
162 /*
163  * Creates multiple nested directories as required by the
164  * supplied path. Starts at the end of the path, creating
165  * a list of directories to mkdir, up to the root of the
166  * path, then mkdirs them one at a time from the root on down.
167  */
168 static int
169 fileset_mkdir(char *path, int mode)
170 {
171 	char *p;
172 	char *dirs[65536];
173 	int i = 0;
174 
175 	if ((p = strdup(path)) == NULL)
176 		goto null_str;
177 
178 	/*
179 	 * Fill an array of subdirectory path names until either we
180 	 * reach the root or encounter an already existing subdirectory
181 	 */
182 	/* CONSTCOND */
183 	while (1) {
184 		struct stat64 sb;
185 
186 		if (stat64(p, &sb) == 0)
187 			break;
188 		if (strlen(p) < 3)
189 			break;
190 		if ((dirs[i] = strdup(p)) == NULL) {
191 			free(p);
192 			goto null_str;
193 		}
194 
195 		(void) trunc_dirname(p);
196 		i++;
197 	}
198 
199 	/* Make the directories, from closest to root downwards. */
200 	for (--i; i >= 0; i--) {
201 		(void) FB_MKDIR(dirs[i], mode);
202 		free(dirs[i]);
203 	}
204 
205 	free(p);
206 	return (FILEBENCH_OK);
207 
208 null_str:
209 	/* clean up */
210 	for (--i; i >= 0; i--)
211 		free(dirs[i]);
212 
213 	filebench_log(LOG_ERROR,
214 	    "Failed to create directory path %s: Out of memory", path);
215 	return (FILEBENCH_ERROR);
216 }
217 
218 /*
219  * creates the subdirectory tree for a fileset.
220  */
221 static int
222 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
223 {
224 	filesetentry_t *direntry;
225 	char full_path[MAXPATHLEN];
226 	char *part_path;
227 
228 	/* walk the subdirectory list, enstanciating subdirs */
229 	direntry = fileset->fs_dirlist;
230 	while (direntry) {
231 		(void) fb_strlcpy(full_path, filesetpath, MAXPATHLEN);
232 		part_path = fileset_resolvepath(direntry);
233 		(void) fb_strlcat(full_path, part_path, MAXPATHLEN);
234 		free(part_path);
235 
236 		/* now create this portion of the subdirectory tree */
237 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
238 			return (FILEBENCH_ERROR);
239 
240 		direntry = direntry->fse_nextoftype;
241 	}
242 	return (FILEBENCH_OK);
243 }
244 
245 /*
246  * move filesetentry between exist tree and non-exist tree, source_tree
247  * to destination tree.
248  */
249 static void
250 fileset_move_entry(avl_tree_t *src_tree, avl_tree_t *dst_tree,
251     filesetentry_t *entry)
252 {
253 	avl_remove(src_tree, entry);
254 	avl_add(dst_tree, entry);
255 }
256 
257 /*
258  * given a fileset entry, determines if the associated leaf directory
259  * needs to be made or not, and if so does the mkdir.
260  */
261 static int
262 fileset_alloc_leafdir(filesetentry_t *entry)
263 {
264 	fileset_t *fileset;
265 	char path[MAXPATHLEN];
266 	struct stat64 sb;
267 	char *pathtmp;
268 
269 	fileset = entry->fse_fileset;
270 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
271 	(void) fb_strlcat(path, "/", MAXPATHLEN);
272 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
273 	pathtmp = fileset_resolvepath(entry);
274 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
275 	free(pathtmp);
276 
277 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
278 
279 	/* see if not reusing and this directory does not exist */
280 	if (!((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0))) {
281 
282 		/* No file or not reusing, so create */
283 		if (FB_MKDIR(path, 0755) < 0) {
284 			filebench_log(LOG_ERROR,
285 			    "Failed to pre-allocate leaf directory %s: %s",
286 			    path, strerror(errno));
287 			fileset_unbusy(entry, TRUE, FALSE, 0);
288 			return (FILEBENCH_ERROR);
289 		}
290 	}
291 
292 	/* unbusy the allocated entry */
293 	fileset_unbusy(entry, TRUE, TRUE, 0);
294 	return (FILEBENCH_OK);
295 }
296 
297 /*
298  * given a fileset entry, determines if the associated file
299  * needs to be allocated or not, and if so does the allocation.
300  */
301 static int
302 fileset_alloc_file(filesetentry_t *entry)
303 {
304 	fileset_t *fileset;
305 	char path[MAXPATHLEN];
306 	char *buf;
307 	struct stat64 sb;
308 	char *pathtmp;
309 	off64_t seek;
310 	fb_fdesc_t fdesc;
311 	int trust_tree;
312 
313 	fileset = entry->fse_fileset;
314 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
315 	(void) fb_strlcat(path, "/", MAXPATHLEN);
316 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
317 	pathtmp = fileset_resolvepath(entry);
318 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
319 	free(pathtmp);
320 
321 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
322 
323 	/* see if reusing and this file exists */
324 	trust_tree = avd_get_bool(fileset->fs_trust_tree);
325 	if ((entry->fse_flags & FSE_REUSING) && (trust_tree ||
326 	    (FB_STAT(path, &sb) == 0))) {
327 		if (FB_OPEN(&fdesc, path, O_RDWR, 0) == FILEBENCH_ERROR) {
328 			filebench_log(LOG_INFO,
329 			    "Attempted but failed to Re-use file %s",
330 			    path);
331 			fileset_unbusy(entry, TRUE, FALSE, 0);
332 			return (FILEBENCH_ERROR);
333 		}
334 
335 		if (trust_tree || (sb.st_size == (off64_t)entry->fse_size)) {
336 			filebench_log(LOG_DEBUG_IMPL,
337 			    "Re-using file %s", path);
338 
339 			if (!avd_get_bool(fileset->fs_cached))
340 				(void) FB_FREEMEM(&fdesc, entry->fse_size);
341 
342 			(void) FB_CLOSE(&fdesc);
343 
344 			/* unbusy the allocated entry */
345 			fileset_unbusy(entry, TRUE, TRUE, 0);
346 			return (FILEBENCH_OK);
347 
348 		} else if (sb.st_size > (off64_t)entry->fse_size) {
349 			/* reuse, but too large */
350 			filebench_log(LOG_DEBUG_IMPL,
351 			    "Truncating & re-using file %s", path);
352 
353 			(void) FB_FTRUNC(&fdesc, (off64_t)entry->fse_size);
354 
355 			if (!avd_get_bool(fileset->fs_cached))
356 				(void) FB_FREEMEM(&fdesc, entry->fse_size);
357 
358 			(void) FB_CLOSE(&fdesc);
359 
360 			/* unbusy the allocated entry */
361 			fileset_unbusy(entry, TRUE, TRUE, 0);
362 			return (FILEBENCH_OK);
363 		}
364 	} else {
365 
366 		/* No file or not reusing, so create */
367 		if (FB_OPEN(&fdesc, path, O_RDWR | O_CREAT, 0644) ==
368 		    FILEBENCH_ERROR) {
369 			filebench_log(LOG_ERROR,
370 			    "Failed to pre-allocate file %s: %s",
371 			    path, strerror(errno));
372 
373 			/* unbusy the unallocated entry */
374 			fileset_unbusy(entry, TRUE, FALSE, 0);
375 			return (FILEBENCH_ERROR);
376 		}
377 	}
378 
379 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL) {
380 		/* unbusy the unallocated entry */
381 		fileset_unbusy(entry, TRUE, FALSE, 0);
382 		return (FILEBENCH_ERROR);
383 	}
384 
385 	for (seek = 0; seek < entry->fse_size; ) {
386 		off64_t wsize;
387 		int ret = 0;
388 
389 		/*
390 		 * Write FILE_ALLOC_BLOCK's worth,
391 		 * except on last write
392 		 */
393 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
394 
395 		ret = FB_WRITE(&fdesc, buf, wsize);
396 		if (ret != wsize) {
397 			filebench_log(LOG_ERROR,
398 			    "Failed to pre-allocate file %s: %s",
399 			    path, strerror(errno));
400 			(void) FB_CLOSE(&fdesc);
401 			free(buf);
402 			fileset_unbusy(entry, TRUE, FALSE, 0);
403 			return (FILEBENCH_ERROR);
404 		}
405 		seek += wsize;
406 	}
407 
408 	if (!avd_get_bool(fileset->fs_cached))
409 		(void) FB_FREEMEM(&fdesc, entry->fse_size);
410 
411 	(void) FB_CLOSE(&fdesc);
412 
413 	free(buf);
414 
415 	/* unbusy the allocated entry */
416 	fileset_unbusy(entry, TRUE, TRUE, 0);
417 
418 	filebench_log(LOG_DEBUG_IMPL,
419 	    "Pre-allocated file %s size %llu",
420 	    path, (u_longlong_t)entry->fse_size);
421 
422 	return (FILEBENCH_OK);
423 }
424 
425 /*
426  * given a fileset entry, determines if the associated file
427  * needs to be allocated or not, and if so does the allocation.
428  * Sets shm_fsparalloc_count to -1 on error.
429  */
430 static void *
431 fileset_alloc_thread(filesetentry_t *entry)
432 {
433 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
434 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
435 		filebench_shm->shm_fsparalloc_count = -1;
436 	} else {
437 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
438 		filebench_shm->shm_fsparalloc_count--;
439 	}
440 
441 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
442 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
443 
444 	pthread_exit(NULL);
445 	return (NULL);
446 }
447 
448 
449 /*
450  * First creates the parent directories of the file using
451  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
452  * and opens the file with open64(). It unlocks the fileset
453  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
454  * as requested, and returns the file descriptor integer
455  * for the opened file in the supplied filebench file descriptor.
456  * Returns FILEBENCH_ERROR on error, and FILEBENCH_OK on success.
457  */
458 int
459 fileset_openfile(fb_fdesc_t *fdesc, fileset_t *fileset,
460     filesetentry_t *entry, int flag, int filemode, int attrs)
461 {
462 	char path[MAXPATHLEN];
463 	char dir[MAXPATHLEN];
464 	char *pathtmp;
465 	struct stat64 sb;
466 	int open_attrs = 0;
467 
468 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
469 	(void) fb_strlcat(path, "/", MAXPATHLEN);
470 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
471 	pathtmp = fileset_resolvepath(entry);
472 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
473 	(void) fb_strlcpy(dir, path, MAXPATHLEN);
474 	free(pathtmp);
475 	(void) trunc_dirname(dir);
476 
477 	/* If we are going to create a file, create the parent dirs */
478 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
479 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
480 			return (FILEBENCH_ERROR);
481 	}
482 
483 	if (attrs & FLOW_ATTR_DSYNC) {
484 #ifdef sun
485 		open_attrs |= O_DSYNC;
486 #else
487 		open_attrs |= O_FSYNC;
488 #endif
489 	}
490 
491 	if (FB_OPEN(fdesc, path, flag | open_attrs, filemode)
492 	    == FILEBENCH_ERROR) {
493 		filebench_log(LOG_ERROR,
494 		    "Failed to open file %d, %s, with status %x: %s",
495 		    entry->fse_index, path, entry->fse_flags, strerror(errno));
496 
497 		fileset_unbusy(entry, FALSE, FALSE, 0);
498 		return (FILEBENCH_ERROR);
499 	}
500 
501 	if (flag & O_CREAT)
502 		fileset_unbusy(entry, TRUE, TRUE, 1);
503 	else
504 		fileset_unbusy(entry, FALSE, FALSE, 1);
505 
506 #ifdef sun
507 	if (attrs & FLOW_ATTR_DIRECTIO)
508 		(void) directio(fdesc->fd_num, DIRECTIO_ON);
509 	else
510 		(void) directio(fdesc->fd_num, DIRECTIO_OFF);
511 #endif
512 
513 	return (FILEBENCH_OK);
514 }
515 
516 /*
517  * removes all filesetentries from their respective btrees, and puts them
518  * on the free list. The supplied argument indicates which free list to
519  * use.
520  */
521 static void
522 fileset_pickreset(fileset_t *fileset, int entry_type)
523 {
524 	filesetentry_t	*entry;
525 
526 	switch (entry_type & FILESET_PICKMASK) {
527 	case FILESET_PICKFILE:
528 		entry = (filesetentry_t *)avl_first(&fileset->fs_noex_files);
529 
530 		/* make sure non-existing files are marked free */
531 		while (entry) {
532 			entry->fse_flags |= FSE_FREE;
533 			entry->fse_open_cnt = 0;
534 			fileset_move_entry(&fileset->fs_noex_files,
535 			    &fileset->fs_free_files, entry);
536 			entry =  AVL_NEXT(&fileset->fs_noex_files, entry);
537 		}
538 
539 		/* free up any existing files */
540 		entry = (filesetentry_t *)avl_first(&fileset->fs_exist_files);
541 
542 		while (entry) {
543 			entry->fse_flags |= FSE_FREE;
544 			entry->fse_open_cnt = 0;
545 			fileset_move_entry(&fileset->fs_exist_files,
546 			    &fileset->fs_free_files, entry);
547 
548 			entry =  AVL_NEXT(&fileset->fs_exist_files, entry);
549 		}
550 
551 		break;
552 
553 	case FILESET_PICKDIR:
554 		/* nothing to reset, as all (sub)dirs always exist */
555 		break;
556 
557 	case FILESET_PICKLEAFDIR:
558 		entry = (filesetentry_t *)
559 		    avl_first(&fileset->fs_noex_leaf_dirs);
560 
561 		/* make sure non-existing leaf dirs are marked free */
562 		while (entry) {
563 			entry->fse_flags |= FSE_FREE;
564 			entry->fse_open_cnt = 0;
565 			fileset_move_entry(&fileset->fs_noex_leaf_dirs,
566 			    &fileset->fs_free_leaf_dirs, entry);
567 			entry =  AVL_NEXT(&fileset->fs_noex_leaf_dirs, entry);
568 		}
569 
570 		/* free up any existing leaf dirs */
571 		entry = (filesetentry_t *)
572 		    avl_first(&fileset->fs_exist_leaf_dirs);
573 
574 		while (entry) {
575 			entry->fse_flags |= FSE_FREE;
576 			entry->fse_open_cnt = 0;
577 			fileset_move_entry(&fileset->fs_exist_leaf_dirs,
578 			    &fileset->fs_free_leaf_dirs, entry);
579 
580 			entry =  AVL_NEXT(&fileset->fs_exist_leaf_dirs, entry);
581 		}
582 
583 		break;
584 	}
585 }
586 
587 /*
588  * find a filesetentry from the fileset using the supplied index
589  */
590 static filesetentry_t *
591 fileset_find_entry(avl_tree_t *atp, uint_t index)
592 {
593 	avl_index_t	found_loc;
594 	filesetentry_t	desired_fse, *found_fse;
595 
596 	/* find the file with the desired index, if it is in the tree */
597 	desired_fse.fse_index = index;
598 	found_fse = avl_find(atp, (void *)(&desired_fse), &found_loc);
599 	if (found_fse != NULL)
600 		return (found_fse);
601 
602 	/* if requested node not found, find next higher node */
603 	found_fse = avl_nearest(atp, found_loc, AVL_AFTER);
604 	if (found_fse != NULL)
605 		return (found_fse);
606 
607 	/* might have hit the end, return lowest available index node */
608 	found_fse = avl_first(atp);
609 	return (found_fse);
610 }
611 
612 /*
613  * Selects a fileset entry from a fileset. If the
614  * FILESET_PICKLEAFDIR flag is set it will pick a leaf directory entry,
615  * if the FILESET_PICKDIR flag is set it will pick a non leaf directory
616  * entry, otherwise a file entry. The FILESET_PICKUNIQUE
617  * flag will take an entry off of one of the free (unused)
618  * lists (file or directory), otherwise the entry will be
619  * picked off of one of the rotor lists (file or directory).
620  * The FILESET_PICKEXISTS will insure that only extant
621  * (FSE_EXISTS) state files are selected, while
622  * FILESET_PICKNOEXIST insures that only non extant
623  * (not FSE_EXISTS) state files are selected.
624  * Note that the selected fileset entry (file) is returned
625  * with its FSE_BUSY flag (in fse_flags) set.
626  */
627 filesetentry_t *
628 fileset_pick(fileset_t *fileset, int flags, int tid, int index)
629 {
630 	filesetentry_t *entry = NULL;
631 	filesetentry_t *start_point;
632 	avl_tree_t *atp;
633 	fbint_t max_entries;
634 
635 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
636 
637 	/* see if we have to wait for available files or directories */
638 	switch (flags & FILESET_PICKMASK) {
639 	case FILESET_PICKFILE:
640 		if (fileset->fs_filelist == NULL)
641 			goto empty;
642 
643 		while (fileset->fs_idle_files == 0) {
644 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
645 			    &fileset->fs_pick_lock);
646 		}
647 
648 		max_entries = fileset->fs_constentries;
649 		if (flags & FILESET_PICKUNIQUE) {
650 			atp = &fileset->fs_free_files;
651 		} else if (flags & FILESET_PICKNOEXIST) {
652 			atp = &fileset->fs_noex_files;
653 		} else {
654 			atp = &fileset->fs_exist_files;
655 		}
656 		break;
657 
658 	case FILESET_PICKDIR:
659 		if (fileset->fs_dirlist == NULL)
660 			goto empty;
661 
662 		while (fileset->fs_idle_dirs == 0) {
663 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
664 			    &fileset->fs_pick_lock);
665 		}
666 
667 		max_entries = 1;
668 		atp = &fileset->fs_dirs;
669 		break;
670 
671 	case FILESET_PICKLEAFDIR:
672 		if (fileset->fs_leafdirlist == NULL)
673 			goto empty;
674 
675 		while (fileset->fs_idle_leafdirs == 0) {
676 			(void) pthread_cond_wait(&fileset->fs_idle_leafdirs_cv,
677 			    &fileset->fs_pick_lock);
678 		}
679 
680 		max_entries = fileset->fs_constleafdirs;
681 		if (flags & FILESET_PICKUNIQUE) {
682 			atp = &fileset->fs_free_leaf_dirs;
683 		} else if (flags & FILESET_PICKNOEXIST) {
684 			atp = &fileset->fs_noex_leaf_dirs;
685 		} else {
686 			atp = &fileset->fs_exist_leaf_dirs;
687 		}
688 		break;
689 	}
690 
691 	/* see if asking for impossible */
692 	if (avl_is_empty(atp))
693 		goto empty;
694 
695 	if (flags & FILESET_PICKUNIQUE) {
696 		uint64_t  index64;
697 
698 		/*
699 		 * pick at random from free list in order to
700 		 * distribute initially allocated files more
701 		 * randomly on storage media. Use uniform
702 		 * random number generator to select index
703 		 * if it is not supplied with pick call.
704 		 */
705 		if (index) {
706 			index64 = index;
707 		} else {
708 			if (filebench_randomno64(&index64, max_entries, 1,
709 			    NULL) == FILEBENCH_ERROR)
710 				return (NULL);
711 		}
712 
713 		entry = fileset_find_entry(atp, (int)index64);
714 
715 		if (entry == NULL)
716 			goto empty;
717 
718 	} else if (flags & FILESET_PICKBYINDEX) {
719 		/* pick by supplied index */
720 		entry = fileset_find_entry(atp, index);
721 
722 	} else {
723 		/* pick in rotation */
724 		switch (flags & FILESET_PICKMASK) {
725 		case FILESET_PICKFILE:
726 			if (flags & FILESET_PICKNOEXIST) {
727 				entry = fileset_find_entry(atp,
728 				    fileset->fs_file_nerotor);
729 				fileset->fs_file_nerotor =
730 				    entry->fse_index + 1;
731 			} else {
732 				entry = fileset_find_entry(atp,
733 				    fileset->fs_file_exrotor[tid]);
734 				fileset->fs_file_exrotor[tid] =
735 				    entry->fse_index + 1;
736 			}
737 			break;
738 
739 		case FILESET_PICKDIR:
740 			entry = fileset_find_entry(atp, fileset->fs_dirrotor);
741 			fileset->fs_dirrotor = entry->fse_index + 1;
742 			break;
743 
744 		case FILESET_PICKLEAFDIR:
745 			if (flags & FILESET_PICKNOEXIST) {
746 				entry = fileset_find_entry(atp,
747 				    fileset->fs_leafdir_nerotor);
748 				fileset->fs_leafdir_nerotor =
749 				    entry->fse_index + 1;
750 			} else {
751 				entry = fileset_find_entry(atp,
752 				    fileset->fs_leafdir_exrotor);
753 				fileset->fs_leafdir_exrotor =
754 				    entry->fse_index + 1;
755 			}
756 			break;
757 		}
758 	}
759 
760 	if (entry == NULL)
761 		goto empty;
762 
763 	/* see if entry in use */
764 	start_point = entry;
765 	while (entry->fse_flags & FSE_BUSY) {
766 
767 		/* it is, so try next */
768 		entry = AVL_NEXT(atp, entry);
769 		if (entry == NULL)
770 			entry = avl_first(atp);
771 
772 		/* see if we have wrapped around */
773 		if ((entry == NULL) || (entry == start_point)) {
774 			filebench_log(LOG_DEBUG_SCRIPT,
775 			    "All %d files are busy", avl_numnodes(atp));
776 			goto empty;
777 		}
778 
779 	}
780 
781 	/* update file or directory idle counts */
782 	switch (flags & FILESET_PICKMASK) {
783 	case FILESET_PICKFILE:
784 		fileset->fs_idle_files--;
785 		break;
786 	case FILESET_PICKDIR:
787 		fileset->fs_idle_dirs--;
788 		break;
789 	case FILESET_PICKLEAFDIR:
790 		fileset->fs_idle_leafdirs--;
791 		break;
792 	}
793 
794 	/* Indicate that file or directory is now busy */
795 	entry->fse_flags |= FSE_BUSY;
796 
797 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
798 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
799 	return (entry);
800 
801 empty:
802 	filebench_log(LOG_DEBUG_SCRIPT, "No file found");
803 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
804 	return (NULL);
805 }
806 
807 /*
808  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
809  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
810  * existant or not, or leaves that designation alone.
811  */
812 void
813 fileset_unbusy(filesetentry_t *entry, int update_exist,
814     int new_exist_val, int open_cnt_incr)
815 {
816 	fileset_t *fileset = NULL;
817 
818 	if (entry)
819 		fileset = entry->fse_fileset;
820 
821 	if (fileset == NULL) {
822 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
823 		return;
824 	}
825 
826 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
827 
828 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
829 	if (update_exist) {
830 		if (new_exist_val == TRUE) {
831 			if (entry->fse_flags & FSE_FREE) {
832 
833 				/* asked to set and it was free */
834 				entry->fse_flags |= FSE_EXISTS;
835 				entry->fse_flags &= (~FSE_FREE);
836 				switch (entry->fse_flags & FSE_TYPE_MASK) {
837 				case FSE_TYPE_FILE:
838 					fileset_move_entry(
839 					    &fileset->fs_free_files,
840 					    &fileset->fs_exist_files, entry);
841 					break;
842 
843 				case FSE_TYPE_DIR:
844 					break;
845 
846 				case FSE_TYPE_LEAFDIR:
847 					fileset_move_entry(
848 					    &fileset->fs_free_leaf_dirs,
849 					    &fileset->fs_exist_leaf_dirs,
850 					    entry);
851 					break;
852 				}
853 
854 			} else if (!(entry->fse_flags & FSE_EXISTS)) {
855 
856 				/* asked to set, and it was clear */
857 				entry->fse_flags |= FSE_EXISTS;
858 				switch (entry->fse_flags & FSE_TYPE_MASK) {
859 				case FSE_TYPE_FILE:
860 					fileset_move_entry(
861 					    &fileset->fs_noex_files,
862 					    &fileset->fs_exist_files, entry);
863 					break;
864 				case FSE_TYPE_DIR:
865 					break;
866 				case FSE_TYPE_LEAFDIR:
867 					fileset_move_entry(
868 					    &fileset->fs_noex_leaf_dirs,
869 					    &fileset->fs_exist_leaf_dirs,
870 					    entry);
871 					break;
872 				}
873 			}
874 		} else {
875 			if (entry->fse_flags & FSE_FREE) {
876 				/* asked to clear, and it was free */
877 				entry->fse_flags &= (~(FSE_FREE | FSE_EXISTS));
878 				switch (entry->fse_flags & FSE_TYPE_MASK) {
879 				case FSE_TYPE_FILE:
880 					fileset_move_entry(
881 					    &fileset->fs_free_files,
882 					    &fileset->fs_noex_files, entry);
883 					break;
884 
885 				case FSE_TYPE_DIR:
886 					break;
887 
888 				case FSE_TYPE_LEAFDIR:
889 					fileset_move_entry(
890 					    &fileset->fs_free_leaf_dirs,
891 					    &fileset->fs_noex_leaf_dirs,
892 					    entry);
893 					break;
894 				}
895 			} else if (entry->fse_flags & FSE_EXISTS) {
896 
897 				/* asked to clear, and it was set */
898 				entry->fse_flags &= (~FSE_EXISTS);
899 				switch (entry->fse_flags & FSE_TYPE_MASK) {
900 				case FSE_TYPE_FILE:
901 					fileset_move_entry(
902 					    &fileset->fs_exist_files,
903 					    &fileset->fs_noex_files, entry);
904 					break;
905 				case FSE_TYPE_DIR:
906 					break;
907 				case FSE_TYPE_LEAFDIR:
908 					fileset_move_entry(
909 					    &fileset->fs_exist_leaf_dirs,
910 					    &fileset->fs_noex_leaf_dirs,
911 					    entry);
912 					break;
913 				}
914 			}
915 		}
916 	}
917 
918 	/* update open count */
919 	entry->fse_open_cnt += open_cnt_incr;
920 
921 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
922 	if (entry->fse_flags & FSE_BUSY) {
923 
924 		/* unbusy it */
925 		entry->fse_flags &= (~FSE_BUSY);
926 
927 		/* release any threads waiting for unbusy */
928 		if (entry->fse_flags & FSE_THRD_WAITNG) {
929 			entry->fse_flags &= (~FSE_THRD_WAITNG);
930 			(void) pthread_cond_broadcast(
931 			    &fileset->fs_thrd_wait_cv);
932 		}
933 
934 		/* increment idle count and signal waiting threads */
935 		switch (entry->fse_flags & FSE_TYPE_MASK) {
936 		case FSE_TYPE_FILE:
937 			fileset->fs_idle_files++;
938 			if (fileset->fs_idle_files == 1) {
939 				(void) pthread_cond_signal(
940 				    &fileset->fs_idle_files_cv);
941 			}
942 			break;
943 
944 		case FSE_TYPE_DIR:
945 			fileset->fs_idle_dirs++;
946 			if (fileset->fs_idle_dirs == 1) {
947 				(void) pthread_cond_signal(
948 				    &fileset->fs_idle_dirs_cv);
949 			}
950 			break;
951 
952 		case FSE_TYPE_LEAFDIR:
953 			fileset->fs_idle_leafdirs++;
954 			if (fileset->fs_idle_leafdirs == 1) {
955 				(void) pthread_cond_signal(
956 				    &fileset->fs_idle_leafdirs_cv);
957 			}
958 			break;
959 		}
960 	}
961 
962 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
963 }
964 
965 /*
966  * Given a fileset "fileset", create the associated files as
967  * specified in the attributes of the fileset. The fileset is
968  * rooted in a directory whose pathname is in fileset_path. If the
969  * directory exists, meaning that there is already a fileset,
970  * and the fileset_reuse attribute is false, then remove it and all
971  * its contained files and subdirectories. Next, the routine
972  * creates a root directory for the fileset. All the file type
973  * filesetentries are cycled through creating as needed
974  * their containing subdirectory trees in the filesystem and
975  * creating actual files for fileset_preallocpercent of them. The
976  * created files are filled with fse_size bytes of unitialized
977  * data. The routine returns FILEBENCH_ERROR on errors,
978  * FILEBENCH_OK on success.
979  */
980 static int
981 fileset_create(fileset_t *fileset)
982 {
983 	filesetentry_t *entry;
984 	char path[MAXPATHLEN];
985 	struct stat64 sb;
986 	hrtime_t start = gethrtime();
987 	char *fileset_path;
988 	char *fileset_name;
989 	int randno;
990 	int preallocated = 0;
991 	int reusing;
992 
993 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
994 		filebench_log(LOG_ERROR, "%s path not set",
995 		    fileset_entity_name(fileset));
996 		return (FILEBENCH_ERROR);
997 	}
998 
999 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1000 		filebench_log(LOG_ERROR, "%s name not set",
1001 		    fileset_entity_name(fileset));
1002 		return (FILEBENCH_ERROR);
1003 	}
1004 
1005 #ifdef HAVE_RAW_SUPPORT
1006 	/* treat raw device as special case */
1007 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1008 		return (FILEBENCH_OK);
1009 #endif /* HAVE_RAW_SUPPORT */
1010 
1011 	/* XXX Add check to see if there is enough space */
1012 
1013 	/* set up path to fileset */
1014 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
1015 	(void) fb_strlcat(path, "/", MAXPATHLEN);
1016 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
1017 
1018 	/* if reusing and trusting to exist, just blindly reuse */
1019 	if (avd_get_bool(fileset->fs_trust_tree)) {
1020 		reusing = 1;
1021 
1022 	/* if exists and resusing, then don't create new */
1023 	} else if (((stat64(path, &sb) == 0)&& (strlen(path) > 3) &&
1024 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) &&
1025 	    avd_get_bool(fileset->fs_reuse)) {
1026 		reusing = 1;
1027 	} else {
1028 		reusing = 0;
1029 	}
1030 
1031 	if (!reusing) {
1032 		char cmd[MAXPATHLEN];
1033 
1034 		/* Remove existing */
1035 		(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
1036 		(void) system(cmd);
1037 		filebench_log(LOG_VERBOSE,
1038 		    "Removed any existing %s %s in %llu seconds",
1039 		    fileset_entity_name(fileset), fileset_name,
1040 		    (u_longlong_t)(((gethrtime() - start) /
1041 		    1000000000) + 1));
1042 	} else {
1043 		/* we are re-using */
1044 		filebench_log(LOG_VERBOSE, "Re-using %s %s.",
1045 		    fileset_entity_name(fileset), fileset_name);
1046 	}
1047 
1048 	/* make the filesets directory tree unless in reuse mode */
1049 	if (!reusing && (avd_get_bool(fileset->fs_prealloc))) {
1050 		filebench_log(LOG_VERBOSE,
1051 		    "making tree for filset %s", path);
1052 
1053 		(void) FB_MKDIR(path, 0755);
1054 
1055 		if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
1056 			return (FILEBENCH_ERROR);
1057 	}
1058 
1059 	start = gethrtime();
1060 
1061 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
1062 	    fileset_entity_name(fileset), fileset_name);
1063 
1064 	randno = ((RAND_MAX * (100
1065 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
1066 
1067 	/* alloc any files, as required */
1068 	fileset_pickreset(fileset, FILESET_PICKFILE);
1069 	while (entry = fileset_pick(fileset,
1070 	    FILESET_PICKFREE | FILESET_PICKFILE, 0, 0)) {
1071 		pthread_t tid;
1072 		int newrand;
1073 
1074 		newrand = rand();
1075 
1076 		if (newrand < randno) {
1077 			/* unbusy the unallocated entry */
1078 			fileset_unbusy(entry, TRUE, FALSE, 0);
1079 			continue;
1080 		}
1081 
1082 		preallocated++;
1083 
1084 		if (reusing)
1085 			entry->fse_flags |= FSE_REUSING;
1086 		else
1087 			entry->fse_flags &= (~FSE_REUSING);
1088 
1089 		/* fire off allocation threads for each file if paralloc set */
1090 		if (avd_get_bool(fileset->fs_paralloc)) {
1091 
1092 			/* limit total number of simultaneous allocations */
1093 			(void) pthread_mutex_lock(
1094 			    &filebench_shm->shm_fsparalloc_lock);
1095 			while (filebench_shm->shm_fsparalloc_count
1096 			    >= MAX_PARALLOC_THREADS) {
1097 				(void) pthread_cond_wait(
1098 				    &filebench_shm->shm_fsparalloc_cv,
1099 				    &filebench_shm->shm_fsparalloc_lock);
1100 			}
1101 
1102 			/* quit if any allocation thread reports an error */
1103 			if (filebench_shm->shm_fsparalloc_count < 0) {
1104 				(void) pthread_mutex_unlock(
1105 				    &filebench_shm->shm_fsparalloc_lock);
1106 				return (FILEBENCH_ERROR);
1107 			}
1108 
1109 			filebench_shm->shm_fsparalloc_count++;
1110 			(void) pthread_mutex_unlock(
1111 			    &filebench_shm->shm_fsparalloc_lock);
1112 
1113 			/*
1114 			 * Fire off a detached allocation thread per file.
1115 			 * The thread will self destruct when it finishes
1116 			 * writing pre-allocation data to the file.
1117 			 */
1118 			if (pthread_create(&tid, NULL,
1119 			    (void *(*)(void*))fileset_alloc_thread,
1120 			    entry) == 0) {
1121 				/*
1122 				 * A thread was created; detach it so it can
1123 				 * fully quit when finished.
1124 				 */
1125 				(void) pthread_detach(tid);
1126 			} else {
1127 				filebench_log(LOG_ERROR,
1128 				    "File prealloc thread create failed");
1129 				filebench_shutdown(1);
1130 			}
1131 
1132 		} else {
1133 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
1134 				return (FILEBENCH_ERROR);
1135 		}
1136 	}
1137 
1138 	/* alloc any leaf directories, as required */
1139 	fileset_pickreset(fileset, FILESET_PICKLEAFDIR);
1140 	while (entry = fileset_pick(fileset,
1141 	    FILESET_PICKFREE | FILESET_PICKLEAFDIR, 0, 0)) {
1142 
1143 		if (rand() < randno) {
1144 			/* unbusy the unallocated entry */
1145 			fileset_unbusy(entry, TRUE, FALSE, 0);
1146 			continue;
1147 		}
1148 
1149 		preallocated++;
1150 
1151 		if (reusing)
1152 			entry->fse_flags |= FSE_REUSING;
1153 		else
1154 			entry->fse_flags &= (~FSE_REUSING);
1155 
1156 		if (fileset_alloc_leafdir(entry) == FILEBENCH_ERROR)
1157 			return (FILEBENCH_ERROR);
1158 	}
1159 
1160 exit:
1161 	filebench_log(LOG_VERBOSE,
1162 	    "Preallocated %d of %llu of %s %s in %llu seconds",
1163 	    preallocated,
1164 	    (u_longlong_t)fileset->fs_constentries,
1165 	    fileset_entity_name(fileset), fileset_name,
1166 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
1167 
1168 	return (FILEBENCH_OK);
1169 }
1170 
1171 /*
1172  * Adds an entry to the fileset's file list. Single threaded so
1173  * no locking needed.
1174  */
1175 static void
1176 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
1177 {
1178 	entry->fse_flags = FSE_TYPE_FILE | FSE_FREE;
1179 	avl_add(&fileset->fs_free_files, entry);
1180 
1181 	if (fileset->fs_filelist == NULL) {
1182 		fileset->fs_filelist = entry;
1183 		entry->fse_nextoftype = NULL;
1184 	} else {
1185 		entry->fse_nextoftype = fileset->fs_filelist;
1186 		fileset->fs_filelist = entry;
1187 	}
1188 }
1189 
1190 /*
1191  * Adds an entry to the fileset's directory list. Single
1192  * threaded so no locking needed.
1193  */
1194 static void
1195 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
1196 {
1197 	entry->fse_flags = FSE_TYPE_DIR | FSE_EXISTS;
1198 	avl_add(&fileset->fs_dirs, entry);
1199 
1200 	if (fileset->fs_dirlist == NULL) {
1201 		fileset->fs_dirlist = entry;
1202 		entry->fse_nextoftype = NULL;
1203 	} else {
1204 		entry->fse_nextoftype = fileset->fs_dirlist;
1205 		fileset->fs_dirlist = entry;
1206 	}
1207 }
1208 
1209 /*
1210  * Adds an entry to the fileset's leaf directory list. Single
1211  * threaded so no locking needed.
1212  */
1213 static void
1214 fileset_insleafdirlist(fileset_t *fileset, filesetentry_t *entry)
1215 {
1216 	entry->fse_flags = FSE_TYPE_LEAFDIR | FSE_FREE;
1217 	avl_add(&fileset->fs_free_leaf_dirs, entry);
1218 
1219 	if (fileset->fs_leafdirlist == NULL) {
1220 		fileset->fs_leafdirlist = entry;
1221 		entry->fse_nextoftype = NULL;
1222 	} else {
1223 		entry->fse_nextoftype = fileset->fs_leafdirlist;
1224 		fileset->fs_leafdirlist = entry;
1225 	}
1226 }
1227 
1228 /*
1229  * Compares two fileset entries to determine their relative order
1230  */
1231 static int
1232 fileset_entry_compare(const void *node_1, const void *node_2)
1233 {
1234 	if (((filesetentry_t *)node_1)->fse_index <
1235 	    ((filesetentry_t *)node_2)->fse_index)
1236 		return (-1);
1237 
1238 	if (((filesetentry_t *)node_1)->fse_index ==
1239 	    ((filesetentry_t *)node_2)->fse_index)
1240 		return (0);
1241 
1242 	return (1);
1243 }
1244 
1245 /*
1246  * Obtains a filesetentry entity for a file to be placed in a
1247  * (sub)directory of a fileset. The size of the file may be
1248  * specified by fileset_meansize, or calculated from a gamma
1249  * distribution of parameter fileset_sizegamma and of mean size
1250  * fileset_meansize. The filesetentry entity is placed on the file
1251  * list in the specified parent filesetentry entity, which may
1252  * be a directory filesetentry, or the root filesetentry in the
1253  * fileset. It is also placed on the fileset's list of all
1254  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
1255  * if ipc memory for the path string cannot be allocated.
1256  */
1257 static int
1258 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
1259 {
1260 	char tmpname[16];
1261 	filesetentry_t *entry;
1262 	double drand;
1263 	uint_t index;
1264 
1265 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1266 	    == NULL) {
1267 		filebench_log(LOG_ERROR,
1268 		    "fileset_populate_file: Can't malloc filesetentry");
1269 		return (FILEBENCH_ERROR);
1270 	}
1271 
1272 	/* Another currently idle file */
1273 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1274 	index = fileset->fs_idle_files++;
1275 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1276 
1277 	entry->fse_index = index;
1278 	entry->fse_parent = parent;
1279 	entry->fse_fileset = fileset;
1280 	fileset_insfilelist(fileset, entry);
1281 
1282 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1283 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1284 		filebench_log(LOG_ERROR,
1285 		    "fileset_populate_file: Can't alloc path string");
1286 		return (FILEBENCH_ERROR);
1287 	}
1288 
1289 	/* see if random variable was supplied for file size */
1290 	if (fileset->fs_meansize == -1) {
1291 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
1292 	} else {
1293 		double gamma;
1294 
1295 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1296 		if (gamma > 0) {
1297 			drand = gamma_dist_knuth(gamma,
1298 			    fileset->fs_meansize / gamma);
1299 			entry->fse_size = (off64_t)drand;
1300 		} else {
1301 			entry->fse_size = (off64_t)fileset->fs_meansize;
1302 		}
1303 	}
1304 
1305 	fileset->fs_bytes += entry->fse_size;
1306 
1307 	fileset->fs_realfiles++;
1308 	return (FILEBENCH_OK);
1309 }
1310 
1311 /*
1312  * Obtaines a filesetentry entity for a leaf directory to be placed in a
1313  * (sub)directory of a fileset. The leaf directory will always be empty so
1314  * it can be created and deleted (mkdir, rmdir) at will. The filesetentry
1315  * entity is placed on the leaf directory list in the specified parent
1316  * filesetentry entity, which may be a (sub) directory filesetentry, or
1317  * the root filesetentry in the fileset. It is also placed on the fileset's
1318  * list of all contained leaf directories. Returns FILEBENCH_OK if successful
1319  * or FILEBENCH_ERROR if ipc memory cannot be allocated.
1320  */
1321 static int
1322 fileset_populate_leafdir(fileset_t *fileset, filesetentry_t *parent, int serial)
1323 {
1324 	char tmpname[16];
1325 	filesetentry_t *entry;
1326 	uint_t index;
1327 
1328 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1329 	    == NULL) {
1330 		filebench_log(LOG_ERROR,
1331 		    "fileset_populate_file: Can't malloc filesetentry");
1332 		return (FILEBENCH_ERROR);
1333 	}
1334 
1335 	/* Another currently idle leaf directory */
1336 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1337 	index = fileset->fs_idle_leafdirs++;
1338 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1339 
1340 	entry->fse_index = index;
1341 	entry->fse_parent = parent;
1342 	entry->fse_fileset = fileset;
1343 	fileset_insleafdirlist(fileset, entry);
1344 
1345 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1346 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1347 		filebench_log(LOG_ERROR,
1348 		    "fileset_populate_file: Can't alloc path string");
1349 		return (FILEBENCH_ERROR);
1350 	}
1351 
1352 	fileset->fs_realleafdirs++;
1353 	return (FILEBENCH_OK);
1354 }
1355 
1356 /*
1357  * Creates a directory node in a fileset, by obtaining a
1358  * filesetentry entity for the node and initializing it
1359  * according to parameters of the fileset. It determines a
1360  * directory tree depth and directory width, optionally using
1361  * a gamma distribution. If its calculated depth is less then
1362  * its actual depth in the directory tree, it becomes a leaf
1363  * node and files itself with "width" number of file type
1364  * filesetentries, otherwise it files itself with "width"
1365  * number of directory type filesetentries, using recursive
1366  * calls to fileset_populate_subdir. The end result of the
1367  * initial call to this routine is a tree of directories of
1368  * random width and varying depth with sufficient leaf
1369  * directories to contain all required files.
1370  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
1371  * string memory cannot be allocated and returns the error code (currently
1372  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
1373  * calls to fileset_populate_subdir.
1374  */
1375 static int
1376 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
1377     int serial, double depth)
1378 {
1379 	double randepth, drand, ranwidth;
1380 	int isleaf = 0;
1381 	char tmpname[16];
1382 	filesetentry_t *entry;
1383 	int i;
1384 	uint_t index;
1385 
1386 	depth += 1;
1387 
1388 	/* Create dir node */
1389 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1390 	    == NULL) {
1391 		filebench_log(LOG_ERROR,
1392 		    "fileset_populate_subdir: Can't malloc filesetentry");
1393 		return (FILEBENCH_ERROR);
1394 	}
1395 
1396 	/* another idle directory */
1397 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1398 	index = fileset->fs_idle_dirs++;
1399 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1400 
1401 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1402 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1403 		filebench_log(LOG_ERROR,
1404 		    "fileset_populate_subdir: Can't alloc path string");
1405 		return (FILEBENCH_ERROR);
1406 	}
1407 
1408 	entry->fse_index = index;
1409 	entry->fse_parent = parent;
1410 	entry->fse_fileset = fileset;
1411 	fileset_insdirlist(fileset, entry);
1412 
1413 	if (fileset->fs_dirdepthrv) {
1414 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
1415 	} else {
1416 		double gamma;
1417 
1418 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
1419 		if (gamma > 0) {
1420 			drand = gamma_dist_knuth(gamma,
1421 			    fileset->fs_meandepth / gamma);
1422 			randepth = (int)drand;
1423 		} else {
1424 			randepth = (int)fileset->fs_meandepth;
1425 		}
1426 	}
1427 
1428 	if (fileset->fs_meanwidth == -1) {
1429 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
1430 	} else {
1431 		double gamma;
1432 
1433 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1434 		if (gamma > 0) {
1435 			drand = gamma_dist_knuth(gamma,
1436 			    fileset->fs_meanwidth / gamma);
1437 			ranwidth = drand;
1438 		} else {
1439 			ranwidth = fileset->fs_meanwidth;
1440 		}
1441 	}
1442 
1443 	if (randepth == 0)
1444 		randepth = 1;
1445 	if (ranwidth == 0)
1446 		ranwidth = 1;
1447 	if (depth >= randepth)
1448 		isleaf = 1;
1449 
1450 	/*
1451 	 * Create directory of random width filled with files according
1452 	 * to distribution, or if root directory, continue until #files required
1453 	 */
1454 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1455 	    (fileset->fs_realfiles < fileset->fs_constentries);
1456 	    i++) {
1457 		int ret = 0;
1458 
1459 		if (parent && isleaf)
1460 			ret = fileset_populate_file(fileset, entry, i);
1461 		else
1462 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1463 
1464 		if (ret != 0)
1465 			return (ret);
1466 	}
1467 
1468 	/*
1469 	 * Create directory of random width filled with leaf directories
1470 	 * according to distribution, or if root directory, continue until
1471 	 * the number of leaf directories required has been generated.
1472 	 */
1473 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1474 	    (fileset->fs_realleafdirs < fileset->fs_constleafdirs);
1475 	    i++) {
1476 		int ret = 0;
1477 
1478 		if (parent && isleaf)
1479 			ret = fileset_populate_leafdir(fileset, entry, i);
1480 		else
1481 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1482 
1483 		if (ret != 0)
1484 			return (ret);
1485 	}
1486 
1487 	return (FILEBENCH_OK);
1488 }
1489 
1490 /*
1491  * Populates a fileset with files and subdirectory entries. Uses
1492  * the supplied fileset_dirwidth and fileset_entries (number of files) to
1493  * calculate the required fileset_meandepth (of subdirectories) and
1494  * initialize the fileset_meanwidth and fileset_meansize variables. Then
1495  * calls fileset_populate_subdir() to do the recursive
1496  * subdirectory entry creation and leaf file entry creation. All
1497  * of the above is skipped if the fileset has already been
1498  * populated. Returns 0 on success, or an error code from the
1499  * call to fileset_populate_subdir if that call fails.
1500  */
1501 static int
1502 fileset_populate(fileset_t *fileset)
1503 {
1504 	fbint_t entries = avd_get_int(fileset->fs_entries);
1505 	fbint_t leafdirs = avd_get_int(fileset->fs_leafdirs);
1506 	int meandirwidth;
1507 	int ret;
1508 
1509 	/* Skip if already populated */
1510 	if (fileset->fs_bytes > 0)
1511 		goto exists;
1512 
1513 #ifdef HAVE_RAW_SUPPORT
1514 	/* check for raw device */
1515 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1516 		return (FILEBENCH_OK);
1517 #endif /* HAVE_RAW_SUPPORT */
1518 
1519 	/*
1520 	 * save value of entries and leaf dirs obtained for later
1521 	 * in case it was random
1522 	 */
1523 	fileset->fs_constentries = entries;
1524 	fileset->fs_constleafdirs = leafdirs;
1525 
1526 	/* initialize idle files and directories condition variables */
1527 	(void) pthread_cond_init(&fileset->fs_idle_files_cv, ipc_condattr());
1528 	(void) pthread_cond_init(&fileset->fs_idle_dirs_cv, ipc_condattr());
1529 	(void) pthread_cond_init(&fileset->fs_idle_leafdirs_cv, ipc_condattr());
1530 
1531 	/* no files or dirs idle (or busy) yet */
1532 	fileset->fs_idle_files = 0;
1533 	fileset->fs_idle_dirs = 0;
1534 	fileset->fs_idle_leafdirs = 0;
1535 
1536 	/* initialize locks and other condition variables */
1537 	(void) pthread_mutex_init(&fileset->fs_pick_lock,
1538 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1539 	(void) pthread_mutex_init(&fileset->fs_histo_lock,
1540 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1541 	(void) pthread_cond_init(&fileset->fs_thrd_wait_cv, ipc_condattr());
1542 
1543 	/* Initialize avl btrees */
1544 	avl_create(&(fileset->fs_free_files), fileset_entry_compare,
1545 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1546 	avl_create(&(fileset->fs_noex_files), fileset_entry_compare,
1547 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1548 	avl_create(&(fileset->fs_exist_files), fileset_entry_compare,
1549 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1550 	avl_create(&(fileset->fs_free_leaf_dirs), fileset_entry_compare,
1551 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1552 	avl_create(&(fileset->fs_noex_leaf_dirs), fileset_entry_compare,
1553 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1554 	avl_create(&(fileset->fs_exist_leaf_dirs), fileset_entry_compare,
1555 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1556 	avl_create(&(fileset->fs_dirs), fileset_entry_compare,
1557 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1558 
1559 	/* is dirwidth a random variable? */
1560 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
1561 		meandirwidth =
1562 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
1563 		fileset->fs_meanwidth = -1;
1564 	} else {
1565 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
1566 		fileset->fs_meanwidth = (double)meandirwidth;
1567 	}
1568 
1569 	/*
1570 	 * Input params are:
1571 	 *	# of files
1572 	 *	ave # of files per dir
1573 	 *	max size of dir
1574 	 *	# ave size of file
1575 	 *	max size of file
1576 	 */
1577 	fileset->fs_meandepth = log(entries+leafdirs) / log(meandirwidth);
1578 
1579 	/* Has a random variable been supplied for dirdepth? */
1580 	if (fileset->fs_dirdepthrv) {
1581 		/* yes, so set the random variable's mean value to meandepth */
1582 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
1583 		    fileset->fs_meandepth;
1584 	}
1585 
1586 	/* test for random size variable */
1587 	if (AVD_IS_RANDOM(fileset->fs_size))
1588 		fileset->fs_meansize = -1;
1589 	else
1590 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
1591 
1592 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
1593 		return (ret);
1594 
1595 
1596 exists:
1597 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1598 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
1599 		    avd_get_str(fileset->fs_name),
1600 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1601 	} else {
1602 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, %d leafdirs "
1603 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
1604 		    avd_get_str(fileset->fs_name), entries, leafdirs,
1605 		    meandirwidth,
1606 		    fileset->fs_meandepth,
1607 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1608 	}
1609 
1610 	return (FILEBENCH_OK);
1611 }
1612 
1613 /*
1614  * Allocates a fileset instance, initializes fileset_dirgamma and
1615  * fileset_sizegamma default values, and sets the fileset name to the
1616  * supplied name string. Puts the allocated fileset on the
1617  * master fileset list and returns a pointer to it.
1618  *
1619  * This routine implements the 'define fileset' calls found in a .f
1620  * workload, such as in the following example:
1621  * define fileset name=drew4ever, entries=$nfiles
1622  */
1623 fileset_t *
1624 fileset_define(avd_t name)
1625 {
1626 	fileset_t *fileset;
1627 
1628 	if (name == NULL)
1629 		return (NULL);
1630 
1631 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1632 		filebench_log(LOG_ERROR,
1633 		    "fileset_define: Can't malloc fileset");
1634 		return (NULL);
1635 	}
1636 
1637 	filebench_log(LOG_DEBUG_IMPL,
1638 	    "Defining file %s", avd_get_str(name));
1639 
1640 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1641 
1642 	fileset->fs_dirgamma = avd_int_alloc(1500);
1643 	fileset->fs_sizegamma = avd_int_alloc(1500);
1644 	fileset->fs_histo_id = -1;
1645 
1646 	/* Add fileset to global list */
1647 	if (filebench_shm->shm_filesetlist == NULL) {
1648 		filebench_shm->shm_filesetlist = fileset;
1649 		fileset->fs_next = NULL;
1650 	} else {
1651 		fileset->fs_next = filebench_shm->shm_filesetlist;
1652 		filebench_shm->shm_filesetlist = fileset;
1653 	}
1654 
1655 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1656 
1657 	fileset->fs_name = name;
1658 
1659 	return (fileset);
1660 }
1661 
1662 /*
1663  * If supplied with a pointer to a fileset and the fileset's
1664  * fileset_prealloc flag is set, calls fileset_populate() to populate
1665  * the fileset with filesetentries, then calls fileset_create()
1666  * to make actual directories and files for the filesetentries.
1667  * Otherwise, it applies fileset_populate() and fileset_create()
1668  * to all the filesets on the master fileset list. It always
1669  * returns zero (0) if one fileset is populated / created,
1670  * otherwise it returns the sum of returned values from
1671  * fileset_create() and fileset_populate(), which
1672  * will be a negative one (-1) times the number of
1673  * fileset_create() calls which failed.
1674  */
1675 int
1676 fileset_createset(fileset_t *fileset)
1677 {
1678 	fileset_t *list;
1679 	int ret = 0;
1680 
1681 	/* set up for possible parallel allocate */
1682 	filebench_shm->shm_fsparalloc_count = 0;
1683 	(void) pthread_cond_init(
1684 	    &filebench_shm->shm_fsparalloc_cv,
1685 	    ipc_condattr());
1686 
1687 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1688 
1689 		/* check for raw files */
1690 		if (fileset_checkraw(fileset)) {
1691 			filebench_log(LOG_INFO,
1692 			    "file %s/%s is a RAW device",
1693 			    avd_get_str(fileset->fs_path),
1694 			    avd_get_str(fileset->fs_name));
1695 			return (FILEBENCH_OK);
1696 		}
1697 
1698 		filebench_log(LOG_INFO,
1699 		    "creating/pre-allocating %s %s",
1700 		    fileset_entity_name(fileset),
1701 		    avd_get_str(fileset->fs_name));
1702 
1703 		if ((ret = fileset_populate(fileset)) != FILEBENCH_OK)
1704 			return (ret);
1705 
1706 		if ((ret = fileset_create(fileset)) != FILEBENCH_OK)
1707 			return (ret);
1708 	} else {
1709 
1710 		filebench_log(LOG_INFO,
1711 		    "Creating/pre-allocating files and filesets");
1712 
1713 		list = filebench_shm->shm_filesetlist;
1714 		while (list) {
1715 			/* check for raw files */
1716 			if (fileset_checkraw(list)) {
1717 				filebench_log(LOG_INFO,
1718 				    "file %s/%s is a RAW device",
1719 				    avd_get_str(list->fs_path),
1720 				    avd_get_str(list->fs_name));
1721 				list = list->fs_next;
1722 				continue;
1723 			}
1724 
1725 			if ((ret = fileset_populate(list)) != FILEBENCH_OK)
1726 				return (ret);
1727 
1728 			if ((ret = fileset_create(list)) != FILEBENCH_OK)
1729 				return (ret);
1730 
1731 			list = list->fs_next;
1732 		}
1733 	}
1734 
1735 	/* wait for allocation threads to finish */
1736 	filebench_log(LOG_INFO,
1737 	    "waiting for fileset pre-allocation to finish");
1738 
1739 	(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
1740 	while (filebench_shm->shm_fsparalloc_count > 0)
1741 		(void) pthread_cond_wait(
1742 		    &filebench_shm->shm_fsparalloc_cv,
1743 		    &filebench_shm->shm_fsparalloc_lock);
1744 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
1745 
1746 	if (filebench_shm->shm_fsparalloc_count < 0)
1747 		return (FILEBENCH_ERROR);
1748 
1749 	return (FILEBENCH_OK);
1750 }
1751 
1752 /*
1753  * Searches through the master fileset list for the named fileset.
1754  * If found, returns pointer to same, otherwise returns NULL.
1755  */
1756 fileset_t *
1757 fileset_find(char *name)
1758 {
1759 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1760 
1761 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1762 
1763 	while (fileset) {
1764 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1765 			(void) ipc_mutex_unlock(
1766 			    &filebench_shm->shm_fileset_lock);
1767 			return (fileset);
1768 		}
1769 		fileset = fileset->fs_next;
1770 	}
1771 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1772 
1773 	return (NULL);
1774 }
1775 
1776 /*
1777  * Iterates over all the file sets in the filesetlist,
1778  * executing the supplied command "*cmd()" on them. Also
1779  * indicates to the executed command if it is the first
1780  * time the command has been executed since the current
1781  * call to fileset_iter.
1782  */
1783 int
1784 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1785 {
1786 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1787 	int count = 0;
1788 
1789 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1790 
1791 	while (fileset) {
1792 		if (cmd(fileset, count == 0) == FILEBENCH_ERROR) {
1793 			(void) ipc_mutex_unlock(
1794 			    &filebench_shm->shm_fileset_lock);
1795 			return (FILEBENCH_ERROR);
1796 		}
1797 		fileset = fileset->fs_next;
1798 		count++;
1799 	}
1800 
1801 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1802 	return (FILEBENCH_OK);
1803 }
1804 
1805 /*
1806  * Prints information to the filebench log about the file
1807  * object. Also prints a header on the first call.
1808  */
1809 int
1810 fileset_print(fileset_t *fileset, int first)
1811 {
1812 	int pathlength;
1813 	char *fileset_path;
1814 	char *fileset_name;
1815 	static char pad[] = "                              "; /* 30 spaces */
1816 
1817 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1818 		filebench_log(LOG_ERROR, "%s path not set",
1819 		    fileset_entity_name(fileset));
1820 		return (FILEBENCH_ERROR);
1821 	}
1822 
1823 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1824 		filebench_log(LOG_ERROR, "%s name not set",
1825 		    fileset_entity_name(fileset));
1826 		return (FILEBENCH_ERROR);
1827 	}
1828 
1829 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1830 
1831 	if (pathlength > 29)
1832 		pathlength = 29;
1833 
1834 	if (first) {
1835 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1836 		    "file size",
1837 		    "dir width",
1838 		    "entries");
1839 	}
1840 
1841 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1842 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1843 			filebench_log(LOG_INFO,
1844 			    "%s/%s%s         (Raw Device)",
1845 			    fileset_path, fileset_name, &pad[pathlength]);
1846 		} else {
1847 			filebench_log(LOG_INFO,
1848 			    "%s/%s%s%9llu     (Single File)",
1849 			    fileset_path, fileset_name, &pad[pathlength],
1850 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1851 		}
1852 	} else {
1853 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1854 		    fileset_path, fileset_name,
1855 		    &pad[pathlength],
1856 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1857 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1858 		    (u_longlong_t)fileset->fs_constentries);
1859 	}
1860 	return (FILEBENCH_OK);
1861 }
1862 
1863 /*
1864  * checks to see if the path/name pair points to a raw device. If
1865  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1866  * If RAW is not defined, or it is not a raw device, it clears the
1867  * raw device flag and returns 0.
1868  */
1869 int
1870 fileset_checkraw(fileset_t *fileset)
1871 {
1872 	char path[MAXPATHLEN];
1873 	struct stat64 sb;
1874 	char *pathname;
1875 	char *setname;
1876 
1877 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1878 
1879 #ifdef HAVE_RAW_SUPPORT
1880 	/* check for raw device */
1881 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1882 		return (FILEBENCH_OK);
1883 
1884 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1885 		return (FILEBENCH_OK);
1886 
1887 	(void) fb_strlcpy(path, pathname, MAXPATHLEN);
1888 	(void) fb_strlcat(path, "/", MAXPATHLEN);
1889 	(void) fb_strlcat(path, setname, MAXPATHLEN);
1890 	if ((stat64(path, &sb) == 0) &&
1891 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1892 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1893 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1894 			filebench_log(LOG_ERROR,
1895 			    "WARNING Fileset %s/%s Cannot be RAW device",
1896 			    avd_get_str(fileset->fs_path),
1897 			    avd_get_str(fileset->fs_name));
1898 			filebench_shutdown(1);
1899 		}
1900 
1901 		return (1);
1902 	}
1903 #endif /* HAVE_RAW_SUPPORT */
1904 
1905 	return (FILEBENCH_OK);
1906 }
1907