xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 8404:b96b8ad1c3e9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Portions Copyright 2008 Denis Cheng
26  */
27 
28 #include <fcntl.h>
29 #include <pthread.h>
30 #include <errno.h>
31 #include <math.h>
32 #include <libgen.h>
33 #include <sys/mman.h>
34 #include <sys/shm.h>
35 
36 #include "filebench.h"
37 #include "fileset.h"
38 #include "gamma_dist.h"
39 #include "utils.h"
40 
41 /*
42  * File sets, of type fileset_t, are entities which contain
43  * information about collections of files and subdirectories in Filebench.
44  * The fileset, once populated, consists of a tree of fileset entries of
45  * type filesetentry_t which specify files and directories.  The fileset
46  * is rooted in a directory specified by fileset_path, and once the populated
47  * fileset has been created, has a tree of directories and files
48  * corresponding to the fileset's filesetentry tree.
49  *
50  * Fileset entities are allocated by fileset_define() which is called from
51  * parser_gram.y: parser_fileset_define(). The filesetentry tree corrseponding
52  * to the eventual directory and file tree to be instantiated on the storage
53  * medium is built by fileset_populate(), which is This routine is called
54  * from fileset_createset(), which is in turn called by fileset_createset().
55  * After calling fileset_populate(), fileset_createset() will call
56  * fileset_create() to pre-allocate designated files and directories.
57  *
58  * Fileset_createset() is called from parser_gram.y: parser_create_fileset()
59  * when a "create fileset" or "run" command is encountered. When the
60  * "create fileset" command is used, it is generally paired with
61  * a "create processes" command, and must appear first, in order to
62  * instantiate all the files in the fileset before trying to use them.
63  */
64 
65 static int fileset_checkraw(fileset_t *fileset);
66 
67 /* maximum parallel allocation control */
68 #define	MAX_PARALLOC_THREADS 32
69 
70 /*
71  * returns pointer to file or fileset
72  * string, as appropriate
73  */
74 static char *
75 fileset_entity_name(fileset_t *fileset)
76 {
77 	if (fileset->fs_attrs & FILESET_IS_FILE)
78 		return ("file");
79 	else
80 		return ("fileset");
81 }
82 
83 /*
84  * Removes the last file or directory name from a pathname.
85  * Basically removes characters from the end of the path by
86  * setting them to \0 until a forward slash '/' is
87  * encountered. It also removes the forward slash.
88  */
89 static char *
90 trunc_dirname(char *dir)
91 {
92 	char *s = dir + strlen(dir);
93 
94 	while (s != dir) {
95 		int c = *s;
96 
97 		*s = 0;
98 		if (c == '/')
99 			break;
100 		s--;
101 	}
102 	return (dir);
103 }
104 
105 /*
106  * Prints a list of allowed options and how to specify them.
107  */
108 void
109 fileset_usage(void)
110 {
111 	(void) fprintf(stderr,
112 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
113 	    ",entries=<number>\n");
114 	(void) fprintf(stderr,
115 	    "		        [,filesize=[size]]\n");
116 	(void) fprintf(stderr,
117 	    "		        [,dirwidth=[width]]\n");
118 	(void) fprintf(stderr,
119 	    "		        [,dirdepthrv=$random_variable_name]\n");
120 	(void) fprintf(stderr,
121 	    "		        [,dirgamma=[100-10000]] "
122 	    "(Gamma * 1000)\n");
123 	(void) fprintf(stderr,
124 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
125 	(void) fprintf(stderr,
126 	    "		        [,prealloc=[percent]]\n");
127 	(void) fprintf(stderr, "		        [,paralloc]\n");
128 	(void) fprintf(stderr, "		        [,reuse]\n");
129 	(void) fprintf(stderr, "\n");
130 }
131 
132 /*
133  * Frees up memory mapped file region of supplied size. The
134  * file descriptor "fd" indicates which memory mapped file.
135  * If successful, returns 0. Otherwise returns -1 if "size"
136  * is zero, or -1 times the number of times msync() failed.
137  */
138 static int
139 fileset_freemem(int fd, off64_t size)
140 {
141 	off64_t left;
142 	int ret = 0;
143 
144 	for (left = size; left > 0; left -= MMAP_SIZE) {
145 		off64_t thismapsize;
146 		caddr_t addr;
147 
148 		thismapsize = MIN(MMAP_SIZE, left);
149 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
150 		    MAP_SHARED, fd, size - left);
151 		ret += msync(addr, thismapsize, MS_INVALIDATE);
152 		(void) munmap(addr, thismapsize);
153 	}
154 	return (ret);
155 }
156 
157 /*
158  * Creates a path string from the filesetentry_t "*entry"
159  * and all of its parent's path names. The resulting path
160  * is a concatination of all the individual parent paths.
161  * Allocates memory for the path string and returns a
162  * pointer to it.
163  */
164 char *
165 fileset_resolvepath(filesetentry_t *entry)
166 {
167 	filesetentry_t *fsep = entry;
168 	char path[MAXPATHLEN];
169 	char pathtmp[MAXPATHLEN];
170 	char *s;
171 
172 	path[0] = '\0';
173 	while (fsep->fse_parent) {
174 		(void) strcpy(pathtmp, "/");
175 		(void) fb_strlcat(pathtmp, fsep->fse_path, MAXPATHLEN);
176 		(void) fb_strlcat(pathtmp, path, MAXPATHLEN);
177 		(void) fb_strlcpy(path, pathtmp, MAXPATHLEN);
178 		fsep = fsep->fse_parent;
179 	}
180 
181 	s = malloc(strlen(path) + 1);
182 	(void) fb_strlcpy(s, path, MAXPATHLEN);
183 	return (s);
184 }
185 
186 /*
187  * Creates multiple nested directories as required by the
188  * supplied path. Starts at the end of the path, creating
189  * a list of directories to mkdir, up to the root of the
190  * path, then mkdirs them one at a time from the root on down.
191  */
192 static int
193 fileset_mkdir(char *path, int mode)
194 {
195 	char *p;
196 	char *dirs[65536];
197 	int i = 0;
198 
199 	if ((p = strdup(path)) == NULL)
200 		goto null_str;
201 
202 	/*
203 	 * Fill an array of subdirectory path names until either we
204 	 * reach the root or encounter an already existing subdirectory
205 	 */
206 	/* CONSTCOND */
207 	while (1) {
208 		struct stat64 sb;
209 
210 		if (stat64(p, &sb) == 0)
211 			break;
212 		if (strlen(p) < 3)
213 			break;
214 		if ((dirs[i] = strdup(p)) == NULL) {
215 			free(p);
216 			goto null_str;
217 		}
218 
219 		(void) trunc_dirname(p);
220 		i++;
221 	}
222 
223 	/* Make the directories, from closest to root downwards. */
224 	for (--i; i >= 0; i--) {
225 		(void) mkdir(dirs[i], mode);
226 		free(dirs[i]);
227 	}
228 
229 	free(p);
230 	return (FILEBENCH_OK);
231 
232 null_str:
233 	/* clean up */
234 	for (--i; i >= 0; i--)
235 		free(dirs[i]);
236 
237 	filebench_log(LOG_ERROR,
238 	    "Failed to create directory path %s: Out of memory", path);
239 	return (FILEBENCH_ERROR);
240 }
241 
242 /*
243  * creates the subdirectory tree for a fileset.
244  */
245 static int
246 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
247 {
248 	filesetentry_t *direntry;
249 	char full_path[MAXPATHLEN];
250 	char *part_path;
251 
252 	/* walk the subdirectory list, enstanciating subdirs */
253 	direntry = fileset->fs_dirlist;
254 	while (direntry) {
255 		(void) fb_strlcpy(full_path, filesetpath, MAXPATHLEN);
256 		part_path = fileset_resolvepath(direntry);
257 		(void) fb_strlcat(full_path, part_path, MAXPATHLEN);
258 		free(part_path);
259 
260 		/* now create this portion of the subdirectory tree */
261 		if (fileset_mkdir(full_path, 0755) == FILEBENCH_ERROR)
262 			return (FILEBENCH_ERROR);
263 
264 		direntry = direntry->fse_nextoftype;
265 	}
266 	return (FILEBENCH_OK);
267 }
268 
269 /*
270  * move filesetentry between exist tree and non-exist tree, source_tree
271  * to destination tree.
272  */
273 static void
274 fileset_move_entry(avl_tree_t *src_tree, avl_tree_t *dst_tree,
275     filesetentry_t *entry)
276 {
277 	avl_remove(src_tree, entry);
278 	avl_add(dst_tree, entry);
279 }
280 
281 /*
282  * given a fileset entry, determines if the associated leaf directory
283  * needs to be made or not, and if so does the mkdir.
284  */
285 static int
286 fileset_alloc_leafdir(filesetentry_t *entry)
287 {
288 	fileset_t *fileset;
289 	char path[MAXPATHLEN];
290 	struct stat64 sb;
291 	char *pathtmp;
292 
293 	fileset = entry->fse_fileset;
294 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
295 	(void) fb_strlcat(path, "/", MAXPATHLEN);
296 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
297 	pathtmp = fileset_resolvepath(entry);
298 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
299 	free(pathtmp);
300 
301 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
302 
303 	/* see if not reusing and this directory does not exist */
304 	if (!((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0))) {
305 
306 		/* No file or not reusing, so create */
307 		if (mkdir(path, 0755) < 0) {
308 			filebench_log(LOG_ERROR,
309 			    "Failed to pre-allocate leaf directory %s: %s",
310 			    path, strerror(errno));
311 			fileset_unbusy(entry, TRUE, FALSE, 0);
312 			return (FILEBENCH_ERROR);
313 		}
314 	}
315 
316 	/* unbusy the allocated entry */
317 	fileset_unbusy(entry, TRUE, TRUE, 0);
318 	return (FILEBENCH_OK);
319 }
320 
321 /*
322  * given a fileset entry, determines if the associated file
323  * needs to be allocated or not, and if so does the allocation.
324  */
325 static int
326 fileset_alloc_file(filesetentry_t *entry)
327 {
328 	fileset_t *fileset;
329 	char path[MAXPATHLEN];
330 	char *buf;
331 	struct stat64 sb;
332 	char *pathtmp;
333 	off64_t seek;
334 	int fd;
335 
336 	fileset = entry->fse_fileset;
337 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
338 	(void) fb_strlcat(path, "/", MAXPATHLEN);
339 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
340 	pathtmp = fileset_resolvepath(entry);
341 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
342 	free(pathtmp);
343 
344 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
345 
346 	/* see if reusing and this file exists */
347 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
348 		if ((fd = open64(path, O_RDWR)) < 0) {
349 			filebench_log(LOG_INFO,
350 			    "Attempted but failed to Re-use file %s",
351 			    path);
352 			fileset_unbusy(entry, TRUE, FALSE, 0);
353 			return (FILEBENCH_ERROR);
354 		}
355 
356 		if (sb.st_size == (off64_t)entry->fse_size) {
357 			filebench_log(LOG_DEBUG_IMPL,
358 			    "Re-using file %s", path);
359 
360 			if (!avd_get_bool(fileset->fs_cached))
361 				(void) fileset_freemem(fd,
362 				    entry->fse_size);
363 
364 			(void) close(fd);
365 
366 			/* unbusy the allocated entry */
367 			fileset_unbusy(entry, TRUE, TRUE, 0);
368 			return (FILEBENCH_OK);
369 
370 		} else if (sb.st_size > (off64_t)entry->fse_size) {
371 			/* reuse, but too large */
372 			filebench_log(LOG_DEBUG_IMPL,
373 			    "Truncating & re-using file %s", path);
374 
375 #ifdef HAVE_FTRUNCATE64
376 			(void) ftruncate64(fd, (off64_t)entry->fse_size);
377 #else
378 			(void) ftruncate(fd, (off_t)entry->fse_size);
379 #endif
380 
381 			if (!avd_get_bool(fileset->fs_cached))
382 				(void) fileset_freemem(fd,
383 				    entry->fse_size);
384 
385 			(void) close(fd);
386 
387 			/* unbusy the allocated entry */
388 			fileset_unbusy(entry, TRUE, TRUE, 0);
389 			return (FILEBENCH_OK);
390 		}
391 	} else {
392 
393 		/* No file or not reusing, so create */
394 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
395 			filebench_log(LOG_ERROR,
396 			    "Failed to pre-allocate file %s: %s",
397 			    path, strerror(errno));
398 
399 			/* unbusy the unallocated entry */
400 			fileset_unbusy(entry, TRUE, FALSE, 0);
401 			return (FILEBENCH_ERROR);
402 		}
403 	}
404 
405 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL) {
406 		/* unbusy the unallocated entry */
407 		fileset_unbusy(entry, TRUE, FALSE, 0);
408 		return (FILEBENCH_ERROR);
409 	}
410 
411 	for (seek = 0; seek < entry->fse_size; ) {
412 		off64_t wsize;
413 		int ret = 0;
414 
415 		/*
416 		 * Write FILE_ALLOC_BLOCK's worth,
417 		 * except on last write
418 		 */
419 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
420 
421 		ret = write(fd, buf, wsize);
422 		if (ret != wsize) {
423 			filebench_log(LOG_ERROR,
424 			    "Failed to pre-allocate file %s: %s",
425 			    path, strerror(errno));
426 			(void) close(fd);
427 			free(buf);
428 			fileset_unbusy(entry, TRUE, FALSE, 0);
429 			return (FILEBENCH_ERROR);
430 		}
431 		seek += wsize;
432 	}
433 
434 	if (!avd_get_bool(fileset->fs_cached))
435 		(void) fileset_freemem(fd, entry->fse_size);
436 
437 	(void) close(fd);
438 
439 	free(buf);
440 
441 	/* unbusy the allocated entry */
442 	fileset_unbusy(entry, TRUE, TRUE, 0);
443 
444 	filebench_log(LOG_DEBUG_IMPL,
445 	    "Pre-allocated file %s size %llu",
446 	    path, (u_longlong_t)entry->fse_size);
447 
448 	return (FILEBENCH_OK);
449 }
450 
451 /*
452  * given a fileset entry, determines if the associated file
453  * needs to be allocated or not, and if so does the allocation.
454  * Sets shm_fsparalloc_count to -1 on error.
455  */
456 static void *
457 fileset_alloc_thread(filesetentry_t *entry)
458 {
459 	if (fileset_alloc_file(entry) == FILEBENCH_ERROR) {
460 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
461 		filebench_shm->shm_fsparalloc_count = -1;
462 	} else {
463 		(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
464 		filebench_shm->shm_fsparalloc_count--;
465 	}
466 
467 	(void) pthread_cond_signal(&filebench_shm->shm_fsparalloc_cv);
468 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
469 
470 	pthread_exit(NULL);
471 	return (NULL);
472 }
473 
474 
475 /*
476  * First creates the parent directories of the file using
477  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
478  * and opens the file with open64(). It unlocks the fileset
479  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
480  * as requested, and returns the file descriptor integer
481  * for the opened file.
482  */
483 int
484 fileset_openfile(fileset_t *fileset,
485     filesetentry_t *entry, int flag, int filemode, int attrs)
486 {
487 	char path[MAXPATHLEN];
488 	char dir[MAXPATHLEN];
489 	char *pathtmp;
490 	struct stat64 sb;
491 	int fd;
492 	int open_attrs = 0;
493 
494 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
495 	(void) fb_strlcat(path, "/", MAXPATHLEN);
496 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
497 	pathtmp = fileset_resolvepath(entry);
498 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
499 	(void) fb_strlcpy(dir, path, MAXPATHLEN);
500 	free(pathtmp);
501 	(void) trunc_dirname(dir);
502 
503 	/* If we are going to create a file, create the parent dirs */
504 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
505 		if (fileset_mkdir(dir, 0755) == FILEBENCH_ERROR)
506 			return (FILEBENCH_ERROR);
507 	}
508 
509 	if (attrs & FLOW_ATTR_DSYNC) {
510 #ifdef sun
511 		open_attrs |= O_DSYNC;
512 #else
513 		open_attrs |= O_FSYNC;
514 #endif
515 	}
516 
517 	if ((fd = open64(path, flag | open_attrs, filemode)) < 0) {
518 		filebench_log(LOG_ERROR,
519 		    "Failed to open file %d, %s, with status %x: %s",
520 		    entry->fse_index, path, entry->fse_flags, strerror(errno));
521 
522 		fileset_unbusy(entry, FALSE, FALSE, 0);
523 		return (FILEBENCH_ERROR);
524 	}
525 
526 	if (flag & O_CREAT)
527 		fileset_unbusy(entry, TRUE, TRUE, 1);
528 	else
529 		fileset_unbusy(entry, FALSE, FALSE, 1);
530 
531 #ifdef sun
532 	if (attrs & FLOW_ATTR_DIRECTIO)
533 		(void) directio(fd, DIRECTIO_ON);
534 	else
535 		(void) directio(fd, DIRECTIO_OFF);
536 #endif
537 
538 	return (fd);
539 }
540 
541 /*
542  * removes all filesetentries from their respective btrees, and puts them
543  * on the free list. The supplied argument indicates which free list to
544  * use.
545  */
546 static void
547 fileset_pickreset(fileset_t *fileset, int entry_type)
548 {
549 	filesetentry_t	*entry;
550 
551 	switch (entry_type & FILESET_PICKMASK) {
552 	case FILESET_PICKFILE:
553 		entry = (filesetentry_t *)avl_first(&fileset->fs_noex_files);
554 
555 		/* make sure non-existing files are marked free */
556 		while (entry) {
557 			entry->fse_flags |= FSE_FREE;
558 			entry->fse_open_cnt = 0;
559 			fileset_move_entry(&fileset->fs_noex_files,
560 			    &fileset->fs_free_files, entry);
561 			entry =  AVL_NEXT(&fileset->fs_noex_files, entry);
562 		}
563 
564 		/* free up any existing files */
565 		entry = (filesetentry_t *)avl_first(&fileset->fs_exist_files);
566 
567 		while (entry) {
568 			entry->fse_flags |= FSE_FREE;
569 			entry->fse_open_cnt = 0;
570 			fileset_move_entry(&fileset->fs_exist_files,
571 			    &fileset->fs_free_files, entry);
572 
573 			entry =  AVL_NEXT(&fileset->fs_exist_files, entry);
574 		}
575 
576 		break;
577 
578 	case FILESET_PICKDIR:
579 		/* nothing to reset, as all (sub)dirs always exist */
580 		break;
581 
582 	case FILESET_PICKLEAFDIR:
583 		entry = (filesetentry_t *)
584 		    avl_first(&fileset->fs_noex_leaf_dirs);
585 
586 		/* make sure non-existing leaf dirs are marked free */
587 		while (entry) {
588 			entry->fse_flags |= FSE_FREE;
589 			entry->fse_open_cnt = 0;
590 			fileset_move_entry(&fileset->fs_noex_leaf_dirs,
591 			    &fileset->fs_free_leaf_dirs, entry);
592 			entry =  AVL_NEXT(&fileset->fs_noex_leaf_dirs, entry);
593 		}
594 
595 		/* free up any existing leaf dirs */
596 		entry = (filesetentry_t *)
597 		    avl_first(&fileset->fs_exist_leaf_dirs);
598 
599 		while (entry) {
600 			entry->fse_flags |= FSE_FREE;
601 			entry->fse_open_cnt = 0;
602 			fileset_move_entry(&fileset->fs_exist_leaf_dirs,
603 			    &fileset->fs_free_leaf_dirs, entry);
604 
605 			entry =  AVL_NEXT(&fileset->fs_exist_leaf_dirs, entry);
606 		}
607 
608 		break;
609 	}
610 }
611 
612 /*
613  * find a filesetentry from the fileset using the supplied index
614  */
615 static filesetentry_t *
616 fileset_find_entry(avl_tree_t *atp, uint_t index)
617 {
618 	avl_index_t	found_loc;
619 	filesetentry_t	desired_fse, *found_fse;
620 
621 	/* find the file with the desired index, if it is in the tree */
622 	desired_fse.fse_index = index;
623 	found_fse = avl_find(atp, (void *)(&desired_fse), &found_loc);
624 	if (found_fse != NULL)
625 		return (found_fse);
626 
627 	/* if requested node not found, find next higher node */
628 	found_fse = avl_nearest(atp, found_loc, AVL_AFTER);
629 	if (found_fse != NULL)
630 		return (found_fse);
631 
632 	/* might have hit the end, return lowest available index node */
633 	found_fse = avl_first(atp);
634 	return (found_fse);
635 }
636 
637 /*
638  * Selects a fileset entry from a fileset. If the
639  * FILESET_PICKLEAFDIR flag is set it will pick a leaf directory entry,
640  * if the FILESET_PICKDIR flag is set it will pick a non leaf directory
641  * entry, otherwise a file entry. The FILESET_PICKUNIQUE
642  * flag will take an entry off of one of the free (unused)
643  * lists (file or directory), otherwise the entry will be
644  * picked off of one of the rotor lists (file or directory).
645  * The FILESET_PICKEXISTS will insure that only extant
646  * (FSE_EXISTS) state files are selected, while
647  * FILESET_PICKNOEXIST insures that only non extant
648  * (not FSE_EXISTS) state files are selected.
649  * Note that the selected fileset entry (file) is returned
650  * with its FSE_BUSY flag (in fse_flags) set.
651  */
652 filesetentry_t *
653 fileset_pick(fileset_t *fileset, int flags, int tid, int index)
654 {
655 	filesetentry_t *entry = NULL;
656 	filesetentry_t *start_point;
657 	avl_tree_t *atp;
658 	fbint_t max_entries;
659 
660 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
661 
662 	/* see if we have to wait for available files or directories */
663 	switch (flags & FILESET_PICKMASK) {
664 	case FILESET_PICKFILE:
665 		if (fileset->fs_filelist == NULL)
666 			goto empty;
667 
668 		while (fileset->fs_idle_files == 0) {
669 			(void) pthread_cond_wait(&fileset->fs_idle_files_cv,
670 			    &fileset->fs_pick_lock);
671 		}
672 
673 		max_entries = fileset->fs_constentries;
674 		if (flags & FILESET_PICKUNIQUE) {
675 			atp = &fileset->fs_free_files;
676 		} else if (flags & FILESET_PICKNOEXIST) {
677 			atp = &fileset->fs_noex_files;
678 		} else {
679 			atp = &fileset->fs_exist_files;
680 		}
681 		break;
682 
683 	case FILESET_PICKDIR:
684 		if (fileset->fs_dirlist == NULL)
685 			goto empty;
686 
687 		while (fileset->fs_idle_dirs == 0) {
688 			(void) pthread_cond_wait(&fileset->fs_idle_dirs_cv,
689 			    &fileset->fs_pick_lock);
690 		}
691 
692 		max_entries = 1;
693 		atp = &fileset->fs_dirs;
694 		break;
695 
696 	case FILESET_PICKLEAFDIR:
697 		if (fileset->fs_leafdirlist == NULL)
698 			goto empty;
699 
700 		while (fileset->fs_idle_leafdirs == 0) {
701 			(void) pthread_cond_wait(&fileset->fs_idle_leafdirs_cv,
702 			    &fileset->fs_pick_lock);
703 		}
704 
705 		max_entries = fileset->fs_constleafdirs;
706 		if (flags & FILESET_PICKUNIQUE) {
707 			atp = &fileset->fs_free_leaf_dirs;
708 		} else if (flags & FILESET_PICKNOEXIST) {
709 			atp = &fileset->fs_noex_leaf_dirs;
710 		} else {
711 			atp = &fileset->fs_exist_leaf_dirs;
712 		}
713 		break;
714 	}
715 
716 	/* see if asking for impossible */
717 	if (avl_is_empty(atp))
718 		goto empty;
719 
720 	if (flags & FILESET_PICKUNIQUE) {
721 		uint64_t  index64;
722 
723 		/*
724 		 * pick at random from free list in order to
725 		 * distribute initially allocated files more
726 		 * randomly on storage media. Use uniform
727 		 * random number generator to select index
728 		 * if it is not supplied with pick call.
729 		 */
730 		if (index) {
731 			index64 = index;
732 		} else {
733 			if (filebench_randomno64(&index64, max_entries, 1,
734 			    NULL) == FILEBENCH_ERROR)
735 				return (NULL);
736 		}
737 
738 		entry = fileset_find_entry(atp, (int)index64);
739 
740 		if (entry == NULL)
741 			goto empty;
742 
743 	} else if (flags & FILESET_PICKBYINDEX) {
744 		/* pick by supplied index */
745 		entry = fileset_find_entry(atp, index);
746 
747 	} else {
748 		/* pick in rotation */
749 		switch (flags & FILESET_PICKMASK) {
750 		case FILESET_PICKFILE:
751 			if (flags & FILESET_PICKNOEXIST) {
752 				entry = fileset_find_entry(atp,
753 				    fileset->fs_file_nerotor);
754 				fileset->fs_file_nerotor =
755 				    entry->fse_index + 1;
756 			} else {
757 				entry = fileset_find_entry(atp,
758 				    fileset->fs_file_exrotor[tid]);
759 				fileset->fs_file_exrotor[tid] =
760 				    entry->fse_index + 1;
761 			}
762 			break;
763 
764 		case FILESET_PICKDIR:
765 			entry = fileset_find_entry(atp, fileset->fs_dirrotor);
766 			fileset->fs_dirrotor = entry->fse_index + 1;
767 			break;
768 
769 		case FILESET_PICKLEAFDIR:
770 			if (flags & FILESET_PICKNOEXIST) {
771 				entry = fileset_find_entry(atp,
772 				    fileset->fs_leafdir_nerotor);
773 				fileset->fs_leafdir_nerotor =
774 				    entry->fse_index + 1;
775 			} else {
776 				entry = fileset_find_entry(atp,
777 				    fileset->fs_leafdir_exrotor);
778 				fileset->fs_leafdir_exrotor =
779 				    entry->fse_index + 1;
780 			}
781 			break;
782 		}
783 	}
784 
785 	if (entry == NULL)
786 		goto empty;
787 
788 	/* see if entry in use */
789 	start_point = entry;
790 	while (entry->fse_flags & FSE_BUSY) {
791 
792 		/* it is, so try next */
793 		entry = AVL_NEXT(atp, entry);
794 		if (entry == NULL)
795 			entry = avl_first(atp);
796 
797 		/* see if we have wrapped around */
798 		if ((entry == NULL) || (entry == start_point)) {
799 			filebench_log(LOG_DEBUG_SCRIPT,
800 			    "All %d files are busy", avl_numnodes(atp));
801 			goto empty;
802 		}
803 
804 	}
805 
806 	/* update file or directory idle counts */
807 	switch (flags & FILESET_PICKMASK) {
808 	case FILESET_PICKFILE:
809 		fileset->fs_idle_files--;
810 		break;
811 	case FILESET_PICKDIR:
812 		fileset->fs_idle_dirs--;
813 		break;
814 	case FILESET_PICKLEAFDIR:
815 		fileset->fs_idle_leafdirs--;
816 		break;
817 	}
818 
819 	/* Indicate that file or directory is now busy */
820 	entry->fse_flags |= FSE_BUSY;
821 
822 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
823 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
824 	return (entry);
825 
826 empty:
827 	filebench_log(LOG_DEBUG_SCRIPT, "No file found");
828 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
829 	return (NULL);
830 }
831 
832 /*
833  * Removes a filesetentry from the "FSE_BUSY" state, signaling any threads
834  * that are waiting for a NOT BUSY filesetentry. Also sets whether it is
835  * existant or not, or leaves that designation alone.
836  */
837 void
838 fileset_unbusy(filesetentry_t *entry, int update_exist,
839     int new_exist_val, int open_cnt_incr)
840 {
841 	fileset_t *fileset = NULL;
842 
843 	if (entry)
844 		fileset = entry->fse_fileset;
845 
846 	if (fileset == NULL) {
847 		filebench_log(LOG_ERROR, "fileset_unbusy: NO FILESET!");
848 		return;
849 	}
850 
851 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
852 
853 	/* modify FSE_EXIST flag and actual dirs/files count, if requested */
854 	if (update_exist) {
855 		if (new_exist_val == TRUE) {
856 			if (entry->fse_flags & FSE_FREE) {
857 
858 				/* asked to set and it was free */
859 				entry->fse_flags |= FSE_EXISTS;
860 				entry->fse_flags &= (~FSE_FREE);
861 				switch (entry->fse_flags & FSE_TYPE_MASK) {
862 				case FSE_TYPE_FILE:
863 					fileset_move_entry(
864 					    &fileset->fs_free_files,
865 					    &fileset->fs_exist_files, entry);
866 					break;
867 
868 				case FSE_TYPE_DIR:
869 					break;
870 
871 				case FSE_TYPE_LEAFDIR:
872 					fileset_move_entry(
873 					    &fileset->fs_free_leaf_dirs,
874 					    &fileset->fs_exist_leaf_dirs,
875 					    entry);
876 					break;
877 				}
878 
879 			} else if (!(entry->fse_flags & FSE_EXISTS)) {
880 
881 				/* asked to set, and it was clear */
882 				entry->fse_flags |= FSE_EXISTS;
883 				switch (entry->fse_flags & FSE_TYPE_MASK) {
884 				case FSE_TYPE_FILE:
885 					fileset_move_entry(
886 					    &fileset->fs_noex_files,
887 					    &fileset->fs_exist_files, entry);
888 					break;
889 				case FSE_TYPE_DIR:
890 					break;
891 				case FSE_TYPE_LEAFDIR:
892 					fileset_move_entry(
893 					    &fileset->fs_noex_leaf_dirs,
894 					    &fileset->fs_exist_leaf_dirs,
895 					    entry);
896 					break;
897 				}
898 			}
899 		} else {
900 			if (entry->fse_flags & FSE_FREE) {
901 				/* asked to clear, and it was free */
902 				entry->fse_flags &= (~(FSE_FREE | FSE_EXISTS));
903 				switch (entry->fse_flags & FSE_TYPE_MASK) {
904 				case FSE_TYPE_FILE:
905 					fileset_move_entry(
906 					    &fileset->fs_free_files,
907 					    &fileset->fs_noex_files, entry);
908 					break;
909 
910 				case FSE_TYPE_DIR:
911 					break;
912 
913 				case FSE_TYPE_LEAFDIR:
914 					fileset_move_entry(
915 					    &fileset->fs_free_leaf_dirs,
916 					    &fileset->fs_noex_leaf_dirs,
917 					    entry);
918 					break;
919 				}
920 			} else if (entry->fse_flags & FSE_EXISTS) {
921 
922 				/* asked to clear, and it was set */
923 				entry->fse_flags &= (~FSE_EXISTS);
924 				switch (entry->fse_flags & FSE_TYPE_MASK) {
925 				case FSE_TYPE_FILE:
926 					fileset_move_entry(
927 					    &fileset->fs_exist_files,
928 					    &fileset->fs_noex_files, entry);
929 					break;
930 				case FSE_TYPE_DIR:
931 					break;
932 				case FSE_TYPE_LEAFDIR:
933 					fileset_move_entry(
934 					    &fileset->fs_exist_leaf_dirs,
935 					    &fileset->fs_noex_leaf_dirs,
936 					    entry);
937 					break;
938 				}
939 			}
940 		}
941 	}
942 
943 	/* update open count */
944 	entry->fse_open_cnt += open_cnt_incr;
945 
946 	/* increment idle count, clear FSE_BUSY and signal IF it was busy */
947 	if (entry->fse_flags & FSE_BUSY) {
948 
949 		/* unbusy it */
950 		entry->fse_flags &= (~FSE_BUSY);
951 
952 		/* release any threads waiting for unbusy */
953 		if (entry->fse_flags & FSE_THRD_WAITNG) {
954 			entry->fse_flags &= (~FSE_THRD_WAITNG);
955 			(void) pthread_cond_broadcast(
956 			    &fileset->fs_thrd_wait_cv);
957 		}
958 
959 		/* increment idle count and signal waiting threads */
960 		switch (entry->fse_flags & FSE_TYPE_MASK) {
961 		case FSE_TYPE_FILE:
962 			fileset->fs_idle_files++;
963 			if (fileset->fs_idle_files == 1) {
964 				(void) pthread_cond_signal(
965 				    &fileset->fs_idle_files_cv);
966 			}
967 			break;
968 
969 		case FSE_TYPE_DIR:
970 			fileset->fs_idle_dirs++;
971 			if (fileset->fs_idle_dirs == 1) {
972 				(void) pthread_cond_signal(
973 				    &fileset->fs_idle_dirs_cv);
974 			}
975 			break;
976 
977 		case FSE_TYPE_LEAFDIR:
978 			fileset->fs_idle_leafdirs++;
979 			if (fileset->fs_idle_leafdirs == 1) {
980 				(void) pthread_cond_signal(
981 				    &fileset->fs_idle_leafdirs_cv);
982 			}
983 			break;
984 		}
985 	}
986 
987 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
988 }
989 
990 /*
991  * Given a fileset "fileset", create the associated files as
992  * specified in the attributes of the fileset. The fileset is
993  * rooted in a directory whose pathname is in fileset_path. If the
994  * directory exists, meaning that there is already a fileset,
995  * and the fileset_reuse attribute is false, then remove it and all
996  * its contained files and subdirectories. Next, the routine
997  * creates a root directory for the fileset. All the file type
998  * filesetentries are cycled through creating as needed
999  * their containing subdirectory trees in the filesystem and
1000  * creating actual files for fileset_preallocpercent of them. The
1001  * created files are filled with fse_size bytes of unitialized
1002  * data. The routine returns FILEBENCH_ERROR on errors,
1003  * FILEBENCH_OK on success.
1004  */
1005 static int
1006 fileset_create(fileset_t *fileset)
1007 {
1008 	filesetentry_t *entry;
1009 	char path[MAXPATHLEN];
1010 	struct stat64 sb;
1011 	hrtime_t start = gethrtime();
1012 	char *fileset_path;
1013 	char *fileset_name;
1014 	int randno;
1015 	int preallocated = 0;
1016 	int reusing;
1017 
1018 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1019 		filebench_log(LOG_ERROR, "%s path not set",
1020 		    fileset_entity_name(fileset));
1021 		return (FILEBENCH_ERROR);
1022 	}
1023 
1024 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1025 		filebench_log(LOG_ERROR, "%s name not set",
1026 		    fileset_entity_name(fileset));
1027 		return (FILEBENCH_ERROR);
1028 	}
1029 
1030 #ifdef HAVE_RAW_SUPPORT
1031 	/* treat raw device as special case */
1032 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1033 		return (FILEBENCH_OK);
1034 #endif /* HAVE_RAW_SUPPORT */
1035 
1036 	/* XXX Add check to see if there is enough space */
1037 
1038 	/* set up path to fileset */
1039 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
1040 	(void) fb_strlcat(path, "/", MAXPATHLEN);
1041 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
1042 
1043 	/* if exists and resusing, then don't create new */
1044 	if (((stat64(path, &sb) == 0)&& (strlen(path) > 3) &&
1045 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) &&
1046 	    avd_get_bool(fileset->fs_reuse)) {
1047 		reusing = 1;
1048 	} else {
1049 		reusing = 0;
1050 	}
1051 
1052 	if (!reusing) {
1053 		char cmd[MAXPATHLEN];
1054 
1055 		/* Remove existing */
1056 		(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
1057 		(void) system(cmd);
1058 		filebench_log(LOG_VERBOSE,
1059 		    "Removed any existing %s %s in %llu seconds",
1060 		    fileset_entity_name(fileset), fileset_name,
1061 		    (u_longlong_t)(((gethrtime() - start) /
1062 		    1000000000) + 1));
1063 	} else {
1064 		/* we are re-using */
1065 		filebench_log(LOG_VERBOSE, "Re-using %s %s.",
1066 		    fileset_entity_name(fileset), fileset_name);
1067 	}
1068 
1069 	/* make the filesets directory tree unless in reuse mode */
1070 	if (!reusing && (avd_get_bool(fileset->fs_prealloc))) {
1071 		filebench_log(LOG_VERBOSE,
1072 		    "making tree for filset %s", path);
1073 
1074 		(void) mkdir(path, 0755);
1075 
1076 		if (fileset_create_subdirs(fileset, path) == FILEBENCH_ERROR)
1077 			return (FILEBENCH_ERROR);
1078 	}
1079 
1080 	start = gethrtime();
1081 
1082 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
1083 	    fileset_entity_name(fileset), fileset_name);
1084 
1085 	randno = ((RAND_MAX * (100
1086 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
1087 
1088 	/* alloc any files, as required */
1089 	fileset_pickreset(fileset, FILESET_PICKFILE);
1090 	while (entry = fileset_pick(fileset,
1091 	    FILESET_PICKFREE | FILESET_PICKFILE, 0, 0)) {
1092 		pthread_t tid;
1093 		int newrand;
1094 
1095 		newrand = rand();
1096 
1097 		if (newrand < randno) {
1098 			/* unbusy the unallocated entry */
1099 			fileset_unbusy(entry, TRUE, FALSE, 0);
1100 			continue;
1101 		}
1102 
1103 		preallocated++;
1104 
1105 		if (reusing)
1106 			entry->fse_flags |= FSE_REUSING;
1107 		else
1108 			entry->fse_flags &= (~FSE_REUSING);
1109 
1110 		/* fire off allocation threads for each file if paralloc set */
1111 		if (avd_get_bool(fileset->fs_paralloc)) {
1112 
1113 			/* limit total number of simultaneous allocations */
1114 			(void) pthread_mutex_lock(
1115 			    &filebench_shm->shm_fsparalloc_lock);
1116 			while (filebench_shm->shm_fsparalloc_count
1117 			    >= MAX_PARALLOC_THREADS) {
1118 				(void) pthread_cond_wait(
1119 				    &filebench_shm->shm_fsparalloc_cv,
1120 				    &filebench_shm->shm_fsparalloc_lock);
1121 			}
1122 
1123 			/* quit if any allocation thread reports and error */
1124 			if (filebench_shm->shm_fsparalloc_count < 0) {
1125 				(void) pthread_mutex_unlock(
1126 				    &filebench_shm->shm_fsparalloc_lock);
1127 				return (FILEBENCH_ERROR);
1128 			}
1129 
1130 			filebench_shm->shm_fsparalloc_count++;
1131 			(void) pthread_mutex_unlock(
1132 			    &filebench_shm->shm_fsparalloc_lock);
1133 
1134 			/*
1135 			 * Fire off a detached allocation thread per file.
1136 			 * The thread will self destruct when it finishes
1137 			 * writing pre-allocation data to the file.
1138 			 */
1139 			if (pthread_create(&tid, NULL,
1140 			    (void *(*)(void*))fileset_alloc_thread,
1141 			    entry) == 0) {
1142 				/*
1143 				 * A thread was created; detach it so it can
1144 				 * fully quit when finished.
1145 				 */
1146 				(void) pthread_detach(tid);
1147 			} else {
1148 				filebench_log(LOG_ERROR,
1149 				    "File prealloc thread create failed");
1150 				filebench_shutdown(1);
1151 			}
1152 
1153 		} else {
1154 			if (fileset_alloc_file(entry) == FILEBENCH_ERROR)
1155 				return (FILEBENCH_ERROR);
1156 		}
1157 	}
1158 
1159 	/* alloc any leaf directories, as required */
1160 	fileset_pickreset(fileset, FILESET_PICKLEAFDIR);
1161 	while (entry = fileset_pick(fileset,
1162 	    FILESET_PICKFREE | FILESET_PICKLEAFDIR, 0, 0)) {
1163 
1164 		if (rand() < randno) {
1165 			/* unbusy the unallocated entry */
1166 			fileset_unbusy(entry, TRUE, FALSE, 0);
1167 			continue;
1168 		}
1169 
1170 		preallocated++;
1171 
1172 		if (reusing)
1173 			entry->fse_flags |= FSE_REUSING;
1174 		else
1175 			entry->fse_flags &= (~FSE_REUSING);
1176 
1177 		if (fileset_alloc_leafdir(entry) == FILEBENCH_ERROR)
1178 			return (FILEBENCH_ERROR);
1179 	}
1180 
1181 exit:
1182 	filebench_log(LOG_VERBOSE,
1183 	    "Preallocated %d of %llu of %s %s in %llu seconds",
1184 	    preallocated,
1185 	    (u_longlong_t)fileset->fs_constentries,
1186 	    fileset_entity_name(fileset), fileset_name,
1187 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
1188 
1189 	return (FILEBENCH_OK);
1190 }
1191 
1192 /*
1193  * Adds an entry to the fileset's file list. Single threaded so
1194  * no locking needed.
1195  */
1196 static void
1197 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
1198 {
1199 	entry->fse_flags = FSE_TYPE_FILE | FSE_FREE;
1200 	avl_add(&fileset->fs_free_files, entry);
1201 
1202 	if (fileset->fs_filelist == NULL) {
1203 		fileset->fs_filelist = entry;
1204 		entry->fse_nextoftype = NULL;
1205 	} else {
1206 		entry->fse_nextoftype = fileset->fs_filelist;
1207 		fileset->fs_filelist = entry;
1208 	}
1209 }
1210 
1211 /*
1212  * Adds an entry to the fileset's directory list. Single
1213  * threaded so no locking needed.
1214  */
1215 static void
1216 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
1217 {
1218 	entry->fse_flags = FSE_TYPE_DIR | FSE_EXISTS;
1219 	avl_add(&fileset->fs_dirs, entry);
1220 
1221 	if (fileset->fs_dirlist == NULL) {
1222 		fileset->fs_dirlist = entry;
1223 		entry->fse_nextoftype = NULL;
1224 	} else {
1225 		entry->fse_nextoftype = fileset->fs_dirlist;
1226 		fileset->fs_dirlist = entry;
1227 	}
1228 }
1229 
1230 /*
1231  * Adds an entry to the fileset's leaf directory list. Single
1232  * threaded so no locking needed.
1233  */
1234 static void
1235 fileset_insleafdirlist(fileset_t *fileset, filesetentry_t *entry)
1236 {
1237 	entry->fse_flags = FSE_TYPE_LEAFDIR | FSE_FREE;
1238 	avl_add(&fileset->fs_free_leaf_dirs, entry);
1239 
1240 	if (fileset->fs_leafdirlist == NULL) {
1241 		fileset->fs_leafdirlist = entry;
1242 		entry->fse_nextoftype = NULL;
1243 	} else {
1244 		entry->fse_nextoftype = fileset->fs_leafdirlist;
1245 		fileset->fs_leafdirlist = entry;
1246 	}
1247 }
1248 
1249 /*
1250  * Compares two fileset entries to determine their relative order
1251  */
1252 static int
1253 fileset_entry_compare(const void *node_1, const void *node_2)
1254 {
1255 	if (((filesetentry_t *)node_1)->fse_index <
1256 	    ((filesetentry_t *)node_2)->fse_index)
1257 		return (-1);
1258 
1259 	if (((filesetentry_t *)node_1)->fse_index ==
1260 	    ((filesetentry_t *)node_2)->fse_index)
1261 		return (0);
1262 
1263 	return (1);
1264 }
1265 
1266 /*
1267  * Obtains a filesetentry entity for a file to be placed in a
1268  * (sub)directory of a fileset. The size of the file may be
1269  * specified by fileset_meansize, or calculated from a gamma
1270  * distribution of parameter fileset_sizegamma and of mean size
1271  * fileset_meansize. The filesetentry entity is placed on the file
1272  * list in the specified parent filesetentry entity, which may
1273  * be a directory filesetentry, or the root filesetentry in the
1274  * fileset. It is also placed on the fileset's list of all
1275  * contained files. Returns FILEBENCH_OK if successful or FILEBENCH_ERROR
1276  * if ipc memory for the path string cannot be allocated.
1277  */
1278 static int
1279 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
1280 {
1281 	char tmpname[16];
1282 	filesetentry_t *entry;
1283 	double drand;
1284 	uint_t index;
1285 
1286 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1287 	    == NULL) {
1288 		filebench_log(LOG_ERROR,
1289 		    "fileset_populate_file: Can't malloc filesetentry");
1290 		return (FILEBENCH_ERROR);
1291 	}
1292 
1293 	/* Another currently idle file */
1294 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1295 	index = fileset->fs_idle_files++;
1296 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1297 
1298 	entry->fse_index = index;
1299 	entry->fse_parent = parent;
1300 	entry->fse_fileset = fileset;
1301 	fileset_insfilelist(fileset, entry);
1302 
1303 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1304 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1305 		filebench_log(LOG_ERROR,
1306 		    "fileset_populate_file: Can't alloc path string");
1307 		return (FILEBENCH_ERROR);
1308 	}
1309 
1310 	/* see if random variable was supplied for file size */
1311 	if (fileset->fs_meansize == -1) {
1312 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
1313 	} else {
1314 		double gamma;
1315 
1316 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1317 		if (gamma > 0) {
1318 			drand = gamma_dist_knuth(gamma,
1319 			    fileset->fs_meansize / gamma);
1320 			entry->fse_size = (off64_t)drand;
1321 		} else {
1322 			entry->fse_size = (off64_t)fileset->fs_meansize;
1323 		}
1324 	}
1325 
1326 	fileset->fs_bytes += entry->fse_size;
1327 
1328 	fileset->fs_realfiles++;
1329 	return (FILEBENCH_OK);
1330 }
1331 
1332 /*
1333  * Obtaines a filesetentry entity for a leaf directory to be placed in a
1334  * (sub)directory of a fileset. The leaf directory will always be empty so
1335  * it can be created and deleted (mkdir, rmdir) at will. The filesetentry
1336  * entity is placed on the leaf directory list in the specified parent
1337  * filesetentry entity, which may be a (sub) directory filesetentry, or
1338  * the root filesetentry in the fileset. It is also placed on the fileset's
1339  * list of all contained leaf directories. Returns FILEBENCH_OK if successful
1340  * or FILEBENCH_ERROR if ipc memory cannot be allocated.
1341  */
1342 static int
1343 fileset_populate_leafdir(fileset_t *fileset, filesetentry_t *parent, int serial)
1344 {
1345 	char tmpname[16];
1346 	filesetentry_t *entry;
1347 	uint_t index;
1348 
1349 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1350 	    == NULL) {
1351 		filebench_log(LOG_ERROR,
1352 		    "fileset_populate_file: Can't malloc filesetentry");
1353 		return (FILEBENCH_ERROR);
1354 	}
1355 
1356 	/* Another currently idle leaf directory */
1357 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1358 	index = fileset->fs_idle_leafdirs++;
1359 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1360 
1361 	entry->fse_index = index;
1362 	entry->fse_parent = parent;
1363 	entry->fse_fileset = fileset;
1364 	fileset_insleafdirlist(fileset, entry);
1365 
1366 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1367 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1368 		filebench_log(LOG_ERROR,
1369 		    "fileset_populate_file: Can't alloc path string");
1370 		return (FILEBENCH_ERROR);
1371 	}
1372 
1373 	fileset->fs_realleafdirs++;
1374 	return (FILEBENCH_OK);
1375 }
1376 
1377 /*
1378  * Creates a directory node in a fileset, by obtaining a
1379  * filesetentry entity for the node and initializing it
1380  * according to parameters of the fileset. It determines a
1381  * directory tree depth and directory width, optionally using
1382  * a gamma distribution. If its calculated depth is less then
1383  * its actual depth in the directory tree, it becomes a leaf
1384  * node and files itself with "width" number of file type
1385  * filesetentries, otherwise it files itself with "width"
1386  * number of directory type filesetentries, using recursive
1387  * calls to fileset_populate_subdir. The end result of the
1388  * initial call to this routine is a tree of directories of
1389  * random width and varying depth with sufficient leaf
1390  * directories to contain all required files.
1391  * Returns FILEBENCH_OK on success. Returns FILEBENCH_ERROR if ipc path
1392  * string memory cannot be allocated and returns the error code (currently
1393  * also FILEBENCH_ERROR) from calls to fileset_populate_file or recursive
1394  * calls to fileset_populate_subdir.
1395  */
1396 static int
1397 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
1398     int serial, double depth)
1399 {
1400 	double randepth, drand, ranwidth;
1401 	int isleaf = 0;
1402 	char tmpname[16];
1403 	filesetentry_t *entry;
1404 	int i;
1405 	uint_t index;
1406 
1407 	depth += 1;
1408 
1409 	/* Create dir node */
1410 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
1411 	    == NULL) {
1412 		filebench_log(LOG_ERROR,
1413 		    "fileset_populate_subdir: Can't malloc filesetentry");
1414 		return (FILEBENCH_ERROR);
1415 	}
1416 
1417 	/* another idle directory */
1418 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
1419 	index = fileset->fs_idle_dirs++;
1420 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
1421 
1422 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
1423 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
1424 		filebench_log(LOG_ERROR,
1425 		    "fileset_populate_subdir: Can't alloc path string");
1426 		return (FILEBENCH_ERROR);
1427 	}
1428 
1429 	entry->fse_index = index;
1430 	entry->fse_parent = parent;
1431 	entry->fse_fileset = fileset;
1432 	fileset_insdirlist(fileset, entry);
1433 
1434 	if (fileset->fs_dirdepthrv) {
1435 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
1436 	} else {
1437 		double gamma;
1438 
1439 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
1440 		if (gamma > 0) {
1441 			drand = gamma_dist_knuth(gamma,
1442 			    fileset->fs_meandepth / gamma);
1443 			randepth = (int)drand;
1444 		} else {
1445 			randepth = (int)fileset->fs_meandepth;
1446 		}
1447 	}
1448 
1449 	if (fileset->fs_meanwidth == -1) {
1450 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
1451 	} else {
1452 		double gamma;
1453 
1454 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
1455 		if (gamma > 0) {
1456 			drand = gamma_dist_knuth(gamma,
1457 			    fileset->fs_meanwidth / gamma);
1458 			ranwidth = drand;
1459 		} else {
1460 			ranwidth = fileset->fs_meanwidth;
1461 		}
1462 	}
1463 
1464 	if (randepth == 0)
1465 		randepth = 1;
1466 	if (ranwidth == 0)
1467 		ranwidth = 1;
1468 	if (depth >= randepth)
1469 		isleaf = 1;
1470 
1471 	/*
1472 	 * Create directory of random width filled with files according
1473 	 * to distribution, or if root directory, continue until #files required
1474 	 */
1475 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1476 	    (fileset->fs_realfiles < fileset->fs_constentries);
1477 	    i++) {
1478 		int ret = 0;
1479 
1480 		if (parent && isleaf)
1481 			ret = fileset_populate_file(fileset, entry, i);
1482 		else
1483 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1484 
1485 		if (ret != 0)
1486 			return (ret);
1487 	}
1488 
1489 	/*
1490 	 * Create directory of random width filled with leaf directories
1491 	 * according to distribution, or if root directory, continue until
1492 	 * the number of leaf directories required has been generated.
1493 	 */
1494 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
1495 	    (fileset->fs_realleafdirs < fileset->fs_constleafdirs);
1496 	    i++) {
1497 		int ret = 0;
1498 
1499 		if (parent && isleaf)
1500 			ret = fileset_populate_leafdir(fileset, entry, i);
1501 		else
1502 			ret = fileset_populate_subdir(fileset, entry, i, depth);
1503 
1504 		if (ret != 0)
1505 			return (ret);
1506 	}
1507 
1508 	return (FILEBENCH_OK);
1509 }
1510 
1511 /*
1512  * Populates a fileset with files and subdirectory entries. Uses
1513  * the supplied fileset_dirwidth and fileset_entries (number of files) to
1514  * calculate the required fileset_meandepth (of subdirectories) and
1515  * initialize the fileset_meanwidth and fileset_meansize variables. Then
1516  * calls fileset_populate_subdir() to do the recursive
1517  * subdirectory entry creation and leaf file entry creation. All
1518  * of the above is skipped if the fileset has already been
1519  * populated. Returns 0 on success, or an error code from the
1520  * call to fileset_populate_subdir if that call fails.
1521  */
1522 static int
1523 fileset_populate(fileset_t *fileset)
1524 {
1525 	fbint_t entries = avd_get_int(fileset->fs_entries);
1526 	fbint_t leafdirs = avd_get_int(fileset->fs_leafdirs);
1527 	int meandirwidth;
1528 	int ret;
1529 
1530 	/* Skip if already populated */
1531 	if (fileset->fs_bytes > 0)
1532 		goto exists;
1533 
1534 #ifdef HAVE_RAW_SUPPORT
1535 	/* check for raw device */
1536 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
1537 		return (FILEBENCH_OK);
1538 #endif /* HAVE_RAW_SUPPORT */
1539 
1540 	/*
1541 	 * save value of entries and leaf dirs obtained for later
1542 	 * in case it was random
1543 	 */
1544 	fileset->fs_constentries = entries;
1545 	fileset->fs_constleafdirs = leafdirs;
1546 
1547 	/* initialize idle files and directories condition variables */
1548 	(void) pthread_cond_init(&fileset->fs_idle_files_cv, ipc_condattr());
1549 	(void) pthread_cond_init(&fileset->fs_idle_dirs_cv, ipc_condattr());
1550 	(void) pthread_cond_init(&fileset->fs_idle_leafdirs_cv, ipc_condattr());
1551 
1552 	/* no files or dirs idle (or busy) yet */
1553 	fileset->fs_idle_files = 0;
1554 	fileset->fs_idle_dirs = 0;
1555 	fileset->fs_idle_leafdirs = 0;
1556 
1557 	/* initialize locks and other condition variables */
1558 	(void) pthread_mutex_init(&fileset->fs_pick_lock,
1559 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1560 	(void) pthread_mutex_init(&fileset->fs_histo_lock,
1561 	    ipc_mutexattr(IPC_MUTEX_NORMAL));
1562 	(void) pthread_cond_init(&fileset->fs_thrd_wait_cv, ipc_condattr());
1563 
1564 	/* Initialize avl btrees */
1565 	avl_create(&(fileset->fs_free_files), fileset_entry_compare,
1566 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1567 	avl_create(&(fileset->fs_noex_files), fileset_entry_compare,
1568 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1569 	avl_create(&(fileset->fs_exist_files), fileset_entry_compare,
1570 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1571 	avl_create(&(fileset->fs_free_leaf_dirs), fileset_entry_compare,
1572 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1573 	avl_create(&(fileset->fs_noex_leaf_dirs), fileset_entry_compare,
1574 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1575 	avl_create(&(fileset->fs_exist_leaf_dirs), fileset_entry_compare,
1576 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1577 	avl_create(&(fileset->fs_dirs), fileset_entry_compare,
1578 	    sizeof (filesetentry_t), FSE_OFFSETOF(fse_link));
1579 
1580 	/* is dirwidth a random variable? */
1581 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
1582 		meandirwidth =
1583 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
1584 		fileset->fs_meanwidth = -1;
1585 	} else {
1586 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
1587 		fileset->fs_meanwidth = (double)meandirwidth;
1588 	}
1589 
1590 	/*
1591 	 * Input params are:
1592 	 *	# of files
1593 	 *	ave # of files per dir
1594 	 *	max size of dir
1595 	 *	# ave size of file
1596 	 *	max size of file
1597 	 */
1598 	fileset->fs_meandepth = log(entries+leafdirs) / log(meandirwidth);
1599 
1600 	/* Has a random variable been supplied for dirdepth? */
1601 	if (fileset->fs_dirdepthrv) {
1602 		/* yes, so set the random variable's mean value to meandepth */
1603 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
1604 		    fileset->fs_meandepth;
1605 	}
1606 
1607 	/* test for random size variable */
1608 	if (AVD_IS_RANDOM(fileset->fs_size))
1609 		fileset->fs_meansize = -1;
1610 	else
1611 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
1612 
1613 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
1614 		return (ret);
1615 
1616 
1617 exists:
1618 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1619 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
1620 		    avd_get_str(fileset->fs_name),
1621 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1622 	} else {
1623 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, %d leafdirs "
1624 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
1625 		    avd_get_str(fileset->fs_name), entries, leafdirs,
1626 		    meandirwidth,
1627 		    fileset->fs_meandepth,
1628 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1629 	}
1630 
1631 	return (FILEBENCH_OK);
1632 }
1633 
1634 /*
1635  * Allocates a fileset instance, initializes fileset_dirgamma and
1636  * fileset_sizegamma default values, and sets the fileset name to the
1637  * supplied name string. Puts the allocated fileset on the
1638  * master fileset list and returns a pointer to it.
1639  *
1640  * This routine implements the 'define fileset' calls found in a .f
1641  * workload, such as in the following example:
1642  * define fileset name=drew4ever, entries=$nfiles
1643  */
1644 fileset_t *
1645 fileset_define(avd_t name)
1646 {
1647 	fileset_t *fileset;
1648 
1649 	if (name == NULL)
1650 		return (NULL);
1651 
1652 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1653 		filebench_log(LOG_ERROR,
1654 		    "fileset_define: Can't malloc fileset");
1655 		return (NULL);
1656 	}
1657 
1658 	filebench_log(LOG_DEBUG_IMPL,
1659 	    "Defining file %s", avd_get_str(name));
1660 
1661 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1662 
1663 	fileset->fs_dirgamma = avd_int_alloc(1500);
1664 	fileset->fs_sizegamma = avd_int_alloc(1500);
1665 	fileset->fs_histo_id = -1;
1666 
1667 	/* Add fileset to global list */
1668 	if (filebench_shm->shm_filesetlist == NULL) {
1669 		filebench_shm->shm_filesetlist = fileset;
1670 		fileset->fs_next = NULL;
1671 	} else {
1672 		fileset->fs_next = filebench_shm->shm_filesetlist;
1673 		filebench_shm->shm_filesetlist = fileset;
1674 	}
1675 
1676 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1677 
1678 	fileset->fs_name = name;
1679 
1680 	return (fileset);
1681 }
1682 
1683 /*
1684  * If supplied with a pointer to a fileset and the fileset's
1685  * fileset_prealloc flag is set, calls fileset_populate() to populate
1686  * the fileset with filesetentries, then calls fileset_create()
1687  * to make actual directories and files for the filesetentries.
1688  * Otherwise, it applies fileset_populate() and fileset_create()
1689  * to all the filesets on the master fileset list. It always
1690  * returns zero (0) if one fileset is populated / created,
1691  * otherwise it returns the sum of returned values from
1692  * fileset_create() and fileset_populate(), which
1693  * will be a negative one (-1) times the number of
1694  * fileset_create() calls which failed.
1695  */
1696 int
1697 fileset_createset(fileset_t *fileset)
1698 {
1699 	fileset_t *list;
1700 	int ret = 0;
1701 
1702 	/* set up for possible parallel allocate */
1703 	filebench_shm->shm_fsparalloc_count = 0;
1704 	(void) pthread_cond_init(
1705 	    &filebench_shm->shm_fsparalloc_cv,
1706 	    ipc_condattr());
1707 
1708 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1709 
1710 		/* check for raw files */
1711 		if (fileset_checkraw(fileset)) {
1712 			filebench_log(LOG_INFO,
1713 			    "file %s/%s is a RAW device",
1714 			    avd_get_str(fileset->fs_path),
1715 			    avd_get_str(fileset->fs_name));
1716 			return (FILEBENCH_OK);
1717 		}
1718 
1719 		filebench_log(LOG_INFO,
1720 		    "creating/pre-allocating %s %s",
1721 		    fileset_entity_name(fileset),
1722 		    avd_get_str(fileset->fs_name));
1723 
1724 		if ((ret = fileset_populate(fileset)) != FILEBENCH_OK)
1725 			return (ret);
1726 
1727 		if ((ret = fileset_create(fileset)) != FILEBENCH_OK)
1728 			return (ret);
1729 	} else {
1730 
1731 		filebench_log(LOG_INFO,
1732 		    "Creating/pre-allocating files and filesets");
1733 
1734 		list = filebench_shm->shm_filesetlist;
1735 		while (list) {
1736 			/* check for raw files */
1737 			if (fileset_checkraw(list)) {
1738 				filebench_log(LOG_INFO,
1739 				    "file %s/%s is a RAW device",
1740 				    avd_get_str(list->fs_path),
1741 				    avd_get_str(list->fs_name));
1742 				list = list->fs_next;
1743 				continue;
1744 			}
1745 
1746 			if ((ret = fileset_populate(list)) != FILEBENCH_OK)
1747 				return (ret);
1748 
1749 			if ((ret = fileset_create(list)) != FILEBENCH_OK)
1750 				return (ret);
1751 
1752 			list = list->fs_next;
1753 		}
1754 	}
1755 
1756 	/* wait for allocation threads to finish */
1757 	filebench_log(LOG_INFO,
1758 	    "waiting for fileset pre-allocation to finish");
1759 
1760 	(void) pthread_mutex_lock(&filebench_shm->shm_fsparalloc_lock);
1761 	while (filebench_shm->shm_fsparalloc_count > 0)
1762 		(void) pthread_cond_wait(
1763 		    &filebench_shm->shm_fsparalloc_cv,
1764 		    &filebench_shm->shm_fsparalloc_lock);
1765 	(void) pthread_mutex_unlock(&filebench_shm->shm_fsparalloc_lock);
1766 
1767 	if (filebench_shm->shm_fsparalloc_count < 0)
1768 		return (FILEBENCH_ERROR);
1769 
1770 	return (FILEBENCH_OK);
1771 }
1772 
1773 /*
1774  * Searches through the master fileset list for the named fileset.
1775  * If found, returns pointer to same, otherwise returns NULL.
1776  */
1777 fileset_t *
1778 fileset_find(char *name)
1779 {
1780 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1781 
1782 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1783 
1784 	while (fileset) {
1785 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1786 			(void) ipc_mutex_unlock(
1787 			    &filebench_shm->shm_fileset_lock);
1788 			return (fileset);
1789 		}
1790 		fileset = fileset->fs_next;
1791 	}
1792 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1793 
1794 	return (NULL);
1795 }
1796 
1797 /*
1798  * Iterates over all the file sets in the filesetlist,
1799  * executing the supplied command "*cmd()" on them. Also
1800  * indicates to the executed command if it is the first
1801  * time the command has been executed since the current
1802  * call to fileset_iter.
1803  */
1804 int
1805 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1806 {
1807 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1808 	int count = 0;
1809 
1810 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1811 
1812 	while (fileset) {
1813 		if (cmd(fileset, count == 0) == FILEBENCH_ERROR) {
1814 			(void) ipc_mutex_unlock(
1815 			    &filebench_shm->shm_fileset_lock);
1816 			return (FILEBENCH_ERROR);
1817 		}
1818 		fileset = fileset->fs_next;
1819 		count++;
1820 	}
1821 
1822 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1823 	return (FILEBENCH_OK);
1824 }
1825 
1826 /*
1827  * Prints information to the filebench log about the file
1828  * object. Also prints a header on the first call.
1829  */
1830 int
1831 fileset_print(fileset_t *fileset, int first)
1832 {
1833 	int pathlength;
1834 	char *fileset_path;
1835 	char *fileset_name;
1836 	static char pad[] = "                              "; /* 30 spaces */
1837 
1838 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1839 		filebench_log(LOG_ERROR, "%s path not set",
1840 		    fileset_entity_name(fileset));
1841 		return (FILEBENCH_ERROR);
1842 	}
1843 
1844 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1845 		filebench_log(LOG_ERROR, "%s name not set",
1846 		    fileset_entity_name(fileset));
1847 		return (FILEBENCH_ERROR);
1848 	}
1849 
1850 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1851 
1852 	if (pathlength > 29)
1853 		pathlength = 29;
1854 
1855 	if (first) {
1856 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1857 		    "file size",
1858 		    "dir width",
1859 		    "entries");
1860 	}
1861 
1862 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1863 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1864 			filebench_log(LOG_INFO,
1865 			    "%s/%s%s         (Raw Device)",
1866 			    fileset_path, fileset_name, &pad[pathlength]);
1867 		} else {
1868 			filebench_log(LOG_INFO,
1869 			    "%s/%s%s%9llu     (Single File)",
1870 			    fileset_path, fileset_name, &pad[pathlength],
1871 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1872 		}
1873 	} else {
1874 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1875 		    fileset_path, fileset_name,
1876 		    &pad[pathlength],
1877 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1878 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1879 		    (u_longlong_t)fileset->fs_constentries);
1880 	}
1881 	return (FILEBENCH_OK);
1882 }
1883 
1884 /*
1885  * checks to see if the path/name pair points to a raw device. If
1886  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1887  * If RAW is not defined, or it is not a raw device, it clears the
1888  * raw device flag and returns 0.
1889  */
1890 int
1891 fileset_checkraw(fileset_t *fileset)
1892 {
1893 	char path[MAXPATHLEN];
1894 	struct stat64 sb;
1895 	char *pathname;
1896 	char *setname;
1897 
1898 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1899 
1900 #ifdef HAVE_RAW_SUPPORT
1901 	/* check for raw device */
1902 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1903 		return (FILEBENCH_OK);
1904 
1905 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1906 		return (FILEBENCH_OK);
1907 
1908 	(void) fb_strlcpy(path, pathname, MAXPATHLEN);
1909 	(void) fb_strlcat(path, "/", MAXPATHLEN);
1910 	(void) fb_strlcat(path, setname, MAXPATHLEN);
1911 	if ((stat64(path, &sb) == 0) &&
1912 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1913 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1914 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1915 			filebench_log(LOG_ERROR,
1916 			    "WARNING Fileset %s/%s Cannot be RAW device",
1917 			    avd_get_str(fileset->fs_path),
1918 			    avd_get_str(fileset->fs_name));
1919 			filebench_shutdown(1);
1920 		}
1921 
1922 		return (1);
1923 	}
1924 #endif /* HAVE_RAW_SUPPORT */
1925 
1926 	return (FILEBENCH_OK);
1927 }
1928