xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 6286:edb6e4556869)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <fcntl.h>
30 #include <pthread.h>
31 #include <errno.h>
32 #include <math.h>
33 #include <libgen.h>
34 #include <sys/mman.h>
35 #include "fileset.h"
36 #include "filebench.h"
37 #include "gamma_dist.h"
38 
39 /*
40  * File sets, of type fileset_t, are entities which contain
41  * information about collections of files and subdirectories in Filebench.
42  * The fileset, once populated, consists of a tree of fileset entries of
43  * type filesetentry_t which specify files and directories.  The fileset
44  * is rooted in a directory specified by fileset_path, and once the populated
45  * fileset has been created, has a tree of directories and files
46  * corresponding to the fileset's filesetentry tree.
47  */
48 
49 /* parallel allocation control */
50 #define	MAX_PARALLOC_THREADS 32
51 static pthread_mutex_t	paralloc_lock = PTHREAD_MUTEX_INITIALIZER;
52 static pthread_cond_t	paralloc_cv = PTHREAD_COND_INITIALIZER;
53 static int		paralloc_count;
54 
55 /*
56  * returns pointer to file or fileset
57  * string, as appropriate
58  */
59 static char *
60 fileset_entity_name(fileset_t *fileset)
61 {
62 	if (fileset->fs_attrs & FILESET_IS_FILE)
63 		return ("file");
64 	else
65 		return ("fileset");
66 }
67 
68 /*
69  * Removes the last file or directory name from a pathname.
70  * Basically removes characters from the end of the path by
71  * setting them to \0 until a forward slash '/' is
72  * encountered. It also removes the forward slash.
73  */
74 static char *
75 trunc_dirname(char *dir)
76 {
77 	char *s = dir + strlen(dir);
78 
79 	while (s != dir) {
80 		int c = *s;
81 
82 		*s = 0;
83 		if (c == '/')
84 			break;
85 		s--;
86 	}
87 	return (dir);
88 }
89 
90 /*
91  * Prints a list of allowed options and how to specify them.
92  */
93 void
94 fileset_usage(void)
95 {
96 	(void) fprintf(stderr,
97 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
98 	    ",entries=<number>\n");
99 	(void) fprintf(stderr,
100 	    "		        [,filesize=[size]]\n");
101 	(void) fprintf(stderr,
102 	    "		        [,dirwidth=[width]]\n");
103 	(void) fprintf(stderr,
104 	    "		        [,dirdepthrv=$random_variable_name]\n");
105 	(void) fprintf(stderr,
106 	    "		        [,dirgamma=[100-10000]] "
107 	    "(Gamma * 1000)\n");
108 	(void) fprintf(stderr,
109 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
110 	(void) fprintf(stderr,
111 	    "		        [,prealloc=[percent]]\n");
112 	(void) fprintf(stderr, "		        [,paralloc]\n");
113 	(void) fprintf(stderr, "		        [,reuse]\n");
114 	(void) fprintf(stderr, "\n");
115 }
116 
117 /*
118  * Frees up memory mapped file region of supplied size. The
119  * file descriptor "fd" indicates which memory mapped file.
120  * If successful, returns 0. Otherwise returns -1 if "size"
121  * is zero, or -1 times the number of times msync() failed.
122  */
123 static int
124 fileset_freemem(int fd, off64_t size)
125 {
126 	off64_t left;
127 	int ret = 0;
128 
129 	for (left = size; left > 0; left -= MMAP_SIZE) {
130 		off64_t thismapsize;
131 		caddr_t addr;
132 
133 		thismapsize = MIN(MMAP_SIZE, left);
134 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
135 		    MAP_SHARED, fd, size - left);
136 		ret += msync(addr, thismapsize, MS_INVALIDATE);
137 		(void) munmap(addr, thismapsize);
138 	}
139 	return (ret);
140 }
141 
142 /*
143  * Creates a path string from the filesetentry_t "*entry"
144  * and all of its parent's path names. The resulting path
145  * is a concatination of all the individual parent paths.
146  * Allocates memory for the path string and returns a
147  * pointer to it.
148  */
149 char *
150 fileset_resolvepath(filesetentry_t *entry)
151 {
152 	filesetentry_t *fsep = entry;
153 	char path[MAXPATHLEN];
154 	char pathtmp[MAXPATHLEN];
155 	char *s;
156 
157 	*path = 0;
158 	while (fsep->fse_parent) {
159 		(void) strcpy(pathtmp, "/");
160 		(void) strcat(pathtmp, fsep->fse_path);
161 		(void) strcat(pathtmp, path);
162 		(void) strcpy(path, pathtmp);
163 		fsep = fsep->fse_parent;
164 	}
165 
166 	s = malloc(strlen(path) + 1);
167 	(void) strcpy(s, path);
168 	return (s);
169 }
170 
171 /*
172  * Creates multiple nested directories as required by the
173  * supplied path. Starts at the end of the path, creating
174  * a list of directories to mkdir, up to the root of the
175  * path, then mkdirs them one at a time from the root on down.
176  */
177 static int
178 fileset_mkdir(char *path, int mode)
179 {
180 	char *p;
181 	char *dirs[65536];
182 	int i = 0;
183 
184 	if ((p = strdup(path)) == NULL)
185 		goto null_str;
186 
187 	/*
188 	 * Fill an array of subdirectory path names until either we
189 	 * reach the root or encounter an already existing subdirectory
190 	 */
191 	/* CONSTCOND */
192 	while (1) {
193 		struct stat64 sb;
194 
195 		if (stat64(p, &sb) == 0)
196 			break;
197 		if (strlen(p) < 3)
198 			break;
199 		if ((dirs[i] = strdup(p)) == NULL) {
200 			free(p);
201 			goto null_str;
202 		}
203 
204 		(void) trunc_dirname(p);
205 		i++;
206 	}
207 
208 	/* Make the directories, from closest to root downwards. */
209 	for (--i; i >= 0; i--) {
210 		(void) mkdir(dirs[i], mode);
211 		free(dirs[i]);
212 	}
213 
214 	free(p);
215 	return (0);
216 
217 null_str:
218 	/* clean up */
219 	for (--i; i >= 0; i--)
220 		free(dirs[i]);
221 
222 	filebench_log(LOG_ERROR,
223 	    "Failed to create directory path %s: Out of memory", path);
224 
225 	return (-1);
226 }
227 
228 /*
229  * creates the subdirectory tree for a fileset.
230  */
231 static int
232 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
233 {
234 	filesetentry_t *direntry;
235 	char full_path[MAXPATHLEN];
236 	char *part_path;
237 
238 	/* walk the subdirectory list, enstanciating subdirs */
239 	direntry = fileset->fs_dirlist;
240 	while (direntry) {
241 		(void) strcpy(full_path, filesetpath);
242 		part_path = fileset_resolvepath(direntry);
243 		(void) strcat(full_path, part_path);
244 		free(part_path);
245 
246 		/* now create this portion of the subdirectory tree */
247 		if (fileset_mkdir(full_path, 0755) == -1)
248 			return (-1);
249 
250 		direntry = direntry->fse_dirnext;
251 	}
252 	return (0);
253 }
254 
255 /*
256  * given a fileset entry, determines if the associated file
257  * needs to be allocated or not, and if so does the allocation.
258  */
259 static int
260 fileset_alloc_file(filesetentry_t *entry)
261 {
262 	char path[MAXPATHLEN];
263 	char *buf;
264 	struct stat64 sb;
265 	char *pathtmp;
266 	off64_t seek;
267 	int fd;
268 
269 	*path = 0;
270 	(void) strcpy(path, avd_get_str(entry->fse_fileset->fs_path));
271 	(void) strcat(path, "/");
272 	(void) strcat(path, avd_get_str(entry->fse_fileset->fs_name));
273 	pathtmp = fileset_resolvepath(entry);
274 	(void) strcat(path, pathtmp);
275 
276 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
277 
278 	/* see if reusing and this file exists */
279 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
280 		if ((fd = open64(path, O_RDWR)) < 0) {
281 			filebench_log(LOG_INFO,
282 			    "Attempted but failed to Re-use file %s",
283 			    path);
284 			return (-1);
285 		}
286 
287 		if (sb.st_size == (off64_t)entry->fse_size) {
288 			filebench_log(LOG_INFO,
289 			    "Re-using file %s", path);
290 
291 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
292 				(void) fileset_freemem(fd,
293 				    entry->fse_size);
294 
295 			entry->fse_flags |= FSE_EXISTS;
296 			(void) close(fd);
297 			return (0);
298 
299 		} else if (sb.st_size > (off64_t)entry->fse_size) {
300 			/* reuse, but too large */
301 			filebench_log(LOG_INFO,
302 			    "Truncating & re-using file %s", path);
303 
304 			(void) ftruncate64(fd,
305 			    (off64_t)entry->fse_size);
306 
307 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
308 				(void) fileset_freemem(fd,
309 				    entry->fse_size);
310 
311 			entry->fse_flags |= FSE_EXISTS;
312 			(void) close(fd);
313 			return (0);
314 		}
315 	} else {
316 
317 		/* No file or not reusing, so create */
318 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
319 			filebench_log(LOG_ERROR,
320 			    "Failed to pre-allocate file %s: %s",
321 			    path, strerror(errno));
322 
323 			return (-1);
324 		}
325 	}
326 
327 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
328 		return (-1);
329 
330 	entry->fse_flags |= FSE_EXISTS;
331 
332 	for (seek = 0; seek < entry->fse_size; ) {
333 		off64_t wsize;
334 		int ret = 0;
335 
336 		/*
337 		 * Write FILE_ALLOC_BLOCK's worth,
338 		 * except on last write
339 		 */
340 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
341 
342 		ret = write(fd, buf, wsize);
343 		if (ret != wsize) {
344 			filebench_log(LOG_ERROR,
345 			    "Failed to pre-allocate file %s: %s",
346 			    path, strerror(errno));
347 			(void) close(fd);
348 			free(buf);
349 			return (-1);
350 		}
351 		seek += wsize;
352 	}
353 
354 	if (!avd_get_bool(entry->fse_fileset->fs_cached))
355 		(void) fileset_freemem(fd, entry->fse_size);
356 
357 	(void) close(fd);
358 
359 	free(buf);
360 
361 	filebench_log(LOG_DEBUG_IMPL,
362 	    "Pre-allocated file %s size %llu",
363 	    path, (u_longlong_t)entry->fse_size);
364 
365 	return (0);
366 }
367 
368 /*
369  * given a fileset entry, determines if the associated file
370  * needs to be allocated or not, and if so does the allocation.
371  */
372 static void *
373 fileset_alloc_thread(filesetentry_t *entry)
374 {
375 	if (fileset_alloc_file(entry) == -1) {
376 		(void) pthread_mutex_lock(&paralloc_lock);
377 		paralloc_count = -1;
378 	} else {
379 		(void) pthread_mutex_lock(&paralloc_lock);
380 		paralloc_count--;
381 	}
382 
383 	(void) pthread_cond_signal(&paralloc_cv);
384 	(void) pthread_mutex_unlock(&paralloc_lock);
385 
386 	pthread_exit(NULL);
387 	return (NULL);
388 }
389 
390 
391 /*
392  * First creates the parent directories of the file using
393  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
394  * and opens the file with open64(). It unlocks the fileset
395  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
396  * as requested, and returns the file descriptor integer
397  * for the opened file.
398  */
399 int
400 fileset_openfile(fileset_t *fileset,
401     filesetentry_t *entry, int flag, int mode, int attrs)
402 {
403 	char path[MAXPATHLEN];
404 	char dir[MAXPATHLEN];
405 	char *pathtmp;
406 	struct stat64 sb;
407 	int fd;
408 	int open_attrs = 0;
409 
410 	*path = 0;
411 	(void) strcpy(path, avd_get_str(fileset->fs_path));
412 	(void) strcat(path, "/");
413 	(void) strcat(path, avd_get_str(fileset->fs_name));
414 	pathtmp = fileset_resolvepath(entry);
415 	(void) strcat(path, pathtmp);
416 	(void) strcpy(dir, path);
417 	free(pathtmp);
418 	(void) trunc_dirname(dir);
419 
420 	/* If we are going to create a file, create the parent dirs */
421 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
422 		if (fileset_mkdir(dir, 0755) == -1)
423 			return (-1);
424 	}
425 
426 	if (flag & O_CREAT)
427 		entry->fse_flags |= FSE_EXISTS;
428 
429 	if (attrs & FLOW_ATTR_DSYNC) {
430 #ifdef sun
431 		open_attrs |= O_DSYNC;
432 #else
433 		open_attrs |= O_FSYNC;
434 #endif
435 	}
436 
437 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
438 		filebench_log(LOG_ERROR,
439 		    "Failed to open file %s: %s",
440 		    path, strerror(errno));
441 		(void) ipc_mutex_unlock(&entry->fse_lock);
442 		return (-1);
443 	}
444 	(void) ipc_mutex_unlock(&entry->fse_lock);
445 
446 #ifdef sun
447 	if (attrs & FLOW_ATTR_DIRECTIO)
448 		(void) directio(fd, DIRECTIO_ON);
449 	else
450 		(void) directio(fd, DIRECTIO_OFF);
451 #endif
452 
453 	return (fd);
454 }
455 
456 
457 /*
458  * Selects a fileset entry from a fileset. If the
459  * FILESET_PICKDIR flag is set it will pick a directory
460  * entry, otherwise a file entry. The FILESET_PICKRESET
461  * flag will cause it to reset the free list to the
462  * overall list (file or directory). The FILESET_PICKUNIQUE
463  * flag will take an entry off of one of the free (unused)
464  * lists (file or directory), otherwise the entry will be
465  * picked off of one of the rotor lists (file or directory).
466  * The FILESET_PICKEXISTS will insure that only extant
467  * (FSE_EXISTS) state files are selected, while
468  * FILESET_PICKNOEXIST insures that only non extant
469  * (not FSE_EXISTS) state files are selected.
470  */
471 filesetentry_t *
472 fileset_pick(fileset_t *fileset, int flags, int tid)
473 {
474 	filesetentry_t *entry = NULL;
475 	filesetentry_t *first = NULL;
476 
477 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
478 
479 	while (entry == NULL) {
480 
481 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
482 			entry = fileset->fs_dirlist;
483 			while (entry) {
484 				entry->fse_flags |= FSE_FREE;
485 				entry = entry->fse_dirnext;
486 			}
487 			fileset->fs_dirfree = fileset->fs_dirlist;
488 		}
489 
490 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
491 			entry = fileset->fs_filelist;
492 			while (entry) {
493 				entry->fse_flags |= FSE_FREE;
494 				entry = entry->fse_filenext;
495 			}
496 			fileset->fs_filefree = fileset->fs_filelist;
497 		}
498 
499 		if (flags & FILESET_PICKUNIQUE) {
500 			if (flags & FILESET_PICKDIR) {
501 				entry = fileset->fs_dirfree;
502 				if (entry == NULL)
503 					goto empty;
504 				fileset->fs_dirfree = entry->fse_dirnext;
505 			} else {
506 				entry = fileset->fs_filefree;
507 				if (entry == NULL)
508 					goto empty;
509 				fileset->fs_filefree = entry->fse_filenext;
510 			}
511 			entry->fse_flags &= ~FSE_FREE;
512 		} else {
513 			if (flags & FILESET_PICKDIR) {
514 				entry = fileset->fs_dirrotor;
515 				if (entry == NULL)
516 				fileset->fs_dirrotor =
517 				    entry = fileset->fs_dirlist;
518 				fileset->fs_dirrotor = entry->fse_dirnext;
519 			} else {
520 				entry = fileset->fs_filerotor[tid];
521 				if (entry == NULL)
522 					fileset->fs_filerotor[tid] =
523 					    entry = fileset->fs_filelist;
524 				fileset->fs_filerotor[tid] =
525 				    entry->fse_filenext;
526 			}
527 		}
528 
529 		if (first == entry)
530 			goto empty;
531 
532 		if (first == NULL)
533 			first = entry;
534 
535 		/* Return locked entry */
536 		(void) ipc_mutex_lock(&entry->fse_lock);
537 
538 		/* If we ask for an existing file, go round again */
539 		if ((flags & FILESET_PICKEXISTS) &&
540 		    !(entry->fse_flags & FSE_EXISTS)) {
541 			(void) ipc_mutex_unlock(&entry->fse_lock);
542 			entry = NULL;
543 		}
544 
545 		/* If we ask for not an existing file, go round again */
546 		if ((flags & FILESET_PICKNOEXIST) &&
547 		    (entry->fse_flags & FSE_EXISTS)) {
548 			(void) ipc_mutex_unlock(&entry->fse_lock);
549 			entry = NULL;
550 		}
551 	}
552 
553 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
554 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
555 	return (entry);
556 
557 empty:
558 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
559 	return (NULL);
560 }
561 
562 /*
563  * Given a fileset "fileset", create the associated files as
564  * specified in the attributes of the fileset. The fileset is
565  * rooted in a directory whose pathname is in fileset_path. If the
566  * directory exists, meaning that there is already a fileset,
567  * and the fileset_reuse attribute is false, then remove it and all
568  * its contained files and subdirectories. Next, the routine
569  * creates a root directory for the fileset. All the file type
570  * filesetentries are cycled through creating as needed
571  * their containing subdirectory trees in the filesystem and
572  * creating actual files for fileset_preallocpercent of them. The
573  * created files are filled with fse_size bytes of unitialized
574  * data. The routine returns -1 on errors, 0 on success.
575  */
576 static int
577 fileset_create(fileset_t *fileset)
578 {
579 	filesetentry_t *entry;
580 	char path[MAXPATHLEN];
581 	struct stat64 sb;
582 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
583 	hrtime_t start = gethrtime();
584 	char *fileset_path;
585 	char *fileset_name;
586 	int randno;
587 	int preallocated = 0;
588 	int reusing = 0;
589 
590 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
591 		filebench_log(LOG_ERROR, "%s path not set",
592 		    fileset_entity_name(fileset));
593 		return (-1);
594 	}
595 
596 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
597 		filebench_log(LOG_ERROR, "%s name not set",
598 		    fileset_entity_name(fileset));
599 		return (-1);
600 	}
601 
602 #ifdef HAVE_RAW_SUPPORT
603 	/* treat raw device as special case */
604 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
605 		return (0);
606 #endif /* HAVE_RAW_SUPPORT */
607 
608 	/* XXX Add check to see if there is enough space */
609 
610 	/* Remove existing */
611 	(void) strcpy(path, fileset_path);
612 	(void) strcat(path, "/");
613 	(void) strcat(path, fileset_name);
614 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
615 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) {
616 		if (!avd_get_bool(fileset->fs_reuse)) {
617 			char cmd[MAXPATHLEN];
618 
619 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
620 			(void) system(cmd);
621 			filebench_log(LOG_VERBOSE,
622 			    "Removed any existing %s %s in %llu seconds",
623 			    fileset_entity_name(fileset), fileset_name,
624 			    (u_longlong_t)(((gethrtime() - start) /
625 			    1000000000) + 1));
626 		} else {
627 			/* we are re-using */
628 			reusing = 1;
629 			filebench_log(LOG_VERBOSE,
630 			    "Re-using %s %s on %s file system.",
631 			    fileset_entity_name(fileset),
632 			    fileset_name, sb.st_fstype);
633 		}
634 	}
635 	(void) mkdir(path, 0755);
636 
637 	/* make the filesets directory tree */
638 	if (fileset_create_subdirs(fileset, path) == -1)
639 		return (-1);
640 
641 	start = gethrtime();
642 
643 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
644 	    fileset_entity_name(fileset), fileset_name);
645 
646 	if (!avd_get_bool(fileset->fs_prealloc))
647 		goto exit;
648 
649 	randno = ((RAND_MAX * (100
650 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
651 
652 	while (entry = fileset_pick(fileset, pickflags, 0)) {
653 		pthread_t tid;
654 
655 		pickflags = FILESET_PICKUNIQUE;
656 
657 		entry->fse_flags &= ~FSE_EXISTS;
658 
659 		/* entry doesn't need to be locked during initialization */
660 		(void) ipc_mutex_unlock(&entry->fse_lock);
661 
662 		if (rand() < randno)
663 			continue;
664 
665 		preallocated++;
666 
667 		if (reusing)
668 			entry->fse_flags |= FSE_REUSING;
669 		else
670 			entry->fse_flags &= (~FSE_REUSING);
671 
672 		if (avd_get_bool(fileset->fs_paralloc)) {
673 
674 			/* fire off a separate allocation thread */
675 			(void) pthread_mutex_lock(&paralloc_lock);
676 			while (paralloc_count >= MAX_PARALLOC_THREADS) {
677 				(void) pthread_cond_wait(
678 				    &paralloc_cv, &paralloc_lock);
679 			}
680 
681 			if (paralloc_count < 0) {
682 				(void) pthread_mutex_unlock(&paralloc_lock);
683 				return (-1);
684 			}
685 
686 			paralloc_count++;
687 			(void) pthread_mutex_unlock(&paralloc_lock);
688 
689 			if (pthread_create(&tid, NULL,
690 			    (void *(*)(void*))fileset_alloc_thread,
691 			    entry) != 0) {
692 				filebench_log(LOG_ERROR,
693 				    "File prealloc thread create failed");
694 				filebench_shutdown(1);
695 			}
696 
697 		} else {
698 			if (fileset_alloc_file(entry) == -1)
699 				return (-1);
700 		}
701 	}
702 
703 exit:
704 	filebench_log(LOG_VERBOSE,
705 	    "Preallocated %d of %llu of %s %s in %llu seconds",
706 	    preallocated,
707 	    (u_longlong_t)fileset->fs_constentries,
708 	    fileset_entity_name(fileset), fileset_name,
709 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
710 
711 	return (0);
712 }
713 
714 /*
715  * Adds an entry to the fileset's file list. Single threaded so
716  * no locking needed.
717  */
718 static void
719 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
720 {
721 	if (fileset->fs_filelist == NULL) {
722 		fileset->fs_filelist = entry;
723 		entry->fse_filenext = NULL;
724 	} else {
725 		entry->fse_filenext = fileset->fs_filelist;
726 		fileset->fs_filelist = entry;
727 	}
728 }
729 
730 /*
731  * Adds an entry to the fileset's directory list. Single
732  * threaded so no locking needed.
733  */
734 static void
735 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
736 {
737 	if (fileset->fs_dirlist == NULL) {
738 		fileset->fs_dirlist = entry;
739 		entry->fse_dirnext = NULL;
740 	} else {
741 		entry->fse_dirnext = fileset->fs_dirlist;
742 		fileset->fs_dirlist = entry;
743 	}
744 }
745 
746 /*
747  * Obtaines a filesetentry entity for a file to be placed in a
748  * (sub)directory of a fileset. The size of the file may be
749  * specified by fileset_meansize, or calculated from a gamma
750  * distribution of parameter fileset_sizegamma and of mean size
751  * fileset_meansize. The filesetentry entity is placed on the file
752  * list in the specified parent filesetentry entity, which may
753  * be a directory filesetentry, or the root filesetentry in the
754  * fileset. It is also placed on the fileset's list of all
755  * contained files. Returns 0 if successful or -1 if ipc memory
756  * for the path string cannot be allocated.
757  */
758 static int
759 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
760 {
761 	char tmpname[16];
762 	filesetentry_t *entry;
763 	double drand;
764 
765 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
766 	    == NULL) {
767 		filebench_log(LOG_ERROR,
768 		    "fileset_populate_file: Can't malloc filesetentry");
769 		return (-1);
770 	}
771 
772 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
773 	entry->fse_parent = parent;
774 	entry->fse_fileset = fileset;
775 	entry->fse_flags |= FSE_FREE;
776 	fileset_insfilelist(fileset, entry);
777 
778 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
779 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
780 		filebench_log(LOG_ERROR,
781 		    "fileset_populate_file: Can't alloc path string");
782 		return (-1);
783 	}
784 
785 	/* see if random variable was supplied for file size */
786 	if (fileset->fs_meansize == -1) {
787 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
788 	} else {
789 		double gamma;
790 
791 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
792 		if (gamma > 0) {
793 			drand = gamma_dist_knuth(gamma,
794 			    fileset->fs_meansize / gamma);
795 			entry->fse_size = (off64_t)drand;
796 		} else {
797 			entry->fse_size = (off64_t)fileset->fs_meansize;
798 		}
799 	}
800 
801 	fileset->fs_bytes += entry->fse_size;
802 
803 	fileset->fs_realfiles++;
804 	return (0);
805 }
806 
807 /*
808  * Creates a directory node in a fileset, by obtaining a
809  * filesetentry entity for the node and initializing it
810  * according to parameters of the fileset. It determines a
811  * directory tree depth and directory width, optionally using
812  * a gamma distribution. If its calculated depth is less then
813  * its actual depth in the directory tree, it becomes a leaf
814  * node and files itself with "width" number of file type
815  * filesetentries, otherwise it files itself with "width"
816  * number of directory type filesetentries, using recursive
817  * calls to fileset_populate_subdir. The end result of the
818  * initial call to this routine is a tree of directories of
819  * random width and varying depth with sufficient leaf
820  * directories to contain all required files.
821  * Returns 0 on success. Returns -1 if ipc path string memory
822  * cannot be allocated and returns an error code (currently
823  * also -1) from calls to fileset_populate_file or recursive
824  * calls to fileset_populate_subdir.
825  */
826 static int
827 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
828     int serial, double depth)
829 {
830 	double randepth, drand, ranwidth;
831 	int isleaf = 0;
832 	char tmpname[16];
833 	filesetentry_t *entry;
834 	int i;
835 
836 	depth += 1;
837 
838 	/* Create dir node */
839 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
840 	    == NULL) {
841 		filebench_log(LOG_ERROR,
842 		    "fileset_populate_subdir: Can't malloc filesetentry");
843 		return (-1);
844 	}
845 
846 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
847 
848 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
849 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
850 		filebench_log(LOG_ERROR,
851 		    "fileset_populate_subdir: Can't alloc path string");
852 		return (-1);
853 	}
854 
855 	entry->fse_parent = parent;
856 	entry->fse_flags |= FSE_DIR | FSE_FREE;
857 	fileset_insdirlist(fileset, entry);
858 
859 	if (fileset->fs_dirdepthrv) {
860 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
861 	} else {
862 		double gamma;
863 
864 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
865 		if (gamma > 0) {
866 			drand = gamma_dist_knuth(gamma,
867 			    fileset->fs_meandepth / gamma);
868 			randepth = (int)drand;
869 		} else {
870 			randepth = (int)fileset->fs_meandepth;
871 		}
872 	}
873 
874 	if (fileset->fs_meanwidth == -1) {
875 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
876 	} else {
877 		double gamma;
878 
879 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
880 		if (gamma > 0) {
881 			drand = gamma_dist_knuth(gamma,
882 			    fileset->fs_meanwidth / gamma);
883 			ranwidth = drand;
884 		} else {
885 			ranwidth = fileset->fs_meanwidth;
886 		}
887 	}
888 
889 	if (randepth == 0)
890 		randepth = 1;
891 	if (ranwidth == 0)
892 		ranwidth = 1;
893 	if (depth >= randepth)
894 		isleaf = 1;
895 
896 	/*
897 	 * Create directory of random width according to distribution, or
898 	 * if root directory, continue until #files required
899 	 */
900 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
901 	    (fileset->fs_realfiles < fileset->fs_constentries);
902 	    i++) {
903 		int ret = 0;
904 
905 		if (parent && isleaf)
906 			ret = fileset_populate_file(fileset, entry, i);
907 		else
908 			ret = fileset_populate_subdir(fileset, entry, i, depth);
909 
910 		if (ret != 0)
911 			return (ret);
912 	}
913 	return (0);
914 }
915 
916 /*
917  * Populates a fileset with files and subdirectory entries. Uses
918  * the supplied fileset_dirwidth and fileset_entries (number of files) to
919  * calculate the required fileset_meandepth (of subdirectories) and
920  * initialize the fileset_meanwidth and fileset_meansize variables. Then
921  * calls fileset_populate_subdir() to do the recursive
922  * subdirectory entry creation and leaf file entry creation. All
923  * of the above is skipped if the fileset has already been
924  * populated. Returns 0 on success, or an error code from the
925  * call to fileset_populate_subdir if that call fails.
926  */
927 static int
928 fileset_populate(fileset_t *fileset)
929 {
930 	int entries = (int)avd_get_int(fileset->fs_entries);
931 	int meandirwidth;
932 	int ret;
933 
934 	/* Skip if already populated */
935 	if (fileset->fs_bytes > 0)
936 		goto exists;
937 
938 #ifdef HAVE_RAW_SUPPORT
939 	/* check for raw device */
940 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
941 		return (0);
942 #endif /* HAVE_RAW_SUPPORT */
943 
944 	/* save value of entries obtained for later, in case it was random */
945 	fileset->fs_constentries = entries;
946 
947 	/* is dirwidth a random variable? */
948 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
949 		meandirwidth =
950 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
951 		fileset->fs_meanwidth = -1;
952 	} else {
953 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
954 		fileset->fs_meanwidth = (double)meandirwidth;
955 	}
956 
957 	/*
958 	 * Input params are:
959 	 *	# of files
960 	 *	ave # of files per dir
961 	 *	max size of dir
962 	 *	# ave size of file
963 	 *	max size of file
964 	 */
965 	fileset->fs_meandepth = log(entries) / log(meandirwidth);
966 
967 	/* Has a random variable been supplied for dirdepth? */
968 	if (fileset->fs_dirdepthrv) {
969 		/* yes, so set the random variable's mean value to meandepth */
970 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
971 		    fileset->fs_meandepth;
972 	}
973 
974 	/* test for random size variable */
975 	if (AVD_IS_RANDOM(fileset->fs_size))
976 		fileset->fs_meansize = -1;
977 	else
978 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
979 
980 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
981 		return (ret);
982 
983 
984 exists:
985 	if (fileset->fs_attrs & FILESET_IS_FILE) {
986 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
987 		    avd_get_str(fileset->fs_name),
988 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
989 	} else {
990 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, "
991 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
992 		    avd_get_str(fileset->fs_name), entries,
993 		    meandirwidth,
994 		    fileset->fs_meandepth,
995 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
996 	}
997 	return (0);
998 }
999 
1000 /*
1001  * Allocates a fileset instance, initializes fileset_dirgamma and
1002  * fileset_sizegamma default values, and sets the fileset name to the
1003  * supplied name string. Puts the allocated fileset on the
1004  * master fileset list and returns a pointer to it.
1005  */
1006 fileset_t *
1007 fileset_define(avd_t name)
1008 {
1009 	fileset_t *fileset;
1010 
1011 	if (name == NULL)
1012 		return (NULL);
1013 
1014 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1015 		filebench_log(LOG_ERROR,
1016 		    "fileset_define: Can't malloc fileset");
1017 		return (NULL);
1018 	}
1019 
1020 	filebench_log(LOG_DEBUG_IMPL,
1021 	    "Defining file %s", avd_get_str(name));
1022 
1023 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1024 
1025 	fileset->fs_dirgamma = avd_int_alloc(1500);
1026 	fileset->fs_sizegamma = avd_int_alloc(1500);
1027 
1028 	/* Add fileset to global list */
1029 	if (filebench_shm->filesetlist == NULL) {
1030 		filebench_shm->filesetlist = fileset;
1031 		fileset->fs_next = NULL;
1032 	} else {
1033 		fileset->fs_next = filebench_shm->filesetlist;
1034 		filebench_shm->filesetlist = fileset;
1035 	}
1036 
1037 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1038 
1039 	fileset->fs_name = name;
1040 
1041 	return (fileset);
1042 }
1043 
1044 /*
1045  * If supplied with a pointer to a fileset and the fileset's
1046  * fileset_prealloc flag is set, calls fileset_populate() to populate
1047  * the fileset with filesetentries, then calls fileset_create()
1048  * to make actual directories and files for the filesetentries.
1049  * Otherwise, it applies fileset_populate() and fileset_create()
1050  * to all the filesets on the master fileset list. It always
1051  * returns zero (0) if one fileset is populated / created,
1052  * otherwise it returns the sum of returned values from
1053  * fileset_create() and fileset_populate(), which
1054  * will be a negative one (-1) times the number of
1055  * fileset_create() calls which failed.
1056  */
1057 int
1058 fileset_createset(fileset_t *fileset)
1059 {
1060 	fileset_t *list;
1061 	int ret = 0;
1062 
1063 	/* set up for possible parallel allocate */
1064 	paralloc_count = 0;
1065 
1066 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1067 
1068 		filebench_log(LOG_INFO,
1069 		    "creating/pre-allocating %s %s",
1070 		    fileset_entity_name(fileset),
1071 		    avd_get_str(fileset->fs_name));
1072 
1073 		if ((ret = fileset_populate(fileset)) != 0)
1074 			return (ret);
1075 
1076 		if ((ret = fileset_create(fileset)) != 0)
1077 			return (ret);
1078 	} else {
1079 
1080 		filebench_log(LOG_INFO,
1081 		    "Creating/pre-allocating files and filesets");
1082 
1083 		list = filebench_shm->filesetlist;
1084 		while (list) {
1085 			if ((ret = fileset_populate(list)) != 0)
1086 				return (ret);
1087 			if ((ret = fileset_create(list)) != 0)
1088 				return (ret);
1089 			list = list->fs_next;
1090 		}
1091 	}
1092 
1093 	/* wait for allocation threads to finish */
1094 	filebench_log(LOG_INFO,
1095 	    "waiting for fileset pre-allocation to finish");
1096 
1097 	(void) pthread_mutex_lock(&paralloc_lock);
1098 	while (paralloc_count > 0)
1099 		(void) pthread_cond_wait(&paralloc_cv, &paralloc_lock);
1100 	(void) pthread_mutex_unlock(&paralloc_lock);
1101 
1102 	if (paralloc_count < 0)
1103 		return (-1);
1104 
1105 	return (0);
1106 }
1107 
1108 /*
1109  * Searches through the master fileset list for the named fileset.
1110  * If found, returns pointer to same, otherwise returns NULL.
1111  */
1112 fileset_t *
1113 fileset_find(char *name)
1114 {
1115 	fileset_t *fileset = filebench_shm->filesetlist;
1116 
1117 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1118 
1119 	while (fileset) {
1120 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1121 			(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1122 			return (fileset);
1123 		}
1124 		fileset = fileset->fs_next;
1125 	}
1126 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1127 
1128 	return (NULL);
1129 }
1130 
1131 /*
1132  * Iterates over all the file sets in the filesetlist,
1133  * executing the supplied command "*cmd()" on them. Also
1134  * indicates to the executed command if it is the first
1135  * time the command has been executed since the current
1136  * call to fileset_iter.
1137  */
1138 void
1139 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1140 {
1141 	fileset_t *fileset = filebench_shm->filesetlist;
1142 	int count = 0;
1143 
1144 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1145 
1146 	while (fileset) {
1147 		cmd(fileset, count == 0);
1148 		fileset = fileset->fs_next;
1149 		count++;
1150 	}
1151 
1152 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1153 }
1154 
1155 /*
1156  * Prints information to the filebench log about the file
1157  * object. Also prints a header on the first call.
1158  */
1159 int
1160 fileset_print(fileset_t *fileset, int first)
1161 {
1162 	int pathlength;
1163 	char *fileset_path;
1164 	char *fileset_name;
1165 	static char pad[] = "                              "; /* 30 spaces */
1166 
1167 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1168 		filebench_log(LOG_ERROR, "%s path not set",
1169 		    fileset_entity_name(fileset));
1170 		return (-1);
1171 	}
1172 
1173 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1174 		filebench_log(LOG_ERROR, "%s name not set",
1175 		    fileset_entity_name(fileset));
1176 		return (-1);
1177 	}
1178 
1179 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1180 
1181 	if (pathlength > 29)
1182 		pathlength = 29;
1183 
1184 	if (first) {
1185 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1186 		    "file size",
1187 		    "dir width",
1188 		    "entries");
1189 	}
1190 
1191 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1192 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1193 			filebench_log(LOG_INFO,
1194 			    "%s/%s%s         (Raw Device)",
1195 			    fileset_path, fileset_name, &pad[pathlength]);
1196 		} else {
1197 			filebench_log(LOG_INFO,
1198 			    "%s/%s%s%9llu     (Single File)",
1199 			    fileset_path, fileset_name, &pad[pathlength],
1200 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1201 		}
1202 	} else {
1203 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1204 		    fileset_path, fileset_name,
1205 		    &pad[pathlength],
1206 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1207 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1208 		    (u_longlong_t)fileset->fs_constentries);
1209 	}
1210 	return (0);
1211 }
1212 /*
1213  * checks to see if the path/name pair points to a raw device. If
1214  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1215  * If RAW is not defined, or it is not a raw device, it clears the
1216  * raw device flag and returns 0.
1217  */
1218 int
1219 fileset_checkraw(fileset_t *fileset)
1220 {
1221 	char path[MAXPATHLEN];
1222 	struct stat64 sb;
1223 
1224 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1225 
1226 #ifdef HAVE_RAW_SUPPORT
1227 	/* check for raw device */
1228 	(void) strcpy(path, avd_get_str(fileset->fs_path));
1229 	(void) strcat(path, "/");
1230 	(void) strcat(path, avd_get_str(fileset->fs_name));
1231 	if ((stat64(path, &sb) == 0) &&
1232 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1233 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1234 		return (1);
1235 	}
1236 #endif /* HAVE_RAW_SUPPORT */
1237 
1238 	return (0);
1239 }
1240