xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 5673:043503f0cca3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <fcntl.h>
30 #include <pthread.h>
31 #include <errno.h>
32 #include <math.h>
33 #include <libgen.h>
34 #include <sys/mman.h>
35 #include "fileset.h"
36 #include "filebench.h"
37 #include "gamma_dist.h"
38 
39 /*
40  * File sets, of type fileset_t, are entities which contain
41  * information about collections of files and subdirectories in Filebench.
42  * The fileset, once populated, consists of a tree of fileset entries of
43  * type filesetentry_t which specify files and directories.  The fileset
44  * is rooted in a directory specified by fs_path, and once the populated
45  * fileset has been created, has a tree of directories and files
46  * corresponding to the fileset's filesetentry tree.
47  */
48 
49 /* parallel allocation control */
50 #define	MAX_PARALLOC_THREADS 32
51 static pthread_mutex_t	paralloc_lock = PTHREAD_MUTEX_INITIALIZER;
52 static pthread_cond_t	paralloc_cv = PTHREAD_COND_INITIALIZER;
53 static int		paralloc_count;
54 
55 /*
56  * returns pointer to file or fileset
57  * string, as appropriate
58  */
59 static char *
60 fileset_entity_name(fileset_t *fileset)
61 {
62 	if (fileset->fs_attrs & FILESET_IS_FILE)
63 		return ("file");
64 	else
65 		return ("fileset");
66 }
67 
68 /*
69  * Removes the last file or directory name from a pathname.
70  * Basically removes characters from the end of the path by
71  * setting them to \0 until a forward slash '/' is
72  * encountered. It also removes the forward slash.
73  */
74 static char *
75 trunc_dirname(char *dir)
76 {
77 	char *s = dir + strlen(dir);
78 
79 	while (s != dir) {
80 		int c = *s;
81 
82 		*s = 0;
83 		if (c == '/')
84 			break;
85 		s--;
86 	}
87 	return (dir);
88 }
89 
90 /*
91  * Prints a list of allowed options and how to specify them.
92  */
93 void
94 fileset_usage(void)
95 {
96 	(void) fprintf(stderr,
97 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
98 	    ",entries=<number>\n");
99 	(void) fprintf(stderr,
100 	    "		        [,dirwidth=[width]]\n");
101 	(void) fprintf(stderr,
102 	    "		        [,dirgamma=[100-10000]] "
103 	    "(Gamma * 1000)\n");
104 	(void) fprintf(stderr,
105 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
106 	(void) fprintf(stderr,
107 	    "		        [,prealloc=[percent]]\n");
108 	(void) fprintf(stderr, "		        [,paralloc]\n");
109 	(void) fprintf(stderr, "		        [,reuse]\n");
110 	(void) fprintf(stderr, "\n");
111 }
112 
113 /*
114  * Frees up memory mapped file region of supplied size. The
115  * file descriptor "fd" indicates which memory mapped file.
116  * If successful, returns 0. Otherwise returns -1 if "size"
117  * is zero, or -1 times the number of times msync() failed.
118  */
119 static int
120 fileset_freemem(int fd, off64_t size)
121 {
122 	off64_t left;
123 	int ret = 0;
124 
125 	for (left = size; left > 0; left -= MMAP_SIZE) {
126 		off64_t thismapsize;
127 		caddr_t addr;
128 
129 		thismapsize = MIN(MMAP_SIZE, left);
130 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
131 		    MAP_SHARED, fd, size - left);
132 		ret += msync(addr, thismapsize, MS_INVALIDATE);
133 		(void) munmap(addr, thismapsize);
134 	}
135 	return (ret);
136 }
137 
138 /*
139  * Creates a path string from the filesetentry_t "*entry"
140  * and all of its parent's path names. The resulting path
141  * is a concatination of all the individual parent paths.
142  * Allocates memory for the path string and returns a
143  * pointer to it.
144  */
145 char *
146 fileset_resolvepath(filesetentry_t *entry)
147 {
148 	filesetentry_t *fsep = entry;
149 	char path[MAXPATHLEN];
150 	char pathtmp[MAXPATHLEN];
151 	char *s;
152 
153 	*path = 0;
154 	while (fsep->fse_parent) {
155 		(void) strcpy(pathtmp, "/");
156 		(void) strcat(pathtmp, fsep->fse_path);
157 		(void) strcat(pathtmp, path);
158 		(void) strcpy(path, pathtmp);
159 		fsep = fsep->fse_parent;
160 	}
161 
162 	s = malloc(strlen(path) + 1);
163 	(void) strcpy(s, path);
164 	return (s);
165 }
166 
167 /*
168  * Creates multiple nested directories as required by the
169  * supplied path. Starts at the end of the path, creating
170  * a list of directories to mkdir, up to the root of the
171  * path, then mkdirs them one at a time from the root on down.
172  */
173 static int
174 fileset_mkdir(char *path, int mode)
175 {
176 	char *p;
177 	char *dirs[65536];
178 	int i = 0;
179 
180 	if ((p = strdup(path)) == NULL)
181 		goto null_str;
182 
183 	/*
184 	 * Fill an array of subdirectory path names until either we
185 	 * reach the root or encounter an already existing subdirectory
186 	 */
187 	/* CONSTCOND */
188 	while (1) {
189 		struct stat64 sb;
190 
191 		if (stat64(p, &sb) == 0)
192 			break;
193 		if (strlen(p) < 3)
194 			break;
195 		if ((dirs[i] = strdup(p)) == NULL) {
196 			free(p);
197 			goto null_str;
198 		}
199 
200 		(void) trunc_dirname(p);
201 		i++;
202 	}
203 
204 	/* Make the directories, from closest to root downwards. */
205 	for (--i; i >= 0; i--) {
206 		(void) mkdir(dirs[i], mode);
207 		free(dirs[i]);
208 	}
209 
210 	free(p);
211 	return (0);
212 
213 null_str:
214 	/* clean up */
215 	for (--i; i >= 0; i--)
216 		free(dirs[i]);
217 
218 	filebench_log(LOG_ERROR,
219 	    "Failed to create directory path %s: Out of memory", path);
220 
221 	return (-1);
222 }
223 
224 /*
225  * creates the subdirectory tree for a fileset.
226  */
227 static int
228 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
229 {
230 	filesetentry_t *direntry;
231 	char full_path[MAXPATHLEN];
232 	char *part_path;
233 
234 	/* walk the subdirectory list, enstanciating subdirs */
235 	direntry = fileset->fs_dirlist;
236 	while (direntry) {
237 		(void) strcpy(full_path, filesetpath);
238 		part_path = fileset_resolvepath(direntry);
239 		(void) strcat(full_path, part_path);
240 		free(part_path);
241 
242 		/* now create this portion of the subdirectory tree */
243 		if (fileset_mkdir(full_path, 0755) == -1)
244 			return (-1);
245 
246 		direntry = direntry->fse_dirnext;
247 	}
248 	return (0);
249 }
250 
251 /*
252  * given a fileset entry, determines if the associated file
253  * needs to be allocated or not, and if so does the allocation.
254  */
255 static int
256 fileset_alloc_file(filesetentry_t *entry)
257 {
258 	char path[MAXPATHLEN];
259 	char *buf;
260 	struct stat64 sb;
261 	char *pathtmp;
262 	off64_t seek;
263 	int fd;
264 
265 	*path = 0;
266 	(void) strcpy(path, *entry->fse_fileset->fs_path);
267 	(void) strcat(path, "/");
268 	(void) strcat(path, entry->fse_fileset->fs_name);
269 	pathtmp = fileset_resolvepath(entry);
270 	(void) strcat(path, pathtmp);
271 
272 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
273 
274 	/* see if reusing and this file exists */
275 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
276 		if ((fd = open64(path, O_RDWR)) < 0) {
277 			filebench_log(LOG_INFO,
278 			    "Attempted but failed to Re-use file %s",
279 			    path);
280 			return (-1);
281 		}
282 
283 		if (sb.st_size == (off64_t)entry->fse_size) {
284 			filebench_log(LOG_INFO,
285 			    "Re-using file %s", path);
286 
287 			if (!integer_isset(entry->fse_fileset->fs_cached))
288 				(void) fileset_freemem(fd,
289 				    entry->fse_size);
290 
291 			entry->fse_flags |= FSE_EXISTS;
292 			(void) close(fd);
293 			return (0);
294 
295 		} else if (sb.st_size > (off64_t)entry->fse_size) {
296 			/* reuse, but too large */
297 			filebench_log(LOG_INFO,
298 			    "Truncating & re-using file %s", path);
299 
300 			(void) ftruncate64(fd,
301 			    (off64_t)entry->fse_size);
302 
303 			if (!integer_isset(entry->fse_fileset->fs_cached))
304 				(void) fileset_freemem(fd,
305 				    entry->fse_size);
306 
307 			entry->fse_flags |= FSE_EXISTS;
308 			(void) close(fd);
309 			return (0);
310 		}
311 	} else {
312 
313 		/* No file or not reusing, so create */
314 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
315 			filebench_log(LOG_ERROR,
316 			    "Failed to pre-allocate file %s: %s",
317 			    path, strerror(errno));
318 
319 			return (-1);
320 		}
321 	}
322 
323 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
324 		return (-1);
325 
326 	entry->fse_flags |= FSE_EXISTS;
327 
328 	for (seek = 0; seek < entry->fse_size; ) {
329 		off64_t wsize;
330 		int ret = 0;
331 
332 		/*
333 		 * Write FILE_ALLOC_BLOCK's worth,
334 		 * except on last write
335 		 */
336 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
337 
338 		ret = write(fd, buf, wsize);
339 		if (ret != wsize) {
340 			filebench_log(LOG_ERROR,
341 			    "Failed to pre-allocate file %s: %s",
342 			    path, strerror(errno));
343 			(void) close(fd);
344 			free(buf);
345 			return (-1);
346 		}
347 		seek += wsize;
348 	}
349 
350 	if (!integer_isset(entry->fse_fileset->fs_cached))
351 		(void) fileset_freemem(fd, entry->fse_size);
352 
353 	(void) close(fd);
354 
355 	free(buf);
356 
357 	filebench_log(LOG_DEBUG_IMPL,
358 	    "Pre-allocated file %s size %lld", path, entry->fse_size);
359 
360 	return (0);
361 }
362 
363 /*
364  * given a fileset entry, determines if the associated file
365  * needs to be allocated or not, and if so does the allocation.
366  */
367 static void *
368 fileset_alloc_thread(filesetentry_t *entry)
369 {
370 	if (fileset_alloc_file(entry) == -1) {
371 		(void) pthread_mutex_lock(&paralloc_lock);
372 		paralloc_count = -1;
373 	} else {
374 		(void) pthread_mutex_lock(&paralloc_lock);
375 		paralloc_count--;
376 	}
377 
378 	(void) pthread_cond_signal(&paralloc_cv);
379 	(void) pthread_mutex_unlock(&paralloc_lock);
380 
381 	pthread_exit(NULL);
382 	return (NULL);
383 }
384 
385 
386 /*
387  * First creates the parent directories of the file using
388  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
389  * and opens the file with open64(). It unlocks the fileset
390  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
391  * as requested, and returns the file descriptor integer
392  * for the opened file.
393  */
394 int
395 fileset_openfile(fileset_t *fileset,
396     filesetentry_t *entry, int flag, int mode, int attrs)
397 {
398 	char path[MAXPATHLEN];
399 	char dir[MAXPATHLEN];
400 	char *pathtmp;
401 	struct stat64 sb;
402 	int fd;
403 	int open_attrs = 0;
404 
405 	*path = 0;
406 	(void) strcpy(path, *fileset->fs_path);
407 	(void) strcat(path, "/");
408 	(void) strcat(path, fileset->fs_name);
409 	pathtmp = fileset_resolvepath(entry);
410 	(void) strcat(path, pathtmp);
411 	(void) strcpy(dir, path);
412 	free(pathtmp);
413 	(void) trunc_dirname(dir);
414 
415 	/* If we are going to create a file, create the parent dirs */
416 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
417 		if (fileset_mkdir(dir, 0755) == -1)
418 			return (-1);
419 	}
420 
421 	if (flag & O_CREAT)
422 		entry->fse_flags |= FSE_EXISTS;
423 
424 	if (attrs & FLOW_ATTR_DSYNC) {
425 #ifdef sun
426 		open_attrs |= O_DSYNC;
427 #else
428 		open_attrs |= O_FSYNC;
429 #endif
430 	}
431 
432 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
433 		filebench_log(LOG_ERROR,
434 		    "Failed to open file %s: %s",
435 		    path, strerror(errno));
436 		(void) ipc_mutex_unlock(&entry->fse_lock);
437 		return (-1);
438 	}
439 	(void) ipc_mutex_unlock(&entry->fse_lock);
440 
441 #ifdef sun
442 	if (attrs & FLOW_ATTR_DIRECTIO)
443 		(void) directio(fd, DIRECTIO_ON);
444 	else
445 		(void) directio(fd, DIRECTIO_OFF);
446 #endif
447 
448 	return (fd);
449 }
450 
451 
452 /*
453  * Selects a fileset entry from a fileset. If the
454  * FILESET_PICKDIR flag is set it will pick a directory
455  * entry, otherwise a file entry. The FILESET_PICKRESET
456  * flag will cause it to reset the free list to the
457  * overall list (file or directory). The FILESET_PICKUNIQUE
458  * flag will take an entry off of one of the free (unused)
459  * lists (file or directory), otherwise the entry will be
460  * picked off of one of the rotor lists (file or directory).
461  * The FILESET_PICKEXISTS will insure that only extant
462  * (FSE_EXISTS) state files are selected, while
463  * FILESET_PICKNOEXIST insures that only non extant
464  * (not FSE_EXISTS) state files are selected.
465  */
466 filesetentry_t *
467 fileset_pick(fileset_t *fileset, int flags, int tid)
468 {
469 	filesetentry_t *entry = NULL;
470 	filesetentry_t *first = NULL;
471 
472 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
473 
474 	while (entry == NULL) {
475 
476 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
477 			entry = fileset->fs_dirlist;
478 			while (entry) {
479 				entry->fse_flags |= FSE_FREE;
480 				entry = entry->fse_dirnext;
481 			}
482 			fileset->fs_dirfree = fileset->fs_dirlist;
483 		}
484 
485 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
486 			entry = fileset->fs_filelist;
487 			while (entry) {
488 				entry->fse_flags |= FSE_FREE;
489 				entry = entry->fse_filenext;
490 			}
491 			fileset->fs_filefree = fileset->fs_filelist;
492 		}
493 
494 		if (flags & FILESET_PICKUNIQUE) {
495 			if (flags & FILESET_PICKDIR) {
496 				entry = fileset->fs_dirfree;
497 				if (entry == NULL)
498 					goto empty;
499 				fileset->fs_dirfree = entry->fse_dirnext;
500 			} else {
501 				entry = fileset->fs_filefree;
502 				if (entry == NULL)
503 					goto empty;
504 				fileset->fs_filefree = entry->fse_filenext;
505 			}
506 			entry->fse_flags &= ~FSE_FREE;
507 		} else {
508 			if (flags & FILESET_PICKDIR) {
509 				entry = fileset->fs_dirrotor;
510 				if (entry == NULL)
511 				fileset->fs_dirrotor =
512 				    entry = fileset->fs_dirlist;
513 				fileset->fs_dirrotor = entry->fse_dirnext;
514 			} else {
515 				entry = fileset->fs_filerotor[tid];
516 				if (entry == NULL)
517 					fileset->fs_filerotor[tid] =
518 					    entry = fileset->fs_filelist;
519 				fileset->fs_filerotor[tid] =
520 				    entry->fse_filenext;
521 			}
522 		}
523 
524 		if (first == entry)
525 			goto empty;
526 
527 		if (first == NULL)
528 			first = entry;
529 
530 		/* Return locked entry */
531 		(void) ipc_mutex_lock(&entry->fse_lock);
532 
533 		/* If we ask for an existing file, go round again */
534 		if ((flags & FILESET_PICKEXISTS) &&
535 		    !(entry->fse_flags & FSE_EXISTS)) {
536 			(void) ipc_mutex_unlock(&entry->fse_lock);
537 			entry = NULL;
538 		}
539 
540 		/* If we ask for not an existing file, go round again */
541 		if ((flags & FILESET_PICKNOEXIST) &&
542 		    (entry->fse_flags & FSE_EXISTS)) {
543 			(void) ipc_mutex_unlock(&entry->fse_lock);
544 			entry = NULL;
545 		}
546 	}
547 
548 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
549 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
550 	return (entry);
551 
552 empty:
553 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
554 	return (NULL);
555 }
556 
557 /*
558  * Given a fileset "fileset", create the associated files as
559  * specified in the attributes of the fileset. The fileset is
560  * rooted in a directory whose pathname is in fs_path. If the
561  * directory exists, meaning that there is already a fileset,
562  * and the fs_reuse attribute is false, then remove it and all
563  * its contained files and subdirectories. Next, the routine
564  * creates a root directory for the fileset. All the file type
565  * filesetentries are cycled through creating as needed
566  * their containing subdirectory trees in the filesystem and
567  * creating actual files for fs_preallocpercent of them. The
568  * created files are filled with fse_size bytes of unitialized
569  * data. The routine returns -1 on errors, 0 on success.
570  */
571 static int
572 fileset_create(fileset_t *fileset)
573 {
574 	filesetentry_t *entry;
575 	char path[MAXPATHLEN];
576 	struct stat64 sb;
577 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
578 	hrtime_t start = gethrtime();
579 	int preallocated = 0;
580 	int reusing = 0;
581 
582 	if (*fileset->fs_path == NULL) {
583 		filebench_log(LOG_ERROR, "%s path not set",
584 		    fileset_entity_name(fileset));
585 		return (-1);
586 	}
587 
588 #ifdef HAVE_RAW_SUPPORT
589 	/* treat raw device as special case */
590 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
591 		return (0);
592 #endif /* HAVE_RAW_SUPPORT */
593 
594 	/* XXX Add check to see if there is enough space */
595 
596 	/* Remove existing */
597 	(void) strcpy(path, *fileset->fs_path);
598 	(void) strcat(path, "/");
599 	(void) strcat(path, fileset->fs_name);
600 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
601 	    (strlen(*fileset->fs_path) > 2)) {
602 		if (!integer_isset(fileset->fs_reuse)) {
603 			char cmd[MAXPATHLEN];
604 
605 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
606 			(void) system(cmd);
607 			filebench_log(LOG_VERBOSE,
608 			    "Removed any existing %s %s in %lld seconds",
609 			    fileset_entity_name(fileset), fileset->fs_name,
610 			    ((gethrtime() - start) / 1000000000) + 1);
611 		} else {
612 			/* we are re-using */
613 			reusing = 1;
614 			filebench_log(LOG_VERBOSE,
615 			    "Re-using %s %s on %s file system.",
616 			    fileset_entity_name(fileset),
617 			    fileset->fs_name, sb.st_fstype);
618 		}
619 	}
620 	(void) mkdir(path, 0755);
621 
622 	/* make the filesets directory tree */
623 	if (fileset_create_subdirs(fileset, path) == -1)
624 		return (-1);
625 
626 	start = gethrtime();
627 
628 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
629 	    fileset_entity_name(fileset), fileset->fs_name);
630 
631 	if (!integer_isset(fileset->fs_prealloc))
632 		goto exit;
633 
634 	while (entry = fileset_pick(fileset, pickflags, 0)) {
635 		int randno;
636 		pthread_t tid;
637 
638 		pickflags = FILESET_PICKUNIQUE;
639 
640 		entry->fse_flags &= ~FSE_EXISTS;
641 
642 		randno = ((RAND_MAX * (100 - *(fileset->fs_preallocpercent)))
643 		    / 100);
644 
645 		/* entry doesn't need to be locked during initialization */
646 		(void) ipc_mutex_unlock(&entry->fse_lock);
647 
648 		if (rand() < randno)
649 			continue;
650 
651 		preallocated++;
652 
653 		if (reusing)
654 			entry->fse_flags |= FSE_REUSING;
655 		else
656 			entry->fse_flags &= (~FSE_REUSING);
657 
658 		if (integer_isset(fileset->fs_paralloc)) {
659 
660 			/* fire off a separate allocation thread */
661 			(void) pthread_mutex_lock(&paralloc_lock);
662 			while (paralloc_count >= MAX_PARALLOC_THREADS) {
663 				(void) pthread_cond_wait(
664 				    &paralloc_cv, &paralloc_lock);
665 			}
666 
667 			if (paralloc_count < 0) {
668 				(void) pthread_mutex_unlock(&paralloc_lock);
669 				return (-1);
670 			}
671 
672 			paralloc_count++;
673 			(void) pthread_mutex_unlock(&paralloc_lock);
674 
675 			if (pthread_create(&tid, NULL,
676 			    (void *(*)(void*))fileset_alloc_thread,
677 			    entry) != 0) {
678 				filebench_log(LOG_ERROR,
679 				    "File prealloc thread create failed");
680 				filebench_shutdown(1);
681 			}
682 
683 		} else {
684 			if (fileset_alloc_file(entry) == -1)
685 				return (-1);
686 		}
687 	}
688 
689 exit:
690 	filebench_log(LOG_VERBOSE,
691 	    "Preallocated %d of %lld of %s %s in %lld seconds",
692 	    preallocated,
693 	    *(fileset->fs_entries),
694 	    fileset_entity_name(fileset),
695 	    fileset->fs_name,
696 	    ((gethrtime() - start) / 1000000000) + 1);
697 
698 	return (0);
699 }
700 
701 /*
702  * Adds an entry to the fileset's file list. Single threaded so
703  * no locking needed.
704  */
705 static void
706 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
707 {
708 	if (fileset->fs_filelist == NULL) {
709 		fileset->fs_filelist = entry;
710 		entry->fse_filenext = NULL;
711 	} else {
712 		entry->fse_filenext = fileset->fs_filelist;
713 		fileset->fs_filelist = entry;
714 	}
715 }
716 
717 /*
718  * Adds an entry to the fileset's directory list. Single
719  * threaded so no locking needed.
720  */
721 static void
722 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
723 {
724 	if (fileset->fs_dirlist == NULL) {
725 		fileset->fs_dirlist = entry;
726 		entry->fse_dirnext = NULL;
727 	} else {
728 		entry->fse_dirnext = fileset->fs_dirlist;
729 		fileset->fs_dirlist = entry;
730 	}
731 }
732 
733 /*
734  * Obtaines a filesetentry entity for a file to be placed in a
735  * (sub)directory of a fileset. The size of the file may be
736  * specified by fs_meansize, or calculated from a gamma
737  * distribution of parameter fs_sizegamma and of mean size
738  * fs_meansize. The filesetentry entity is placed on the file
739  * list in the specified parent filesetentry entity, which may
740  * be a directory filesetentry, or the root filesetentry in the
741  * fileset. It is also placed on the fileset's list of all
742  * contained files. Returns 0 if successful or -1 if ipc memory
743  * for the path string cannot be allocated.
744  */
745 static int
746 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
747 {
748 	char tmpname[16];
749 	filesetentry_t *entry;
750 	double drand;
751 	double gamma;
752 
753 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
754 	    == NULL) {
755 		filebench_log(LOG_ERROR,
756 		    "fileset_populate_file: Can't malloc filesetentry");
757 		return (-1);
758 	}
759 
760 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
761 	entry->fse_parent = parent;
762 	entry->fse_fileset = fileset;
763 	entry->fse_flags |= FSE_FREE;
764 	fileset_insfilelist(fileset, entry);
765 
766 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
767 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
768 		filebench_log(LOG_ERROR,
769 		    "fileset_populate_file: Can't alloc path string");
770 		return (-1);
771 	}
772 
773 	gamma = *(fileset->fs_sizegamma) / 1000.0;
774 
775 	if (gamma > 0) {
776 		drand = gamma_dist_knuth(gamma, fileset->fs_meansize / gamma);
777 		entry->fse_size = (off64_t)drand;
778 	} else {
779 		entry->fse_size = (off64_t)fileset->fs_meansize;
780 	}
781 
782 	fileset->fs_bytes += entry->fse_size;
783 
784 	fileset->fs_realfiles++;
785 	return (0);
786 }
787 
788 /*
789  * Creates a directory node in a fileset, by obtaining a
790  * filesetentry entity for the node and initializing it
791  * according to parameters of the fileset. It determines a
792  * directory tree depth and directory width, optionally using
793  * a gamma distribution. If its calculated depth is less then
794  * its actual depth in the directory tree, it becomes a leaf
795  * node and files itself with "width" number of file type
796  * filesetentries, otherwise it files itself with "width"
797  * number of directory type filesetentries, using recursive
798  * calls to fileset_populate_subdir. The end result of the
799  * initial call to this routine is a tree of directories of
800  * random width and varying depth with sufficient leaf
801  * directories to contain all required files.
802  * Returns 0 on success. Returns -1 if ipc path string memory
803  * cannot be allocated and returns an error code (currently
804  * also -1) from calls to fileset_populate_file or recursive
805  * calls to fileset_populate_subdir.
806  */
807 static int
808 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
809     int serial, double depth)
810 {
811 	double randepth, drand, ranwidth, gamma;
812 	int isleaf = 0;
813 	char tmpname[16];
814 	filesetentry_t *entry;
815 	int i;
816 
817 	depth += 1;
818 
819 	/* Create dir node */
820 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
821 	    == NULL) {
822 		filebench_log(LOG_ERROR,
823 		    "fileset_populate_subdir: Can't malloc filesetentry");
824 		return (-1);
825 	}
826 
827 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
828 
829 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
830 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
831 		filebench_log(LOG_ERROR,
832 		    "fileset_populate_subdir: Can't alloc path string");
833 		return (-1);
834 	}
835 
836 	entry->fse_parent = parent;
837 	entry->fse_flags |= FSE_DIR | FSE_FREE;
838 	fileset_insdirlist(fileset, entry);
839 
840 	gamma = *(fileset->fs_dirgamma) / 1000.0;
841 	if (gamma > 0) {
842 		drand = gamma_dist_knuth(gamma, fileset->fs_meandepth / gamma);
843 		randepth = (int)drand;
844 	} else {
845 		randepth = (int)fileset->fs_meandepth;
846 	}
847 
848 	gamma = *(fileset->fs_sizegamma) / 1000.0;
849 
850 	if (gamma > 0) {
851 		drand = gamma_dist_knuth(gamma, fileset->fs_meanwidth / gamma);
852 		ranwidth = drand;
853 	} else {
854 		ranwidth = fileset->fs_meanwidth;
855 	}
856 
857 	if (randepth == 0)
858 		randepth = 1;
859 	if (ranwidth == 0)
860 		ranwidth = 1;
861 	if (depth >= randepth)
862 		isleaf = 1;
863 
864 	/*
865 	 * Create directory of random width according to distribution, or
866 	 * if root directory, continue until #files required
867 	 */
868 	for (i = 1;
869 	    ((parent == NULL) || (i < ranwidth + 1)) &&
870 	    (fileset->fs_realfiles < *(fileset->fs_entries)); i++) {
871 		int ret = 0;
872 
873 		if (parent && isleaf)
874 			ret = fileset_populate_file(fileset, entry, i);
875 		else
876 			ret = fileset_populate_subdir(fileset, entry, i, depth);
877 
878 		if (ret != 0)
879 			return (ret);
880 	}
881 	return (0);
882 }
883 
884 /*
885  * Populates a fileset with files and subdirectory entries. Uses
886  * the supplied fs_dirwidth and fs_entries (number of files) to
887  * calculate the required fs_meandepth (of subdirectories) and
888  * initialize the fs_meanwidth and fs_meansize variables. Then
889  * calls fileset_populate_subdir() to do the recursive
890  * subdirectory entry creation and leaf file entry creation. All
891  * of the above is skipped if the fileset has already been
892  * populated. Returns 0 on success, or an error code from the
893  * call to fileset_populate_subdir if that call fails.
894  */
895 static int
896 fileset_populate(fileset_t *fileset)
897 {
898 	int nfiles;
899 	int meandirwidth = *(fileset->fs_dirwidth);
900 	int ret;
901 
902 	/* Skip if already populated */
903 	if (fileset->fs_bytes > 0)
904 		goto exists;
905 
906 #ifdef HAVE_RAW_SUPPORT
907 	/* check for raw device */
908 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
909 		return (0);
910 #endif /* HAVE_RAW_SUPPORT */
911 
912 	/*
913 	 * Input params are:
914 	 *	# of files
915 	 *	ave # of files per dir
916 	 *	max size of dir
917 	 *	# ave size of file
918 	 *	max size of file
919 	 */
920 	nfiles = *(fileset->fs_entries);
921 	fileset->fs_meandepth = log(nfiles) / log(meandirwidth);
922 	fileset->fs_meanwidth = meandirwidth;
923 	fileset->fs_meansize = *(fileset->fs_size);
924 
925 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
926 		return (ret);
927 
928 
929 exists:
930 	if (fileset->fs_attrs & FILESET_IS_FILE) {
931 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%lld",
932 		    fileset->fs_name,
933 		    fileset->fs_bytes / 1024UL / 1024UL);
934 	} else {
935 		filebench_log(LOG_VERBOSE, "Fileset %s: %lld files, "
936 		    "avg dir = %.1lf, avg depth = %.1lf, mbytes=%lld",
937 		    fileset->fs_name,
938 		    *(fileset->fs_entries),
939 		    fileset->fs_meanwidth,
940 		    fileset->fs_meandepth,
941 		    fileset->fs_bytes / 1024UL / 1024UL);
942 	}
943 	return (0);
944 }
945 
946 /*
947  * Allocates a fileset instance, initializes fs_dirgamma and
948  * fs_sizegamma default values, and sets the fileset name to the
949  * supplied name string. Puts the allocated fileset on the
950  * master fileset list and returns a pointer to it.
951  */
952 fileset_t *
953 fileset_define(char *name)
954 {
955 	fileset_t *fileset;
956 
957 	if (name == NULL)
958 		return (NULL);
959 
960 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
961 		filebench_log(LOG_ERROR,
962 		    "fileset_define: Can't malloc fileset");
963 		return (NULL);
964 	}
965 
966 	filebench_log(LOG_DEBUG_IMPL, "Defining file %s", name);
967 
968 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
969 
970 	fileset->fs_dirgamma = integer_alloc(1500);
971 	fileset->fs_sizegamma = integer_alloc(1500);
972 
973 	/* Add fileset to global list */
974 	if (filebench_shm->filesetlist == NULL) {
975 		filebench_shm->filesetlist = fileset;
976 		fileset->fs_next = NULL;
977 	} else {
978 		fileset->fs_next = filebench_shm->filesetlist;
979 		filebench_shm->filesetlist = fileset;
980 	}
981 
982 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
983 
984 	(void) strcpy(fileset->fs_name, name);
985 
986 	return (fileset);
987 }
988 
989 /*
990  * If supplied with a pointer to a fileset and the fileset's
991  * fs_prealloc flag is set, calls fileset_populate() to populate
992  * the fileset with filesetentries, then calls fileset_create()
993  * to make actual directories and files for the filesetentries.
994  * Otherwise, it applies fileset_populate() and fileset_create()
995  * to all the filesets on the master fileset list. It always
996  * returns zero (0) if one fileset is populated / created,
997  * otherwise it returns the sum of returned values from
998  * fileset_create() and fileset_populate(), which
999  * will be a negative one (-1) times the number of
1000  * fileset_create() calls which failed.
1001  */
1002 int
1003 fileset_createset(fileset_t *fileset)
1004 {
1005 	fileset_t *list;
1006 	int ret = 0;
1007 
1008 	/* set up for possible parallel allocate */
1009 	paralloc_count = 0;
1010 
1011 	if (fileset && integer_isset(fileset->fs_prealloc)) {
1012 
1013 		filebench_log(LOG_INFO,
1014 		    "creating/pre-allocating %s %s",
1015 		    fileset_entity_name(fileset), fileset->fs_name);
1016 
1017 		if ((ret = fileset_populate(fileset)) != 0)
1018 			return (ret);
1019 
1020 		if ((ret = fileset_create(fileset)) != 0)
1021 			return (ret);
1022 	} else {
1023 
1024 		filebench_log(LOG_INFO,
1025 		    "Creating/pre-allocating files and filesets");
1026 
1027 		list = filebench_shm->filesetlist;
1028 		while (list) {
1029 			if ((ret = fileset_populate(list)) != 0)
1030 				return (ret);
1031 			if ((ret = fileset_create(list)) != 0)
1032 				return (ret);
1033 			list = list->fs_next;
1034 		}
1035 	}
1036 
1037 	/* wait for allocation threads to finish */
1038 	filebench_log(LOG_INFO,
1039 	    "waiting for fileset pre-allocation to finish");
1040 
1041 	(void) pthread_mutex_lock(&paralloc_lock);
1042 	while (paralloc_count > 0)
1043 		(void) pthread_cond_wait(&paralloc_cv, &paralloc_lock);
1044 	(void) pthread_mutex_unlock(&paralloc_lock);
1045 
1046 	if (paralloc_count < 0)
1047 		return (-1);
1048 
1049 	return (0);
1050 }
1051 
1052 /*
1053  * Searches through the master fileset list for the named fileset.
1054  * If found, returns pointer to same, otherwise returns NULL.
1055  */
1056 fileset_t *
1057 fileset_find(char *name)
1058 {
1059 	fileset_t *fileset = filebench_shm->filesetlist;
1060 
1061 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1062 
1063 	while (fileset) {
1064 		if (strcmp(name, fileset->fs_name) == 0) {
1065 			(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1066 			return (fileset);
1067 		}
1068 		fileset = fileset->fs_next;
1069 	}
1070 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1071 
1072 	return (NULL);
1073 }
1074 
1075 /*
1076  * Iterates over all the file sets in the filesetlist,
1077  * executing the supplied command "*cmd()" on them. Also
1078  * indicates to the executed command if it is the first
1079  * time the command has been executed since the current
1080  * call to fileset_iter.
1081  */
1082 void
1083 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1084 {
1085 	fileset_t *fileset = filebench_shm->filesetlist;
1086 	int count = 0;
1087 
1088 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1089 
1090 	while (fileset) {
1091 		cmd(fileset, count == 0);
1092 		fileset = fileset->fs_next;
1093 		count++;
1094 	}
1095 
1096 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1097 }
1098 
1099 /*
1100  * Prints information to the filebench log about the file
1101  * object. Also prints a header on the first call.
1102  */
1103 int
1104 fileset_print(fileset_t *fileset, int first)
1105 {
1106 	int pathlength = strlen(*fileset->fs_path) + strlen(fileset->fs_name);
1107 	/* 30 spaces */
1108 	char pad[] = "                              ";
1109 
1110 	if (pathlength > 29)
1111 		pathlength = 29;
1112 
1113 	if (first) {
1114 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1115 		    "file size",
1116 		    "dir width",
1117 		    "entries");
1118 	}
1119 
1120 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1121 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1122 			filebench_log(LOG_INFO,
1123 			    "%s/%s%s         (Raw Device)",
1124 			    *fileset->fs_path,
1125 			    fileset->fs_name,
1126 			    &pad[pathlength]);
1127 		} else {
1128 			filebench_log(LOG_INFO,
1129 			    "%s/%s%s%9lld     (Single File)",
1130 			    *fileset->fs_path,
1131 			    fileset->fs_name,
1132 			    &pad[pathlength],
1133 			    *fileset->fs_size);
1134 		}
1135 	} else {
1136 		filebench_log(LOG_INFO, "%s/%s%s%9lld%12lld%10lld",
1137 		    *fileset->fs_path,
1138 		    fileset->fs_name,
1139 		    &pad[pathlength],
1140 		    *fileset->fs_size,
1141 		    *fileset->fs_dirwidth,
1142 		    *fileset->fs_entries);
1143 	}
1144 	return (0);
1145 }
1146 /*
1147  * checks to see if the path/name pair points to a raw device. If
1148  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1149  * If RAW is not defined, or it is not a raw device, it clears the
1150  * raw device flag and returns 0.
1151  */
1152 int
1153 fileset_checkraw(fileset_t *fileset)
1154 {
1155 	char path[MAXPATHLEN];
1156 	struct stat64 sb;
1157 
1158 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1159 
1160 #ifdef HAVE_RAW_SUPPORT
1161 	/* check for raw device */
1162 	(void) strcpy(path, *fileset->fs_path);
1163 	(void) strcat(path, "/");
1164 	(void) strcat(path, fileset->fs_name);
1165 	if ((stat64(path, &sb) == 0) &&
1166 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1167 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1168 		return (1);
1169 	}
1170 #endif /* HAVE_RAW_SUPPORT */
1171 
1172 	return (0);
1173 }
1174