xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 6305:ce48ff893c37)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <fcntl.h>
30 #include <pthread.h>
31 #include <errno.h>
32 #include <math.h>
33 #include <libgen.h>
34 #include <sys/mman.h>
35 #include "fileset.h"
36 #include "filebench.h"
37 #include "gamma_dist.h"
38 
39 /*
40  * File sets, of type fileset_t, are entities which contain
41  * information about collections of files and subdirectories in Filebench.
42  * The fileset, once populated, consists of a tree of fileset entries of
43  * type filesetentry_t which specify files and directories.  The fileset
44  * is rooted in a directory specified by fileset_path, and once the populated
45  * fileset has been created, has a tree of directories and files
46  * corresponding to the fileset's filesetentry tree.
47  */
48 
49 static int fileset_checkraw(fileset_t *fileset);
50 
51 /* parallel allocation control */
52 #define	MAX_PARALLOC_THREADS 32
53 static pthread_mutex_t	paralloc_lock = PTHREAD_MUTEX_INITIALIZER;
54 static pthread_cond_t	paralloc_cv = PTHREAD_COND_INITIALIZER;
55 static int		paralloc_count;
56 
57 /*
58  * returns pointer to file or fileset
59  * string, as appropriate
60  */
61 static char *
62 fileset_entity_name(fileset_t *fileset)
63 {
64 	if (fileset->fs_attrs & FILESET_IS_FILE)
65 		return ("file");
66 	else
67 		return ("fileset");
68 }
69 
70 /*
71  * Removes the last file or directory name from a pathname.
72  * Basically removes characters from the end of the path by
73  * setting them to \0 until a forward slash '/' is
74  * encountered. It also removes the forward slash.
75  */
76 static char *
77 trunc_dirname(char *dir)
78 {
79 	char *s = dir + strlen(dir);
80 
81 	while (s != dir) {
82 		int c = *s;
83 
84 		*s = 0;
85 		if (c == '/')
86 			break;
87 		s--;
88 	}
89 	return (dir);
90 }
91 
92 /*
93  * Prints a list of allowed options and how to specify them.
94  */
95 void
96 fileset_usage(void)
97 {
98 	(void) fprintf(stderr,
99 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
100 	    ",entries=<number>\n");
101 	(void) fprintf(stderr,
102 	    "		        [,filesize=[size]]\n");
103 	(void) fprintf(stderr,
104 	    "		        [,dirwidth=[width]]\n");
105 	(void) fprintf(stderr,
106 	    "		        [,dirdepthrv=$random_variable_name]\n");
107 	(void) fprintf(stderr,
108 	    "		        [,dirgamma=[100-10000]] "
109 	    "(Gamma * 1000)\n");
110 	(void) fprintf(stderr,
111 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
112 	(void) fprintf(stderr,
113 	    "		        [,prealloc=[percent]]\n");
114 	(void) fprintf(stderr, "		        [,paralloc]\n");
115 	(void) fprintf(stderr, "		        [,reuse]\n");
116 	(void) fprintf(stderr, "\n");
117 }
118 
119 /*
120  * Frees up memory mapped file region of supplied size. The
121  * file descriptor "fd" indicates which memory mapped file.
122  * If successful, returns 0. Otherwise returns -1 if "size"
123  * is zero, or -1 times the number of times msync() failed.
124  */
125 static int
126 fileset_freemem(int fd, off64_t size)
127 {
128 	off64_t left;
129 	int ret = 0;
130 
131 	for (left = size; left > 0; left -= MMAP_SIZE) {
132 		off64_t thismapsize;
133 		caddr_t addr;
134 
135 		thismapsize = MIN(MMAP_SIZE, left);
136 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
137 		    MAP_SHARED, fd, size - left);
138 		ret += msync(addr, thismapsize, MS_INVALIDATE);
139 		(void) munmap(addr, thismapsize);
140 	}
141 	return (ret);
142 }
143 
144 /*
145  * Creates a path string from the filesetentry_t "*entry"
146  * and all of its parent's path names. The resulting path
147  * is a concatination of all the individual parent paths.
148  * Allocates memory for the path string and returns a
149  * pointer to it.
150  */
151 char *
152 fileset_resolvepath(filesetentry_t *entry)
153 {
154 	filesetentry_t *fsep = entry;
155 	char path[MAXPATHLEN];
156 	char pathtmp[MAXPATHLEN];
157 	char *s;
158 
159 	*path = 0;
160 	while (fsep->fse_parent) {
161 		(void) strcpy(pathtmp, "/");
162 		(void) strcat(pathtmp, fsep->fse_path);
163 		(void) strcat(pathtmp, path);
164 		(void) strcpy(path, pathtmp);
165 		fsep = fsep->fse_parent;
166 	}
167 
168 	s = malloc(strlen(path) + 1);
169 	(void) strcpy(s, path);
170 	return (s);
171 }
172 
173 /*
174  * Creates multiple nested directories as required by the
175  * supplied path. Starts at the end of the path, creating
176  * a list of directories to mkdir, up to the root of the
177  * path, then mkdirs them one at a time from the root on down.
178  */
179 static int
180 fileset_mkdir(char *path, int mode)
181 {
182 	char *p;
183 	char *dirs[65536];
184 	int i = 0;
185 
186 	if ((p = strdup(path)) == NULL)
187 		goto null_str;
188 
189 	/*
190 	 * Fill an array of subdirectory path names until either we
191 	 * reach the root or encounter an already existing subdirectory
192 	 */
193 	/* CONSTCOND */
194 	while (1) {
195 		struct stat64 sb;
196 
197 		if (stat64(p, &sb) == 0)
198 			break;
199 		if (strlen(p) < 3)
200 			break;
201 		if ((dirs[i] = strdup(p)) == NULL) {
202 			free(p);
203 			goto null_str;
204 		}
205 
206 		(void) trunc_dirname(p);
207 		i++;
208 	}
209 
210 	/* Make the directories, from closest to root downwards. */
211 	for (--i; i >= 0; i--) {
212 		(void) mkdir(dirs[i], mode);
213 		free(dirs[i]);
214 	}
215 
216 	free(p);
217 	return (0);
218 
219 null_str:
220 	/* clean up */
221 	for (--i; i >= 0; i--)
222 		free(dirs[i]);
223 
224 	filebench_log(LOG_ERROR,
225 	    "Failed to create directory path %s: Out of memory", path);
226 
227 	return (-1);
228 }
229 
230 /*
231  * creates the subdirectory tree for a fileset.
232  */
233 static int
234 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
235 {
236 	filesetentry_t *direntry;
237 	char full_path[MAXPATHLEN];
238 	char *part_path;
239 
240 	/* walk the subdirectory list, enstanciating subdirs */
241 	direntry = fileset->fs_dirlist;
242 	while (direntry) {
243 		(void) strcpy(full_path, filesetpath);
244 		part_path = fileset_resolvepath(direntry);
245 		(void) strcat(full_path, part_path);
246 		free(part_path);
247 
248 		/* now create this portion of the subdirectory tree */
249 		if (fileset_mkdir(full_path, 0755) == -1)
250 			return (-1);
251 
252 		direntry = direntry->fse_dirnext;
253 	}
254 	return (0);
255 }
256 
257 /*
258  * given a fileset entry, determines if the associated file
259  * needs to be allocated or not, and if so does the allocation.
260  */
261 static int
262 fileset_alloc_file(filesetentry_t *entry)
263 {
264 	char path[MAXPATHLEN];
265 	char *buf;
266 	struct stat64 sb;
267 	char *pathtmp;
268 	off64_t seek;
269 	int fd;
270 
271 	*path = 0;
272 	(void) strcpy(path, avd_get_str(entry->fse_fileset->fs_path));
273 	(void) strcat(path, "/");
274 	(void) strcat(path, avd_get_str(entry->fse_fileset->fs_name));
275 	pathtmp = fileset_resolvepath(entry);
276 	(void) strcat(path, pathtmp);
277 
278 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
279 
280 	/* see if reusing and this file exists */
281 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
282 		if ((fd = open64(path, O_RDWR)) < 0) {
283 			filebench_log(LOG_INFO,
284 			    "Attempted but failed to Re-use file %s",
285 			    path);
286 			return (-1);
287 		}
288 
289 		if (sb.st_size == (off64_t)entry->fse_size) {
290 			filebench_log(LOG_INFO,
291 			    "Re-using file %s", path);
292 
293 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
294 				(void) fileset_freemem(fd,
295 				    entry->fse_size);
296 
297 			entry->fse_flags |= FSE_EXISTS;
298 			(void) close(fd);
299 			return (0);
300 
301 		} else if (sb.st_size > (off64_t)entry->fse_size) {
302 			/* reuse, but too large */
303 			filebench_log(LOG_INFO,
304 			    "Truncating & re-using file %s", path);
305 
306 			(void) ftruncate64(fd,
307 			    (off64_t)entry->fse_size);
308 
309 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
310 				(void) fileset_freemem(fd,
311 				    entry->fse_size);
312 
313 			entry->fse_flags |= FSE_EXISTS;
314 			(void) close(fd);
315 			return (0);
316 		}
317 	} else {
318 
319 		/* No file or not reusing, so create */
320 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
321 			filebench_log(LOG_ERROR,
322 			    "Failed to pre-allocate file %s: %s",
323 			    path, strerror(errno));
324 
325 			return (-1);
326 		}
327 	}
328 
329 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
330 		return (-1);
331 
332 	entry->fse_flags |= FSE_EXISTS;
333 
334 	for (seek = 0; seek < entry->fse_size; ) {
335 		off64_t wsize;
336 		int ret = 0;
337 
338 		/*
339 		 * Write FILE_ALLOC_BLOCK's worth,
340 		 * except on last write
341 		 */
342 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
343 
344 		ret = write(fd, buf, wsize);
345 		if (ret != wsize) {
346 			filebench_log(LOG_ERROR,
347 			    "Failed to pre-allocate file %s: %s",
348 			    path, strerror(errno));
349 			(void) close(fd);
350 			free(buf);
351 			return (-1);
352 		}
353 		seek += wsize;
354 	}
355 
356 	if (!avd_get_bool(entry->fse_fileset->fs_cached))
357 		(void) fileset_freemem(fd, entry->fse_size);
358 
359 	(void) close(fd);
360 
361 	free(buf);
362 
363 	filebench_log(LOG_DEBUG_IMPL,
364 	    "Pre-allocated file %s size %llu",
365 	    path, (u_longlong_t)entry->fse_size);
366 
367 	return (0);
368 }
369 
370 /*
371  * given a fileset entry, determines if the associated file
372  * needs to be allocated or not, and if so does the allocation.
373  */
374 static void *
375 fileset_alloc_thread(filesetentry_t *entry)
376 {
377 	if (fileset_alloc_file(entry) == -1) {
378 		(void) pthread_mutex_lock(&paralloc_lock);
379 		paralloc_count = -1;
380 	} else {
381 		(void) pthread_mutex_lock(&paralloc_lock);
382 		paralloc_count--;
383 	}
384 
385 	(void) pthread_cond_signal(&paralloc_cv);
386 	(void) pthread_mutex_unlock(&paralloc_lock);
387 
388 	pthread_exit(NULL);
389 	return (NULL);
390 }
391 
392 
393 /*
394  * First creates the parent directories of the file using
395  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
396  * and opens the file with open64(). It unlocks the fileset
397  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
398  * as requested, and returns the file descriptor integer
399  * for the opened file.
400  */
401 int
402 fileset_openfile(fileset_t *fileset,
403     filesetentry_t *entry, int flag, int mode, int attrs)
404 {
405 	char path[MAXPATHLEN];
406 	char dir[MAXPATHLEN];
407 	char *pathtmp;
408 	struct stat64 sb;
409 	int fd;
410 	int open_attrs = 0;
411 
412 	*path = 0;
413 	(void) strcpy(path, avd_get_str(fileset->fs_path));
414 	(void) strcat(path, "/");
415 	(void) strcat(path, avd_get_str(fileset->fs_name));
416 	pathtmp = fileset_resolvepath(entry);
417 	(void) strcat(path, pathtmp);
418 	(void) strcpy(dir, path);
419 	free(pathtmp);
420 	(void) trunc_dirname(dir);
421 
422 	/* If we are going to create a file, create the parent dirs */
423 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
424 		if (fileset_mkdir(dir, 0755) == -1)
425 			return (-1);
426 	}
427 
428 	if (flag & O_CREAT)
429 		entry->fse_flags |= FSE_EXISTS;
430 
431 	if (attrs & FLOW_ATTR_DSYNC) {
432 #ifdef sun
433 		open_attrs |= O_DSYNC;
434 #else
435 		open_attrs |= O_FSYNC;
436 #endif
437 	}
438 
439 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
440 		filebench_log(LOG_ERROR,
441 		    "Failed to open file %s: %s",
442 		    path, strerror(errno));
443 		(void) ipc_mutex_unlock(&entry->fse_lock);
444 		return (-1);
445 	}
446 	(void) ipc_mutex_unlock(&entry->fse_lock);
447 
448 #ifdef sun
449 	if (attrs & FLOW_ATTR_DIRECTIO)
450 		(void) directio(fd, DIRECTIO_ON);
451 	else
452 		(void) directio(fd, DIRECTIO_OFF);
453 #endif
454 
455 	return (fd);
456 }
457 
458 
459 /*
460  * Selects a fileset entry from a fileset. If the
461  * FILESET_PICKDIR flag is set it will pick a directory
462  * entry, otherwise a file entry. The FILESET_PICKRESET
463  * flag will cause it to reset the free list to the
464  * overall list (file or directory). The FILESET_PICKUNIQUE
465  * flag will take an entry off of one of the free (unused)
466  * lists (file or directory), otherwise the entry will be
467  * picked off of one of the rotor lists (file or directory).
468  * The FILESET_PICKEXISTS will insure that only extant
469  * (FSE_EXISTS) state files are selected, while
470  * FILESET_PICKNOEXIST insures that only non extant
471  * (not FSE_EXISTS) state files are selected.
472  */
473 filesetentry_t *
474 fileset_pick(fileset_t *fileset, int flags, int tid)
475 {
476 	filesetentry_t *entry = NULL;
477 	filesetentry_t *first = NULL;
478 
479 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
480 
481 	while (entry == NULL) {
482 
483 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
484 			entry = fileset->fs_dirlist;
485 			while (entry) {
486 				entry->fse_flags |= FSE_FREE;
487 				entry = entry->fse_dirnext;
488 			}
489 			fileset->fs_dirfree = fileset->fs_dirlist;
490 		}
491 
492 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
493 			entry = fileset->fs_filelist;
494 			while (entry) {
495 				entry->fse_flags |= FSE_FREE;
496 				entry = entry->fse_filenext;
497 			}
498 			fileset->fs_filefree = fileset->fs_filelist;
499 		}
500 
501 		if (flags & FILESET_PICKUNIQUE) {
502 			if (flags & FILESET_PICKDIR) {
503 				entry = fileset->fs_dirfree;
504 				if (entry == NULL)
505 					goto empty;
506 				fileset->fs_dirfree = entry->fse_dirnext;
507 			} else {
508 				entry = fileset->fs_filefree;
509 				if (entry == NULL)
510 					goto empty;
511 				fileset->fs_filefree = entry->fse_filenext;
512 			}
513 			entry->fse_flags &= ~FSE_FREE;
514 		} else {
515 			if (flags & FILESET_PICKDIR) {
516 				entry = fileset->fs_dirrotor;
517 				if (entry == NULL)
518 				fileset->fs_dirrotor =
519 				    entry = fileset->fs_dirlist;
520 				fileset->fs_dirrotor = entry->fse_dirnext;
521 			} else {
522 				entry = fileset->fs_filerotor[tid];
523 				if (entry == NULL)
524 					fileset->fs_filerotor[tid] =
525 					    entry = fileset->fs_filelist;
526 				fileset->fs_filerotor[tid] =
527 				    entry->fse_filenext;
528 			}
529 		}
530 
531 		if (first == entry)
532 			goto empty;
533 
534 		if (first == NULL)
535 			first = entry;
536 
537 		/* Return locked entry */
538 		(void) ipc_mutex_lock(&entry->fse_lock);
539 
540 		/* If we ask for an existing file, go round again */
541 		if ((flags & FILESET_PICKEXISTS) &&
542 		    !(entry->fse_flags & FSE_EXISTS)) {
543 			(void) ipc_mutex_unlock(&entry->fse_lock);
544 			entry = NULL;
545 		}
546 
547 		/* If we ask for not an existing file, go round again */
548 		if ((flags & FILESET_PICKNOEXIST) &&
549 		    (entry->fse_flags & FSE_EXISTS)) {
550 			(void) ipc_mutex_unlock(&entry->fse_lock);
551 			entry = NULL;
552 		}
553 	}
554 
555 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
556 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
557 	return (entry);
558 
559 empty:
560 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
561 	return (NULL);
562 }
563 
564 /*
565  * Given a fileset "fileset", create the associated files as
566  * specified in the attributes of the fileset. The fileset is
567  * rooted in a directory whose pathname is in fileset_path. If the
568  * directory exists, meaning that there is already a fileset,
569  * and the fileset_reuse attribute is false, then remove it and all
570  * its contained files and subdirectories. Next, the routine
571  * creates a root directory for the fileset. All the file type
572  * filesetentries are cycled through creating as needed
573  * their containing subdirectory trees in the filesystem and
574  * creating actual files for fileset_preallocpercent of them. The
575  * created files are filled with fse_size bytes of unitialized
576  * data. The routine returns -1 on errors, 0 on success.
577  */
578 static int
579 fileset_create(fileset_t *fileset)
580 {
581 	filesetentry_t *entry;
582 	char path[MAXPATHLEN];
583 	struct stat64 sb;
584 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
585 	hrtime_t start = gethrtime();
586 	char *fileset_path;
587 	char *fileset_name;
588 	int randno;
589 	int preallocated = 0;
590 	int reusing = 0;
591 
592 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
593 		filebench_log(LOG_ERROR, "%s path not set",
594 		    fileset_entity_name(fileset));
595 		return (-1);
596 	}
597 
598 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
599 		filebench_log(LOG_ERROR, "%s name not set",
600 		    fileset_entity_name(fileset));
601 		return (-1);
602 	}
603 
604 #ifdef HAVE_RAW_SUPPORT
605 	/* treat raw device as special case */
606 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
607 		return (0);
608 #endif /* HAVE_RAW_SUPPORT */
609 
610 	/* XXX Add check to see if there is enough space */
611 
612 	/* Remove existing */
613 	(void) strcpy(path, fileset_path);
614 	(void) strcat(path, "/");
615 	(void) strcat(path, fileset_name);
616 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
617 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) {
618 		if (!avd_get_bool(fileset->fs_reuse)) {
619 			char cmd[MAXPATHLEN];
620 
621 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
622 			(void) system(cmd);
623 			filebench_log(LOG_VERBOSE,
624 			    "Removed any existing %s %s in %llu seconds",
625 			    fileset_entity_name(fileset), fileset_name,
626 			    (u_longlong_t)(((gethrtime() - start) /
627 			    1000000000) + 1));
628 		} else {
629 			/* we are re-using */
630 			reusing = 1;
631 			filebench_log(LOG_VERBOSE,
632 			    "Re-using %s %s on %s file system.",
633 			    fileset_entity_name(fileset),
634 			    fileset_name, sb.st_fstype);
635 		}
636 	}
637 	(void) mkdir(path, 0755);
638 
639 	/* make the filesets directory tree */
640 	if (fileset_create_subdirs(fileset, path) == -1)
641 		return (-1);
642 
643 	start = gethrtime();
644 
645 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
646 	    fileset_entity_name(fileset), fileset_name);
647 
648 	if (!avd_get_bool(fileset->fs_prealloc))
649 		goto exit;
650 
651 	randno = ((RAND_MAX * (100
652 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
653 
654 	while (entry = fileset_pick(fileset, pickflags, 0)) {
655 		pthread_t tid;
656 
657 		pickflags = FILESET_PICKUNIQUE;
658 
659 		entry->fse_flags &= ~FSE_EXISTS;
660 
661 		/* entry doesn't need to be locked during initialization */
662 		(void) ipc_mutex_unlock(&entry->fse_lock);
663 
664 		if (rand() < randno)
665 			continue;
666 
667 		preallocated++;
668 
669 		if (reusing)
670 			entry->fse_flags |= FSE_REUSING;
671 		else
672 			entry->fse_flags &= (~FSE_REUSING);
673 
674 		if (avd_get_bool(fileset->fs_paralloc)) {
675 
676 			/* fire off a separate allocation thread */
677 			(void) pthread_mutex_lock(&paralloc_lock);
678 			while (paralloc_count >= MAX_PARALLOC_THREADS) {
679 				(void) pthread_cond_wait(
680 				    &paralloc_cv, &paralloc_lock);
681 			}
682 
683 			if (paralloc_count < 0) {
684 				(void) pthread_mutex_unlock(&paralloc_lock);
685 				return (-1);
686 			}
687 
688 			paralloc_count++;
689 			(void) pthread_mutex_unlock(&paralloc_lock);
690 
691 			if (pthread_create(&tid, NULL,
692 			    (void *(*)(void*))fileset_alloc_thread,
693 			    entry) != 0) {
694 				filebench_log(LOG_ERROR,
695 				    "File prealloc thread create failed");
696 				filebench_shutdown(1);
697 			}
698 
699 		} else {
700 			if (fileset_alloc_file(entry) == -1)
701 				return (-1);
702 		}
703 	}
704 
705 exit:
706 	filebench_log(LOG_VERBOSE,
707 	    "Preallocated %d of %llu of %s %s in %llu seconds",
708 	    preallocated,
709 	    (u_longlong_t)fileset->fs_constentries,
710 	    fileset_entity_name(fileset), fileset_name,
711 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
712 
713 	return (0);
714 }
715 
716 /*
717  * Adds an entry to the fileset's file list. Single threaded so
718  * no locking needed.
719  */
720 static void
721 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
722 {
723 	if (fileset->fs_filelist == NULL) {
724 		fileset->fs_filelist = entry;
725 		entry->fse_filenext = NULL;
726 	} else {
727 		entry->fse_filenext = fileset->fs_filelist;
728 		fileset->fs_filelist = entry;
729 	}
730 }
731 
732 /*
733  * Adds an entry to the fileset's directory list. Single
734  * threaded so no locking needed.
735  */
736 static void
737 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
738 {
739 	if (fileset->fs_dirlist == NULL) {
740 		fileset->fs_dirlist = entry;
741 		entry->fse_dirnext = NULL;
742 	} else {
743 		entry->fse_dirnext = fileset->fs_dirlist;
744 		fileset->fs_dirlist = entry;
745 	}
746 }
747 
748 /*
749  * Obtaines a filesetentry entity for a file to be placed in a
750  * (sub)directory of a fileset. The size of the file may be
751  * specified by fileset_meansize, or calculated from a gamma
752  * distribution of parameter fileset_sizegamma and of mean size
753  * fileset_meansize. The filesetentry entity is placed on the file
754  * list in the specified parent filesetentry entity, which may
755  * be a directory filesetentry, or the root filesetentry in the
756  * fileset. It is also placed on the fileset's list of all
757  * contained files. Returns 0 if successful or -1 if ipc memory
758  * for the path string cannot be allocated.
759  */
760 static int
761 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
762 {
763 	char tmpname[16];
764 	filesetentry_t *entry;
765 	double drand;
766 
767 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
768 	    == NULL) {
769 		filebench_log(LOG_ERROR,
770 		    "fileset_populate_file: Can't malloc filesetentry");
771 		return (-1);
772 	}
773 
774 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
775 	entry->fse_parent = parent;
776 	entry->fse_fileset = fileset;
777 	entry->fse_flags |= FSE_FREE;
778 	fileset_insfilelist(fileset, entry);
779 
780 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
781 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
782 		filebench_log(LOG_ERROR,
783 		    "fileset_populate_file: Can't alloc path string");
784 		return (-1);
785 	}
786 
787 	/* see if random variable was supplied for file size */
788 	if (fileset->fs_meansize == -1) {
789 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
790 	} else {
791 		double gamma;
792 
793 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
794 		if (gamma > 0) {
795 			drand = gamma_dist_knuth(gamma,
796 			    fileset->fs_meansize / gamma);
797 			entry->fse_size = (off64_t)drand;
798 		} else {
799 			entry->fse_size = (off64_t)fileset->fs_meansize;
800 		}
801 	}
802 
803 	fileset->fs_bytes += entry->fse_size;
804 
805 	fileset->fs_realfiles++;
806 	return (0);
807 }
808 
809 /*
810  * Creates a directory node in a fileset, by obtaining a
811  * filesetentry entity for the node and initializing it
812  * according to parameters of the fileset. It determines a
813  * directory tree depth and directory width, optionally using
814  * a gamma distribution. If its calculated depth is less then
815  * its actual depth in the directory tree, it becomes a leaf
816  * node and files itself with "width" number of file type
817  * filesetentries, otherwise it files itself with "width"
818  * number of directory type filesetentries, using recursive
819  * calls to fileset_populate_subdir. The end result of the
820  * initial call to this routine is a tree of directories of
821  * random width and varying depth with sufficient leaf
822  * directories to contain all required files.
823  * Returns 0 on success. Returns -1 if ipc path string memory
824  * cannot be allocated and returns an error code (currently
825  * also -1) from calls to fileset_populate_file or recursive
826  * calls to fileset_populate_subdir.
827  */
828 static int
829 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
830     int serial, double depth)
831 {
832 	double randepth, drand, ranwidth;
833 	int isleaf = 0;
834 	char tmpname[16];
835 	filesetentry_t *entry;
836 	int i;
837 
838 	depth += 1;
839 
840 	/* Create dir node */
841 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
842 	    == NULL) {
843 		filebench_log(LOG_ERROR,
844 		    "fileset_populate_subdir: Can't malloc filesetentry");
845 		return (-1);
846 	}
847 
848 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
849 
850 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
851 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
852 		filebench_log(LOG_ERROR,
853 		    "fileset_populate_subdir: Can't alloc path string");
854 		return (-1);
855 	}
856 
857 	entry->fse_parent = parent;
858 	entry->fse_flags |= FSE_DIR | FSE_FREE;
859 	fileset_insdirlist(fileset, entry);
860 
861 	if (fileset->fs_dirdepthrv) {
862 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
863 	} else {
864 		double gamma;
865 
866 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
867 		if (gamma > 0) {
868 			drand = gamma_dist_knuth(gamma,
869 			    fileset->fs_meandepth / gamma);
870 			randepth = (int)drand;
871 		} else {
872 			randepth = (int)fileset->fs_meandepth;
873 		}
874 	}
875 
876 	if (fileset->fs_meanwidth == -1) {
877 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
878 	} else {
879 		double gamma;
880 
881 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
882 		if (gamma > 0) {
883 			drand = gamma_dist_knuth(gamma,
884 			    fileset->fs_meanwidth / gamma);
885 			ranwidth = drand;
886 		} else {
887 			ranwidth = fileset->fs_meanwidth;
888 		}
889 	}
890 
891 	if (randepth == 0)
892 		randepth = 1;
893 	if (ranwidth == 0)
894 		ranwidth = 1;
895 	if (depth >= randepth)
896 		isleaf = 1;
897 
898 	/*
899 	 * Create directory of random width according to distribution, or
900 	 * if root directory, continue until #files required
901 	 */
902 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
903 	    (fileset->fs_realfiles < fileset->fs_constentries);
904 	    i++) {
905 		int ret = 0;
906 
907 		if (parent && isleaf)
908 			ret = fileset_populate_file(fileset, entry, i);
909 		else
910 			ret = fileset_populate_subdir(fileset, entry, i, depth);
911 
912 		if (ret != 0)
913 			return (ret);
914 	}
915 	return (0);
916 }
917 
918 /*
919  * Populates a fileset with files and subdirectory entries. Uses
920  * the supplied fileset_dirwidth and fileset_entries (number of files) to
921  * calculate the required fileset_meandepth (of subdirectories) and
922  * initialize the fileset_meanwidth and fileset_meansize variables. Then
923  * calls fileset_populate_subdir() to do the recursive
924  * subdirectory entry creation and leaf file entry creation. All
925  * of the above is skipped if the fileset has already been
926  * populated. Returns 0 on success, or an error code from the
927  * call to fileset_populate_subdir if that call fails.
928  */
929 static int
930 fileset_populate(fileset_t *fileset)
931 {
932 	int entries = (int)avd_get_int(fileset->fs_entries);
933 	int meandirwidth;
934 	int ret;
935 
936 	/* Skip if already populated */
937 	if (fileset->fs_bytes > 0)
938 		goto exists;
939 
940 #ifdef HAVE_RAW_SUPPORT
941 	/* check for raw device */
942 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
943 		return (0);
944 #endif /* HAVE_RAW_SUPPORT */
945 
946 	/* save value of entries obtained for later, in case it was random */
947 	fileset->fs_constentries = entries;
948 
949 	/* is dirwidth a random variable? */
950 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
951 		meandirwidth =
952 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
953 		fileset->fs_meanwidth = -1;
954 	} else {
955 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
956 		fileset->fs_meanwidth = (double)meandirwidth;
957 	}
958 
959 	/*
960 	 * Input params are:
961 	 *	# of files
962 	 *	ave # of files per dir
963 	 *	max size of dir
964 	 *	# ave size of file
965 	 *	max size of file
966 	 */
967 	fileset->fs_meandepth = log(entries) / log(meandirwidth);
968 
969 	/* Has a random variable been supplied for dirdepth? */
970 	if (fileset->fs_dirdepthrv) {
971 		/* yes, so set the random variable's mean value to meandepth */
972 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
973 		    fileset->fs_meandepth;
974 	}
975 
976 	/* test for random size variable */
977 	if (AVD_IS_RANDOM(fileset->fs_size))
978 		fileset->fs_meansize = -1;
979 	else
980 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
981 
982 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
983 		return (ret);
984 
985 
986 exists:
987 	if (fileset->fs_attrs & FILESET_IS_FILE) {
988 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
989 		    avd_get_str(fileset->fs_name),
990 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
991 	} else {
992 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, "
993 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
994 		    avd_get_str(fileset->fs_name), entries,
995 		    meandirwidth,
996 		    fileset->fs_meandepth,
997 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
998 	}
999 	return (0);
1000 }
1001 
1002 /*
1003  * Allocates a fileset instance, initializes fileset_dirgamma and
1004  * fileset_sizegamma default values, and sets the fileset name to the
1005  * supplied name string. Puts the allocated fileset on the
1006  * master fileset list and returns a pointer to it.
1007  */
1008 fileset_t *
1009 fileset_define(avd_t name)
1010 {
1011 	fileset_t *fileset;
1012 
1013 	if (name == NULL)
1014 		return (NULL);
1015 
1016 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1017 		filebench_log(LOG_ERROR,
1018 		    "fileset_define: Can't malloc fileset");
1019 		return (NULL);
1020 	}
1021 
1022 	filebench_log(LOG_DEBUG_IMPL,
1023 	    "Defining file %s", avd_get_str(name));
1024 
1025 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1026 
1027 	fileset->fs_dirgamma = avd_int_alloc(1500);
1028 	fileset->fs_sizegamma = avd_int_alloc(1500);
1029 
1030 	/* Add fileset to global list */
1031 	if (filebench_shm->filesetlist == NULL) {
1032 		filebench_shm->filesetlist = fileset;
1033 		fileset->fs_next = NULL;
1034 	} else {
1035 		fileset->fs_next = filebench_shm->filesetlist;
1036 		filebench_shm->filesetlist = fileset;
1037 	}
1038 
1039 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1040 
1041 	fileset->fs_name = name;
1042 
1043 	return (fileset);
1044 }
1045 
1046 /*
1047  * If supplied with a pointer to a fileset and the fileset's
1048  * fileset_prealloc flag is set, calls fileset_populate() to populate
1049  * the fileset with filesetentries, then calls fileset_create()
1050  * to make actual directories and files for the filesetentries.
1051  * Otherwise, it applies fileset_populate() and fileset_create()
1052  * to all the filesets on the master fileset list. It always
1053  * returns zero (0) if one fileset is populated / created,
1054  * otherwise it returns the sum of returned values from
1055  * fileset_create() and fileset_populate(), which
1056  * will be a negative one (-1) times the number of
1057  * fileset_create() calls which failed.
1058  */
1059 int
1060 fileset_createset(fileset_t *fileset)
1061 {
1062 	fileset_t *list;
1063 	int ret = 0;
1064 
1065 	/* set up for possible parallel allocate */
1066 	paralloc_count = 0;
1067 
1068 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1069 
1070 		/* check for raw files */
1071 		if (fileset_checkraw(fileset)) {
1072 			filebench_log(LOG_INFO,
1073 			    "file %s/%s is a RAW device",
1074 			    avd_get_str(fileset->fs_path),
1075 			    avd_get_str(fileset->fs_name));
1076 			return (0);
1077 		}
1078 
1079 		filebench_log(LOG_INFO,
1080 		    "creating/pre-allocating %s %s",
1081 		    fileset_entity_name(fileset),
1082 		    avd_get_str(fileset->fs_name));
1083 
1084 		if ((ret = fileset_populate(fileset)) != 0)
1085 			return (ret);
1086 
1087 		if ((ret = fileset_create(fileset)) != 0)
1088 			return (ret);
1089 	} else {
1090 
1091 		filebench_log(LOG_INFO,
1092 		    "Creating/pre-allocating files and filesets");
1093 
1094 		list = filebench_shm->filesetlist;
1095 		while (list) {
1096 			/* check for raw files */
1097 			if (fileset_checkraw(list)) {
1098 				filebench_log(LOG_INFO,
1099 				    "file %s/%s is a RAW device",
1100 				    avd_get_str(list->fs_path),
1101 				    avd_get_str(list->fs_name));
1102 				list = list->fs_next;
1103 				continue;
1104 			}
1105 
1106 			if ((ret = fileset_populate(list)) != 0)
1107 				return (ret);
1108 			if ((ret = fileset_create(list)) != 0)
1109 				return (ret);
1110 			list = list->fs_next;
1111 		}
1112 	}
1113 
1114 	/* wait for allocation threads to finish */
1115 	filebench_log(LOG_INFO,
1116 	    "waiting for fileset pre-allocation to finish");
1117 
1118 	(void) pthread_mutex_lock(&paralloc_lock);
1119 	while (paralloc_count > 0)
1120 		(void) pthread_cond_wait(&paralloc_cv, &paralloc_lock);
1121 	(void) pthread_mutex_unlock(&paralloc_lock);
1122 
1123 	if (paralloc_count < 0)
1124 		return (-1);
1125 
1126 	return (0);
1127 }
1128 
1129 /*
1130  * Searches through the master fileset list for the named fileset.
1131  * If found, returns pointer to same, otherwise returns NULL.
1132  */
1133 fileset_t *
1134 fileset_find(char *name)
1135 {
1136 	fileset_t *fileset = filebench_shm->filesetlist;
1137 
1138 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1139 
1140 	while (fileset) {
1141 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1142 			(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1143 			return (fileset);
1144 		}
1145 		fileset = fileset->fs_next;
1146 	}
1147 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1148 
1149 	return (NULL);
1150 }
1151 
1152 /*
1153  * Iterates over all the file sets in the filesetlist,
1154  * executing the supplied command "*cmd()" on them. Also
1155  * indicates to the executed command if it is the first
1156  * time the command has been executed since the current
1157  * call to fileset_iter.
1158  */
1159 void
1160 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1161 {
1162 	fileset_t *fileset = filebench_shm->filesetlist;
1163 	int count = 0;
1164 
1165 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1166 
1167 	while (fileset) {
1168 		cmd(fileset, count == 0);
1169 		fileset = fileset->fs_next;
1170 		count++;
1171 	}
1172 
1173 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1174 }
1175 
1176 /*
1177  * Prints information to the filebench log about the file
1178  * object. Also prints a header on the first call.
1179  */
1180 int
1181 fileset_print(fileset_t *fileset, int first)
1182 {
1183 	int pathlength;
1184 	char *fileset_path;
1185 	char *fileset_name;
1186 	static char pad[] = "                              "; /* 30 spaces */
1187 
1188 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1189 		filebench_log(LOG_ERROR, "%s path not set",
1190 		    fileset_entity_name(fileset));
1191 		return (-1);
1192 	}
1193 
1194 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1195 		filebench_log(LOG_ERROR, "%s name not set",
1196 		    fileset_entity_name(fileset));
1197 		return (-1);
1198 	}
1199 
1200 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1201 
1202 	if (pathlength > 29)
1203 		pathlength = 29;
1204 
1205 	if (first) {
1206 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1207 		    "file size",
1208 		    "dir width",
1209 		    "entries");
1210 	}
1211 
1212 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1213 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1214 			filebench_log(LOG_INFO,
1215 			    "%s/%s%s         (Raw Device)",
1216 			    fileset_path, fileset_name, &pad[pathlength]);
1217 		} else {
1218 			filebench_log(LOG_INFO,
1219 			    "%s/%s%s%9llu     (Single File)",
1220 			    fileset_path, fileset_name, &pad[pathlength],
1221 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1222 		}
1223 	} else {
1224 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1225 		    fileset_path, fileset_name,
1226 		    &pad[pathlength],
1227 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1228 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1229 		    (u_longlong_t)fileset->fs_constentries);
1230 	}
1231 	return (0);
1232 }
1233 /*
1234  * checks to see if the path/name pair points to a raw device. If
1235  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1236  * If RAW is not defined, or it is not a raw device, it clears the
1237  * raw device flag and returns 0.
1238  */
1239 int
1240 fileset_checkraw(fileset_t *fileset)
1241 {
1242 	char path[MAXPATHLEN];
1243 	struct stat64 sb;
1244 	char *pathname;
1245 	char *setname;
1246 
1247 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1248 
1249 #ifdef HAVE_RAW_SUPPORT
1250 	/* check for raw device */
1251 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1252 		return (0);
1253 
1254 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1255 		return (0);
1256 
1257 	(void) strcpy(path, pathname);
1258 	(void) strcat(path, "/");
1259 	(void) strcat(path, setname);
1260 	if ((stat64(path, &sb) == 0) &&
1261 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1262 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1263 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1264 			filebench_log(LOG_ERROR,
1265 			    "WARNING Fileset %s/%s Cannot be RAW device",
1266 			    avd_get_str(fileset->fs_path),
1267 			    avd_get_str(fileset->fs_name));
1268 			filebench_shutdown(1);
1269 		}
1270 
1271 		return (1);
1272 	}
1273 #endif /* HAVE_RAW_SUPPORT */
1274 
1275 	return (0);
1276 }
1277