xref: /onnv-gate/usr/src/cmd/filebench/common/fileset.c (revision 6391:f317d2de8920)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <fcntl.h>
30 #include <pthread.h>
31 #include <errno.h>
32 #include <math.h>
33 #include <libgen.h>
34 #include <sys/mman.h>
35 #include "fileset.h"
36 #include "filebench.h"
37 #include "gamma_dist.h"
38 
39 /*
40  * File sets, of type fileset_t, are entities which contain
41  * information about collections of files and subdirectories in Filebench.
42  * The fileset, once populated, consists of a tree of fileset entries of
43  * type filesetentry_t which specify files and directories.  The fileset
44  * is rooted in a directory specified by fileset_path, and once the populated
45  * fileset has been created, has a tree of directories and files
46  * corresponding to the fileset's filesetentry tree.
47  */
48 
49 static int fileset_checkraw(fileset_t *fileset);
50 
51 /* parallel allocation control */
52 #define	MAX_PARALLOC_THREADS 32
53 static pthread_mutex_t	paralloc_lock = PTHREAD_MUTEX_INITIALIZER;
54 static pthread_cond_t	paralloc_cv = PTHREAD_COND_INITIALIZER;
55 static int		paralloc_count;
56 
57 /*
58  * returns pointer to file or fileset
59  * string, as appropriate
60  */
61 static char *
62 fileset_entity_name(fileset_t *fileset)
63 {
64 	if (fileset->fs_attrs & FILESET_IS_FILE)
65 		return ("file");
66 	else
67 		return ("fileset");
68 }
69 
70 /*
71  * Removes the last file or directory name from a pathname.
72  * Basically removes characters from the end of the path by
73  * setting them to \0 until a forward slash '/' is
74  * encountered. It also removes the forward slash.
75  */
76 static char *
77 trunc_dirname(char *dir)
78 {
79 	char *s = dir + strlen(dir);
80 
81 	while (s != dir) {
82 		int c = *s;
83 
84 		*s = 0;
85 		if (c == '/')
86 			break;
87 		s--;
88 	}
89 	return (dir);
90 }
91 
92 /*
93  * Prints a list of allowed options and how to specify them.
94  */
95 void
96 fileset_usage(void)
97 {
98 	(void) fprintf(stderr,
99 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
100 	    ",entries=<number>\n");
101 	(void) fprintf(stderr,
102 	    "		        [,filesize=[size]]\n");
103 	(void) fprintf(stderr,
104 	    "		        [,dirwidth=[width]]\n");
105 	(void) fprintf(stderr,
106 	    "		        [,dirdepthrv=$random_variable_name]\n");
107 	(void) fprintf(stderr,
108 	    "		        [,dirgamma=[100-10000]] "
109 	    "(Gamma * 1000)\n");
110 	(void) fprintf(stderr,
111 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
112 	(void) fprintf(stderr,
113 	    "		        [,prealloc=[percent]]\n");
114 	(void) fprintf(stderr, "		        [,paralloc]\n");
115 	(void) fprintf(stderr, "		        [,reuse]\n");
116 	(void) fprintf(stderr, "\n");
117 }
118 
119 /*
120  * Frees up memory mapped file region of supplied size. The
121  * file descriptor "fd" indicates which memory mapped file.
122  * If successful, returns 0. Otherwise returns -1 if "size"
123  * is zero, or -1 times the number of times msync() failed.
124  */
125 static int
126 fileset_freemem(int fd, off64_t size)
127 {
128 	off64_t left;
129 	int ret = 0;
130 
131 	for (left = size; left > 0; left -= MMAP_SIZE) {
132 		off64_t thismapsize;
133 		caddr_t addr;
134 
135 		thismapsize = MIN(MMAP_SIZE, left);
136 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
137 		    MAP_SHARED, fd, size - left);
138 		ret += msync(addr, thismapsize, MS_INVALIDATE);
139 		(void) munmap(addr, thismapsize);
140 	}
141 	return (ret);
142 }
143 
144 /*
145  * Creates a path string from the filesetentry_t "*entry"
146  * and all of its parent's path names. The resulting path
147  * is a concatination of all the individual parent paths.
148  * Allocates memory for the path string and returns a
149  * pointer to it.
150  */
151 char *
152 fileset_resolvepath(filesetentry_t *entry)
153 {
154 	filesetentry_t *fsep = entry;
155 	char path[MAXPATHLEN];
156 	char pathtmp[MAXPATHLEN];
157 	char *s;
158 
159 	*path = 0;
160 	while (fsep->fse_parent) {
161 		(void) strcpy(pathtmp, "/");
162 		(void) strcat(pathtmp, fsep->fse_path);
163 		(void) strcat(pathtmp, path);
164 		(void) strcpy(path, pathtmp);
165 		fsep = fsep->fse_parent;
166 	}
167 
168 	s = malloc(strlen(path) + 1);
169 	(void) strcpy(s, path);
170 	return (s);
171 }
172 
173 /*
174  * Creates multiple nested directories as required by the
175  * supplied path. Starts at the end of the path, creating
176  * a list of directories to mkdir, up to the root of the
177  * path, then mkdirs them one at a time from the root on down.
178  */
179 static int
180 fileset_mkdir(char *path, int mode)
181 {
182 	char *p;
183 	char *dirs[65536];
184 	int i = 0;
185 
186 	if ((p = strdup(path)) == NULL)
187 		goto null_str;
188 
189 	/*
190 	 * Fill an array of subdirectory path names until either we
191 	 * reach the root or encounter an already existing subdirectory
192 	 */
193 	/* CONSTCOND */
194 	while (1) {
195 		struct stat64 sb;
196 
197 		if (stat64(p, &sb) == 0)
198 			break;
199 		if (strlen(p) < 3)
200 			break;
201 		if ((dirs[i] = strdup(p)) == NULL) {
202 			free(p);
203 			goto null_str;
204 		}
205 
206 		(void) trunc_dirname(p);
207 		i++;
208 	}
209 
210 	/* Make the directories, from closest to root downwards. */
211 	for (--i; i >= 0; i--) {
212 		(void) mkdir(dirs[i], mode);
213 		free(dirs[i]);
214 	}
215 
216 	free(p);
217 	return (0);
218 
219 null_str:
220 	/* clean up */
221 	for (--i; i >= 0; i--)
222 		free(dirs[i]);
223 
224 	filebench_log(LOG_ERROR,
225 	    "Failed to create directory path %s: Out of memory", path);
226 
227 	return (-1);
228 }
229 
230 /*
231  * creates the subdirectory tree for a fileset.
232  */
233 static int
234 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
235 {
236 	filesetentry_t *direntry;
237 	char full_path[MAXPATHLEN];
238 	char *part_path;
239 
240 	/* walk the subdirectory list, enstanciating subdirs */
241 	direntry = fileset->fs_dirlist;
242 	while (direntry) {
243 		(void) strcpy(full_path, filesetpath);
244 		part_path = fileset_resolvepath(direntry);
245 		(void) strcat(full_path, part_path);
246 		free(part_path);
247 
248 		/* now create this portion of the subdirectory tree */
249 		if (fileset_mkdir(full_path, 0755) == -1)
250 			return (-1);
251 
252 		direntry = direntry->fse_dirnext;
253 	}
254 	return (0);
255 }
256 
257 /*
258  * given a fileset entry, determines if the associated file
259  * needs to be allocated or not, and if so does the allocation.
260  */
261 static int
262 fileset_alloc_file(filesetentry_t *entry)
263 {
264 	char path[MAXPATHLEN];
265 	char *buf;
266 	struct stat64 sb;
267 	char *pathtmp;
268 	off64_t seek;
269 	int fd;
270 
271 	*path = 0;
272 	(void) strcpy(path, avd_get_str(entry->fse_fileset->fs_path));
273 	(void) strcat(path, "/");
274 	(void) strcat(path, avd_get_str(entry->fse_fileset->fs_name));
275 	pathtmp = fileset_resolvepath(entry);
276 	(void) strcat(path, pathtmp);
277 
278 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
279 
280 	/* see if reusing and this file exists */
281 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
282 		if ((fd = open64(path, O_RDWR)) < 0) {
283 			filebench_log(LOG_INFO,
284 			    "Attempted but failed to Re-use file %s",
285 			    path);
286 			return (-1);
287 		}
288 
289 		if (sb.st_size == (off64_t)entry->fse_size) {
290 			filebench_log(LOG_INFO,
291 			    "Re-using file %s", path);
292 
293 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
294 				(void) fileset_freemem(fd,
295 				    entry->fse_size);
296 
297 			entry->fse_flags |= FSE_EXISTS;
298 			(void) close(fd);
299 			return (0);
300 
301 		} else if (sb.st_size > (off64_t)entry->fse_size) {
302 			/* reuse, but too large */
303 			filebench_log(LOG_INFO,
304 			    "Truncating & re-using file %s", path);
305 
306 			(void) ftruncate64(fd,
307 			    (off64_t)entry->fse_size);
308 
309 			if (!avd_get_bool(entry->fse_fileset->fs_cached))
310 				(void) fileset_freemem(fd,
311 				    entry->fse_size);
312 
313 			entry->fse_flags |= FSE_EXISTS;
314 			(void) close(fd);
315 			return (0);
316 		}
317 	} else {
318 
319 		/* No file or not reusing, so create */
320 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
321 			filebench_log(LOG_ERROR,
322 			    "Failed to pre-allocate file %s: %s",
323 			    path, strerror(errno));
324 
325 			return (-1);
326 		}
327 	}
328 
329 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
330 		return (-1);
331 
332 	entry->fse_flags |= FSE_EXISTS;
333 
334 	for (seek = 0; seek < entry->fse_size; ) {
335 		off64_t wsize;
336 		int ret = 0;
337 
338 		/*
339 		 * Write FILE_ALLOC_BLOCK's worth,
340 		 * except on last write
341 		 */
342 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
343 
344 		ret = write(fd, buf, wsize);
345 		if (ret != wsize) {
346 			filebench_log(LOG_ERROR,
347 			    "Failed to pre-allocate file %s: %s",
348 			    path, strerror(errno));
349 			(void) close(fd);
350 			free(buf);
351 			return (-1);
352 		}
353 		seek += wsize;
354 	}
355 
356 	if (!avd_get_bool(entry->fse_fileset->fs_cached))
357 		(void) fileset_freemem(fd, entry->fse_size);
358 
359 	(void) close(fd);
360 
361 	free(buf);
362 
363 	filebench_log(LOG_DEBUG_IMPL,
364 	    "Pre-allocated file %s size %llu",
365 	    path, (u_longlong_t)entry->fse_size);
366 
367 	return (0);
368 }
369 
370 /*
371  * given a fileset entry, determines if the associated file
372  * needs to be allocated or not, and if so does the allocation.
373  */
374 static void *
375 fileset_alloc_thread(filesetentry_t *entry)
376 {
377 	if (fileset_alloc_file(entry) == -1) {
378 		(void) pthread_mutex_lock(&paralloc_lock);
379 		paralloc_count = -1;
380 	} else {
381 		(void) pthread_mutex_lock(&paralloc_lock);
382 		paralloc_count--;
383 	}
384 
385 	(void) pthread_cond_signal(&paralloc_cv);
386 	(void) pthread_mutex_unlock(&paralloc_lock);
387 
388 	pthread_exit(NULL);
389 	return (NULL);
390 }
391 
392 
393 /*
394  * First creates the parent directories of the file using
395  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
396  * and opens the file with open64(). It unlocks the fileset
397  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
398  * as requested, and returns the file descriptor integer
399  * for the opened file.
400  */
401 int
402 fileset_openfile(fileset_t *fileset,
403     filesetentry_t *entry, int flag, int mode, int attrs)
404 {
405 	char path[MAXPATHLEN];
406 	char dir[MAXPATHLEN];
407 	char *pathtmp;
408 	struct stat64 sb;
409 	int fd;
410 	int open_attrs = 0;
411 
412 	*path = 0;
413 	(void) strcpy(path, avd_get_str(fileset->fs_path));
414 	(void) strcat(path, "/");
415 	(void) strcat(path, avd_get_str(fileset->fs_name));
416 	pathtmp = fileset_resolvepath(entry);
417 	(void) strcat(path, pathtmp);
418 	(void) strcpy(dir, path);
419 	free(pathtmp);
420 	(void) trunc_dirname(dir);
421 
422 	/* If we are going to create a file, create the parent dirs */
423 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
424 		if (fileset_mkdir(dir, 0755) == -1)
425 			return (-1);
426 	}
427 
428 	if (flag & O_CREAT)
429 		entry->fse_flags |= FSE_EXISTS;
430 
431 	if (attrs & FLOW_ATTR_DSYNC) {
432 #ifdef sun
433 		open_attrs |= O_DSYNC;
434 #else
435 		open_attrs |= O_FSYNC;
436 #endif
437 	}
438 
439 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
440 		filebench_log(LOG_ERROR,
441 		    "Failed to open file %s: %s",
442 		    path, strerror(errno));
443 		(void) ipc_mutex_unlock(&entry->fse_lock);
444 		return (-1);
445 	}
446 	(void) ipc_mutex_unlock(&entry->fse_lock);
447 
448 #ifdef sun
449 	if (attrs & FLOW_ATTR_DIRECTIO)
450 		(void) directio(fd, DIRECTIO_ON);
451 	else
452 		(void) directio(fd, DIRECTIO_OFF);
453 #endif
454 
455 	return (fd);
456 }
457 
458 
459 /*
460  * Selects a fileset entry from a fileset. If the
461  * FILESET_PICKDIR flag is set it will pick a directory
462  * entry, otherwise a file entry. The FILESET_PICKRESET
463  * flag will cause it to reset the free list to the
464  * overall list (file or directory). The FILESET_PICKUNIQUE
465  * flag will take an entry off of one of the free (unused)
466  * lists (file or directory), otherwise the entry will be
467  * picked off of one of the rotor lists (file or directory).
468  * The FILESET_PICKEXISTS will insure that only extant
469  * (FSE_EXISTS) state files are selected, while
470  * FILESET_PICKNOEXIST insures that only non extant
471  * (not FSE_EXISTS) state files are selected.
472  * Note that the selected fileset entry (file) is returned
473  * with its fse_lock field locked.
474  */
475 filesetentry_t *
476 fileset_pick(fileset_t *fileset, int flags, int tid)
477 {
478 	filesetentry_t *entry = NULL;
479 	filesetentry_t *first = NULL;
480 
481 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
482 
483 	while (entry == NULL) {
484 
485 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
486 			entry = fileset->fs_dirlist;
487 			while (entry) {
488 				entry->fse_flags |= FSE_FREE;
489 				entry = entry->fse_dirnext;
490 			}
491 			fileset->fs_dirfree = fileset->fs_dirlist;
492 		}
493 
494 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
495 			entry = fileset->fs_filelist;
496 			while (entry) {
497 				entry->fse_flags |= FSE_FREE;
498 				entry = entry->fse_filenext;
499 			}
500 			fileset->fs_filefree = fileset->fs_filelist;
501 		}
502 
503 		if (flags & FILESET_PICKUNIQUE) {
504 			if (flags & FILESET_PICKDIR) {
505 				entry = fileset->fs_dirfree;
506 				if (entry == NULL)
507 					goto empty;
508 				fileset->fs_dirfree = entry->fse_dirnext;
509 			} else {
510 				entry = fileset->fs_filefree;
511 				if (entry == NULL)
512 					goto empty;
513 				fileset->fs_filefree = entry->fse_filenext;
514 			}
515 			entry->fse_flags &= ~FSE_FREE;
516 		} else {
517 			if (flags & FILESET_PICKDIR) {
518 				entry = fileset->fs_dirrotor;
519 				if (entry == NULL)
520 				fileset->fs_dirrotor =
521 				    entry = fileset->fs_dirlist;
522 				fileset->fs_dirrotor = entry->fse_dirnext;
523 			} else {
524 				entry = fileset->fs_filerotor[tid];
525 				if (entry == NULL)
526 					fileset->fs_filerotor[tid] =
527 					    entry = fileset->fs_filelist;
528 				fileset->fs_filerotor[tid] =
529 				    entry->fse_filenext;
530 			}
531 		}
532 
533 		if (first == entry)
534 			goto empty;
535 
536 		if (first == NULL)
537 			first = entry;
538 
539 		/* Return locked entry */
540 		(void) ipc_mutex_lock(&entry->fse_lock);
541 
542 		/* If we ask for an existing file, go round again */
543 		if ((flags & FILESET_PICKEXISTS) &&
544 		    !(entry->fse_flags & FSE_EXISTS)) {
545 			(void) ipc_mutex_unlock(&entry->fse_lock);
546 			entry = NULL;
547 		}
548 
549 		/* If we ask for not an existing file, go round again */
550 		if ((flags & FILESET_PICKNOEXIST) &&
551 		    (entry->fse_flags & FSE_EXISTS)) {
552 			(void) ipc_mutex_unlock(&entry->fse_lock);
553 			entry = NULL;
554 		}
555 	}
556 
557 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
558 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
559 	return (entry);
560 
561 empty:
562 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
563 	return (NULL);
564 }
565 
566 /*
567  * Given a fileset "fileset", create the associated files as
568  * specified in the attributes of the fileset. The fileset is
569  * rooted in a directory whose pathname is in fileset_path. If the
570  * directory exists, meaning that there is already a fileset,
571  * and the fileset_reuse attribute is false, then remove it and all
572  * its contained files and subdirectories. Next, the routine
573  * creates a root directory for the fileset. All the file type
574  * filesetentries are cycled through creating as needed
575  * their containing subdirectory trees in the filesystem and
576  * creating actual files for fileset_preallocpercent of them. The
577  * created files are filled with fse_size bytes of unitialized
578  * data. The routine returns -1 on errors, 0 on success.
579  */
580 static int
581 fileset_create(fileset_t *fileset)
582 {
583 	filesetentry_t *entry;
584 	char path[MAXPATHLEN];
585 	struct stat64 sb;
586 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
587 	hrtime_t start = gethrtime();
588 	char *fileset_path;
589 	char *fileset_name;
590 	int randno;
591 	int preallocated = 0;
592 	int reusing = 0;
593 
594 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
595 		filebench_log(LOG_ERROR, "%s path not set",
596 		    fileset_entity_name(fileset));
597 		return (-1);
598 	}
599 
600 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
601 		filebench_log(LOG_ERROR, "%s name not set",
602 		    fileset_entity_name(fileset));
603 		return (-1);
604 	}
605 
606 #ifdef HAVE_RAW_SUPPORT
607 	/* treat raw device as special case */
608 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
609 		return (0);
610 #endif /* HAVE_RAW_SUPPORT */
611 
612 	/* XXX Add check to see if there is enough space */
613 
614 	/* Remove existing */
615 	(void) strcpy(path, fileset_path);
616 	(void) strcat(path, "/");
617 	(void) strcat(path, fileset_name);
618 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
619 	    (strlen(avd_get_str(fileset->fs_path)) > 2)) {
620 		if (!avd_get_bool(fileset->fs_reuse)) {
621 			char cmd[MAXPATHLEN];
622 
623 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
624 			(void) system(cmd);
625 			filebench_log(LOG_VERBOSE,
626 			    "Removed any existing %s %s in %llu seconds",
627 			    fileset_entity_name(fileset), fileset_name,
628 			    (u_longlong_t)(((gethrtime() - start) /
629 			    1000000000) + 1));
630 		} else {
631 			/* we are re-using */
632 			reusing = 1;
633 			filebench_log(LOG_VERBOSE,
634 			    "Re-using %s %s on %s file system.",
635 			    fileset_entity_name(fileset),
636 			    fileset_name, sb.st_fstype);
637 		}
638 	}
639 	(void) mkdir(path, 0755);
640 
641 	/* make the filesets directory tree */
642 	if (fileset_create_subdirs(fileset, path) == -1)
643 		return (-1);
644 
645 	start = gethrtime();
646 
647 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
648 	    fileset_entity_name(fileset), fileset_name);
649 
650 	if (!avd_get_bool(fileset->fs_prealloc))
651 		goto exit;
652 
653 	randno = ((RAND_MAX * (100
654 	    - avd_get_int(fileset->fs_preallocpercent))) / 100);
655 
656 	while (entry = fileset_pick(fileset, pickflags, 0)) {
657 		pthread_t tid;
658 
659 		pickflags = FILESET_PICKUNIQUE;
660 
661 		entry->fse_flags &= ~FSE_EXISTS;
662 
663 		/* entry doesn't need to be locked during initialization */
664 		(void) ipc_mutex_unlock(&entry->fse_lock);
665 
666 		if (rand() < randno)
667 			continue;
668 
669 		preallocated++;
670 
671 		if (reusing)
672 			entry->fse_flags |= FSE_REUSING;
673 		else
674 			entry->fse_flags &= (~FSE_REUSING);
675 
676 		if (avd_get_bool(fileset->fs_paralloc)) {
677 
678 			/* fire off a separate allocation thread */
679 			(void) pthread_mutex_lock(&paralloc_lock);
680 			while (paralloc_count >= MAX_PARALLOC_THREADS) {
681 				(void) pthread_cond_wait(
682 				    &paralloc_cv, &paralloc_lock);
683 			}
684 
685 			if (paralloc_count < 0) {
686 				(void) pthread_mutex_unlock(&paralloc_lock);
687 				return (-1);
688 			}
689 
690 			paralloc_count++;
691 			(void) pthread_mutex_unlock(&paralloc_lock);
692 
693 			if (pthread_create(&tid, NULL,
694 			    (void *(*)(void*))fileset_alloc_thread,
695 			    entry) != 0) {
696 				filebench_log(LOG_ERROR,
697 				    "File prealloc thread create failed");
698 				filebench_shutdown(1);
699 			}
700 
701 		} else {
702 			if (fileset_alloc_file(entry) == -1)
703 				return (-1);
704 		}
705 	}
706 
707 exit:
708 	filebench_log(LOG_VERBOSE,
709 	    "Preallocated %d of %llu of %s %s in %llu seconds",
710 	    preallocated,
711 	    (u_longlong_t)fileset->fs_constentries,
712 	    fileset_entity_name(fileset), fileset_name,
713 	    (u_longlong_t)(((gethrtime() - start) / 1000000000) + 1));
714 
715 	return (0);
716 }
717 
718 /*
719  * Adds an entry to the fileset's file list. Single threaded so
720  * no locking needed.
721  */
722 static void
723 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
724 {
725 	if (fileset->fs_filelist == NULL) {
726 		fileset->fs_filelist = entry;
727 		entry->fse_filenext = NULL;
728 	} else {
729 		entry->fse_filenext = fileset->fs_filelist;
730 		fileset->fs_filelist = entry;
731 	}
732 }
733 
734 /*
735  * Adds an entry to the fileset's directory list. Single
736  * threaded so no locking needed.
737  */
738 static void
739 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
740 {
741 	if (fileset->fs_dirlist == NULL) {
742 		fileset->fs_dirlist = entry;
743 		entry->fse_dirnext = NULL;
744 	} else {
745 		entry->fse_dirnext = fileset->fs_dirlist;
746 		fileset->fs_dirlist = entry;
747 	}
748 }
749 
750 /*
751  * Obtaines a filesetentry entity for a file to be placed in a
752  * (sub)directory of a fileset. The size of the file may be
753  * specified by fileset_meansize, or calculated from a gamma
754  * distribution of parameter fileset_sizegamma and of mean size
755  * fileset_meansize. The filesetentry entity is placed on the file
756  * list in the specified parent filesetentry entity, which may
757  * be a directory filesetentry, or the root filesetentry in the
758  * fileset. It is also placed on the fileset's list of all
759  * contained files. Returns 0 if successful or -1 if ipc memory
760  * for the path string cannot be allocated.
761  */
762 static int
763 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
764 {
765 	char tmpname[16];
766 	filesetentry_t *entry;
767 	double drand;
768 
769 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
770 	    == NULL) {
771 		filebench_log(LOG_ERROR,
772 		    "fileset_populate_file: Can't malloc filesetentry");
773 		return (-1);
774 	}
775 
776 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
777 	entry->fse_parent = parent;
778 	entry->fse_fileset = fileset;
779 	entry->fse_flags |= FSE_FREE;
780 	fileset_insfilelist(fileset, entry);
781 
782 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
783 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
784 		filebench_log(LOG_ERROR,
785 		    "fileset_populate_file: Can't alloc path string");
786 		return (-1);
787 	}
788 
789 	/* see if random variable was supplied for file size */
790 	if (fileset->fs_meansize == -1) {
791 		entry->fse_size = (off64_t)avd_get_int(fileset->fs_size);
792 	} else {
793 		double gamma;
794 
795 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
796 		if (gamma > 0) {
797 			drand = gamma_dist_knuth(gamma,
798 			    fileset->fs_meansize / gamma);
799 			entry->fse_size = (off64_t)drand;
800 		} else {
801 			entry->fse_size = (off64_t)fileset->fs_meansize;
802 		}
803 	}
804 
805 	fileset->fs_bytes += entry->fse_size;
806 
807 	fileset->fs_realfiles++;
808 	return (0);
809 }
810 
811 /*
812  * Creates a directory node in a fileset, by obtaining a
813  * filesetentry entity for the node and initializing it
814  * according to parameters of the fileset. It determines a
815  * directory tree depth and directory width, optionally using
816  * a gamma distribution. If its calculated depth is less then
817  * its actual depth in the directory tree, it becomes a leaf
818  * node and files itself with "width" number of file type
819  * filesetentries, otherwise it files itself with "width"
820  * number of directory type filesetentries, using recursive
821  * calls to fileset_populate_subdir. The end result of the
822  * initial call to this routine is a tree of directories of
823  * random width and varying depth with sufficient leaf
824  * directories to contain all required files.
825  * Returns 0 on success. Returns -1 if ipc path string memory
826  * cannot be allocated and returns an error code (currently
827  * also -1) from calls to fileset_populate_file or recursive
828  * calls to fileset_populate_subdir.
829  */
830 static int
831 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
832     int serial, double depth)
833 {
834 	double randepth, drand, ranwidth;
835 	int isleaf = 0;
836 	char tmpname[16];
837 	filesetentry_t *entry;
838 	int i;
839 
840 	depth += 1;
841 
842 	/* Create dir node */
843 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
844 	    == NULL) {
845 		filebench_log(LOG_ERROR,
846 		    "fileset_populate_subdir: Can't malloc filesetentry");
847 		return (-1);
848 	}
849 
850 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
851 
852 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
853 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
854 		filebench_log(LOG_ERROR,
855 		    "fileset_populate_subdir: Can't alloc path string");
856 		return (-1);
857 	}
858 
859 	entry->fse_parent = parent;
860 	entry->fse_flags |= FSE_DIR | FSE_FREE;
861 	fileset_insdirlist(fileset, entry);
862 
863 	if (fileset->fs_dirdepthrv) {
864 		randepth = (int)avd_get_int(fileset->fs_dirdepthrv);
865 	} else {
866 		double gamma;
867 
868 		gamma = avd_get_int(fileset->fs_dirgamma) / 1000.0;
869 		if (gamma > 0) {
870 			drand = gamma_dist_knuth(gamma,
871 			    fileset->fs_meandepth / gamma);
872 			randepth = (int)drand;
873 		} else {
874 			randepth = (int)fileset->fs_meandepth;
875 		}
876 	}
877 
878 	if (fileset->fs_meanwidth == -1) {
879 		ranwidth = avd_get_dbl(fileset->fs_dirwidth);
880 	} else {
881 		double gamma;
882 
883 		gamma = avd_get_int(fileset->fs_sizegamma) / 1000.0;
884 		if (gamma > 0) {
885 			drand = gamma_dist_knuth(gamma,
886 			    fileset->fs_meanwidth / gamma);
887 			ranwidth = drand;
888 		} else {
889 			ranwidth = fileset->fs_meanwidth;
890 		}
891 	}
892 
893 	if (randepth == 0)
894 		randepth = 1;
895 	if (ranwidth == 0)
896 		ranwidth = 1;
897 	if (depth >= randepth)
898 		isleaf = 1;
899 
900 	/*
901 	 * Create directory of random width according to distribution, or
902 	 * if root directory, continue until #files required
903 	 */
904 	for (i = 1; ((parent == NULL) || (i < ranwidth + 1)) &&
905 	    (fileset->fs_realfiles < fileset->fs_constentries);
906 	    i++) {
907 		int ret = 0;
908 
909 		if (parent && isleaf)
910 			ret = fileset_populate_file(fileset, entry, i);
911 		else
912 			ret = fileset_populate_subdir(fileset, entry, i, depth);
913 
914 		if (ret != 0)
915 			return (ret);
916 	}
917 	return (0);
918 }
919 
920 /*
921  * Populates a fileset with files and subdirectory entries. Uses
922  * the supplied fileset_dirwidth and fileset_entries (number of files) to
923  * calculate the required fileset_meandepth (of subdirectories) and
924  * initialize the fileset_meanwidth and fileset_meansize variables. Then
925  * calls fileset_populate_subdir() to do the recursive
926  * subdirectory entry creation and leaf file entry creation. All
927  * of the above is skipped if the fileset has already been
928  * populated. Returns 0 on success, or an error code from the
929  * call to fileset_populate_subdir if that call fails.
930  */
931 static int
932 fileset_populate(fileset_t *fileset)
933 {
934 	int entries = (int)avd_get_int(fileset->fs_entries);
935 	int meandirwidth;
936 	int ret;
937 
938 	/* Skip if already populated */
939 	if (fileset->fs_bytes > 0)
940 		goto exists;
941 
942 #ifdef HAVE_RAW_SUPPORT
943 	/* check for raw device */
944 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
945 		return (0);
946 #endif /* HAVE_RAW_SUPPORT */
947 
948 	/* save value of entries obtained for later, in case it was random */
949 	fileset->fs_constentries = entries;
950 
951 	/* is dirwidth a random variable? */
952 	if (AVD_IS_RANDOM(fileset->fs_dirwidth)) {
953 		meandirwidth =
954 		    (int)fileset->fs_dirwidth->avd_val.randptr->rnd_dbl_mean;
955 		fileset->fs_meanwidth = -1;
956 	} else {
957 		meandirwidth = (int)avd_get_int(fileset->fs_dirwidth);
958 		fileset->fs_meanwidth = (double)meandirwidth;
959 	}
960 
961 	/*
962 	 * Input params are:
963 	 *	# of files
964 	 *	ave # of files per dir
965 	 *	max size of dir
966 	 *	# ave size of file
967 	 *	max size of file
968 	 */
969 	fileset->fs_meandepth = log(entries) / log(meandirwidth);
970 
971 	/* Has a random variable been supplied for dirdepth? */
972 	if (fileset->fs_dirdepthrv) {
973 		/* yes, so set the random variable's mean value to meandepth */
974 		fileset->fs_dirdepthrv->avd_val.randptr->rnd_dbl_mean =
975 		    fileset->fs_meandepth;
976 	}
977 
978 	/* test for random size variable */
979 	if (AVD_IS_RANDOM(fileset->fs_size))
980 		fileset->fs_meansize = -1;
981 	else
982 		fileset->fs_meansize = avd_get_int(fileset->fs_size);
983 
984 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
985 		return (ret);
986 
987 
988 exists:
989 	if (fileset->fs_attrs & FILESET_IS_FILE) {
990 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%llu",
991 		    avd_get_str(fileset->fs_name),
992 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
993 	} else {
994 		filebench_log(LOG_VERBOSE, "Fileset %s: %d files, "
995 		    "avg dir = %d, avg depth = %.1lf, mbytes=%llu",
996 		    avd_get_str(fileset->fs_name), entries,
997 		    meandirwidth,
998 		    fileset->fs_meandepth,
999 		    (u_longlong_t)(fileset->fs_bytes / 1024UL / 1024UL));
1000 	}
1001 	return (0);
1002 }
1003 
1004 /*
1005  * Allocates a fileset instance, initializes fileset_dirgamma and
1006  * fileset_sizegamma default values, and sets the fileset name to the
1007  * supplied name string. Puts the allocated fileset on the
1008  * master fileset list and returns a pointer to it.
1009  */
1010 fileset_t *
1011 fileset_define(avd_t name)
1012 {
1013 	fileset_t *fileset;
1014 
1015 	if (name == NULL)
1016 		return (NULL);
1017 
1018 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
1019 		filebench_log(LOG_ERROR,
1020 		    "fileset_define: Can't malloc fileset");
1021 		return (NULL);
1022 	}
1023 
1024 	filebench_log(LOG_DEBUG_IMPL,
1025 	    "Defining file %s", avd_get_str(name));
1026 
1027 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1028 
1029 	fileset->fs_dirgamma = avd_int_alloc(1500);
1030 	fileset->fs_sizegamma = avd_int_alloc(1500);
1031 
1032 	/* Add fileset to global list */
1033 	if (filebench_shm->shm_filesetlist == NULL) {
1034 		filebench_shm->shm_filesetlist = fileset;
1035 		fileset->fs_next = NULL;
1036 	} else {
1037 		fileset->fs_next = filebench_shm->shm_filesetlist;
1038 		filebench_shm->shm_filesetlist = fileset;
1039 	}
1040 
1041 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1042 
1043 	fileset->fs_name = name;
1044 
1045 	return (fileset);
1046 }
1047 
1048 /*
1049  * If supplied with a pointer to a fileset and the fileset's
1050  * fileset_prealloc flag is set, calls fileset_populate() to populate
1051  * the fileset with filesetentries, then calls fileset_create()
1052  * to make actual directories and files for the filesetentries.
1053  * Otherwise, it applies fileset_populate() and fileset_create()
1054  * to all the filesets on the master fileset list. It always
1055  * returns zero (0) if one fileset is populated / created,
1056  * otherwise it returns the sum of returned values from
1057  * fileset_create() and fileset_populate(), which
1058  * will be a negative one (-1) times the number of
1059  * fileset_create() calls which failed.
1060  */
1061 int
1062 fileset_createset(fileset_t *fileset)
1063 {
1064 	fileset_t *list;
1065 	int ret = 0;
1066 
1067 	/* set up for possible parallel allocate */
1068 	paralloc_count = 0;
1069 
1070 	if (fileset && avd_get_bool(fileset->fs_prealloc)) {
1071 
1072 		/* check for raw files */
1073 		if (fileset_checkraw(fileset)) {
1074 			filebench_log(LOG_INFO,
1075 			    "file %s/%s is a RAW device",
1076 			    avd_get_str(fileset->fs_path),
1077 			    avd_get_str(fileset->fs_name));
1078 			return (0);
1079 		}
1080 
1081 		filebench_log(LOG_INFO,
1082 		    "creating/pre-allocating %s %s",
1083 		    fileset_entity_name(fileset),
1084 		    avd_get_str(fileset->fs_name));
1085 
1086 		if ((ret = fileset_populate(fileset)) != 0)
1087 			return (ret);
1088 
1089 		if ((ret = fileset_create(fileset)) != 0)
1090 			return (ret);
1091 	} else {
1092 
1093 		filebench_log(LOG_INFO,
1094 		    "Creating/pre-allocating files and filesets");
1095 
1096 		list = filebench_shm->shm_filesetlist;
1097 		while (list) {
1098 			/* check for raw files */
1099 			if (fileset_checkraw(list)) {
1100 				filebench_log(LOG_INFO,
1101 				    "file %s/%s is a RAW device",
1102 				    avd_get_str(list->fs_path),
1103 				    avd_get_str(list->fs_name));
1104 				list = list->fs_next;
1105 				continue;
1106 			}
1107 
1108 			if ((ret = fileset_populate(list)) != 0)
1109 				return (ret);
1110 			if ((ret = fileset_create(list)) != 0)
1111 				return (ret);
1112 			list = list->fs_next;
1113 		}
1114 	}
1115 
1116 	/* wait for allocation threads to finish */
1117 	filebench_log(LOG_INFO,
1118 	    "waiting for fileset pre-allocation to finish");
1119 
1120 	(void) pthread_mutex_lock(&paralloc_lock);
1121 	while (paralloc_count > 0)
1122 		(void) pthread_cond_wait(&paralloc_cv, &paralloc_lock);
1123 	(void) pthread_mutex_unlock(&paralloc_lock);
1124 
1125 	if (paralloc_count < 0)
1126 		return (-1);
1127 
1128 	return (0);
1129 }
1130 
1131 /*
1132  * Searches through the master fileset list for the named fileset.
1133  * If found, returns pointer to same, otherwise returns NULL.
1134  */
1135 fileset_t *
1136 fileset_find(char *name)
1137 {
1138 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1139 
1140 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1141 
1142 	while (fileset) {
1143 		if (strcmp(name, avd_get_str(fileset->fs_name)) == 0) {
1144 			(void) ipc_mutex_unlock(
1145 			    &filebench_shm->shm_fileset_lock);
1146 			return (fileset);
1147 		}
1148 		fileset = fileset->fs_next;
1149 	}
1150 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1151 
1152 	return (NULL);
1153 }
1154 
1155 /*
1156  * Iterates over all the file sets in the filesetlist,
1157  * executing the supplied command "*cmd()" on them. Also
1158  * indicates to the executed command if it is the first
1159  * time the command has been executed since the current
1160  * call to fileset_iter.
1161  */
1162 void
1163 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1164 {
1165 	fileset_t *fileset = filebench_shm->shm_filesetlist;
1166 	int count = 0;
1167 
1168 	(void) ipc_mutex_lock(&filebench_shm->shm_fileset_lock);
1169 
1170 	while (fileset) {
1171 		cmd(fileset, count == 0);
1172 		fileset = fileset->fs_next;
1173 		count++;
1174 	}
1175 
1176 	(void) ipc_mutex_unlock(&filebench_shm->shm_fileset_lock);
1177 }
1178 
1179 /*
1180  * Prints information to the filebench log about the file
1181  * object. Also prints a header on the first call.
1182  */
1183 int
1184 fileset_print(fileset_t *fileset, int first)
1185 {
1186 	int pathlength;
1187 	char *fileset_path;
1188 	char *fileset_name;
1189 	static char pad[] = "                              "; /* 30 spaces */
1190 
1191 	if ((fileset_path = avd_get_str(fileset->fs_path)) == NULL) {
1192 		filebench_log(LOG_ERROR, "%s path not set",
1193 		    fileset_entity_name(fileset));
1194 		return (-1);
1195 	}
1196 
1197 	if ((fileset_name = avd_get_str(fileset->fs_name)) == NULL) {
1198 		filebench_log(LOG_ERROR, "%s name not set",
1199 		    fileset_entity_name(fileset));
1200 		return (-1);
1201 	}
1202 
1203 	pathlength = strlen(fileset_path) + strlen(fileset_name);
1204 
1205 	if (pathlength > 29)
1206 		pathlength = 29;
1207 
1208 	if (first) {
1209 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1210 		    "file size",
1211 		    "dir width",
1212 		    "entries");
1213 	}
1214 
1215 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1216 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1217 			filebench_log(LOG_INFO,
1218 			    "%s/%s%s         (Raw Device)",
1219 			    fileset_path, fileset_name, &pad[pathlength]);
1220 		} else {
1221 			filebench_log(LOG_INFO,
1222 			    "%s/%s%s%9llu     (Single File)",
1223 			    fileset_path, fileset_name, &pad[pathlength],
1224 			    (u_longlong_t)avd_get_int(fileset->fs_size));
1225 		}
1226 	} else {
1227 		filebench_log(LOG_INFO, "%s/%s%s%9llu%12llu%10llu",
1228 		    fileset_path, fileset_name,
1229 		    &pad[pathlength],
1230 		    (u_longlong_t)avd_get_int(fileset->fs_size),
1231 		    (u_longlong_t)avd_get_int(fileset->fs_dirwidth),
1232 		    (u_longlong_t)fileset->fs_constentries);
1233 	}
1234 	return (0);
1235 }
1236 /*
1237  * checks to see if the path/name pair points to a raw device. If
1238  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1239  * If RAW is not defined, or it is not a raw device, it clears the
1240  * raw device flag and returns 0.
1241  */
1242 int
1243 fileset_checkraw(fileset_t *fileset)
1244 {
1245 	char path[MAXPATHLEN];
1246 	struct stat64 sb;
1247 	char *pathname;
1248 	char *setname;
1249 
1250 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1251 
1252 #ifdef HAVE_RAW_SUPPORT
1253 	/* check for raw device */
1254 	if ((pathname = avd_get_str(fileset->fs_path)) == NULL)
1255 		return (0);
1256 
1257 	if ((setname = avd_get_str(fileset->fs_name)) == NULL)
1258 		return (0);
1259 
1260 	(void) strcpy(path, pathname);
1261 	(void) strcat(path, "/");
1262 	(void) strcat(path, setname);
1263 	if ((stat64(path, &sb) == 0) &&
1264 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1265 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1266 		if (!(fileset->fs_attrs & FILESET_IS_FILE)) {
1267 			filebench_log(LOG_ERROR,
1268 			    "WARNING Fileset %s/%s Cannot be RAW device",
1269 			    avd_get_str(fileset->fs_path),
1270 			    avd_get_str(fileset->fs_name));
1271 			filebench_shutdown(1);
1272 		}
1273 
1274 		return (1);
1275 	}
1276 #endif /* HAVE_RAW_SUPPORT */
1277 
1278 	return (0);
1279 }
1280