15184Sek110237 /*
25184Sek110237  * CDDL HEADER START
35184Sek110237  *
45184Sek110237  * The contents of this file are subject to the terms of the
55184Sek110237  * Common Development and Distribution License (the "License").
65184Sek110237  * You may not use this file except in compliance with the License.
75184Sek110237  *
85184Sek110237  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95184Sek110237  * or http://www.opensolaris.org/os/licensing.
105184Sek110237  * See the License for the specific language governing permissions
115184Sek110237  * and limitations under the License.
125184Sek110237  *
135184Sek110237  * When distributing Covered Code, include this CDDL HEADER in each
145184Sek110237  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155184Sek110237  * If applicable, add the following below this CDDL HEADER, with the
165184Sek110237  * fields enclosed by brackets "[]" replaced with your own identifying
175184Sek110237  * information: Portions Copyright [yyyy] [name of copyright owner]
185184Sek110237  *
195184Sek110237  * CDDL HEADER END
205184Sek110237  */
215184Sek110237 /*
225184Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
235184Sek110237  * Use is subject to license terms.
245184Sek110237  */
255184Sek110237 
265184Sek110237 #pragma ident	"%Z%%M%	%I%	%E% SMI"
275184Sek110237 
285184Sek110237 
295184Sek110237 #include <fcntl.h>
305184Sek110237 #include <pthread.h>
315184Sek110237 #include <errno.h>
325184Sek110237 #include <math.h>
335184Sek110237 #include <libgen.h>
345184Sek110237 #include <sys/mman.h>
355184Sek110237 #include "fileset.h"
365184Sek110237 #include "filebench.h"
375184Sek110237 #include "gamma_dist.h"
385184Sek110237 
395184Sek110237 /*
405184Sek110237  * File sets, of type fileset_t, are entities which contain
415184Sek110237  * information about collections of files and subdirectories in Filebench.
425184Sek110237  * The fileset, once populated, consists of a tree of fileset entries of
435184Sek110237  * type filesetentry_t which specify files and directories.  The fileset
445184Sek110237  * is rooted in a directory specified by fs_path, and once the populated
455184Sek110237  * fileset has been created, has a tree of directories and files
465184Sek110237  * corresponding to the fileset's filesetentry tree.
475184Sek110237  */
485184Sek110237 
49*5673Saw148015 /* parallel allocation control */
50*5673Saw148015 #define	MAX_PARALLOC_THREADS 32
51*5673Saw148015 static pthread_mutex_t	paralloc_lock = PTHREAD_MUTEX_INITIALIZER;
52*5673Saw148015 static pthread_cond_t	paralloc_cv = PTHREAD_COND_INITIALIZER;
53*5673Saw148015 static int		paralloc_count;
54*5673Saw148015 
55*5673Saw148015 /*
56*5673Saw148015  * returns pointer to file or fileset
57*5673Saw148015  * string, as appropriate
58*5673Saw148015  */
59*5673Saw148015 static char *
60*5673Saw148015 fileset_entity_name(fileset_t *fileset)
61*5673Saw148015 {
62*5673Saw148015 	if (fileset->fs_attrs & FILESET_IS_FILE)
63*5673Saw148015 		return ("file");
64*5673Saw148015 	else
65*5673Saw148015 		return ("fileset");
66*5673Saw148015 }
67*5673Saw148015 
685184Sek110237 /*
695184Sek110237  * Removes the last file or directory name from a pathname.
705184Sek110237  * Basically removes characters from the end of the path by
715184Sek110237  * setting them to \0 until a forward slash '/' is
725184Sek110237  * encountered. It also removes the forward slash.
735184Sek110237  */
745184Sek110237 static char *
755184Sek110237 trunc_dirname(char *dir)
765184Sek110237 {
775184Sek110237 	char *s = dir + strlen(dir);
785184Sek110237 
795184Sek110237 	while (s != dir) {
805184Sek110237 		int c = *s;
815184Sek110237 
825184Sek110237 		*s = 0;
835184Sek110237 		if (c == '/')
845184Sek110237 			break;
855184Sek110237 		s--;
865184Sek110237 	}
875184Sek110237 	return (dir);
885184Sek110237 }
895184Sek110237 
905184Sek110237 /*
915184Sek110237  * Prints a list of allowed options and how to specify them.
925184Sek110237  */
935184Sek110237 void
945184Sek110237 fileset_usage(void)
955184Sek110237 {
96*5673Saw148015 	(void) fprintf(stderr,
97*5673Saw148015 	    "define [file name=<name> | fileset name=<name>],path=<pathname>,"
98*5673Saw148015 	    ",entries=<number>\n");
99*5673Saw148015 	(void) fprintf(stderr,
100*5673Saw148015 	    "		        [,dirwidth=[width]]\n");
101*5673Saw148015 	(void) fprintf(stderr,
102*5673Saw148015 	    "		        [,dirgamma=[100-10000]] "
1035184Sek110237 	    "(Gamma * 1000)\n");
1045184Sek110237 	(void) fprintf(stderr,
105*5673Saw148015 	    "		        [,sizegamma=[100-10000]] (Gamma * 1000)\n");
1065184Sek110237 	(void) fprintf(stderr,
1075184Sek110237 	    "		        [,prealloc=[percent]]\n");
108*5673Saw148015 	(void) fprintf(stderr, "		        [,paralloc]\n");
1095184Sek110237 	(void) fprintf(stderr, "		        [,reuse]\n");
1105184Sek110237 	(void) fprintf(stderr, "\n");
1115184Sek110237 }
1125184Sek110237 
1135184Sek110237 /*
1145184Sek110237  * Frees up memory mapped file region of supplied size. The
1155184Sek110237  * file descriptor "fd" indicates which memory mapped file.
1165184Sek110237  * If successful, returns 0. Otherwise returns -1 if "size"
1175184Sek110237  * is zero, or -1 times the number of times msync() failed.
1185184Sek110237  */
1195184Sek110237 static int
1205184Sek110237 fileset_freemem(int fd, off64_t size)
1215184Sek110237 {
1225184Sek110237 	off64_t left;
1235184Sek110237 	int ret = 0;
1245184Sek110237 
1255184Sek110237 	for (left = size; left > 0; left -= MMAP_SIZE) {
1265184Sek110237 		off64_t thismapsize;
1275184Sek110237 		caddr_t addr;
1285184Sek110237 
1295184Sek110237 		thismapsize = MIN(MMAP_SIZE, left);
1305184Sek110237 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
1315184Sek110237 		    MAP_SHARED, fd, size - left);
1325184Sek110237 		ret += msync(addr, thismapsize, MS_INVALIDATE);
1335184Sek110237 		(void) munmap(addr, thismapsize);
1345184Sek110237 	}
1355184Sek110237 	return (ret);
1365184Sek110237 }
1375184Sek110237 
1385184Sek110237 /*
1395184Sek110237  * Creates a path string from the filesetentry_t "*entry"
1405184Sek110237  * and all of its parent's path names. The resulting path
1415184Sek110237  * is a concatination of all the individual parent paths.
1425184Sek110237  * Allocates memory for the path string and returns a
1435184Sek110237  * pointer to it.
1445184Sek110237  */
1455184Sek110237 char *
1465184Sek110237 fileset_resolvepath(filesetentry_t *entry)
1475184Sek110237 {
1485184Sek110237 	filesetentry_t *fsep = entry;
1495184Sek110237 	char path[MAXPATHLEN];
1505184Sek110237 	char pathtmp[MAXPATHLEN];
1515184Sek110237 	char *s;
1525184Sek110237 
1535184Sek110237 	*path = 0;
1545184Sek110237 	while (fsep->fse_parent) {
1555184Sek110237 		(void) strcpy(pathtmp, "/");
1565184Sek110237 		(void) strcat(pathtmp, fsep->fse_path);
1575184Sek110237 		(void) strcat(pathtmp, path);
1585184Sek110237 		(void) strcpy(path, pathtmp);
1595184Sek110237 		fsep = fsep->fse_parent;
1605184Sek110237 	}
1615184Sek110237 
1625184Sek110237 	s = malloc(strlen(path) + 1);
1635184Sek110237 	(void) strcpy(s, path);
1645184Sek110237 	return (s);
1655184Sek110237 }
1665184Sek110237 
1675184Sek110237 /*
1685184Sek110237  * Creates multiple nested directories as required by the
1695184Sek110237  * supplied path. Starts at the end of the path, creating
1705184Sek110237  * a list of directories to mkdir, up to the root of the
1715184Sek110237  * path, then mkdirs them one at a time from the root on down.
1725184Sek110237  */
1735184Sek110237 static int
1745184Sek110237 fileset_mkdir(char *path, int mode)
1755184Sek110237 {
1765184Sek110237 	char *p;
1775184Sek110237 	char *dirs[65536];
1785184Sek110237 	int i = 0;
1795184Sek110237 
1805184Sek110237 	if ((p = strdup(path)) == NULL)
1815184Sek110237 		goto null_str;
1825184Sek110237 
1835184Sek110237 	/*
1845184Sek110237 	 * Fill an array of subdirectory path names until either we
1855184Sek110237 	 * reach the root or encounter an already existing subdirectory
1865184Sek110237 	 */
1875184Sek110237 	/* CONSTCOND */
1885184Sek110237 	while (1) {
1895184Sek110237 		struct stat64 sb;
1905184Sek110237 
1915184Sek110237 		if (stat64(p, &sb) == 0)
1925184Sek110237 			break;
1935184Sek110237 		if (strlen(p) < 3)
1945184Sek110237 			break;
1955184Sek110237 		if ((dirs[i] = strdup(p)) == NULL) {
1965184Sek110237 			free(p);
1975184Sek110237 			goto null_str;
1985184Sek110237 		}
1995184Sek110237 
2005184Sek110237 		(void) trunc_dirname(p);
2015184Sek110237 		i++;
2025184Sek110237 	}
2035184Sek110237 
2045184Sek110237 	/* Make the directories, from closest to root downwards. */
2055184Sek110237 	for (--i; i >= 0; i--) {
2065184Sek110237 		(void) mkdir(dirs[i], mode);
2075184Sek110237 		free(dirs[i]);
2085184Sek110237 	}
2095184Sek110237 
2105184Sek110237 	free(p);
2115184Sek110237 	return (0);
2125184Sek110237 
2135184Sek110237 null_str:
2145184Sek110237 	/* clean up */
2155184Sek110237 	for (--i; i >= 0; i--)
2165184Sek110237 		free(dirs[i]);
2175184Sek110237 
2185184Sek110237 	filebench_log(LOG_ERROR,
2195184Sek110237 	    "Failed to create directory path %s: Out of memory", path);
2205184Sek110237 
2215184Sek110237 	return (-1);
2225184Sek110237 }
2235184Sek110237 
224*5673Saw148015 /*
225*5673Saw148015  * creates the subdirectory tree for a fileset.
226*5673Saw148015  */
227*5673Saw148015 static int
228*5673Saw148015 fileset_create_subdirs(fileset_t *fileset, char *filesetpath)
229*5673Saw148015 {
230*5673Saw148015 	filesetentry_t *direntry;
231*5673Saw148015 	char full_path[MAXPATHLEN];
232*5673Saw148015 	char *part_path;
233*5673Saw148015 
234*5673Saw148015 	/* walk the subdirectory list, enstanciating subdirs */
235*5673Saw148015 	direntry = fileset->fs_dirlist;
236*5673Saw148015 	while (direntry) {
237*5673Saw148015 		(void) strcpy(full_path, filesetpath);
238*5673Saw148015 		part_path = fileset_resolvepath(direntry);
239*5673Saw148015 		(void) strcat(full_path, part_path);
240*5673Saw148015 		free(part_path);
241*5673Saw148015 
242*5673Saw148015 		/* now create this portion of the subdirectory tree */
243*5673Saw148015 		if (fileset_mkdir(full_path, 0755) == -1)
244*5673Saw148015 			return (-1);
245*5673Saw148015 
246*5673Saw148015 		direntry = direntry->fse_dirnext;
247*5673Saw148015 	}
248*5673Saw148015 	return (0);
249*5673Saw148015 }
250*5673Saw148015 
251*5673Saw148015 /*
252*5673Saw148015  * given a fileset entry, determines if the associated file
253*5673Saw148015  * needs to be allocated or not, and if so does the allocation.
254*5673Saw148015  */
255*5673Saw148015 static int
256*5673Saw148015 fileset_alloc_file(filesetentry_t *entry)
257*5673Saw148015 {
258*5673Saw148015 	char path[MAXPATHLEN];
259*5673Saw148015 	char *buf;
260*5673Saw148015 	struct stat64 sb;
261*5673Saw148015 	char *pathtmp;
262*5673Saw148015 	off64_t seek;
263*5673Saw148015 	int fd;
264*5673Saw148015 
265*5673Saw148015 	*path = 0;
266*5673Saw148015 	(void) strcpy(path, *entry->fse_fileset->fs_path);
267*5673Saw148015 	(void) strcat(path, "/");
268*5673Saw148015 	(void) strcat(path, entry->fse_fileset->fs_name);
269*5673Saw148015 	pathtmp = fileset_resolvepath(entry);
270*5673Saw148015 	(void) strcat(path, pathtmp);
271*5673Saw148015 
272*5673Saw148015 	filebench_log(LOG_DEBUG_IMPL, "Populated %s", entry->fse_path);
273*5673Saw148015 
274*5673Saw148015 	/* see if reusing and this file exists */
275*5673Saw148015 	if ((entry->fse_flags & FSE_REUSING) && (stat64(path, &sb) == 0)) {
276*5673Saw148015 		if ((fd = open64(path, O_RDWR)) < 0) {
277*5673Saw148015 			filebench_log(LOG_INFO,
278*5673Saw148015 			    "Attempted but failed to Re-use file %s",
279*5673Saw148015 			    path);
280*5673Saw148015 			return (-1);
281*5673Saw148015 		}
282*5673Saw148015 
283*5673Saw148015 		if (sb.st_size == (off64_t)entry->fse_size) {
284*5673Saw148015 			filebench_log(LOG_INFO,
285*5673Saw148015 			    "Re-using file %s", path);
286*5673Saw148015 
287*5673Saw148015 			if (!integer_isset(entry->fse_fileset->fs_cached))
288*5673Saw148015 				(void) fileset_freemem(fd,
289*5673Saw148015 				    entry->fse_size);
290*5673Saw148015 
291*5673Saw148015 			entry->fse_flags |= FSE_EXISTS;
292*5673Saw148015 			(void) close(fd);
293*5673Saw148015 			return (0);
294*5673Saw148015 
295*5673Saw148015 		} else if (sb.st_size > (off64_t)entry->fse_size) {
296*5673Saw148015 			/* reuse, but too large */
297*5673Saw148015 			filebench_log(LOG_INFO,
298*5673Saw148015 			    "Truncating & re-using file %s", path);
299*5673Saw148015 
300*5673Saw148015 			(void) ftruncate64(fd,
301*5673Saw148015 			    (off64_t)entry->fse_size);
302*5673Saw148015 
303*5673Saw148015 			if (!integer_isset(entry->fse_fileset->fs_cached))
304*5673Saw148015 				(void) fileset_freemem(fd,
305*5673Saw148015 				    entry->fse_size);
306*5673Saw148015 
307*5673Saw148015 			entry->fse_flags |= FSE_EXISTS;
308*5673Saw148015 			(void) close(fd);
309*5673Saw148015 			return (0);
310*5673Saw148015 		}
311*5673Saw148015 	} else {
312*5673Saw148015 
313*5673Saw148015 		/* No file or not reusing, so create */
314*5673Saw148015 		if ((fd = open64(path, O_RDWR | O_CREAT, 0644)) < 0) {
315*5673Saw148015 			filebench_log(LOG_ERROR,
316*5673Saw148015 			    "Failed to pre-allocate file %s: %s",
317*5673Saw148015 			    path, strerror(errno));
318*5673Saw148015 
319*5673Saw148015 			return (-1);
320*5673Saw148015 		}
321*5673Saw148015 	}
322*5673Saw148015 
323*5673Saw148015 	if ((buf = (char *)malloc(FILE_ALLOC_BLOCK)) == NULL)
324*5673Saw148015 		return (-1);
325*5673Saw148015 
326*5673Saw148015 	entry->fse_flags |= FSE_EXISTS;
327*5673Saw148015 
328*5673Saw148015 	for (seek = 0; seek < entry->fse_size; ) {
329*5673Saw148015 		off64_t wsize;
330*5673Saw148015 		int ret = 0;
331*5673Saw148015 
332*5673Saw148015 		/*
333*5673Saw148015 		 * Write FILE_ALLOC_BLOCK's worth,
334*5673Saw148015 		 * except on last write
335*5673Saw148015 		 */
336*5673Saw148015 		wsize = MIN(entry->fse_size - seek, FILE_ALLOC_BLOCK);
337*5673Saw148015 
338*5673Saw148015 		ret = write(fd, buf, wsize);
339*5673Saw148015 		if (ret != wsize) {
340*5673Saw148015 			filebench_log(LOG_ERROR,
341*5673Saw148015 			    "Failed to pre-allocate file %s: %s",
342*5673Saw148015 			    path, strerror(errno));
343*5673Saw148015 			(void) close(fd);
344*5673Saw148015 			free(buf);
345*5673Saw148015 			return (-1);
346*5673Saw148015 		}
347*5673Saw148015 		seek += wsize;
348*5673Saw148015 	}
349*5673Saw148015 
350*5673Saw148015 	if (!integer_isset(entry->fse_fileset->fs_cached))
351*5673Saw148015 		(void) fileset_freemem(fd, entry->fse_size);
352*5673Saw148015 
353*5673Saw148015 	(void) close(fd);
354*5673Saw148015 
355*5673Saw148015 	free(buf);
356*5673Saw148015 
357*5673Saw148015 	filebench_log(LOG_DEBUG_IMPL,
358*5673Saw148015 	    "Pre-allocated file %s size %lld", path, entry->fse_size);
359*5673Saw148015 
360*5673Saw148015 	return (0);
361*5673Saw148015 }
362*5673Saw148015 
363*5673Saw148015 /*
364*5673Saw148015  * given a fileset entry, determines if the associated file
365*5673Saw148015  * needs to be allocated or not, and if so does the allocation.
366*5673Saw148015  */
367*5673Saw148015 static void *
368*5673Saw148015 fileset_alloc_thread(filesetentry_t *entry)
369*5673Saw148015 {
370*5673Saw148015 	if (fileset_alloc_file(entry) == -1) {
371*5673Saw148015 		(void) pthread_mutex_lock(&paralloc_lock);
372*5673Saw148015 		paralloc_count = -1;
373*5673Saw148015 	} else {
374*5673Saw148015 		(void) pthread_mutex_lock(&paralloc_lock);
375*5673Saw148015 		paralloc_count--;
376*5673Saw148015 	}
377*5673Saw148015 
378*5673Saw148015 	(void) pthread_cond_signal(&paralloc_cv);
379*5673Saw148015 	(void) pthread_mutex_unlock(&paralloc_lock);
380*5673Saw148015 
381*5673Saw148015 	pthread_exit(NULL);
382*5673Saw148015 	return (NULL);
383*5673Saw148015 }
384*5673Saw148015 
3855184Sek110237 
3865184Sek110237 /*
3875184Sek110237  * First creates the parent directories of the file using
3885184Sek110237  * fileset_mkdir(). Then Optionally sets the O_DSYNC flag
3895184Sek110237  * and opens the file with open64(). It unlocks the fileset
3905184Sek110237  * entry lock, sets the DIRECTIO_ON or DIRECTIO_OFF flags
3915184Sek110237  * as requested, and returns the file descriptor integer
3925184Sek110237  * for the opened file.
3935184Sek110237  */
3945184Sek110237 int
3955184Sek110237 fileset_openfile(fileset_t *fileset,
3965184Sek110237     filesetentry_t *entry, int flag, int mode, int attrs)
3975184Sek110237 {
3985184Sek110237 	char path[MAXPATHLEN];
3995184Sek110237 	char dir[MAXPATHLEN];
4005184Sek110237 	char *pathtmp;
4015184Sek110237 	struct stat64 sb;
4025184Sek110237 	int fd;
4035184Sek110237 	int open_attrs = 0;
4045184Sek110237 
4055184Sek110237 	*path = 0;
4065184Sek110237 	(void) strcpy(path, *fileset->fs_path);
4075184Sek110237 	(void) strcat(path, "/");
4085184Sek110237 	(void) strcat(path, fileset->fs_name);
4095184Sek110237 	pathtmp = fileset_resolvepath(entry);
4105184Sek110237 	(void) strcat(path, pathtmp);
4115184Sek110237 	(void) strcpy(dir, path);
4125184Sek110237 	free(pathtmp);
4135184Sek110237 	(void) trunc_dirname(dir);
4145184Sek110237 
4155184Sek110237 	/* If we are going to create a file, create the parent dirs */
4165184Sek110237 	if ((flag & O_CREAT) && (stat64(dir, &sb) != 0)) {
4175184Sek110237 		if (fileset_mkdir(dir, 0755) == -1)
4185184Sek110237 			return (-1);
4195184Sek110237 	}
4205184Sek110237 
4215184Sek110237 	if (flag & O_CREAT)
4225184Sek110237 		entry->fse_flags |= FSE_EXISTS;
4235184Sek110237 
4245184Sek110237 	if (attrs & FLOW_ATTR_DSYNC) {
4255184Sek110237 #ifdef sun
4265184Sek110237 		open_attrs |= O_DSYNC;
4275184Sek110237 #else
4285184Sek110237 		open_attrs |= O_FSYNC;
4295184Sek110237 #endif
4305184Sek110237 	}
4315184Sek110237 
4325184Sek110237 	if ((fd = open64(path, flag | open_attrs, mode)) < 0) {
4335184Sek110237 		filebench_log(LOG_ERROR,
4345184Sek110237 		    "Failed to open file %s: %s",
4355184Sek110237 		    path, strerror(errno));
4365184Sek110237 		(void) ipc_mutex_unlock(&entry->fse_lock);
4375184Sek110237 		return (-1);
4385184Sek110237 	}
4395184Sek110237 	(void) ipc_mutex_unlock(&entry->fse_lock);
4405184Sek110237 
4415184Sek110237 #ifdef sun
4425184Sek110237 	if (attrs & FLOW_ATTR_DIRECTIO)
4435184Sek110237 		(void) directio(fd, DIRECTIO_ON);
4445184Sek110237 	else
4455184Sek110237 		(void) directio(fd, DIRECTIO_OFF);
4465184Sek110237 #endif
4475184Sek110237 
4485184Sek110237 	return (fd);
4495184Sek110237 }
4505184Sek110237 
4515184Sek110237 
4525184Sek110237 /*
4535184Sek110237  * Selects a fileset entry from a fileset. If the
4545184Sek110237  * FILESET_PICKDIR flag is set it will pick a directory
4555184Sek110237  * entry, otherwise a file entry. The FILESET_PICKRESET
4565184Sek110237  * flag will cause it to reset the free list to the
4575184Sek110237  * overall list (file or directory). The FILESET_PICKUNIQUE
4585184Sek110237  * flag will take an entry off of one of the free (unused)
4595184Sek110237  * lists (file or directory), otherwise the entry will be
4605184Sek110237  * picked off of one of the rotor lists (file or directory).
4615184Sek110237  * The FILESET_PICKEXISTS will insure that only extant
4625184Sek110237  * (FSE_EXISTS) state files are selected, while
4635184Sek110237  * FILESET_PICKNOEXIST insures that only non extant
4645184Sek110237  * (not FSE_EXISTS) state files are selected.
4655184Sek110237  */
4665184Sek110237 filesetentry_t *
4675184Sek110237 fileset_pick(fileset_t *fileset, int flags, int tid)
4685184Sek110237 {
4695184Sek110237 	filesetentry_t *entry = NULL;
4705184Sek110237 	filesetentry_t *first = NULL;
4715184Sek110237 
4725184Sek110237 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
4735184Sek110237 
4745184Sek110237 	while (entry == NULL) {
4755184Sek110237 
4765184Sek110237 		if ((flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
4775184Sek110237 			entry = fileset->fs_dirlist;
4785184Sek110237 			while (entry) {
4795184Sek110237 				entry->fse_flags |= FSE_FREE;
4805184Sek110237 				entry = entry->fse_dirnext;
4815184Sek110237 			}
4825184Sek110237 			fileset->fs_dirfree = fileset->fs_dirlist;
4835184Sek110237 		}
4845184Sek110237 
4855184Sek110237 		if (!(flags & FILESET_PICKDIR) && (flags & FILESET_PICKRESET)) {
4865184Sek110237 			entry = fileset->fs_filelist;
4875184Sek110237 			while (entry) {
4885184Sek110237 				entry->fse_flags |= FSE_FREE;
4895184Sek110237 				entry = entry->fse_filenext;
4905184Sek110237 			}
4915184Sek110237 			fileset->fs_filefree = fileset->fs_filelist;
4925184Sek110237 		}
4935184Sek110237 
4945184Sek110237 		if (flags & FILESET_PICKUNIQUE) {
4955184Sek110237 			if (flags & FILESET_PICKDIR) {
4965184Sek110237 				entry = fileset->fs_dirfree;
4975184Sek110237 				if (entry == NULL)
4985184Sek110237 					goto empty;
4995184Sek110237 				fileset->fs_dirfree = entry->fse_dirnext;
5005184Sek110237 			} else {
5015184Sek110237 				entry = fileset->fs_filefree;
5025184Sek110237 				if (entry == NULL)
5035184Sek110237 					goto empty;
5045184Sek110237 				fileset->fs_filefree = entry->fse_filenext;
5055184Sek110237 			}
5065184Sek110237 			entry->fse_flags &= ~FSE_FREE;
5075184Sek110237 		} else {
5085184Sek110237 			if (flags & FILESET_PICKDIR) {
5095184Sek110237 				entry = fileset->fs_dirrotor;
5105184Sek110237 				if (entry == NULL)
5115184Sek110237 				fileset->fs_dirrotor =
5125184Sek110237 				    entry = fileset->fs_dirlist;
5135184Sek110237 				fileset->fs_dirrotor = entry->fse_dirnext;
5145184Sek110237 			} else {
5155184Sek110237 				entry = fileset->fs_filerotor[tid];
5165184Sek110237 				if (entry == NULL)
5175184Sek110237 					fileset->fs_filerotor[tid] =
5185184Sek110237 					    entry = fileset->fs_filelist;
5195184Sek110237 				fileset->fs_filerotor[tid] =
5205184Sek110237 				    entry->fse_filenext;
5215184Sek110237 			}
5225184Sek110237 		}
5235184Sek110237 
5245184Sek110237 		if (first == entry)
5255184Sek110237 			goto empty;
5265184Sek110237 
5275184Sek110237 		if (first == NULL)
5285184Sek110237 			first = entry;
5295184Sek110237 
5305184Sek110237 		/* Return locked entry */
5315184Sek110237 		(void) ipc_mutex_lock(&entry->fse_lock);
5325184Sek110237 
5335184Sek110237 		/* If we ask for an existing file, go round again */
5345184Sek110237 		if ((flags & FILESET_PICKEXISTS) &&
5355184Sek110237 		    !(entry->fse_flags & FSE_EXISTS)) {
5365184Sek110237 			(void) ipc_mutex_unlock(&entry->fse_lock);
5375184Sek110237 			entry = NULL;
5385184Sek110237 		}
5395184Sek110237 
5405184Sek110237 		/* If we ask for not an existing file, go round again */
5415184Sek110237 		if ((flags & FILESET_PICKNOEXIST) &&
5425184Sek110237 		    (entry->fse_flags & FSE_EXISTS)) {
5435184Sek110237 			(void) ipc_mutex_unlock(&entry->fse_lock);
5445184Sek110237 			entry = NULL;
5455184Sek110237 		}
5465184Sek110237 	}
5475184Sek110237 
5485184Sek110237 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
5495184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "Picked file %s", entry->fse_path);
5505184Sek110237 	return (entry);
5515184Sek110237 
5525184Sek110237 empty:
5535184Sek110237 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
5545184Sek110237 	return (NULL);
5555184Sek110237 }
5565184Sek110237 
5575184Sek110237 /*
5585184Sek110237  * Given a fileset "fileset", create the associated files as
5595184Sek110237  * specified in the attributes of the fileset. The fileset is
5605184Sek110237  * rooted in a directory whose pathname is in fs_path. If the
5615184Sek110237  * directory exists, meaning that there is already a fileset,
5625184Sek110237  * and the fs_reuse attribute is false, then remove it and all
5635184Sek110237  * its contained files and subdirectories. Next, the routine
5645184Sek110237  * creates a root directory for the fileset. All the file type
5655184Sek110237  * filesetentries are cycled through creating as needed
5665184Sek110237  * their containing subdirectory trees in the filesystem and
5675184Sek110237  * creating actual files for fs_preallocpercent of them. The
5685184Sek110237  * created files are filled with fse_size bytes of unitialized
5695184Sek110237  * data. The routine returns -1 on errors, 0 on success.
5705184Sek110237  */
5715184Sek110237 static int
5725184Sek110237 fileset_create(fileset_t *fileset)
5735184Sek110237 {
5745184Sek110237 	filesetentry_t *entry;
5755184Sek110237 	char path[MAXPATHLEN];
5765184Sek110237 	struct stat64 sb;
5775184Sek110237 	int pickflags = FILESET_PICKUNIQUE | FILESET_PICKRESET;
5785184Sek110237 	hrtime_t start = gethrtime();
5795184Sek110237 	int preallocated = 0;
5805184Sek110237 	int reusing = 0;
5815184Sek110237 
5825184Sek110237 	if (*fileset->fs_path == NULL) {
583*5673Saw148015 		filebench_log(LOG_ERROR, "%s path not set",
584*5673Saw148015 		    fileset_entity_name(fileset));
5855184Sek110237 		return (-1);
5865184Sek110237 	}
5875184Sek110237 
588*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
589*5673Saw148015 	/* treat raw device as special case */
590*5673Saw148015 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
591*5673Saw148015 		return (0);
592*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
593*5673Saw148015 
5945184Sek110237 	/* XXX Add check to see if there is enough space */
5955184Sek110237 
5965184Sek110237 	/* Remove existing */
5975184Sek110237 	(void) strcpy(path, *fileset->fs_path);
5985184Sek110237 	(void) strcat(path, "/");
5995184Sek110237 	(void) strcat(path, fileset->fs_name);
6005184Sek110237 	if ((stat64(path, &sb) == 0) && (strlen(path) > 3) &&
6015184Sek110237 	    (strlen(*fileset->fs_path) > 2)) {
6025184Sek110237 		if (!integer_isset(fileset->fs_reuse)) {
6035184Sek110237 			char cmd[MAXPATHLEN];
6045184Sek110237 
6055184Sek110237 			(void) snprintf(cmd, sizeof (cmd), "rm -rf %s", path);
6065184Sek110237 			(void) system(cmd);
6075184Sek110237 			filebench_log(LOG_VERBOSE,
608*5673Saw148015 			    "Removed any existing %s %s in %lld seconds",
609*5673Saw148015 			    fileset_entity_name(fileset), fileset->fs_name,
6105184Sek110237 			    ((gethrtime() - start) / 1000000000) + 1);
6115184Sek110237 		} else {
6125184Sek110237 			/* we are re-using */
6135184Sek110237 			reusing = 1;
6145184Sek110237 			filebench_log(LOG_VERBOSE,
615*5673Saw148015 			    "Re-using %s %s on %s file system.",
616*5673Saw148015 			    fileset_entity_name(fileset),
6175184Sek110237 			    fileset->fs_name, sb.st_fstype);
6185184Sek110237 		}
6195184Sek110237 	}
6205184Sek110237 	(void) mkdir(path, 0755);
6215184Sek110237 
622*5673Saw148015 	/* make the filesets directory tree */
623*5673Saw148015 	if (fileset_create_subdirs(fileset, path) == -1)
624*5673Saw148015 		return (-1);
625*5673Saw148015 
6265184Sek110237 	start = gethrtime();
6275184Sek110237 
628*5673Saw148015 	filebench_log(LOG_VERBOSE, "Creating %s %s...",
629*5673Saw148015 	    fileset_entity_name(fileset), fileset->fs_name);
630*5673Saw148015 
631*5673Saw148015 	if (!integer_isset(fileset->fs_prealloc))
632*5673Saw148015 		goto exit;
6335184Sek110237 
6345184Sek110237 	while (entry = fileset_pick(fileset, pickflags, 0)) {
6355184Sek110237 		int randno;
636*5673Saw148015 		pthread_t tid;
6375184Sek110237 
6385184Sek110237 		pickflags = FILESET_PICKUNIQUE;
6395184Sek110237 
6405184Sek110237 		entry->fse_flags &= ~FSE_EXISTS;
6415184Sek110237 
6425184Sek110237 		randno = ((RAND_MAX * (100 - *(fileset->fs_preallocpercent)))
6435184Sek110237 		    / 100);
6445184Sek110237 
645*5673Saw148015 		/* entry doesn't need to be locked during initialization */
646*5673Saw148015 		(void) ipc_mutex_unlock(&entry->fse_lock);
647*5673Saw148015 
648*5673Saw148015 		if (rand() < randno)
6495184Sek110237 			continue;
6505184Sek110237 
6515184Sek110237 		preallocated++;
6525184Sek110237 
653*5673Saw148015 		if (reusing)
654*5673Saw148015 			entry->fse_flags |= FSE_REUSING;
655*5673Saw148015 		else
656*5673Saw148015 			entry->fse_flags &= (~FSE_REUSING);
657*5673Saw148015 
658*5673Saw148015 		if (integer_isset(fileset->fs_paralloc)) {
6595184Sek110237 
660*5673Saw148015 			/* fire off a separate allocation thread */
661*5673Saw148015 			(void) pthread_mutex_lock(&paralloc_lock);
662*5673Saw148015 			while (paralloc_count >= MAX_PARALLOC_THREADS) {
663*5673Saw148015 				(void) pthread_cond_wait(
664*5673Saw148015 				    &paralloc_cv, &paralloc_lock);
665*5673Saw148015 			}
666*5673Saw148015 
667*5673Saw148015 			if (paralloc_count < 0) {
668*5673Saw148015 				(void) pthread_mutex_unlock(&paralloc_lock);
6695184Sek110237 				return (-1);
6705184Sek110237 			}
6715184Sek110237 
672*5673Saw148015 			paralloc_count++;
673*5673Saw148015 			(void) pthread_mutex_unlock(&paralloc_lock);
6745184Sek110237 
675*5673Saw148015 			if (pthread_create(&tid, NULL,
676*5673Saw148015 			    (void *(*)(void*))fileset_alloc_thread,
677*5673Saw148015 			    entry) != 0) {
6785184Sek110237 				filebench_log(LOG_ERROR,
679*5673Saw148015 				    "File prealloc thread create failed");
680*5673Saw148015 				filebench_shutdown(1);
6815184Sek110237 			}
6825184Sek110237 
683*5673Saw148015 		} else {
684*5673Saw148015 			if (fileset_alloc_file(entry) == -1)
685*5673Saw148015 				return (-1);
686*5673Saw148015 		}
687*5673Saw148015 	}
6885184Sek110237 
689*5673Saw148015 exit:
6905184Sek110237 	filebench_log(LOG_VERBOSE,
691*5673Saw148015 	    "Preallocated %d of %lld of %s %s in %lld seconds",
6925184Sek110237 	    preallocated,
6935184Sek110237 	    *(fileset->fs_entries),
694*5673Saw148015 	    fileset_entity_name(fileset),
6955184Sek110237 	    fileset->fs_name,
6965184Sek110237 	    ((gethrtime() - start) / 1000000000) + 1);
6975184Sek110237 
6985184Sek110237 	return (0);
6995184Sek110237 }
7005184Sek110237 
7015184Sek110237 /*
7025184Sek110237  * Adds an entry to the fileset's file list. Single threaded so
7035184Sek110237  * no locking needed.
7045184Sek110237  */
7055184Sek110237 static void
7065184Sek110237 fileset_insfilelist(fileset_t *fileset, filesetentry_t *entry)
7075184Sek110237 {
7085184Sek110237 	if (fileset->fs_filelist == NULL) {
7095184Sek110237 		fileset->fs_filelist = entry;
7105184Sek110237 		entry->fse_filenext = NULL;
7115184Sek110237 	} else {
7125184Sek110237 		entry->fse_filenext = fileset->fs_filelist;
7135184Sek110237 		fileset->fs_filelist = entry;
7145184Sek110237 	}
7155184Sek110237 }
7165184Sek110237 
7175184Sek110237 /*
7185184Sek110237  * Adds an entry to the fileset's directory list. Single
7195184Sek110237  * threaded so no locking needed.
7205184Sek110237  */
7215184Sek110237 static void
7225184Sek110237 fileset_insdirlist(fileset_t *fileset, filesetentry_t *entry)
7235184Sek110237 {
7245184Sek110237 	if (fileset->fs_dirlist == NULL) {
7255184Sek110237 		fileset->fs_dirlist = entry;
7265184Sek110237 		entry->fse_dirnext = NULL;
7275184Sek110237 	} else {
7285184Sek110237 		entry->fse_dirnext = fileset->fs_dirlist;
7295184Sek110237 		fileset->fs_dirlist = entry;
7305184Sek110237 	}
7315184Sek110237 }
7325184Sek110237 
7335184Sek110237 /*
7345184Sek110237  * Obtaines a filesetentry entity for a file to be placed in a
7355184Sek110237  * (sub)directory of a fileset. The size of the file may be
7365184Sek110237  * specified by fs_meansize, or calculated from a gamma
7375184Sek110237  * distribution of parameter fs_sizegamma and of mean size
7385184Sek110237  * fs_meansize. The filesetentry entity is placed on the file
7395184Sek110237  * list in the specified parent filesetentry entity, which may
7405184Sek110237  * be a directory filesetentry, or the root filesetentry in the
7415184Sek110237  * fileset. It is also placed on the fileset's list of all
7425184Sek110237  * contained files. Returns 0 if successful or -1 if ipc memory
7435184Sek110237  * for the path string cannot be allocated.
7445184Sek110237  */
7455184Sek110237 static int
7465184Sek110237 fileset_populate_file(fileset_t *fileset, filesetentry_t *parent, int serial)
7475184Sek110237 {
7485184Sek110237 	char tmpname[16];
7495184Sek110237 	filesetentry_t *entry;
7505184Sek110237 	double drand;
7515184Sek110237 	double gamma;
7525184Sek110237 
7535184Sek110237 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
7545184Sek110237 	    == NULL) {
7555184Sek110237 		filebench_log(LOG_ERROR,
7565184Sek110237 		    "fileset_populate_file: Can't malloc filesetentry");
7575184Sek110237 		return (-1);
7585184Sek110237 	}
7595184Sek110237 
7605184Sek110237 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
7615184Sek110237 	entry->fse_parent = parent;
7625184Sek110237 	entry->fse_fileset = fileset;
7635184Sek110237 	entry->fse_flags |= FSE_FREE;
7645184Sek110237 	fileset_insfilelist(fileset, entry);
7655184Sek110237 
7665184Sek110237 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
7675184Sek110237 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
7685184Sek110237 		filebench_log(LOG_ERROR,
7695184Sek110237 		    "fileset_populate_file: Can't alloc path string");
7705184Sek110237 		return (-1);
7715184Sek110237 	}
7725184Sek110237 
7735184Sek110237 	gamma = *(fileset->fs_sizegamma) / 1000.0;
7745184Sek110237 
7755184Sek110237 	if (gamma > 0) {
7765184Sek110237 		drand = gamma_dist_knuth(gamma, fileset->fs_meansize / gamma);
7775184Sek110237 		entry->fse_size = (off64_t)drand;
7785184Sek110237 	} else {
7795184Sek110237 		entry->fse_size = (off64_t)fileset->fs_meansize;
7805184Sek110237 	}
7815184Sek110237 
7825184Sek110237 	fileset->fs_bytes += entry->fse_size;
7835184Sek110237 
7845184Sek110237 	fileset->fs_realfiles++;
7855184Sek110237 	return (0);
7865184Sek110237 }
7875184Sek110237 
7885184Sek110237 /*
7895184Sek110237  * Creates a directory node in a fileset, by obtaining a
7905184Sek110237  * filesetentry entity for the node and initializing it
7915184Sek110237  * according to parameters of the fileset. It determines a
7925184Sek110237  * directory tree depth and directory width, optionally using
7935184Sek110237  * a gamma distribution. If its calculated depth is less then
7945184Sek110237  * its actual depth in the directory tree, it becomes a leaf
7955184Sek110237  * node and files itself with "width" number of file type
7965184Sek110237  * filesetentries, otherwise it files itself with "width"
7975184Sek110237  * number of directory type filesetentries, using recursive
7985184Sek110237  * calls to fileset_populate_subdir. The end result of the
7995184Sek110237  * initial call to this routine is a tree of directories of
8005184Sek110237  * random width and varying depth with sufficient leaf
8015184Sek110237  * directories to contain all required files.
8025184Sek110237  * Returns 0 on success. Returns -1 if ipc path string memory
8035184Sek110237  * cannot be allocated and returns an error code (currently
8045184Sek110237  * also -1) from calls to fileset_populate_file or recursive
8055184Sek110237  * calls to fileset_populate_subdir.
8065184Sek110237  */
8075184Sek110237 static int
8085184Sek110237 fileset_populate_subdir(fileset_t *fileset, filesetentry_t *parent,
8095184Sek110237     int serial, double depth)
8105184Sek110237 {
8115184Sek110237 	double randepth, drand, ranwidth, gamma;
8125184Sek110237 	int isleaf = 0;
8135184Sek110237 	char tmpname[16];
8145184Sek110237 	filesetentry_t *entry;
8155184Sek110237 	int i;
8165184Sek110237 
8175184Sek110237 	depth += 1;
8185184Sek110237 
8195184Sek110237 	/* Create dir node */
8205184Sek110237 	if ((entry = (filesetentry_t *)ipc_malloc(FILEBENCH_FILESETENTRY))
8215184Sek110237 	    == NULL) {
8225184Sek110237 		filebench_log(LOG_ERROR,
8235184Sek110237 		    "fileset_populate_subdir: Can't malloc filesetentry");
8245184Sek110237 		return (-1);
8255184Sek110237 	}
8265184Sek110237 
8275184Sek110237 	(void) pthread_mutex_init(&entry->fse_lock, ipc_mutexattr());
8285184Sek110237 
8295184Sek110237 	(void) snprintf(tmpname, sizeof (tmpname), "%08d", serial);
8305184Sek110237 	if ((entry->fse_path = (char *)ipc_pathalloc(tmpname)) == NULL) {
8315184Sek110237 		filebench_log(LOG_ERROR,
8325184Sek110237 		    "fileset_populate_subdir: Can't alloc path string");
8335184Sek110237 		return (-1);
8345184Sek110237 	}
8355184Sek110237 
8365184Sek110237 	entry->fse_parent = parent;
8375184Sek110237 	entry->fse_flags |= FSE_DIR | FSE_FREE;
8385184Sek110237 	fileset_insdirlist(fileset, entry);
8395184Sek110237 
8405184Sek110237 	gamma = *(fileset->fs_dirgamma) / 1000.0;
8415184Sek110237 	if (gamma > 0) {
8425184Sek110237 		drand = gamma_dist_knuth(gamma, fileset->fs_meandepth / gamma);
8435184Sek110237 		randepth = (int)drand;
8445184Sek110237 	} else {
8455184Sek110237 		randepth = (int)fileset->fs_meandepth;
8465184Sek110237 	}
8475184Sek110237 
8485184Sek110237 	gamma = *(fileset->fs_sizegamma) / 1000.0;
8495184Sek110237 
8505184Sek110237 	if (gamma > 0) {
8515184Sek110237 		drand = gamma_dist_knuth(gamma, fileset->fs_meanwidth / gamma);
8525184Sek110237 		ranwidth = drand;
8535184Sek110237 	} else {
8545184Sek110237 		ranwidth = fileset->fs_meanwidth;
8555184Sek110237 	}
8565184Sek110237 
8575184Sek110237 	if (randepth == 0)
8585184Sek110237 		randepth = 1;
8595184Sek110237 	if (ranwidth == 0)
8605184Sek110237 		ranwidth = 1;
8615184Sek110237 	if (depth >= randepth)
8625184Sek110237 		isleaf = 1;
8635184Sek110237 
8645184Sek110237 	/*
8655184Sek110237 	 * Create directory of random width according to distribution, or
8665184Sek110237 	 * if root directory, continue until #files required
8675184Sek110237 	 */
8685184Sek110237 	for (i = 1;
8695184Sek110237 	    ((parent == NULL) || (i < ranwidth + 1)) &&
8705184Sek110237 	    (fileset->fs_realfiles < *(fileset->fs_entries)); i++) {
8715184Sek110237 		int ret = 0;
8725184Sek110237 
8735184Sek110237 		if (parent && isleaf)
8745184Sek110237 			ret = fileset_populate_file(fileset, entry, i);
8755184Sek110237 		else
8765184Sek110237 			ret = fileset_populate_subdir(fileset, entry, i, depth);
8775184Sek110237 
8785184Sek110237 		if (ret != 0)
8795184Sek110237 			return (ret);
8805184Sek110237 	}
8815184Sek110237 	return (0);
8825184Sek110237 }
8835184Sek110237 
8845184Sek110237 /*
8855184Sek110237  * Populates a fileset with files and subdirectory entries. Uses
8865184Sek110237  * the supplied fs_dirwidth and fs_entries (number of files) to
8875184Sek110237  * calculate the required fs_meandepth (of subdirectories) and
8885184Sek110237  * initialize the fs_meanwidth and fs_meansize variables. Then
8895184Sek110237  * calls fileset_populate_subdir() to do the recursive
8905184Sek110237  * subdirectory entry creation and leaf file entry creation. All
8915184Sek110237  * of the above is skipped if the fileset has already been
8925184Sek110237  * populated. Returns 0 on success, or an error code from the
8935184Sek110237  * call to fileset_populate_subdir if that call fails.
8945184Sek110237  */
8955184Sek110237 static int
8965184Sek110237 fileset_populate(fileset_t *fileset)
8975184Sek110237 {
8985184Sek110237 	int nfiles;
8995184Sek110237 	int meandirwidth = *(fileset->fs_dirwidth);
9005184Sek110237 	int ret;
9015184Sek110237 
9025184Sek110237 	/* Skip if already populated */
9035184Sek110237 	if (fileset->fs_bytes > 0)
9045184Sek110237 		goto exists;
9055184Sek110237 
906*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
907*5673Saw148015 	/* check for raw device */
908*5673Saw148015 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV)
909*5673Saw148015 		return (0);
910*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
911*5673Saw148015 
9125184Sek110237 	/*
9135184Sek110237 	 * Input params are:
9145184Sek110237 	 *	# of files
9155184Sek110237 	 *	ave # of files per dir
9165184Sek110237 	 *	max size of dir
9175184Sek110237 	 *	# ave size of file
9185184Sek110237 	 *	max size of file
9195184Sek110237 	 */
9205184Sek110237 	nfiles = *(fileset->fs_entries);
9215184Sek110237 	fileset->fs_meandepth = log(nfiles) / log(meandirwidth);
9225184Sek110237 	fileset->fs_meanwidth = meandirwidth;
9235184Sek110237 	fileset->fs_meansize = *(fileset->fs_size);
9245184Sek110237 
9255184Sek110237 	if ((ret = fileset_populate_subdir(fileset, NULL, 1, 0)) != 0)
9265184Sek110237 		return (ret);
9275184Sek110237 
9285184Sek110237 
9295184Sek110237 exists:
930*5673Saw148015 	if (fileset->fs_attrs & FILESET_IS_FILE) {
931*5673Saw148015 		filebench_log(LOG_VERBOSE, "File %s: mbytes=%lld",
932*5673Saw148015 		    fileset->fs_name,
933*5673Saw148015 		    fileset->fs_bytes / 1024UL / 1024UL);
934*5673Saw148015 	} else {
935*5673Saw148015 		filebench_log(LOG_VERBOSE, "Fileset %s: %lld files, "
936*5673Saw148015 		    "avg dir = %.1lf, avg depth = %.1lf, mbytes=%lld",
937*5673Saw148015 		    fileset->fs_name,
938*5673Saw148015 		    *(fileset->fs_entries),
939*5673Saw148015 		    fileset->fs_meanwidth,
940*5673Saw148015 		    fileset->fs_meandepth,
941*5673Saw148015 		    fileset->fs_bytes / 1024UL / 1024UL);
942*5673Saw148015 	}
9435184Sek110237 	return (0);
9445184Sek110237 }
9455184Sek110237 
9465184Sek110237 /*
9475184Sek110237  * Allocates a fileset instance, initializes fs_dirgamma and
9485184Sek110237  * fs_sizegamma default values, and sets the fileset name to the
9495184Sek110237  * supplied name string. Puts the allocated fileset on the
9505184Sek110237  * master fileset list and returns a pointer to it.
9515184Sek110237  */
9525184Sek110237 fileset_t *
9535184Sek110237 fileset_define(char *name)
9545184Sek110237 {
9555184Sek110237 	fileset_t *fileset;
9565184Sek110237 
9575184Sek110237 	if (name == NULL)
9585184Sek110237 		return (NULL);
9595184Sek110237 
9605184Sek110237 	if ((fileset = (fileset_t *)ipc_malloc(FILEBENCH_FILESET)) == NULL) {
9615184Sek110237 		filebench_log(LOG_ERROR,
9625184Sek110237 		    "fileset_define: Can't malloc fileset");
9635184Sek110237 		return (NULL);
9645184Sek110237 	}
9655184Sek110237 
9665184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "Defining file %s", name);
9675184Sek110237 
9685184Sek110237 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
9695184Sek110237 
9705184Sek110237 	fileset->fs_dirgamma = integer_alloc(1500);
9715184Sek110237 	fileset->fs_sizegamma = integer_alloc(1500);
9725184Sek110237 
9735184Sek110237 	/* Add fileset to global list */
9745184Sek110237 	if (filebench_shm->filesetlist == NULL) {
9755184Sek110237 		filebench_shm->filesetlist = fileset;
9765184Sek110237 		fileset->fs_next = NULL;
9775184Sek110237 	} else {
9785184Sek110237 		fileset->fs_next = filebench_shm->filesetlist;
9795184Sek110237 		filebench_shm->filesetlist = fileset;
9805184Sek110237 	}
9815184Sek110237 
9825184Sek110237 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
9835184Sek110237 
9845184Sek110237 	(void) strcpy(fileset->fs_name, name);
9855184Sek110237 
9865184Sek110237 	return (fileset);
9875184Sek110237 }
9885184Sek110237 
9895184Sek110237 /*
9905184Sek110237  * If supplied with a pointer to a fileset and the fileset's
9915184Sek110237  * fs_prealloc flag is set, calls fileset_populate() to populate
9925184Sek110237  * the fileset with filesetentries, then calls fileset_create()
9935184Sek110237  * to make actual directories and files for the filesetentries.
9945184Sek110237  * Otherwise, it applies fileset_populate() and fileset_create()
9955184Sek110237  * to all the filesets on the master fileset list. It always
9965184Sek110237  * returns zero (0) if one fileset is populated / created,
9975184Sek110237  * otherwise it returns the sum of returned values from
9985184Sek110237  * fileset_create() and fileset_populate(), which
9995184Sek110237  * will be a negative one (-1) times the number of
10005184Sek110237  * fileset_create() calls which failed.
10015184Sek110237  */
10025184Sek110237 int
10035184Sek110237 fileset_createset(fileset_t *fileset)
10045184Sek110237 {
10055184Sek110237 	fileset_t *list;
10065184Sek110237 	int ret = 0;
10075184Sek110237 
1008*5673Saw148015 	/* set up for possible parallel allocate */
1009*5673Saw148015 	paralloc_count = 0;
1010*5673Saw148015 
10115184Sek110237 	if (fileset && integer_isset(fileset->fs_prealloc)) {
1012*5673Saw148015 
1013*5673Saw148015 		filebench_log(LOG_INFO,
1014*5673Saw148015 		    "creating/pre-allocating %s %s",
1015*5673Saw148015 		    fileset_entity_name(fileset), fileset->fs_name);
1016*5673Saw148015 
10175184Sek110237 		if ((ret = fileset_populate(fileset)) != 0)
10185184Sek110237 			return (ret);
1019*5673Saw148015 
1020*5673Saw148015 		if ((ret = fileset_create(fileset)) != 0)
1021*5673Saw148015 			return (ret);
1022*5673Saw148015 	} else {
1023*5673Saw148015 
1024*5673Saw148015 		filebench_log(LOG_INFO,
1025*5673Saw148015 		    "Creating/pre-allocating files and filesets");
1026*5673Saw148015 
1027*5673Saw148015 		list = filebench_shm->filesetlist;
1028*5673Saw148015 		while (list) {
1029*5673Saw148015 			if ((ret = fileset_populate(list)) != 0)
1030*5673Saw148015 				return (ret);
1031*5673Saw148015 			if ((ret = fileset_create(list)) != 0)
1032*5673Saw148015 				return (ret);
1033*5673Saw148015 			list = list->fs_next;
1034*5673Saw148015 		}
10355184Sek110237 	}
10365184Sek110237 
1037*5673Saw148015 	/* wait for allocation threads to finish */
1038*5673Saw148015 	filebench_log(LOG_INFO,
1039*5673Saw148015 	    "waiting for fileset pre-allocation to finish");
10405184Sek110237 
1041*5673Saw148015 	(void) pthread_mutex_lock(&paralloc_lock);
1042*5673Saw148015 	while (paralloc_count > 0)
1043*5673Saw148015 		(void) pthread_cond_wait(&paralloc_cv, &paralloc_lock);
1044*5673Saw148015 	(void) pthread_mutex_unlock(&paralloc_lock);
1045*5673Saw148015 
1046*5673Saw148015 	if (paralloc_count < 0)
1047*5673Saw148015 		return (-1);
1048*5673Saw148015 
1049*5673Saw148015 	return (0);
10505184Sek110237 }
10515184Sek110237 
10525184Sek110237 /*
10535184Sek110237  * Searches through the master fileset list for the named fileset.
10545184Sek110237  * If found, returns pointer to same, otherwise returns NULL.
10555184Sek110237  */
10565184Sek110237 fileset_t *
10575184Sek110237 fileset_find(char *name)
10585184Sek110237 {
10595184Sek110237 	fileset_t *fileset = filebench_shm->filesetlist;
10605184Sek110237 
10615184Sek110237 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
10625184Sek110237 
10635184Sek110237 	while (fileset) {
10645184Sek110237 		if (strcmp(name, fileset->fs_name) == 0) {
10655184Sek110237 			(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
10665184Sek110237 			return (fileset);
10675184Sek110237 		}
10685184Sek110237 		fileset = fileset->fs_next;
10695184Sek110237 	}
10705184Sek110237 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
10715184Sek110237 
10725184Sek110237 	return (NULL);
10735184Sek110237 }
1074*5673Saw148015 
1075*5673Saw148015 /*
1076*5673Saw148015  * Iterates over all the file sets in the filesetlist,
1077*5673Saw148015  * executing the supplied command "*cmd()" on them. Also
1078*5673Saw148015  * indicates to the executed command if it is the first
1079*5673Saw148015  * time the command has been executed since the current
1080*5673Saw148015  * call to fileset_iter.
1081*5673Saw148015  */
1082*5673Saw148015 void
1083*5673Saw148015 fileset_iter(int (*cmd)(fileset_t *fileset, int first))
1084*5673Saw148015 {
1085*5673Saw148015 	fileset_t *fileset = filebench_shm->filesetlist;
1086*5673Saw148015 	int count = 0;
1087*5673Saw148015 
1088*5673Saw148015 	(void) ipc_mutex_lock(&filebench_shm->fileset_lock);
1089*5673Saw148015 
1090*5673Saw148015 	while (fileset) {
1091*5673Saw148015 		cmd(fileset, count == 0);
1092*5673Saw148015 		fileset = fileset->fs_next;
1093*5673Saw148015 		count++;
1094*5673Saw148015 	}
1095*5673Saw148015 
1096*5673Saw148015 	(void) ipc_mutex_unlock(&filebench_shm->fileset_lock);
1097*5673Saw148015 }
1098*5673Saw148015 
1099*5673Saw148015 /*
1100*5673Saw148015  * Prints information to the filebench log about the file
1101*5673Saw148015  * object. Also prints a header on the first call.
1102*5673Saw148015  */
1103*5673Saw148015 int
1104*5673Saw148015 fileset_print(fileset_t *fileset, int first)
1105*5673Saw148015 {
1106*5673Saw148015 	int pathlength = strlen(*fileset->fs_path) + strlen(fileset->fs_name);
1107*5673Saw148015 	/* 30 spaces */
1108*5673Saw148015 	char pad[] = "                              ";
1109*5673Saw148015 
1110*5673Saw148015 	if (pathlength > 29)
1111*5673Saw148015 		pathlength = 29;
1112*5673Saw148015 
1113*5673Saw148015 	if (first) {
1114*5673Saw148015 		filebench_log(LOG_INFO, "File or Fileset name%20s%12s%10s",
1115*5673Saw148015 		    "file size",
1116*5673Saw148015 		    "dir width",
1117*5673Saw148015 		    "entries");
1118*5673Saw148015 	}
1119*5673Saw148015 
1120*5673Saw148015 	if (fileset->fs_attrs & FILESET_IS_FILE) {
1121*5673Saw148015 		if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1122*5673Saw148015 			filebench_log(LOG_INFO,
1123*5673Saw148015 			    "%s/%s%s         (Raw Device)",
1124*5673Saw148015 			    *fileset->fs_path,
1125*5673Saw148015 			    fileset->fs_name,
1126*5673Saw148015 			    &pad[pathlength]);
1127*5673Saw148015 		} else {
1128*5673Saw148015 			filebench_log(LOG_INFO,
1129*5673Saw148015 			    "%s/%s%s%9lld     (Single File)",
1130*5673Saw148015 			    *fileset->fs_path,
1131*5673Saw148015 			    fileset->fs_name,
1132*5673Saw148015 			    &pad[pathlength],
1133*5673Saw148015 			    *fileset->fs_size);
1134*5673Saw148015 		}
1135*5673Saw148015 	} else {
1136*5673Saw148015 		filebench_log(LOG_INFO, "%s/%s%s%9lld%12lld%10lld",
1137*5673Saw148015 		    *fileset->fs_path,
1138*5673Saw148015 		    fileset->fs_name,
1139*5673Saw148015 		    &pad[pathlength],
1140*5673Saw148015 		    *fileset->fs_size,
1141*5673Saw148015 		    *fileset->fs_dirwidth,
1142*5673Saw148015 		    *fileset->fs_entries);
1143*5673Saw148015 	}
1144*5673Saw148015 	return (0);
1145*5673Saw148015 }
1146*5673Saw148015 /*
1147*5673Saw148015  * checks to see if the path/name pair points to a raw device. If
1148*5673Saw148015  * so it sets the raw device flag (FILESET_IS_RAW_DEV) and returns 1.
1149*5673Saw148015  * If RAW is not defined, or it is not a raw device, it clears the
1150*5673Saw148015  * raw device flag and returns 0.
1151*5673Saw148015  */
1152*5673Saw148015 int
1153*5673Saw148015 fileset_checkraw(fileset_t *fileset)
1154*5673Saw148015 {
1155*5673Saw148015 	char path[MAXPATHLEN];
1156*5673Saw148015 	struct stat64 sb;
1157*5673Saw148015 
1158*5673Saw148015 	fileset->fs_attrs &= (~FILESET_IS_RAW_DEV);
1159*5673Saw148015 
1160*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
1161*5673Saw148015 	/* check for raw device */
1162*5673Saw148015 	(void) strcpy(path, *fileset->fs_path);
1163*5673Saw148015 	(void) strcat(path, "/");
1164*5673Saw148015 	(void) strcat(path, fileset->fs_name);
1165*5673Saw148015 	if ((stat64(path, &sb) == 0) &&
1166*5673Saw148015 	    ((sb.st_mode & S_IFMT) == S_IFBLK) && sb.st_rdev) {
1167*5673Saw148015 		fileset->fs_attrs |= FILESET_IS_RAW_DEV;
1168*5673Saw148015 		return (1);
1169*5673Saw148015 	}
1170*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
1171*5673Saw148015 
1172*5673Saw148015 	return (0);
1173*5673Saw148015 }
1174