xref: /onnv-gate/usr/src/cmd/filebench/common/flowop_library.c (revision 8404:b96b8ad1c3e9)
15184Sek110237 /*
25184Sek110237  * CDDL HEADER START
35184Sek110237  *
45184Sek110237  * The contents of this file are subject to the terms of the
55184Sek110237  * Common Development and Distribution License (the "License").
65184Sek110237  * You may not use this file except in compliance with the License.
75184Sek110237  *
85184Sek110237  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95184Sek110237  * or http://www.opensolaris.org/os/licensing.
105184Sek110237  * See the License for the specific language governing permissions
115184Sek110237  * and limitations under the License.
125184Sek110237  *
135184Sek110237  * When distributing Covered Code, include this CDDL HEADER in each
145184Sek110237  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155184Sek110237  * If applicable, add the following below this CDDL HEADER, with the
165184Sek110237  * fields enclosed by brackets "[]" replaced with your own identifying
175184Sek110237  * information: Portions Copyright [yyyy] [name of copyright owner]
185184Sek110237  *
195184Sek110237  * CDDL HEADER END
205184Sek110237  */
215184Sek110237 /*
226084Saw148015  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
235184Sek110237  * Use is subject to license terms.
246613Sek110237  *
256613Sek110237  * Portions Copyright 2008 Denis Cheng
265184Sek110237  */
275184Sek110237 
285184Sek110237 #include "config.h"
295184Sek110237 
305184Sek110237 #include <sys/types.h>
315184Sek110237 #ifdef HAVE_SYS_ASYNCH_H
325184Sek110237 #include <sys/asynch.h>
335184Sek110237 #endif
345184Sek110237 #include <sys/ipc.h>
355184Sek110237 #include <sys/sem.h>
365184Sek110237 #include <sys/errno.h>
375184Sek110237 #include <sys/time.h>
385184Sek110237 #include <inttypes.h>
395184Sek110237 #include <fcntl.h>
406212Saw148015 #include <math.h>
417946SAndrew.W.Wilson@sun.com #include <dirent.h>
425184Sek110237 
435184Sek110237 #ifdef HAVE_UTILITY_H
445184Sek110237 #include <utility.h>
455184Sek110237 #endif /* HAVE_UTILITY_H */
465184Sek110237 
475184Sek110237 #ifdef HAVE_AIO
485184Sek110237 #include <aio.h>
495184Sek110237 #endif /* HAVE_AIO */
505184Sek110237 
515184Sek110237 #ifdef HAVE_LIBAIO_H
525184Sek110237 #include <libaio.h>
535184Sek110237 #endif /* HAVE_LIBAIO_H */
545184Sek110237 
555184Sek110237 #ifdef HAVE_SYS_ASYNC_H
565184Sek110237 #include <sys/asynch.h>
575184Sek110237 #endif /* HAVE_SYS_ASYNC_H */
585184Sek110237 
595184Sek110237 #ifdef HAVE_AIO_H
605184Sek110237 #include <aio.h>
615184Sek110237 #endif /* HAVE_AIO_H */
625184Sek110237 
635184Sek110237 #ifndef HAVE_UINT_T
645184Sek110237 #define	uint_t unsigned int
655184Sek110237 #endif /* HAVE_UINT_T */
665184Sek110237 
675184Sek110237 #ifndef HAVE_AIOCB64_T
685184Sek110237 #define	aiocb64 aiocb
695184Sek110237 #endif /* HAVE_AIOCB64_T */
705184Sek110237 
715184Sek110237 #ifndef HAVE_SYSV_SEM
725184Sek110237 #include <semaphore.h>
735184Sek110237 #endif /* HAVE_SYSV_SEM */
745184Sek110237 
755184Sek110237 #include "filebench.h"
765184Sek110237 #include "flowop.h"
775184Sek110237 #include "fileset.h"
786212Saw148015 #include "fb_random.h"
797946SAndrew.W.Wilson@sun.com #include "utils.h"
805184Sek110237 /*
815184Sek110237  * These routines implement the flowops from the f language. Each
825184Sek110237  * flowop has has a name such as "read", and a set of function pointers
835184Sek110237  * to call for initialization, execution and destruction of the flowop.
845184Sek110237  * The table flowoplib_funcs[] contains a flowoplib struct for each
855184Sek110237  * implemented flowop. Most flowops use a generic initialization function
865184Sek110237  * and all currently use a generic destruction function. All flowop
875184Sek110237  * functions referenced from the table are in this file, though, of
885184Sek110237  * course, they often call functions from other files.
895184Sek110237  *
905184Sek110237  * The flowop_init() routine uses the flowoplib_funcs[] table to
915184Sek110237  * create an initial set of "instance 0" flowops, one for each type of
925184Sek110237  * flowop, from which all other flowops are derived. These "instance 0"
935184Sek110237  * flowops are initialized with information from the table including
945184Sek110237  * pointers for their fo_init, fo_func and fo_destroy functions. When
955184Sek110237  * a flowop definition is encountered in an f language script, the
965184Sek110237  * "type" of flowop, such as "read" is used to search for the
975184Sek110237  * "instance 0" flowop named "read", then a new flowop is allocated
985184Sek110237  * which inherits its function pointers and other initial properties
995184Sek110237  * from the instance 0 flowop, and is given a new name as specified
1005184Sek110237  * by the "name=" attribute.
1015184Sek110237  */
1025184Sek110237 
1035184Sek110237 static int flowoplib_init_generic(flowop_t *flowop);
1045184Sek110237 static void flowoplib_destruct_generic(flowop_t *flowop);
1056084Saw148015 static void flowoplib_destruct_noop(flowop_t *flowop);
1065184Sek110237 static int flowoplib_fdnum(threadflow_t *threadflow, flowop_t *flowop);
1077556SAndrew.W.Wilson@sun.com static int flowoplib_print(threadflow_t *threadflow, flowop_t *flowop);
1085184Sek110237 static int flowoplib_write(threadflow_t *threadflow, flowop_t *flowop);
1095184Sek110237 #ifdef HAVE_AIO
1105184Sek110237 static int flowoplib_aiowrite(threadflow_t *threadflow, flowop_t *flowop);
1115184Sek110237 static int flowoplib_aiowait(threadflow_t *threadflow, flowop_t *flowop);
1125184Sek110237 #endif
1135184Sek110237 static int flowoplib_read(threadflow_t *threadflow, flowop_t *flowop);
1145184Sek110237 static int flowoplib_block_init(flowop_t *flowop);
1155184Sek110237 static int flowoplib_block(threadflow_t *threadflow, flowop_t *flowop);
1165184Sek110237 static int flowoplib_wakeup(threadflow_t *threadflow, flowop_t *flowop);
1175184Sek110237 static int flowoplib_hog(threadflow_t *threadflow, flowop_t *flowop);
1185184Sek110237 static int flowoplib_delay(threadflow_t *threadflow, flowop_t *flowop);
1195184Sek110237 static int flowoplib_sempost(threadflow_t *threadflow, flowop_t *flowop);
1205184Sek110237 static int flowoplib_sempost_init(flowop_t *flowop);
1215184Sek110237 static int flowoplib_semblock(threadflow_t *threadflow, flowop_t *flowop);
1225184Sek110237 static int flowoplib_semblock_init(flowop_t *flowop);
1235184Sek110237 static void flowoplib_semblock_destruct(flowop_t *flowop);
1245184Sek110237 static int flowoplib_eventlimit(threadflow_t *, flowop_t *flowop);
1255184Sek110237 static int flowoplib_bwlimit(threadflow_t *, flowop_t *flowop);
1265184Sek110237 static int flowoplib_iopslimit(threadflow_t *, flowop_t *flowop);
1275184Sek110237 static int flowoplib_opslimit(threadflow_t *, flowop_t *flowop);
1285184Sek110237 static int flowoplib_openfile(threadflow_t *, flowop_t *flowop);
1295184Sek110237 static int flowoplib_openfile_common(threadflow_t *, flowop_t *flowop, int fd);
1305184Sek110237 static int flowoplib_createfile(threadflow_t *, flowop_t *flowop);
1315184Sek110237 static int flowoplib_closefile(threadflow_t *, flowop_t *flowop);
1327946SAndrew.W.Wilson@sun.com static int flowoplib_makedir(threadflow_t *, flowop_t *flowop);
1337946SAndrew.W.Wilson@sun.com static int flowoplib_removedir(threadflow_t *, flowop_t *flowop);
1347946SAndrew.W.Wilson@sun.com static int flowoplib_listdir(threadflow_t *, flowop_t *flowop);
1355184Sek110237 static int flowoplib_fsync(threadflow_t *, flowop_t *flowop);
1365184Sek110237 static int flowoplib_readwholefile(threadflow_t *, flowop_t *flowop);
1375184Sek110237 static int flowoplib_writewholefile(threadflow_t *, flowop_t *flowop);
1385184Sek110237 static int flowoplib_appendfile(threadflow_t *threadflow, flowop_t *flowop);
1395184Sek110237 static int flowoplib_appendfilerand(threadflow_t *threadflow, flowop_t *flowop);
1405184Sek110237 static int flowoplib_deletefile(threadflow_t *threadflow, flowop_t *flowop);
1415184Sek110237 static int flowoplib_statfile(threadflow_t *threadflow, flowop_t *flowop);
1425184Sek110237 static int flowoplib_finishoncount(threadflow_t *threadflow, flowop_t *flowop);
1435184Sek110237 static int flowoplib_finishonbytes(threadflow_t *threadflow, flowop_t *flowop);
1445184Sek110237 static int flowoplib_fsyncset(threadflow_t *threadflow, flowop_t *flowop);
1456212Saw148015 static int flowoplib_testrandvar(threadflow_t *threadflow, flowop_t *flowop);
1466212Saw148015 static int flowoplib_testrandvar_init(flowop_t *flowop);
1476212Saw148015 static void flowoplib_testrandvar_destruct(flowop_t *flowop);
1485184Sek110237 
1495184Sek110237 typedef struct flowoplib {
1505184Sek110237 	int	fl_type;
1515184Sek110237 	int	fl_attrs;
1525184Sek110237 	char	*fl_name;
1535184Sek110237 	int	(*fl_init)();
1545184Sek110237 	int	(*fl_func)();
1555184Sek110237 	void	(*fl_destruct)();
1565184Sek110237 } flowoplib_t;
1575184Sek110237 
1585184Sek110237 static flowoplib_t flowoplib_funcs[] = {
1595184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "write", flowoplib_init_generic,
1605184Sek110237 	flowoplib_write, flowoplib_destruct_generic,
1615184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_READ, "read", flowoplib_init_generic,
1625184Sek110237 	flowoplib_read, flowoplib_destruct_generic,
1635184Sek110237 #ifdef HAVE_AIO
1645184Sek110237 	FLOW_TYPE_AIO, FLOW_ATTR_WRITE, "aiowrite", flowoplib_init_generic,
1655184Sek110237 	flowoplib_aiowrite, flowoplib_destruct_generic,
1665184Sek110237 	FLOW_TYPE_AIO, 0, "aiowait", flowoplib_init_generic,
1675184Sek110237 	flowoplib_aiowait, flowoplib_destruct_generic,
1685184Sek110237 #endif
1695184Sek110237 	FLOW_TYPE_SYNC, 0, "block", flowoplib_block_init,
1705184Sek110237 	flowoplib_block, flowoplib_destruct_generic,
1715184Sek110237 	FLOW_TYPE_SYNC, 0, "wakeup", flowoplib_init_generic,
1725184Sek110237 	flowoplib_wakeup, flowoplib_destruct_generic,
1735184Sek110237 	FLOW_TYPE_SYNC, 0, "semblock", flowoplib_semblock_init,
1745184Sek110237 	flowoplib_semblock, flowoplib_semblock_destruct,
1755184Sek110237 	FLOW_TYPE_SYNC, 0, "sempost", flowoplib_sempost_init,
1766084Saw148015 	flowoplib_sempost, flowoplib_destruct_noop,
1775184Sek110237 	FLOW_TYPE_OTHER, 0, "hog", flowoplib_init_generic,
1785184Sek110237 	flowoplib_hog, flowoplib_destruct_generic,
1795184Sek110237 	FLOW_TYPE_OTHER, 0, "delay", flowoplib_init_generic,
1805184Sek110237 	flowoplib_delay, flowoplib_destruct_generic,
1815184Sek110237 	FLOW_TYPE_OTHER, 0, "eventlimit", flowoplib_init_generic,
1825184Sek110237 	flowoplib_eventlimit, flowoplib_destruct_generic,
1835184Sek110237 	FLOW_TYPE_OTHER, 0, "bwlimit", flowoplib_init_generic,
1845184Sek110237 	flowoplib_bwlimit, flowoplib_destruct_generic,
1855184Sek110237 	FLOW_TYPE_OTHER, 0, "iopslimit", flowoplib_init_generic,
1865184Sek110237 	flowoplib_iopslimit, flowoplib_destruct_generic,
1875184Sek110237 	FLOW_TYPE_OTHER, 0, "opslimit", flowoplib_init_generic,
1885184Sek110237 	flowoplib_opslimit, flowoplib_destruct_generic,
1895184Sek110237 	FLOW_TYPE_OTHER, 0, "finishoncount", flowoplib_init_generic,
1905184Sek110237 	flowoplib_finishoncount, flowoplib_destruct_generic,
1915184Sek110237 	FLOW_TYPE_OTHER, 0, "finishonbytes", flowoplib_init_generic,
1925184Sek110237 	flowoplib_finishonbytes, flowoplib_destruct_generic,
1935184Sek110237 	FLOW_TYPE_IO, 0, "openfile", flowoplib_init_generic,
1945184Sek110237 	flowoplib_openfile, flowoplib_destruct_generic,
1955184Sek110237 	FLOW_TYPE_IO, 0, "createfile", flowoplib_init_generic,
1965184Sek110237 	flowoplib_createfile, flowoplib_destruct_generic,
1975184Sek110237 	FLOW_TYPE_IO, 0, "closefile", flowoplib_init_generic,
1985184Sek110237 	flowoplib_closefile, flowoplib_destruct_generic,
1997946SAndrew.W.Wilson@sun.com 	FLOW_TYPE_IO, 0, "makedir", flowoplib_init_generic,
2007946SAndrew.W.Wilson@sun.com 	flowoplib_makedir, flowoplib_destruct_generic,
2017946SAndrew.W.Wilson@sun.com 	FLOW_TYPE_IO, 0, "removedir", flowoplib_init_generic,
2027946SAndrew.W.Wilson@sun.com 	flowoplib_removedir, flowoplib_destruct_generic,
2037946SAndrew.W.Wilson@sun.com 	FLOW_TYPE_IO, 0, "listdir", flowoplib_init_generic,
2047946SAndrew.W.Wilson@sun.com 	flowoplib_listdir, flowoplib_destruct_generic,
2055184Sek110237 	FLOW_TYPE_IO, 0, "fsync", flowoplib_init_generic,
2065184Sek110237 	flowoplib_fsync, flowoplib_destruct_generic,
2075184Sek110237 	FLOW_TYPE_IO, 0, "fsyncset", flowoplib_init_generic,
2085184Sek110237 	flowoplib_fsyncset, flowoplib_destruct_generic,
2095184Sek110237 	FLOW_TYPE_IO, 0, "statfile", flowoplib_init_generic,
2105184Sek110237 	flowoplib_statfile, flowoplib_destruct_generic,
2115184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_READ, "readwholefile", flowoplib_init_generic,
2125184Sek110237 	flowoplib_readwholefile, flowoplib_destruct_generic,
2135184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "appendfile", flowoplib_init_generic,
2145184Sek110237 	flowoplib_appendfile, flowoplib_destruct_generic,
2155184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "appendfilerand", flowoplib_init_generic,
2165184Sek110237 	flowoplib_appendfilerand, flowoplib_destruct_generic,
2175184Sek110237 	FLOW_TYPE_IO, 0, "deletefile", flowoplib_init_generic,
2185184Sek110237 	flowoplib_deletefile, flowoplib_destruct_generic,
2195184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "writewholefile", flowoplib_init_generic,
2206212Saw148015 	flowoplib_writewholefile, flowoplib_destruct_generic,
2217556SAndrew.W.Wilson@sun.com 	FLOW_TYPE_OTHER, 0, "print", flowoplib_init_generic,
2227556SAndrew.W.Wilson@sun.com 	flowoplib_print, flowoplib_destruct_generic,
2236212Saw148015 	/* routine to calculate mean and stddev for output from a randvar */
2246212Saw148015 	FLOW_TYPE_OTHER, 0, "testrandvar", flowoplib_testrandvar_init,
2256212Saw148015 	flowoplib_testrandvar, flowoplib_testrandvar_destruct
2265184Sek110237 };
2275184Sek110237 
2285184Sek110237 /*
2295184Sek110237  * Loops through the master list of flowops defined in this
2305184Sek110237  * module, and creates and initializes a flowop for each one
2315184Sek110237  * by calling flowop_define. As a side effect of calling
2325184Sek110237  * flowop define, the created flowops are placed on the
2335184Sek110237  * master flowop list. All created flowops are set to
2345184Sek110237  * instance "0".
2355184Sek110237  */
2365184Sek110237 void
2375184Sek110237 flowoplib_init()
2385184Sek110237 {
2395184Sek110237 	int nops = sizeof (flowoplib_funcs) / sizeof (flowoplib_t);
2405184Sek110237 	int i;
2415184Sek110237 
2425184Sek110237 	for (i = 0; i < nops; i++) {
2435184Sek110237 		flowop_t *flowop;
2445184Sek110237 		flowoplib_t *fl;
2455184Sek110237 
2465184Sek110237 		fl = &flowoplib_funcs[i];
2475184Sek110237 
2485184Sek110237 		if ((flowop = flowop_define(NULL,
2496550Saw148015 		    fl->fl_name, NULL, NULL, 0, fl->fl_type)) == 0) {
2505184Sek110237 			filebench_log(LOG_ERROR,
2515184Sek110237 			    "failed to create flowop %s\n",
2525184Sek110237 			    fl->fl_name);
2535184Sek110237 			filebench_shutdown(1);
2545184Sek110237 		}
2555184Sek110237 
2565184Sek110237 		flowop->fo_func = fl->fl_func;
2575184Sek110237 		flowop->fo_init = fl->fl_init;
2585184Sek110237 		flowop->fo_destruct = fl->fl_destruct;
2595184Sek110237 		flowop->fo_attrs = fl->fl_attrs;
2605184Sek110237 	}
2615184Sek110237 }
2625184Sek110237 
2635184Sek110237 static int
2645184Sek110237 flowoplib_init_generic(flowop_t *flowop)
2655184Sek110237 {
2665184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
2676084Saw148015 	return (FILEBENCH_OK);
2685184Sek110237 }
2695184Sek110237 
2705184Sek110237 static void
2715184Sek110237 flowoplib_destruct_generic(flowop_t *flowop)
2725184Sek110237 {
2736084Saw148015 	char *buf;
2746084Saw148015 
2756084Saw148015 	/* release any local resources held by the flowop */
2766084Saw148015 	(void) ipc_mutex_lock(&flowop->fo_lock);
2776084Saw148015 	buf = flowop->fo_buf;
2786084Saw148015 	flowop->fo_buf = NULL;
2796084Saw148015 	(void) ipc_mutex_unlock(&flowop->fo_lock);
2806084Saw148015 
2816084Saw148015 	if (buf)
2826084Saw148015 		free(buf);
2836084Saw148015 }
2846084Saw148015 
2856084Saw148015 /*
2866084Saw148015  * Special total noop destruct
2876084Saw148015  */
2886084Saw148015 /* ARGSUSED */
2896084Saw148015 static void
2906084Saw148015 flowoplib_destruct_noop(flowop_t *flowop)
2916084Saw148015 {
2925184Sek110237 }
2935184Sek110237 
2945184Sek110237 /*
2955184Sek110237  * Generates a file attribute from flags in the supplied flowop.
2965184Sek110237  * Sets FLOW_ATTR_DIRECTIO and/or FLOW_ATTR_DSYNC as needed.
2975184Sek110237  */
2985184Sek110237 static int
2995184Sek110237 flowoplib_fileattrs(flowop_t *flowop)
3005184Sek110237 {
3015184Sek110237 	int attrs = 0;
3025184Sek110237 
3036212Saw148015 	if (avd_get_bool(flowop->fo_directio))
3045184Sek110237 		attrs |= FLOW_ATTR_DIRECTIO;
3055184Sek110237 
3066212Saw148015 	if (avd_get_bool(flowop->fo_dsync))
3075184Sek110237 		attrs |= FLOW_ATTR_DSYNC;
3085184Sek110237 
3095184Sek110237 	return (attrs);
3105184Sek110237 }
3115184Sek110237 
3125184Sek110237 /*
313*8404SAndrew.W.Wilson@sun.com  * Obtain a filesetentry for a file. Result placed where filep points.
314*8404SAndrew.W.Wilson@sun.com  * Supply with a flowop and a flag to indicate whether an existent or
315*8404SAndrew.W.Wilson@sun.com  * non-existent file is required. Returns FILEBENCH_NORSC if all out
316*8404SAndrew.W.Wilson@sun.com  * of the appropriate type of directories, FILEBENCH_ERROR if the
317*8404SAndrew.W.Wilson@sun.com  * flowop does not point to a fileset, and FILEBENCH_OK otherwise.
318*8404SAndrew.W.Wilson@sun.com  */
319*8404SAndrew.W.Wilson@sun.com static int
320*8404SAndrew.W.Wilson@sun.com flowoplib_pickfile(filesetentry_t **filep, flowop_t *flowop, int flags, int tid)
321*8404SAndrew.W.Wilson@sun.com {
322*8404SAndrew.W.Wilson@sun.com 	fileset_t	*fileset;
323*8404SAndrew.W.Wilson@sun.com 	int		fileindex;
324*8404SAndrew.W.Wilson@sun.com 
325*8404SAndrew.W.Wilson@sun.com 	if ((fileset = flowop->fo_fileset) == NULL) {
326*8404SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR, "flowop NO fileset");
327*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
328*8404SAndrew.W.Wilson@sun.com 	}
329*8404SAndrew.W.Wilson@sun.com 
330*8404SAndrew.W.Wilson@sun.com 	if (flowop->fo_fileindex) {
331*8404SAndrew.W.Wilson@sun.com 		fileindex = (int)(avd_get_dbl(flowop->fo_fileindex) *
332*8404SAndrew.W.Wilson@sun.com 		    ((double)(fileset->fs_constentries / 2)));
333*8404SAndrew.W.Wilson@sun.com 		fileindex = fileindex % fileset->fs_constentries;
334*8404SAndrew.W.Wilson@sun.com 		flags |= FILESET_PICKBYINDEX;
335*8404SAndrew.W.Wilson@sun.com 	} else {
336*8404SAndrew.W.Wilson@sun.com 		fileindex = 0;
337*8404SAndrew.W.Wilson@sun.com 	}
338*8404SAndrew.W.Wilson@sun.com 
339*8404SAndrew.W.Wilson@sun.com 	if ((*filep = fileset_pick(fileset, FILESET_PICKFILE | flags,
340*8404SAndrew.W.Wilson@sun.com 	    tid, fileindex)) == NULL) {
341*8404SAndrew.W.Wilson@sun.com 		filebench_log(LOG_DEBUG_SCRIPT,
342*8404SAndrew.W.Wilson@sun.com 		    "flowop %s failed to pick file from fileset %s",
343*8404SAndrew.W.Wilson@sun.com 		    flowop->fo_name,
344*8404SAndrew.W.Wilson@sun.com 		    avd_get_str(fileset->fs_name));
345*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_NORSC);
346*8404SAndrew.W.Wilson@sun.com 	}
347*8404SAndrew.W.Wilson@sun.com 
348*8404SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
349*8404SAndrew.W.Wilson@sun.com }
350*8404SAndrew.W.Wilson@sun.com 
351*8404SAndrew.W.Wilson@sun.com /*
352*8404SAndrew.W.Wilson@sun.com  * Obtain a filesetentry for a leaf directory. Result placed where dirp
353*8404SAndrew.W.Wilson@sun.com  * points. Supply with flowop and a flag to indicate whether an existent
354*8404SAndrew.W.Wilson@sun.com  * or non-existent leaf directory is required. Returns FILEBENCH_NORSC
355*8404SAndrew.W.Wilson@sun.com  * if all out of the appropriate type of directories, FILEBENCH_ERROR
356*8404SAndrew.W.Wilson@sun.com  * if the flowop does not point to a fileset, and FILEBENCH_OK otherwise.
357*8404SAndrew.W.Wilson@sun.com  */
358*8404SAndrew.W.Wilson@sun.com static int
359*8404SAndrew.W.Wilson@sun.com flowoplib_pickleafdir(filesetentry_t **dirp, flowop_t *flowop, int flags)
360*8404SAndrew.W.Wilson@sun.com {
361*8404SAndrew.W.Wilson@sun.com 	fileset_t	*fileset;
362*8404SAndrew.W.Wilson@sun.com 	int		dirindex;
363*8404SAndrew.W.Wilson@sun.com 
364*8404SAndrew.W.Wilson@sun.com 	if ((fileset = flowop->fo_fileset) == NULL) {
365*8404SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR, "flowop NO fileset");
366*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
367*8404SAndrew.W.Wilson@sun.com 	}
368*8404SAndrew.W.Wilson@sun.com 
369*8404SAndrew.W.Wilson@sun.com 	if (flowop->fo_fileindex) {
370*8404SAndrew.W.Wilson@sun.com 		dirindex = (int)(avd_get_dbl(flowop->fo_fileindex) *
371*8404SAndrew.W.Wilson@sun.com 		    ((double)(fileset->fs_constleafdirs / 2)));
372*8404SAndrew.W.Wilson@sun.com 		dirindex = dirindex % fileset->fs_constleafdirs;
373*8404SAndrew.W.Wilson@sun.com 		flags |= FILESET_PICKBYINDEX;
374*8404SAndrew.W.Wilson@sun.com 	} else {
375*8404SAndrew.W.Wilson@sun.com 		dirindex = 0;
376*8404SAndrew.W.Wilson@sun.com 	}
377*8404SAndrew.W.Wilson@sun.com 
378*8404SAndrew.W.Wilson@sun.com 	if ((*dirp = fileset_pick(fileset,
379*8404SAndrew.W.Wilson@sun.com 	    FILESET_PICKLEAFDIR | flags, 0, dirindex)) == NULL) {
380*8404SAndrew.W.Wilson@sun.com 		filebench_log(LOG_DEBUG_SCRIPT,
381*8404SAndrew.W.Wilson@sun.com 		    "flowop %s failed to pick directory from fileset %s",
382*8404SAndrew.W.Wilson@sun.com 		    flowop->fo_name,
383*8404SAndrew.W.Wilson@sun.com 		    avd_get_str(fileset->fs_name));
384*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_NORSC);
385*8404SAndrew.W.Wilson@sun.com 	}
386*8404SAndrew.W.Wilson@sun.com 
387*8404SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
388*8404SAndrew.W.Wilson@sun.com }
389*8404SAndrew.W.Wilson@sun.com 
390*8404SAndrew.W.Wilson@sun.com /*
3915184Sek110237  * Searches for a file descriptor. Tries the flowop's
3925184Sek110237  * fo_fdnumber first and returns with it if it has been
3935184Sek110237  * explicitly set (greater than 0). It next checks to
3945184Sek110237  * see if a rotating file descriptor policy is in effect,
3955184Sek110237  * and if not returns the fdnumber regardless of what
3965184Sek110237  * it is. (note that if it is 0, it just selects to the
3975184Sek110237  * default file descriptor in the threadflow's tf_fd
3985184Sek110237  * array). If the rotating fd policy is in effect, it
3995184Sek110237  * cycles from the end of the tf_fd array to one location
4005184Sek110237  * beyond the maximum needed by the number of entries in
4015184Sek110237  * the associated fileset on each invocation, then starts
4025184Sek110237  * over from the end.
4035184Sek110237  *
4045184Sek110237  * The routine returns an index into the threadflow's
4055184Sek110237  * tf_fd table where the actual file descriptor will be
4065184Sek110237  * found. Note: the calling routine must not call this
4075184Sek110237  * routine if the flowop does not have a fileset, and the
4085184Sek110237  * flowop's fo_fdnumber is zero and fo_rotatefd is
4095184Sek110237  * asserted, or an addressing fault may occur.
4105184Sek110237  */
4115673Saw148015 static int
4125184Sek110237 flowoplib_fdnum(threadflow_t *threadflow, flowop_t *flowop)
4135184Sek110237 {
4146212Saw148015 	fbint_t	entries;
4156391Saw148015 	int fdnumber = flowop->fo_fdnumber;
4166212Saw148015 
4175184Sek110237 	/* If the script sets the fd explicitly */
4186391Saw148015 	if (fdnumber > 0)
4196391Saw148015 		return (fdnumber);
4205184Sek110237 
4215184Sek110237 	/* If the flowop defaults to persistent fd */
4226212Saw148015 	if (!avd_get_bool(flowop->fo_rotatefd))
4236391Saw148015 		return (fdnumber);
4246391Saw148015 
4256391Saw148015 	if (flowop->fo_fileset == NULL) {
4266391Saw148015 		filebench_log(LOG_ERROR, "flowop NULL file");
4276391Saw148015 		return (FILEBENCH_ERROR);
4286391Saw148015 	}
4295184Sek110237 
4306212Saw148015 	entries = flowop->fo_fileset->fs_constentries;
4316212Saw148015 
4325184Sek110237 	/* Rotate the fd on each flowop invocation */
4336212Saw148015 	if (entries > (THREADFLOW_MAXFD / 2)) {
4345184Sek110237 		filebench_log(LOG_ERROR, "Out of file descriptors in flowop %s"
4356286Saw148015 		    " (too many files : %llu",
4366286Saw148015 		    flowop->fo_name, (u_longlong_t)entries);
4376084Saw148015 		return (FILEBENCH_ERROR);
4385184Sek110237 	}
4395184Sek110237 
4405184Sek110237 	/* First time around */
4415184Sek110237 	if (threadflow->tf_fdrotor == 0)
4425184Sek110237 		threadflow->tf_fdrotor = THREADFLOW_MAXFD;
4435184Sek110237 
4445184Sek110237 	/* One fd for every file in the set */
4456212Saw148015 	if (entries == (THREADFLOW_MAXFD - threadflow->tf_fdrotor))
4465184Sek110237 		threadflow->tf_fdrotor = THREADFLOW_MAXFD;
4475184Sek110237 
4485184Sek110237 
4495184Sek110237 	threadflow->tf_fdrotor--;
4505184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "selected fd = %d",
4515184Sek110237 	    threadflow->tf_fdrotor);
4525184Sek110237 	return (threadflow->tf_fdrotor);
4535184Sek110237 }
4545184Sek110237 
4555184Sek110237 /*
4565673Saw148015  * Determines the file descriptor to use, and attempts to open
4575673Saw148015  * the file if it is not already open. Also determines the wss
4586084Saw148015  * value. Returns FILEBENCH_ERROR on errors, FILESET_NORSC if
4596084Saw148015  * if flowop_openfile_common couldn't obtain an appropriate file
4606084Saw148015  * from a the fileset, and FILEBENCH_OK otherwise.
4615673Saw148015  */
4625673Saw148015 static int
4635673Saw148015 flowoplib_filesetup(threadflow_t *threadflow, flowop_t *flowop,
4646212Saw148015     fbint_t *wssp, int *filedescp)
4655673Saw148015 {
4665673Saw148015 	int fd = flowoplib_fdnum(threadflow, flowop);
4675673Saw148015 
4685673Saw148015 	if (fd == -1)
4696084Saw148015 		return (FILEBENCH_ERROR);
4705673Saw148015 
4715673Saw148015 	if (threadflow->tf_fd[fd] == 0) {
4726084Saw148015 		int ret;
4736084Saw148015 
4746084Saw148015 		if ((ret = flowoplib_openfile_common(
4756084Saw148015 		    threadflow, flowop, fd)) != FILEBENCH_OK)
4766084Saw148015 			return (ret);
4775673Saw148015 
4785673Saw148015 		if (threadflow->tf_fse[fd]) {
4795673Saw148015 			filebench_log(LOG_DEBUG_IMPL, "opened file %s",
4805673Saw148015 			    threadflow->tf_fse[fd]->fse_path);
4815673Saw148015 		} else {
4825673Saw148015 			filebench_log(LOG_DEBUG_IMPL,
4835673Saw148015 			    "opened device %s/%s",
4846212Saw148015 			    avd_get_str(flowop->fo_fileset->fs_path),
4856212Saw148015 			    avd_get_str(flowop->fo_fileset->fs_name));
4865673Saw148015 		}
4875673Saw148015 	}
4885673Saw148015 
4895673Saw148015 	*filedescp = threadflow->tf_fd[fd];
4905673Saw148015 
4916212Saw148015 	if ((*wssp = flowop->fo_constwss) == 0) {
4925673Saw148015 		if (threadflow->tf_fse[fd])
4935673Saw148015 			*wssp = threadflow->tf_fse[fd]->fse_size;
4945673Saw148015 		else
4956212Saw148015 			*wssp = avd_get_int(flowop->fo_fileset->fs_size);
4965673Saw148015 	}
4975673Saw148015 
4986084Saw148015 	return (FILEBENCH_OK);
4995673Saw148015 }
5005673Saw148015 
5015673Saw148015 /*
5025673Saw148015  * Determines the io buffer or random offset into tf_mem for
5036084Saw148015  * the IO operation. Returns FILEBENCH_ERROR on errors, FILEBENCH_OK otherwise.
5045673Saw148015  */
5055673Saw148015 static int
5065673Saw148015 flowoplib_iobufsetup(threadflow_t *threadflow, flowop_t *flowop,
5076212Saw148015     caddr_t *iobufp, fbint_t iosize)
5085673Saw148015 {
5095673Saw148015 	long memsize;
5105673Saw148015 	size_t memoffset;
5115673Saw148015 
5125673Saw148015 	if (iosize == 0) {
5135673Saw148015 		filebench_log(LOG_ERROR, "zero iosize for thread %s",
5145673Saw148015 		    flowop->fo_name);
5156084Saw148015 		return (FILEBENCH_ERROR);
5165673Saw148015 	}
5175673Saw148015 
5186212Saw148015 	if ((memsize = threadflow->tf_constmemsize) != 0) {
5195673Saw148015 
5205673Saw148015 		/* use tf_mem for I/O with random offset */
5216212Saw148015 		if (filebench_randomno(&memoffset,
5226212Saw148015 		    memsize, iosize, NULL) == -1) {
5235673Saw148015 			filebench_log(LOG_ERROR,
5245673Saw148015 			    "tf_memsize smaller than IO size for thread %s",
5255673Saw148015 			    flowop->fo_name);
5266084Saw148015 			return (FILEBENCH_ERROR);
5275673Saw148015 		}
5285673Saw148015 		*iobufp = threadflow->tf_mem + memoffset;
5295673Saw148015 
5305673Saw148015 	} else {
5315673Saw148015 		/* use private I/O buffer */
5325673Saw148015 		if ((flowop->fo_buf != NULL) &&
5335673Saw148015 		    (flowop->fo_buf_size < iosize)) {
5346212Saw148015 			/* too small, so free up and re-allocate */
5355673Saw148015 			free(flowop->fo_buf);
5365673Saw148015 			flowop->fo_buf = NULL;
5375673Saw148015 		}
5386212Saw148015 
5396212Saw148015 		/*
5406212Saw148015 		 * Allocate memory for the  buffer. The memory is freed
5416212Saw148015 		 * by flowop_destruct_generic() or by this routine if more
5426212Saw148015 		 * memory is needed for the buffer.
5436212Saw148015 		 */
5445673Saw148015 		if ((flowop->fo_buf == NULL) && ((flowop->fo_buf
5455673Saw148015 		    = (char *)malloc(iosize)) == NULL))
5466084Saw148015 			return (FILEBENCH_ERROR);
5475673Saw148015 
5485673Saw148015 		flowop->fo_buf_size = iosize;
5495673Saw148015 		*iobufp = flowop->fo_buf;
5505673Saw148015 	}
5516084Saw148015 	return (FILEBENCH_OK);
5525673Saw148015 }
5535673Saw148015 
5545673Saw148015 /*
5555673Saw148015  * Determines the file descriptor to use, opens it if necessary, the
5565673Saw148015  * io buffer or random offset into tf_mem for IO operation and the wss
5576084Saw148015  * value. Returns FILEBENCH_ERROR on errors, FILEBENCH_OK otherwise.
5585673Saw148015  */
5595673Saw148015 static int
5605673Saw148015 flowoplib_iosetup(threadflow_t *threadflow, flowop_t *flowop,
5616212Saw148015     fbint_t *wssp, caddr_t *iobufp, int *filedescp, fbint_t iosize)
5625673Saw148015 {
5636084Saw148015 	int ret;
5646084Saw148015 
5656084Saw148015 	if ((ret = flowoplib_filesetup(threadflow, flowop, wssp, filedescp)) !=
5666084Saw148015 	    FILEBENCH_OK)
5676084Saw148015 		return (ret);
5685673Saw148015 
5696084Saw148015 	if ((ret = flowoplib_iobufsetup(threadflow, flowop, iobufp, iosize)) !=
5706084Saw148015 	    FILEBENCH_OK)
5716084Saw148015 		return (ret);
5725673Saw148015 
5736084Saw148015 	return (FILEBENCH_OK);
5745673Saw148015 }
5755673Saw148015 
5765673Saw148015 /*
5775184Sek110237  * Emulate posix read / pread. If the flowop has a fileset,
5785184Sek110237  * a file descriptor number index is fetched, otherwise a
5795184Sek110237  * supplied fileobj file is used. In either case the specified
5805184Sek110237  * file will be opened if not already open. If the flowop has
5816084Saw148015  * neither a fileset or fileobj, an error is logged and FILEBENCH_ERROR
5825184Sek110237  * returned.
5835184Sek110237  *
5845184Sek110237  * The actual read is done to a random offset in the
5855184Sek110237  * threadflow's thread memory (tf_mem), with a size set by
5865184Sek110237  * fo_iosize and at either a random disk offset within the
5875184Sek110237  * working set size, or at the next sequential location. If
5886084Saw148015  * any errors are encountered, FILEBENCH_ERROR is returned,
5896084Saw148015  * if no appropriate file can be obtained from the fileset then
5906084Saw148015  * FILEBENCH_NORSC is returned, otherise FILEBENCH_OK is returned.
5915184Sek110237  */
5925184Sek110237 static int
5935184Sek110237 flowoplib_read(threadflow_t *threadflow, flowop_t *flowop)
5945184Sek110237 {
5955673Saw148015 	caddr_t iobuf;
5966212Saw148015 	fbint_t wss;
5976212Saw148015 	fbint_t iosize;
5985184Sek110237 	int filedesc;
5995184Sek110237 	int ret;
6005184Sek110237 
6016212Saw148015 
6026212Saw148015 	iosize = avd_get_int(flowop->fo_iosize);
6036084Saw148015 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
6046212Saw148015 	    &filedesc, iosize)) != FILEBENCH_OK)
6056084Saw148015 		return (ret);
6065184Sek110237 
6076212Saw148015 	if (avd_get_bool(flowop->fo_random)) {
6085184Sek110237 		uint64_t fileoffset;
6095184Sek110237 
6106212Saw148015 		if (filebench_randomno64(&fileoffset,
6116212Saw148015 		    wss, iosize, NULL) == -1) {
6125184Sek110237 			filebench_log(LOG_ERROR,
6135184Sek110237 			    "file size smaller than IO size for thread %s",
6145184Sek110237 			    flowop->fo_name);
6156084Saw148015 			return (FILEBENCH_ERROR);
6165184Sek110237 		}
6175184Sek110237 
6185184Sek110237 		(void) flowop_beginop(threadflow, flowop);
6195673Saw148015 		if ((ret = pread64(filedesc, iobuf,
6206212Saw148015 		    iosize, (off64_t)fileoffset)) == -1) {
6215673Saw148015 			(void) flowop_endop(threadflow, flowop, 0);
6225184Sek110237 			filebench_log(LOG_ERROR,
6236286Saw148015 			    "read file %s failed, offset %llu "
6245673Saw148015 			    "io buffer %zd: %s",
6256212Saw148015 			    avd_get_str(flowop->fo_fileset->fs_name),
6266286Saw148015 			    (u_longlong_t)fileoffset, iobuf, strerror(errno));
6275673Saw148015 			flowop_endop(threadflow, flowop, 0);
6286084Saw148015 			return (FILEBENCH_ERROR);
6295184Sek110237 		}
6305673Saw148015 		(void) flowop_endop(threadflow, flowop, ret);
6315184Sek110237 
6325184Sek110237 		if ((ret == 0))
6335184Sek110237 			(void) lseek64(filedesc, 0, SEEK_SET);
6345184Sek110237 
6355184Sek110237 	} else {
6365184Sek110237 		(void) flowop_beginop(threadflow, flowop);
6376212Saw148015 		if ((ret = read(filedesc, iobuf, iosize)) == -1) {
6386212Saw148015 			(void) flowop_endop(threadflow, flowop, 0);
6395184Sek110237 			filebench_log(LOG_ERROR,
6405673Saw148015 			    "read file %s failed, io buffer %zd: %s",
6416212Saw148015 			    avd_get_str(flowop->fo_fileset->fs_name),
6425673Saw148015 			    iobuf, strerror(errno));
6435673Saw148015 			(void) flowop_endop(threadflow, flowop, 0);
6446084Saw148015 			return (FILEBENCH_ERROR);
6455184Sek110237 		}
6465673Saw148015 		(void) flowop_endop(threadflow, flowop, ret);
6475184Sek110237 
6485184Sek110237 		if ((ret == 0))
6495184Sek110237 			(void) lseek64(filedesc, 0, SEEK_SET);
6505184Sek110237 	}
6515184Sek110237 
6526084Saw148015 	return (FILEBENCH_OK);
6535184Sek110237 }
6545184Sek110237 
6555184Sek110237 #ifdef HAVE_AIO
6565184Sek110237 
6575184Sek110237 /*
6585184Sek110237  * Asynchronous write section. An Asynchronous IO element
6595184Sek110237  * (aiolist_t) is used to associate the asynchronous write request with
6605184Sek110237  * its subsequent completion. This element includes a aiocb64 struct
6615184Sek110237  * that is used by posix aio_xxx calls to track the asynchronous writes.
6625184Sek110237  * The flowops aiowrite and aiowait result in calls to these posix
6635184Sek110237  * aio_xxx system routines to do the actual asynchronous write IO
6645184Sek110237  * operations.
6655184Sek110237  */
6665184Sek110237 
6675184Sek110237 
6685184Sek110237 /*
6695184Sek110237  * Allocates an asynchronous I/O list (aio, of type
6705184Sek110237  * aiolist_t) element. Adds it to the flowop thread's
6715184Sek110237  * threadflow aio list. Returns a pointer to the element.
6725184Sek110237  */
6735184Sek110237 static aiolist_t *
6745184Sek110237 aio_allocate(flowop_t *flowop)
6755184Sek110237 {
6765184Sek110237 	aiolist_t *aiolist;
6775184Sek110237 
6785184Sek110237 	if ((aiolist = malloc(sizeof (aiolist_t))) == NULL) {
6795184Sek110237 		filebench_log(LOG_ERROR, "malloc aiolist failed");
6805184Sek110237 		filebench_shutdown(1);
6815184Sek110237 	}
6825184Sek110237 
6835184Sek110237 	/* Add to list */
6845184Sek110237 	if (flowop->fo_thread->tf_aiolist == NULL) {
6855184Sek110237 		flowop->fo_thread->tf_aiolist = aiolist;
6865184Sek110237 		aiolist->al_next = NULL;
6875184Sek110237 	} else {
6885184Sek110237 		aiolist->al_next = flowop->fo_thread->tf_aiolist;
6895184Sek110237 		flowop->fo_thread->tf_aiolist = aiolist;
6905184Sek110237 	}
6915184Sek110237 	return (aiolist);
6925184Sek110237 }
6935184Sek110237 
6945184Sek110237 /*
6955184Sek110237  * Searches for the aiolist element that has a matching
6966084Saw148015  * completion block, aiocb. If none found returns FILEBENCH_ERROR. If
6975184Sek110237  * found, removes the aiolist element from flowop thread's
6986084Saw148015  * list and returns FILEBENCH_OK.
6995184Sek110237  */
7005184Sek110237 static int
7015184Sek110237 aio_deallocate(flowop_t *flowop, struct aiocb64 *aiocb)
7025184Sek110237 {
7035184Sek110237 	aiolist_t *aiolist = flowop->fo_thread->tf_aiolist;
7045184Sek110237 	aiolist_t *previous = NULL;
7055184Sek110237 	aiolist_t *match = NULL;
7065184Sek110237 
7075184Sek110237 	if (aiocb == NULL) {
7085184Sek110237 		filebench_log(LOG_ERROR, "null aiocb deallocate");
7096084Saw148015 		return (FILEBENCH_OK);
7105184Sek110237 	}
7115184Sek110237 
7125184Sek110237 	while (aiolist) {
7135184Sek110237 		if (aiocb == &(aiolist->al_aiocb)) {
7145184Sek110237 			match = aiolist;
7155184Sek110237 			break;
7165184Sek110237 		}
7175184Sek110237 		previous = aiolist;
7185184Sek110237 		aiolist = aiolist->al_next;
7195184Sek110237 	}
7205184Sek110237 
7215184Sek110237 	if (match == NULL)
7226084Saw148015 		return (FILEBENCH_ERROR);
7235184Sek110237 
7245184Sek110237 	/* Remove from the list */
7255184Sek110237 	if (previous)
7265184Sek110237 		previous->al_next = match->al_next;
7275184Sek110237 	else
7285184Sek110237 		flowop->fo_thread->tf_aiolist = match->al_next;
7295184Sek110237 
7306084Saw148015 	return (FILEBENCH_OK);
7315184Sek110237 }
7325184Sek110237 
7335184Sek110237 /*
7345184Sek110237  * Emulate posix aiowrite(). Determines which file to use,
7355184Sek110237  * either one file of a fileset, or the file associated
7365184Sek110237  * with a fileobj, allocates and fills an aiolist_t element
7375184Sek110237  * for the write, and issues the asynchronous write. This
7385184Sek110237  * operation is only valid for random IO, and returns an
7396084Saw148015  * error if the flowop is set for sequential IO. Returns
7406084Saw148015  * FILEBENCH_OK on success, FILEBENCH_NORSC if iosetup can't
7416084Saw148015  * obtain a file to open, and FILEBENCH_ERROR on any
7426084Saw148015  * encountered error.
7435184Sek110237  */
7445184Sek110237 static int
7455184Sek110237 flowoplib_aiowrite(threadflow_t *threadflow, flowop_t *flowop)
7465184Sek110237 {
7475673Saw148015 	caddr_t iobuf;
7486212Saw148015 	fbint_t wss;
7496212Saw148015 	fbint_t iosize;
7505184Sek110237 	int filedesc;
7516084Saw148015 	int ret;
7525184Sek110237 
7536212Saw148015 	iosize = avd_get_int(flowop->fo_iosize);
7546212Saw148015 
7556084Saw148015 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
7566212Saw148015 	    &filedesc, iosize)) != FILEBENCH_OK)
7576084Saw148015 		return (ret);
7585184Sek110237 
7596212Saw148015 	if (avd_get_bool(flowop->fo_random)) {
7605184Sek110237 		uint64_t fileoffset;
7615184Sek110237 		struct aiocb64 *aiocb;
7625184Sek110237 		aiolist_t *aiolist;
7635184Sek110237 
7645184Sek110237 		if (filebench_randomno64(&fileoffset,
7656212Saw148015 		    wss, iosize, NULL) == -1) {
7665184Sek110237 			filebench_log(LOG_ERROR,
7675184Sek110237 			    "file size smaller than IO size for thread %s",
7685184Sek110237 			    flowop->fo_name);
7696084Saw148015 			return (FILEBENCH_ERROR);
7705184Sek110237 		}
7715184Sek110237 
7725184Sek110237 		aiolist = aio_allocate(flowop);
7735184Sek110237 		aiolist->al_type = AL_WRITE;
7745184Sek110237 		aiocb = &aiolist->al_aiocb;
7755184Sek110237 
7765184Sek110237 		aiocb->aio_fildes = filedesc;
7775673Saw148015 		aiocb->aio_buf = iobuf;
7786212Saw148015 		aiocb->aio_nbytes = (size_t)iosize;
7795184Sek110237 		aiocb->aio_offset = (off64_t)fileoffset;
7805184Sek110237 		aiocb->aio_reqprio = 0;
7815184Sek110237 
7825184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
7836286Saw148015 		    "aio fd=%d, bytes=%llu, offset=%llu",
7846286Saw148015 		    filedesc, (u_longlong_t)iosize, (u_longlong_t)fileoffset);
7855184Sek110237 
7865184Sek110237 		flowop_beginop(threadflow, flowop);
7875184Sek110237 		if (aio_write64(aiocb) < 0) {
7885184Sek110237 			filebench_log(LOG_ERROR, "aiowrite failed: %s",
7895184Sek110237 			    strerror(errno));
7905184Sek110237 			filebench_shutdown(1);
7915184Sek110237 		}
7926212Saw148015 		flowop_endop(threadflow, flowop, iosize);
7935184Sek110237 	} else {
7946084Saw148015 		return (FILEBENCH_ERROR);
7955184Sek110237 	}
7965184Sek110237 
7976084Saw148015 	return (FILEBENCH_OK);
7985184Sek110237 }
7995184Sek110237 
8005184Sek110237 
8015184Sek110237 
8025184Sek110237 #define	MAXREAP 4096
8035184Sek110237 
8045184Sek110237 /*
8055184Sek110237  * Emulate posix aiowait(). Waits for the completion of half the
8065184Sek110237  * outstanding asynchronous IOs, or a single IO, which ever is
8075184Sek110237  * larger. The routine will return after a sufficient number of
8085184Sek110237  * completed calls issued by any thread in the procflow have
8095184Sek110237  * completed, or a 1 second timout elapses. All completed
8105184Sek110237  * IO operations are deleted from the thread's aiolist.
8115184Sek110237  */
8125184Sek110237 static int
8135184Sek110237 flowoplib_aiowait(threadflow_t *threadflow, flowop_t *flowop)
8145184Sek110237 {
8155184Sek110237 	struct aiocb64 **worklist;
8165184Sek110237 	aiolist_t *aio = flowop->fo_thread->tf_aiolist;
8175184Sek110237 	int uncompleted = 0;
8185184Sek110237 
8195184Sek110237 	worklist = calloc(MAXREAP, sizeof (struct aiocb64 *));
8205184Sek110237 
8215184Sek110237 	/* Count the list of pending aios */
8225184Sek110237 	while (aio) {
8235184Sek110237 		uncompleted++;
8245184Sek110237 		aio = aio->al_next;
8255184Sek110237 	}
8265184Sek110237 
8275184Sek110237 	do {
8285184Sek110237 		uint_t ncompleted = 0;
8295184Sek110237 		uint_t todo;
8305184Sek110237 		struct timespec timeout;
8315184Sek110237 		int inprogress;
8325184Sek110237 		int i;
8335184Sek110237 
8345184Sek110237 		/* Wait for half of the outstanding requests */
8355184Sek110237 		timeout.tv_sec = 1;
8365184Sek110237 		timeout.tv_nsec = 0;
8375184Sek110237 
8385184Sek110237 		if (uncompleted > MAXREAP)
8395184Sek110237 			todo = MAXREAP;
8405184Sek110237 		else
8415184Sek110237 			todo = uncompleted / 2;
8425184Sek110237 
8435184Sek110237 		if (todo == 0)
8445184Sek110237 			todo = 1;
8455184Sek110237 
8465184Sek110237 		flowop_beginop(threadflow, flowop);
8475184Sek110237 
8485184Sek110237 #ifdef HAVE_AIOWAITN
8495184Sek110237 		if ((aio_waitn64((struct aiocb64 **)worklist,
8505184Sek110237 		    MAXREAP, &todo, &timeout) == -1) &&
8515184Sek110237 		    errno && (errno != ETIME)) {
8525184Sek110237 			filebench_log(LOG_ERROR,
8535184Sek110237 			    "aiowait failed: %s, outstanding = %d, "
8545184Sek110237 			    "ncompleted = %d ",
8555184Sek110237 			    strerror(errno), uncompleted, todo);
8565184Sek110237 		}
8575184Sek110237 
8585184Sek110237 		ncompleted = todo;
8595184Sek110237 		/* Take the  completed I/Os from the list */
8605184Sek110237 		inprogress = 0;
8615184Sek110237 		for (i = 0; i < ncompleted; i++) {
8625184Sek110237 			if ((aio_return64(worklist[i]) == -1) &&
8635184Sek110237 			    (errno == EINPROGRESS)) {
8645184Sek110237 				inprogress++;
8655184Sek110237 				continue;
8665184Sek110237 			}
8675184Sek110237 			if (aio_deallocate(flowop, worklist[i]) < 0) {
8685184Sek110237 				filebench_log(LOG_ERROR, "Could not remove "
8695184Sek110237 				    "aio from list ");
8705673Saw148015 				flowop_endop(threadflow, flowop, 0);
8716084Saw148015 				return (FILEBENCH_ERROR);
8725184Sek110237 			}
8735184Sek110237 		}
8745184Sek110237 
8755184Sek110237 		uncompleted -= ncompleted;
8765184Sek110237 		uncompleted += inprogress;
8775184Sek110237 
8785184Sek110237 #else
8795184Sek110237 
8805184Sek110237 		for (ncompleted = 0, inprogress = 0,
8815184Sek110237 		    aio = flowop->fo_thread->tf_aiolist;
8825184Sek110237 		    ncompleted < todo, aio != NULL; aio = aio->al_next) {
8836613Sek110237 			int result = aio_error64(&aio->al_aiocb);
8845184Sek110237 
8855184Sek110237 			if (result == EINPROGRESS) {
8865184Sek110237 				inprogress++;
8875184Sek110237 				continue;
8885184Sek110237 			}
8895184Sek110237 
8905184Sek110237 			if ((aio_return64(&aio->al_aiocb) == -1) || result) {
8915184Sek110237 				filebench_log(LOG_ERROR, "aio failed: %s",
8925184Sek110237 				    strerror(result));
8935184Sek110237 				continue;
8945184Sek110237 			}
8955184Sek110237 
8965184Sek110237 			ncompleted++;
8975184Sek110237 
8985184Sek110237 			if (aio_deallocate(flowop, &aio->al_aiocb) < 0) {
8995184Sek110237 				filebench_log(LOG_ERROR, "Could not remove aio "
9005184Sek110237 				    "from list ");
9015673Saw148015 				flowop_endop(threadflow, flowop, 0);
9026084Saw148015 				return (FILEBENCH_ERROR);
9035184Sek110237 			}
9045184Sek110237 		}
9055184Sek110237 
9065184Sek110237 		uncompleted -= ncompleted;
9075184Sek110237 
9085184Sek110237 #endif
9095184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT,
9105184Sek110237 		    "aio2 completed %d ios, uncompleted = %d, inprogress = %d",
9115184Sek110237 		    ncompleted, uncompleted, inprogress);
9125184Sek110237 
9135184Sek110237 	} while (uncompleted > MAXREAP);
9145184Sek110237 
9155673Saw148015 	flowop_endop(threadflow, flowop, 0);
9165184Sek110237 
9175184Sek110237 	free(worklist);
9185184Sek110237 
9196084Saw148015 	return (FILEBENCH_OK);
9205184Sek110237 }
9215184Sek110237 
9225184Sek110237 #endif /* HAVE_AIO */
9235184Sek110237 
9245184Sek110237 /*
9255184Sek110237  * Initializes a "flowop_block" flowop. Specifically, it
9265184Sek110237  * initializes the flowop's fo_cv and unlocks the fo_lock.
9275184Sek110237  */
9285184Sek110237 static int
9295184Sek110237 flowoplib_block_init(flowop_t *flowop)
9305184Sek110237 {
9315184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d block init address %zx",
9325184Sek110237 	    flowop->fo_name, flowop->fo_instance, &flowop->fo_cv);
9335184Sek110237 	(void) pthread_cond_init(&flowop->fo_cv, ipc_condattr());
9345184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
9355184Sek110237 
9366084Saw148015 	return (FILEBENCH_OK);
9375184Sek110237 }
9385184Sek110237 
9395184Sek110237 /*
9405184Sek110237  * Blocks the threadflow until woken up by flowoplib_wakeup.
9415184Sek110237  * The routine blocks on the flowop's fo_cv condition variable.
9425184Sek110237  */
9435184Sek110237 static int
9445184Sek110237 flowoplib_block(threadflow_t *threadflow, flowop_t *flowop)
9455184Sek110237 {
9465184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d blocking at address %zx",
9475184Sek110237 	    flowop->fo_name, flowop->fo_instance, &flowop->fo_cv);
9485184Sek110237 	(void) ipc_mutex_lock(&flowop->fo_lock);
9495184Sek110237 
9505184Sek110237 	flowop_beginop(threadflow, flowop);
9515184Sek110237 	(void) pthread_cond_wait(&flowop->fo_cv, &flowop->fo_lock);
9525673Saw148015 	flowop_endop(threadflow, flowop, 0);
9535184Sek110237 
9545184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d unblocking",
9555184Sek110237 	    flowop->fo_name, flowop->fo_instance);
9565184Sek110237 
9575184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
9585184Sek110237 
9596084Saw148015 	return (FILEBENCH_OK);
9605184Sek110237 }
9615184Sek110237 
9625184Sek110237 /*
9635184Sek110237  * Wakes up one or more target blocking flowops.
9645184Sek110237  * Sends broadcasts on the fo_cv condition variables of all
9655184Sek110237  * flowops on the target list, except those that are
9665184Sek110237  * FLOW_MASTER flowops. The target list consists of all
9675184Sek110237  * flowops whose name matches this flowop's "fo_targetname"
9685184Sek110237  * attribute. The target list is generated on the first
9695184Sek110237  * invocation, and the run will be shutdown if no targets
9706084Saw148015  * are found. Otherwise the routine always returns FILEBENCH_OK.
9715184Sek110237  */
9725184Sek110237 static int
9735184Sek110237 flowoplib_wakeup(threadflow_t *threadflow, flowop_t *flowop)
9745184Sek110237 {
9755184Sek110237 	flowop_t *target;
9765184Sek110237 
9775184Sek110237 	/* if this is the first wakeup, create the wakeup list */
9785184Sek110237 	if (flowop->fo_targets == NULL) {
9795184Sek110237 		flowop_t *result = flowop_find(flowop->fo_targetname);
9805184Sek110237 
9815184Sek110237 		flowop->fo_targets = result;
9825184Sek110237 		if (result == NULL) {
9835184Sek110237 			filebench_log(LOG_ERROR,
9845184Sek110237 			    "wakeup: could not find op %s for thread %s",
9855184Sek110237 			    flowop->fo_targetname,
9865184Sek110237 			    threadflow->tf_name);
9875184Sek110237 			filebench_shutdown(1);
9885184Sek110237 		}
9895184Sek110237 		while (result) {
9905184Sek110237 			result->fo_targetnext =
9915184Sek110237 			    result->fo_resultnext;
9925184Sek110237 			result = result->fo_resultnext;
9935184Sek110237 		}
9945184Sek110237 	}
9955184Sek110237 
9965184Sek110237 	target = flowop->fo_targets;
9975184Sek110237 
9985184Sek110237 	/* wakeup the targets */
9995184Sek110237 	while (target) {
10005184Sek110237 		if (target->fo_instance == FLOW_MASTER) {
10015184Sek110237 			target = target->fo_targetnext;
10025184Sek110237 			continue;
10035184Sek110237 		}
10045184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
10055184Sek110237 		    "wakeup flow %s-%d at address %zx",
10065184Sek110237 		    target->fo_name,
10075184Sek110237 		    target->fo_instance,
10085184Sek110237 		    &target->fo_cv);
10095184Sek110237 
10105184Sek110237 		flowop_beginop(threadflow, flowop);
10115184Sek110237 		(void) ipc_mutex_lock(&target->fo_lock);
10125184Sek110237 		(void) pthread_cond_broadcast(&target->fo_cv);
10135184Sek110237 		(void) ipc_mutex_unlock(&target->fo_lock);
10145673Saw148015 		flowop_endop(threadflow, flowop, 0);
10155184Sek110237 
10165184Sek110237 		target = target->fo_targetnext;
10175184Sek110237 	}
10185184Sek110237 
10196084Saw148015 	return (FILEBENCH_OK);
10205184Sek110237 }
10215184Sek110237 
10225184Sek110237 /*
10235184Sek110237  * "think time" routines. the "hog" routine consumes cpu cycles as
10245184Sek110237  * it "thinks", while the "delay" flowop simply calls sleep() to delay
10255184Sek110237  * for a given number of seconds without consuming cpu cycles.
10265184Sek110237  */
10275184Sek110237 
10285184Sek110237 
10295184Sek110237 /*
10305184Sek110237  * Consumes CPU cycles and memory bandwidth by looping for
10315184Sek110237  * flowop->fo_value times. With each loop sets memory location
10325184Sek110237  * threadflow->tf_mem to 1.
10335184Sek110237  */
10345184Sek110237 static int
10355184Sek110237 flowoplib_hog(threadflow_t *threadflow, flowop_t *flowop)
10365184Sek110237 {
10376212Saw148015 	uint64_t value = avd_get_int(flowop->fo_value);
10385184Sek110237 	int i;
10395184Sek110237 
10405673Saw148015 	filebench_log(LOG_DEBUG_IMPL, "hog enter");
10415184Sek110237 	flowop_beginop(threadflow, flowop);
10425673Saw148015 	if (threadflow->tf_mem != NULL) {
10435673Saw148015 		for (i = 0; i < value; i++)
10445673Saw148015 			*(threadflow->tf_mem) = 1;
10455673Saw148015 	}
10465673Saw148015 	flowop_endop(threadflow, flowop, 0);
10475184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "hog exit");
10486084Saw148015 	return (FILEBENCH_OK);
10495184Sek110237 }
10505184Sek110237 
10515184Sek110237 
10525184Sek110237 /*
10535184Sek110237  * Delays for fo_value seconds.
10545184Sek110237  */
10555184Sek110237 static int
10565184Sek110237 flowoplib_delay(threadflow_t *threadflow, flowop_t *flowop)
10575184Sek110237 {
10586212Saw148015 	int value = avd_get_int(flowop->fo_value);
10595184Sek110237 
10605184Sek110237 	flowop_beginop(threadflow, flowop);
10615184Sek110237 	(void) sleep(value);
10625673Saw148015 	flowop_endop(threadflow, flowop, 0);
10636084Saw148015 	return (FILEBENCH_OK);
10645184Sek110237 }
10655184Sek110237 
10665184Sek110237 /*
10675184Sek110237  * Rate limiting routines. This is the event consuming half of the
10685184Sek110237  * event system. Each of the four following routines will limit the rate
10695184Sek110237  * to one unit of either calls, issued I/O operations, issued filebench
10705184Sek110237  * operations, or I/O bandwidth. Since there is only one event generator,
10715184Sek110237  * the events will be divided amoung multiple instances of an event
10725184Sek110237  * consumer, and further divided among different consumers if more than
10735184Sek110237  * one has been defined. There is no mechanism to enforce equal sharing
10745184Sek110237  * of events.
10755184Sek110237  */
10765184Sek110237 
10775184Sek110237 /*
10785184Sek110237  * Completes one invocation per posted event. If eventgen_q
10795184Sek110237  * has an event count greater than zero, one will be removed
10805184Sek110237  * (count decremented), otherwise the calling thread will
10815184Sek110237  * block until another event has been posted. Always returns 0
10825184Sek110237  */
10835184Sek110237 static int
10845184Sek110237 flowoplib_eventlimit(threadflow_t *threadflow, flowop_t *flowop)
10855184Sek110237 {
10865184Sek110237 	/* Immediately bail if not set/enabled */
10877946SAndrew.W.Wilson@sun.com 	if (filebench_shm->shm_eventgen_hz == NULL)
10886084Saw148015 		return (FILEBENCH_OK);
10895184Sek110237 
10905184Sek110237 	if (flowop->fo_initted == 0) {
10915184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
10925184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
10935184Sek110237 		flowop->fo_initted = 1;
10945184Sek110237 	}
10955184Sek110237 
10965184Sek110237 	flowop_beginop(threadflow, flowop);
10977946SAndrew.W.Wilson@sun.com 	while (filebench_shm->shm_eventgen_hz != NULL) {
10986391Saw148015 		(void) ipc_mutex_lock(&filebench_shm->shm_eventgen_lock);
10996391Saw148015 		if (filebench_shm->shm_eventgen_q > 0) {
11006391Saw148015 			filebench_shm->shm_eventgen_q--;
11016391Saw148015 			(void) ipc_mutex_unlock(
11026391Saw148015 			    &filebench_shm->shm_eventgen_lock);
11035184Sek110237 			break;
11045184Sek110237 		}
11056391Saw148015 		(void) pthread_cond_wait(&filebench_shm->shm_eventgen_cv,
11066391Saw148015 		    &filebench_shm->shm_eventgen_lock);
11076391Saw148015 		(void) ipc_mutex_unlock(&filebench_shm->shm_eventgen_lock);
11085184Sek110237 	}
11095673Saw148015 	flowop_endop(threadflow, flowop, 0);
11106084Saw148015 	return (FILEBENCH_OK);
11115184Sek110237 }
11125184Sek110237 
11136701Saw148015 static int
11146701Saw148015 flowoplib_event_find_target(threadflow_t *threadflow, flowop_t *flowop)
11156701Saw148015 {
11166701Saw148015 	if (flowop->fo_targetname[0] != '\0') {
11176701Saw148015 
11186701Saw148015 		/* Try to use statistics from specific flowop */
11196701Saw148015 		flowop->fo_targets =
11206701Saw148015 		    flowop_find_from_list(flowop->fo_targetname,
11216701Saw148015 		    threadflow->tf_thrd_fops);
11226701Saw148015 		if (flowop->fo_targets == NULL) {
11236701Saw148015 			filebench_log(LOG_ERROR,
11246701Saw148015 			    "limit target: could not find flowop %s",
11256701Saw148015 			    flowop->fo_targetname);
11266701Saw148015 			filebench_shutdown(1);
11276701Saw148015 			return (FILEBENCH_ERROR);
11286701Saw148015 		}
11296701Saw148015 	} else {
11306701Saw148015 		/* use total workload statistics */
11316701Saw148015 		flowop->fo_targets = NULL;
11326701Saw148015 	}
11336701Saw148015 	return (FILEBENCH_OK);
11346701Saw148015 }
11356701Saw148015 
11365184Sek110237 /*
11375184Sek110237  * Blocks the calling thread if the number of issued I/O
11385184Sek110237  * operations exceeds the number of posted events, thus
11395184Sek110237  * limiting the average I/O operation rate to the rate
11406084Saw148015  * specified by eventgen_hz. Always returns FILEBENCH_OK.
11415184Sek110237  */
11425184Sek110237 static int
11435184Sek110237 flowoplib_iopslimit(threadflow_t *threadflow, flowop_t *flowop)
11445184Sek110237 {
11455184Sek110237 	uint64_t iops;
11465184Sek110237 	uint64_t delta;
11475673Saw148015 	uint64_t events;
11485184Sek110237 
11495184Sek110237 	/* Immediately bail if not set/enabled */
11507946SAndrew.W.Wilson@sun.com 	if (filebench_shm->shm_eventgen_hz == NULL)
11516084Saw148015 		return (FILEBENCH_OK);
11525184Sek110237 
11535184Sek110237 	if (flowop->fo_initted == 0) {
11545184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
11555184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
11565184Sek110237 		flowop->fo_initted = 1;
11576701Saw148015 
11586701Saw148015 		if (flowoplib_event_find_target(threadflow, flowop)
11596701Saw148015 		    == FILEBENCH_ERROR)
11606701Saw148015 			return (FILEBENCH_ERROR);
11616701Saw148015 
11626701Saw148015 		if (flowop->fo_targets && ((flowop->fo_targets->fo_attrs &
11636701Saw148015 		    (FLOW_ATTR_READ | FLOW_ATTR_WRITE)) == 0)) {
11646701Saw148015 			filebench_log(LOG_ERROR,
11656701Saw148015 			    "WARNING: Flowop %s does no IO",
11666701Saw148015 			    flowop->fo_targets->fo_name);
11676701Saw148015 			filebench_shutdown(1);
11686701Saw148015 			return (FILEBENCH_ERROR);
11696701Saw148015 		}
11705184Sek110237 	}
11715184Sek110237 
11726701Saw148015 	if (flowop->fo_targets) {
11736701Saw148015 		/*
11746701Saw148015 		 * Note that fs_count is already the sum of fs_rcount
11756701Saw148015 		 * and fs_wcount if looking at a single flowop.
11766701Saw148015 		 */
11776701Saw148015 		iops = flowop->fo_targets->fo_stats.fs_count;
11786701Saw148015 	} else {
11796701Saw148015 		(void) ipc_mutex_lock(&controlstats_lock);
11806701Saw148015 		iops = (controlstats.fs_rcount +
11816701Saw148015 		    controlstats.fs_wcount);
11826701Saw148015 		(void) ipc_mutex_unlock(&controlstats_lock);
11836701Saw148015 	}
11845184Sek110237 
11855184Sek110237 	/* Is this the first time around */
11865184Sek110237 	if (flowop->fo_tputlast == 0) {
11875184Sek110237 		flowop->fo_tputlast = iops;
11886084Saw148015 		return (FILEBENCH_OK);
11895184Sek110237 	}
11905184Sek110237 
11915184Sek110237 	delta = iops - flowop->fo_tputlast;
11925184Sek110237 	flowop->fo_tputbucket -= delta;
11935184Sek110237 	flowop->fo_tputlast = iops;
11945184Sek110237 
11955184Sek110237 	/* No need to block if the q isn't empty */
11965184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
11975673Saw148015 		flowop_endop(threadflow, flowop, 0);
11986084Saw148015 		return (FILEBENCH_OK);
11995184Sek110237 	}
12005184Sek110237 
12015184Sek110237 	iops = flowop->fo_tputbucket * -1;
12025184Sek110237 	events = iops;
12035184Sek110237 
12045184Sek110237 	flowop_beginop(threadflow, flowop);
12057946SAndrew.W.Wilson@sun.com 	while (filebench_shm->shm_eventgen_hz != NULL) {
12065184Sek110237 
12076391Saw148015 		(void) ipc_mutex_lock(&filebench_shm->shm_eventgen_lock);
12086391Saw148015 		if (filebench_shm->shm_eventgen_q >= events) {
12096391Saw148015 			filebench_shm->shm_eventgen_q -= events;
12106391Saw148015 			(void) ipc_mutex_unlock(
12116391Saw148015 			    &filebench_shm->shm_eventgen_lock);
12125184Sek110237 			flowop->fo_tputbucket += events;
12135184Sek110237 			break;
12145184Sek110237 		}
12156391Saw148015 		(void) pthread_cond_wait(&filebench_shm->shm_eventgen_cv,
12166391Saw148015 		    &filebench_shm->shm_eventgen_lock);
12176391Saw148015 		(void) ipc_mutex_unlock(&filebench_shm->shm_eventgen_lock);
12185184Sek110237 	}
12195673Saw148015 	flowop_endop(threadflow, flowop, 0);
12205184Sek110237 
12216084Saw148015 	return (FILEBENCH_OK);
12225184Sek110237 }
12235184Sek110237 
12245184Sek110237 /*
12255184Sek110237  * Blocks the calling thread if the number of issued filebench
12265184Sek110237  * operations exceeds the number of posted events, thus limiting
12275184Sek110237  * the average filebench operation rate to the rate specified by
12286084Saw148015  * eventgen_hz. Always returns FILEBENCH_OK.
12295184Sek110237  */
12305184Sek110237 static int
12315184Sek110237 flowoplib_opslimit(threadflow_t *threadflow, flowop_t *flowop)
12325184Sek110237 {
12335184Sek110237 	uint64_t ops;
12345184Sek110237 	uint64_t delta;
12355673Saw148015 	uint64_t events;
12365184Sek110237 
12375184Sek110237 	/* Immediately bail if not set/enabled */
12387946SAndrew.W.Wilson@sun.com 	if (filebench_shm->shm_eventgen_hz == NULL)
12396084Saw148015 		return (FILEBENCH_OK);
12405184Sek110237 
12415184Sek110237 	if (flowop->fo_initted == 0) {
12425184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
12435184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
12445184Sek110237 		flowop->fo_initted = 1;
12456701Saw148015 
12466701Saw148015 		if (flowoplib_event_find_target(threadflow, flowop)
12476701Saw148015 		    == FILEBENCH_ERROR)
12486701Saw148015 			return (FILEBENCH_ERROR);
12495184Sek110237 	}
12505184Sek110237 
12516701Saw148015 	if (flowop->fo_targets) {
12526701Saw148015 		ops = flowop->fo_targets->fo_stats.fs_count;
12536701Saw148015 	} else {
12546701Saw148015 		(void) ipc_mutex_lock(&controlstats_lock);
12556701Saw148015 		ops = controlstats.fs_count;
12566701Saw148015 		(void) ipc_mutex_unlock(&controlstats_lock);
12576701Saw148015 	}
12585184Sek110237 
12595184Sek110237 	/* Is this the first time around */
12605184Sek110237 	if (flowop->fo_tputlast == 0) {
12615184Sek110237 		flowop->fo_tputlast = ops;
12626084Saw148015 		return (FILEBENCH_OK);
12635184Sek110237 	}
12645184Sek110237 
12655184Sek110237 	delta = ops - flowop->fo_tputlast;
12665184Sek110237 	flowop->fo_tputbucket -= delta;
12675184Sek110237 	flowop->fo_tputlast = ops;
12685184Sek110237 
12695184Sek110237 	/* No need to block if the q isn't empty */
12705184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
12715673Saw148015 		flowop_endop(threadflow, flowop, 0);
12726084Saw148015 		return (FILEBENCH_OK);
12735184Sek110237 	}
12745184Sek110237 
12755184Sek110237 	ops = flowop->fo_tputbucket * -1;
12765184Sek110237 	events = ops;
12775184Sek110237 
12785184Sek110237 	flowop_beginop(threadflow, flowop);
12797946SAndrew.W.Wilson@sun.com 	while (filebench_shm->shm_eventgen_hz != NULL) {
12806391Saw148015 		(void) ipc_mutex_lock(&filebench_shm->shm_eventgen_lock);
12816391Saw148015 		if (filebench_shm->shm_eventgen_q >= events) {
12826391Saw148015 			filebench_shm->shm_eventgen_q -= events;
12836391Saw148015 			(void) ipc_mutex_unlock(
12846391Saw148015 			    &filebench_shm->shm_eventgen_lock);
12855184Sek110237 			flowop->fo_tputbucket += events;
12865184Sek110237 			break;
12875184Sek110237 		}
12886391Saw148015 		(void) pthread_cond_wait(&filebench_shm->shm_eventgen_cv,
12896391Saw148015 		    &filebench_shm->shm_eventgen_lock);
12906391Saw148015 		(void) ipc_mutex_unlock(&filebench_shm->shm_eventgen_lock);
12915184Sek110237 	}
12925673Saw148015 	flowop_endop(threadflow, flowop, 0);
12935184Sek110237 
12946084Saw148015 	return (FILEBENCH_OK);
12955184Sek110237 }
12965184Sek110237 
12975184Sek110237 
12985184Sek110237 /*
12995184Sek110237  * Blocks the calling thread if the number of bytes of I/O
13005184Sek110237  * issued exceeds one megabyte times the number of posted
13015184Sek110237  * events, thus limiting the average I/O byte rate to one
13025184Sek110237  * megabyte times the event rate as set by eventgen_hz.
13036084Saw148015  * Always retuns FILEBENCH_OK.
13045184Sek110237  */
13055184Sek110237 static int
13065184Sek110237 flowoplib_bwlimit(threadflow_t *threadflow, flowop_t *flowop)
13075184Sek110237 {
13085184Sek110237 	uint64_t bytes;
13095184Sek110237 	uint64_t delta;
13105673Saw148015 	uint64_t events;
13115184Sek110237 
13125184Sek110237 	/* Immediately bail if not set/enabled */
13137946SAndrew.W.Wilson@sun.com 	if (filebench_shm->shm_eventgen_hz == NULL)
13146084Saw148015 		return (FILEBENCH_OK);
13155184Sek110237 
13165184Sek110237 	if (flowop->fo_initted == 0) {
13175184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
13185184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
13195184Sek110237 		flowop->fo_initted = 1;
13206701Saw148015 
13216701Saw148015 		if (flowoplib_event_find_target(threadflow, flowop)
13226701Saw148015 		    == FILEBENCH_ERROR)
13236701Saw148015 			return (FILEBENCH_ERROR);
13246701Saw148015 
13256701Saw148015 		if ((flowop->fo_targets) &&
13266701Saw148015 		    ((flowop->fo_targets->fo_attrs &
13276701Saw148015 		    (FLOW_ATTR_READ | FLOW_ATTR_WRITE)) == 0)) {
13286701Saw148015 			filebench_log(LOG_ERROR,
13296701Saw148015 			    "WARNING: Flowop %s does no Reads or Writes",
13306701Saw148015 			    flowop->fo_targets->fo_name);
13316701Saw148015 			filebench_shutdown(1);
13326701Saw148015 			return (FILEBENCH_ERROR);
13336701Saw148015 		}
13345184Sek110237 	}
13355184Sek110237 
13366701Saw148015 	if (flowop->fo_targets) {
13376701Saw148015 		/*
13386701Saw148015 		 * Note that fs_bytes is already the sum of fs_rbytes
13396701Saw148015 		 * and fs_wbytes if looking at a single flowop.
13406701Saw148015 		 */
13416701Saw148015 		bytes = flowop->fo_targets->fo_stats.fs_bytes;
13426701Saw148015 	} else {
13436701Saw148015 		(void) ipc_mutex_lock(&controlstats_lock);
13446701Saw148015 		bytes = (controlstats.fs_rbytes +
13456701Saw148015 		    controlstats.fs_wbytes);
13466701Saw148015 		(void) ipc_mutex_unlock(&controlstats_lock);
13476701Saw148015 	}
13486701Saw148015 
13496701Saw148015 	/* Is this the first time around? */
13505184Sek110237 	if (flowop->fo_tputlast == 0) {
13515184Sek110237 		flowop->fo_tputlast = bytes;
13526084Saw148015 		return (FILEBENCH_OK);
13535184Sek110237 	}
13545184Sek110237 
13555184Sek110237 	delta = bytes - flowop->fo_tputlast;
13565184Sek110237 	flowop->fo_tputbucket -= delta;
13575184Sek110237 	flowop->fo_tputlast = bytes;
13585184Sek110237 
13595184Sek110237 	/* No need to block if the q isn't empty */
13605184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
13615673Saw148015 		flowop_endop(threadflow, flowop, 0);
13626084Saw148015 		return (FILEBENCH_OK);
13635184Sek110237 	}
13645184Sek110237 
13655184Sek110237 	bytes = flowop->fo_tputbucket * -1;
13665184Sek110237 	events = (bytes / MB) + 1;
13675184Sek110237 
13686286Saw148015 	filebench_log(LOG_DEBUG_IMPL, "%llu bytes, %llu events",
13696286Saw148015 	    (u_longlong_t)bytes, (u_longlong_t)events);
13705184Sek110237 
13715184Sek110237 	flowop_beginop(threadflow, flowop);
13727946SAndrew.W.Wilson@sun.com 	while (filebench_shm->shm_eventgen_hz != NULL) {
13736391Saw148015 		(void) ipc_mutex_lock(&filebench_shm->shm_eventgen_lock);
13746391Saw148015 		if (filebench_shm->shm_eventgen_q >= events) {
13756391Saw148015 			filebench_shm->shm_eventgen_q -= events;
13766391Saw148015 			(void) ipc_mutex_unlock(
13776391Saw148015 			    &filebench_shm->shm_eventgen_lock);
13785184Sek110237 			flowop->fo_tputbucket += (events * MB);
13795184Sek110237 			break;
13805184Sek110237 		}
13816391Saw148015 		(void) pthread_cond_wait(&filebench_shm->shm_eventgen_cv,
13826391Saw148015 		    &filebench_shm->shm_eventgen_lock);
13836391Saw148015 		(void) ipc_mutex_unlock(&filebench_shm->shm_eventgen_lock);
13845184Sek110237 	}
13855673Saw148015 	flowop_endop(threadflow, flowop, 0);
13865184Sek110237 
13876084Saw148015 	return (FILEBENCH_OK);
13885184Sek110237 }
13895184Sek110237 
13905184Sek110237 /*
13915184Sek110237  * These flowops terminate a benchmark run when either the specified
13925184Sek110237  * number of bytes of I/O (flowoplib_finishonbytes) or the specified
13935184Sek110237  * number of I/O operations (flowoplib_finishoncount) have been generated.
13945184Sek110237  */
13955184Sek110237 
13965184Sek110237 
13975184Sek110237 /*
13985184Sek110237  * Stop filebench run when specified number of I/O bytes have been
13996212Saw148015  * transferred. Compares controlstats.fs_bytes with flowop->value,
14005184Sek110237  * and if greater returns 1, stopping the run, if not, returns 0
14015184Sek110237  * to continue running.
14025184Sek110237  */
14035184Sek110237 static int
14045184Sek110237 flowoplib_finishonbytes(threadflow_t *threadflow, flowop_t *flowop)
14055184Sek110237 {
14066701Saw148015 	uint64_t bytes_io;		/* Bytes of I/O delivered so far */
14076701Saw148015 	uint64_t byte_lim = flowop->fo_constvalue;  /* Total Bytes desired */
14086701Saw148015 						    /* Uses constant value */
14096701Saw148015 
14106701Saw148015 	if (flowop->fo_initted == 0) {
14116701Saw148015 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
14126701Saw148015 		    flowop, threadflow->tf_name, threadflow->tf_instance);
14136701Saw148015 		flowop->fo_initted = 1;
14146701Saw148015 
14156701Saw148015 		if (flowoplib_event_find_target(threadflow, flowop)
14166701Saw148015 		    == FILEBENCH_ERROR)
14176701Saw148015 			return (FILEBENCH_ERROR);
14186701Saw148015 
14196701Saw148015 		if ((flowop->fo_targets) &&
14206701Saw148015 		    ((flowop->fo_targets->fo_attrs &
14216701Saw148015 		    (FLOW_ATTR_READ | FLOW_ATTR_WRITE)) == 0)) {
14226701Saw148015 			filebench_log(LOG_ERROR,
14236701Saw148015 			    "WARNING: Flowop %s does no Reads or Writes",
14246701Saw148015 			    flowop->fo_targets->fo_name);
14256701Saw148015 			filebench_shutdown(1);
14266701Saw148015 			return (FILEBENCH_ERROR);
14276701Saw148015 		}
14286701Saw148015 	}
14296701Saw148015 
14306701Saw148015 	if (flowop->fo_targets) {
14316701Saw148015 		bytes_io = flowop->fo_targets->fo_stats.fs_bytes;
14326701Saw148015 	} else {
14336701Saw148015 		(void) ipc_mutex_lock(&controlstats_lock);
14346701Saw148015 		bytes_io = controlstats.fs_bytes;
14356701Saw148015 		(void) ipc_mutex_unlock(&controlstats_lock);
14366701Saw148015 	}
14375184Sek110237 
14385184Sek110237 	flowop_beginop(threadflow, flowop);
14396701Saw148015 	if (bytes_io > byte_lim) {
14405673Saw148015 		flowop_endop(threadflow, flowop, 0);
14416084Saw148015 		return (FILEBENCH_DONE);
14425184Sek110237 	}
14435673Saw148015 	flowop_endop(threadflow, flowop, 0);
14445184Sek110237 
14456084Saw148015 	return (FILEBENCH_OK);
14465184Sek110237 }
14475184Sek110237 
14485184Sek110237 /*
14495184Sek110237  * Stop filebench run when specified number of I/O operations have
14505184Sek110237  * been performed. Compares controlstats.fs_count with *flowop->value,
14516084Saw148015  * and if greater returns 1, stopping the run, if not, returns FILEBENCH_OK
14526084Saw148015  * to continue running.
14535184Sek110237  */
14545184Sek110237 static int
14555184Sek110237 flowoplib_finishoncount(threadflow_t *threadflow, flowop_t *flowop)
14565184Sek110237 {
14575184Sek110237 	uint64_t ops;
14586212Saw148015 	uint64_t count = flowop->fo_constvalue; /* use constant value */
14595184Sek110237 
14606701Saw148015 	if (flowop->fo_initted == 0) {
14616701Saw148015 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
14626701Saw148015 		    flowop, threadflow->tf_name, threadflow->tf_instance);
14636701Saw148015 		flowop->fo_initted = 1;
14646701Saw148015 
14656701Saw148015 		if (flowoplib_event_find_target(threadflow, flowop)
14666701Saw148015 		    == FILEBENCH_ERROR)
14676701Saw148015 			return (FILEBENCH_ERROR);
14686701Saw148015 	}
14696701Saw148015 
14706701Saw148015 	if (flowop->fo_targets) {
14716701Saw148015 		ops = flowop->fo_targets->fo_stats.fs_count;
14726701Saw148015 	} else {
14736701Saw148015 		(void) ipc_mutex_lock(&controlstats_lock);
14746701Saw148015 		ops = controlstats.fs_count;
14756701Saw148015 		(void) ipc_mutex_unlock(&controlstats_lock);
14766701Saw148015 	}
14775184Sek110237 
14785184Sek110237 	flowop_beginop(threadflow, flowop);
14796084Saw148015 	if (ops >= count) {
14805673Saw148015 		flowop_endop(threadflow, flowop, 0);
14816084Saw148015 		return (FILEBENCH_DONE);
14825184Sek110237 	}
14835673Saw148015 	flowop_endop(threadflow, flowop, 0);
14845184Sek110237 
14856084Saw148015 	return (FILEBENCH_OK);
14865184Sek110237 }
14875184Sek110237 
14885184Sek110237 /*
14895184Sek110237  * Semaphore synchronization using either System V semaphores or
14905184Sek110237  * posix semaphores. If System V semaphores are available, they will be
14915184Sek110237  * used, otherwise posix semaphores will be used.
14925184Sek110237  */
14935184Sek110237 
14945184Sek110237 
14955184Sek110237 /*
14965184Sek110237  * Initializes the filebench "block on semaphore" flowop.
14975184Sek110237  * If System V semaphores are implemented, the routine
14985184Sek110237  * initializes the System V semaphore subsystem if it hasn't
14995184Sek110237  * already been initialized, also allocates a pair of semids
15005184Sek110237  * and initializes the highwater System V semaphore.
15015184Sek110237  * If no System V semaphores, then does nothing special.
15026084Saw148015  * Returns FILEBENCH_ERROR if it cannot acquire a set of System V semphores
15036084Saw148015  * or if the initial post to the semaphore set fails. Returns FILEBENCH_OK
15045184Sek110237  * on success.
15055184Sek110237  */
15065184Sek110237 static int
15075184Sek110237 flowoplib_semblock_init(flowop_t *flowop)
15085184Sek110237 {
15095184Sek110237 
15105184Sek110237 #ifdef HAVE_SYSV_SEM
15116391Saw148015 	int sys_semid;
15125184Sek110237 	struct sembuf sbuf[2];
15135184Sek110237 	int highwater;
15145184Sek110237 
15155184Sek110237 	ipc_seminit();
15165184Sek110237 
15175184Sek110237 	flowop->fo_semid_lw = ipc_semidalloc();
15185184Sek110237 	flowop->fo_semid_hw = ipc_semidalloc();
15195184Sek110237 
15205184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d semblock init semid=%x",
15215184Sek110237 	    flowop->fo_name, flowop->fo_instance, flowop->fo_semid_lw);
15225184Sek110237 
15236391Saw148015 	sys_semid = filebench_shm->shm_sys_semid;
15245184Sek110237 
15255184Sek110237 	if ((highwater = flowop->fo_semid_hw) == 0)
15266212Saw148015 		highwater = flowop->fo_constvalue; /* use constant value */
15275184Sek110237 
15285184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "setting highwater to : %d", highwater);
15295184Sek110237 
15305673Saw148015 	sbuf[0].sem_num = (short)highwater;
15316212Saw148015 	sbuf[0].sem_op = avd_get_int(flowop->fo_highwater);
15325184Sek110237 	sbuf[0].sem_flg = 0;
15336391Saw148015 	if ((semop(sys_semid, &sbuf[0], 1) == -1) && errno) {
15345184Sek110237 		filebench_log(LOG_ERROR, "semblock init post failed: %s (%d,"
15355184Sek110237 		    "%d)", strerror(errno), sbuf[0].sem_num, sbuf[0].sem_op);
15366084Saw148015 		return (FILEBENCH_ERROR);
15375184Sek110237 	}
15385184Sek110237 #else
15395184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
15405184Sek110237 	    "flow %s-%d semblock init with posix semaphore",
15415184Sek110237 	    flowop->fo_name, flowop->fo_instance);
15425184Sek110237 
15435184Sek110237 	sem_init(&flowop->fo_sem, 1, 0);
15445184Sek110237 #endif	/* HAVE_SYSV_SEM */
15455184Sek110237 
15466212Saw148015 	if (!(avd_get_bool(flowop->fo_blocking)))
15475184Sek110237 		(void) ipc_mutex_unlock(&flowop->fo_lock);
15485184Sek110237 
15496084Saw148015 	return (FILEBENCH_OK);
15505184Sek110237 }
15515184Sek110237 
15525184Sek110237 /*
15535184Sek110237  * Releases the semids for the System V semaphore allocated
15545184Sek110237  * to this flowop. If not using System V semaphores, then
15556084Saw148015  * it is effectively just a no-op.
15565184Sek110237  */
15575184Sek110237 static void
15585184Sek110237 flowoplib_semblock_destruct(flowop_t *flowop)
15595184Sek110237 {
15605184Sek110237 #ifdef HAVE_SYSV_SEM
15615184Sek110237 	ipc_semidfree(flowop->fo_semid_lw);
15625184Sek110237 	ipc_semidfree(flowop->fo_semid_hw);
15635184Sek110237 #else
15645184Sek110237 	sem_destroy(&flowop->fo_sem);
15655184Sek110237 #endif /* HAVE_SYSV_SEM */
15665184Sek110237 }
15675184Sek110237 
15685184Sek110237 /*
15695184Sek110237  * Attempts to pass a System V or posix semaphore as appropriate,
15706084Saw148015  * and blocks if necessary. Returns FILEBENCH_ERROR if a set of System V
15715184Sek110237  * semphores is not available or cannot be acquired, or if the initial
15726084Saw148015  * post to the semaphore set fails. Returns FILEBENCH_OK on success.
15735184Sek110237  */
15745184Sek110237 static int
15755184Sek110237 flowoplib_semblock(threadflow_t *threadflow, flowop_t *flowop)
15765184Sek110237 {
15775184Sek110237 
15785184Sek110237 #ifdef HAVE_SYSV_SEM
15795184Sek110237 	struct sembuf sbuf[2];
15806212Saw148015 	int value = avd_get_int(flowop->fo_value);
15816391Saw148015 	int sys_semid;
15825184Sek110237 	struct timespec timeout;
15835184Sek110237 
15846391Saw148015 	sys_semid = filebench_shm->shm_sys_semid;
15855184Sek110237 
15865184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
15875184Sek110237 	    "flow %s-%d sem blocking on id %x num %x value %d",
15886391Saw148015 	    flowop->fo_name, flowop->fo_instance, sys_semid,
15895184Sek110237 	    flowop->fo_semid_hw, value);
15905184Sek110237 
15915184Sek110237 	/* Post, decrement the increment the hw queue */
15925184Sek110237 	sbuf[0].sem_num = flowop->fo_semid_hw;
15935673Saw148015 	sbuf[0].sem_op = (short)value;
15945184Sek110237 	sbuf[0].sem_flg = 0;
15955184Sek110237 	sbuf[1].sem_num = flowop->fo_semid_lw;
15965184Sek110237 	sbuf[1].sem_op = value * -1;
15975184Sek110237 	sbuf[1].sem_flg = 0;
15985184Sek110237 	timeout.tv_sec = 600;
15995184Sek110237 	timeout.tv_nsec = 0;
16005184Sek110237 
16016212Saw148015 	if (avd_get_bool(flowop->fo_blocking))
16025184Sek110237 		(void) ipc_mutex_unlock(&flowop->fo_lock);
16035184Sek110237 
16045184Sek110237 	flowop_beginop(threadflow, flowop);
16055184Sek110237 
16065184Sek110237 #ifdef HAVE_SEMTIMEDOP
16076391Saw148015 	(void) semtimedop(sys_semid, &sbuf[0], 1, &timeout);
16086391Saw148015 	(void) semtimedop(sys_semid, &sbuf[1], 1, &timeout);
16095184Sek110237 #else
16106391Saw148015 	(void) semop(sys_semid, &sbuf[0], 1);
16116391Saw148015 	(void) semop(sys_semid, &sbuf[1], 1);
16125184Sek110237 #endif /* HAVE_SEMTIMEDOP */
16135184Sek110237 
16146212Saw148015 	if (avd_get_bool(flowop->fo_blocking))
16155184Sek110237 		(void) ipc_mutex_lock(&flowop->fo_lock);
16165184Sek110237 
16175673Saw148015 	flowop_endop(threadflow, flowop, 0);
16185184Sek110237 
16195184Sek110237 #else
16206212Saw148015 	int value = avd_get_int(flowop->fo_value);
16215184Sek110237 	int i;
16225184Sek110237 
16235184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
16245184Sek110237 	    "flow %s-%d sem blocking on posix semaphore",
16255184Sek110237 	    flowop->fo_name, flowop->fo_instance);
16265184Sek110237 
16275184Sek110237 	/* Decrement sem by value */
16285184Sek110237 	for (i = 0; i < value; i++) {
16295184Sek110237 		if (sem_wait(&flowop->fo_sem) == -1) {
16305184Sek110237 			filebench_log(LOG_ERROR, "semop wait failed");
16316084Saw148015 			return (FILEBENCH_ERROR);
16325184Sek110237 		}
16335184Sek110237 	}
16345184Sek110237 
16355184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d sem unblocking",
16365184Sek110237 	    flowop->fo_name, flowop->fo_instance);
16375184Sek110237 #endif /* HAVE_SYSV_SEM */
16385184Sek110237 
16396084Saw148015 	return (FILEBENCH_OK);
16405184Sek110237 }
16415184Sek110237 
16425184Sek110237 /*
16436084Saw148015  * Calls ipc_seminit(). Always returns FILEBENCH_OK.
16445184Sek110237  */
16455184Sek110237 /* ARGSUSED */
16465184Sek110237 static int
16475184Sek110237 flowoplib_sempost_init(flowop_t *flowop)
16485184Sek110237 {
16495184Sek110237 #ifdef HAVE_SYSV_SEM
16505184Sek110237 	ipc_seminit();
16515184Sek110237 #endif /* HAVE_SYSV_SEM */
16526084Saw148015 	return (FILEBENCH_OK);
16535184Sek110237 }
16545184Sek110237 
16555184Sek110237 /*
16565184Sek110237  * Post to a System V or posix semaphore as appropriate.
16575184Sek110237  * On the first call for a given flowop instance, this routine
16585184Sek110237  * will use the fo_targetname attribute to locate all semblock
16595184Sek110237  * flowops that are expecting posts from this flowop. All
16605184Sek110237  * target flowops on this list will have a post operation done
16615184Sek110237  * to their semaphores on each call.
16625184Sek110237  */
16635184Sek110237 static int
16645184Sek110237 flowoplib_sempost(threadflow_t *threadflow, flowop_t *flowop)
16655184Sek110237 {
16665184Sek110237 	flowop_t *target;
16675184Sek110237 
16685184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
16695184Sek110237 	    "sempost flow %s-%d",
16705184Sek110237 	    flowop->fo_name,
16715184Sek110237 	    flowop->fo_instance);
16725184Sek110237 
16735184Sek110237 	/* if this is the first post, create the post list */
16745184Sek110237 	if (flowop->fo_targets == NULL) {
16755184Sek110237 		flowop_t *result = flowop_find(flowop->fo_targetname);
16765184Sek110237 
16775184Sek110237 		flowop->fo_targets = result;
16785184Sek110237 
16795184Sek110237 		if (result == NULL) {
16805184Sek110237 			filebench_log(LOG_ERROR,
16815184Sek110237 			    "sempost: could not find op %s for thread %s",
16825184Sek110237 			    flowop->fo_targetname,
16835184Sek110237 			    threadflow->tf_name);
16845184Sek110237 			filebench_shutdown(1);
16855184Sek110237 		}
16865184Sek110237 
16875184Sek110237 		while (result) {
16885184Sek110237 			result->fo_targetnext =
16895184Sek110237 			    result->fo_resultnext;
16905184Sek110237 			result = result->fo_resultnext;
16915184Sek110237 		}
16925184Sek110237 	}
16935184Sek110237 
16945184Sek110237 	target = flowop->fo_targets;
16955184Sek110237 
16965184Sek110237 	flowop_beginop(threadflow, flowop);
16975184Sek110237 	/* post to the targets */
16985184Sek110237 	while (target) {
16995184Sek110237 #ifdef HAVE_SYSV_SEM
17005184Sek110237 		struct sembuf sbuf[2];
17016391Saw148015 		int sys_semid;
17025184Sek110237 		int blocking;
17035184Sek110237 #else
17045184Sek110237 		int i;
17055184Sek110237 #endif /* HAVE_SYSV_SEM */
17065184Sek110237 		struct timespec timeout;
17076550Saw148015 		int value = (int)avd_get_int(flowop->fo_value);
17085184Sek110237 
17095184Sek110237 		if (target->fo_instance == FLOW_MASTER) {
17105184Sek110237 			target = target->fo_targetnext;
17115184Sek110237 			continue;
17125184Sek110237 		}
17135184Sek110237 
17145184Sek110237 #ifdef HAVE_SYSV_SEM
17155184Sek110237 
17165184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
17175184Sek110237 		    "sempost flow %s-%d num %x",
17185184Sek110237 		    target->fo_name,
17195184Sek110237 		    target->fo_instance,
17205184Sek110237 		    target->fo_semid_lw);
17215184Sek110237 
17226391Saw148015 		sys_semid = filebench_shm->shm_sys_semid;
17235184Sek110237 		sbuf[0].sem_num = target->fo_semid_lw;
17245673Saw148015 		sbuf[0].sem_op = (short)value;
17255184Sek110237 		sbuf[0].sem_flg = 0;
17265184Sek110237 		sbuf[1].sem_num = target->fo_semid_hw;
17275184Sek110237 		sbuf[1].sem_op = value * -1;
17285184Sek110237 		sbuf[1].sem_flg = 0;
17295184Sek110237 		timeout.tv_sec = 600;
17305184Sek110237 		timeout.tv_nsec = 0;
17315184Sek110237 
17326212Saw148015 		if (avd_get_bool(flowop->fo_blocking))
17335184Sek110237 			blocking = 1;
17345184Sek110237 		else
17355184Sek110237 			blocking = 0;
17365184Sek110237 
17375184Sek110237 #ifdef HAVE_SEMTIMEDOP
17386391Saw148015 		if ((semtimedop(sys_semid, &sbuf[0], blocking + 1,
17395184Sek110237 		    &timeout) == -1) && (errno && (errno != EAGAIN))) {
17405184Sek110237 #else
17416391Saw148015 		if ((semop(sys_semid, &sbuf[0], blocking + 1) == -1) &&
17425184Sek110237 		    (errno && (errno != EAGAIN))) {
17435184Sek110237 #endif /* HAVE_SEMTIMEDOP */
17445184Sek110237 			filebench_log(LOG_ERROR, "semop post failed: %s",
17455184Sek110237 			    strerror(errno));
17466084Saw148015 			return (FILEBENCH_ERROR);
17475184Sek110237 		}
17485184Sek110237 
17495184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
17505184Sek110237 		    "flow %s-%d finished posting",
17515184Sek110237 		    target->fo_name, target->fo_instance);
17525184Sek110237 #else
17535184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
17545184Sek110237 		    "sempost flow %s-%d to posix semaphore",
17555184Sek110237 		    target->fo_name,
17565184Sek110237 		    target->fo_instance);
17575184Sek110237 
17585184Sek110237 		/* Increment sem by value */
17595184Sek110237 		for (i = 0; i < value; i++) {
17605184Sek110237 			if (sem_post(&target->fo_sem) == -1) {
17615184Sek110237 				filebench_log(LOG_ERROR, "semop post failed");
17626084Saw148015 				return (FILEBENCH_ERROR);
17635184Sek110237 			}
17645184Sek110237 		}
17655184Sek110237 
17665184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "flow %s-%d unblocking",
17675184Sek110237 		    target->fo_name, target->fo_instance);
17685184Sek110237 #endif /* HAVE_SYSV_SEM */
17695184Sek110237 
17705184Sek110237 		target = target->fo_targetnext;
17715184Sek110237 	}
17725673Saw148015 	flowop_endop(threadflow, flowop, 0);
17735184Sek110237 
17746084Saw148015 	return (FILEBENCH_OK);
17755184Sek110237 }
17765184Sek110237 
17775184Sek110237 
17785184Sek110237 /*
17795184Sek110237  * Section for exercising create / open / close / delete operations
17805184Sek110237  * on files within a fileset. For proper operation, the flowop attribute
17815184Sek110237  * "fd", which sets the fo_fdnumber field in the flowop, must be used
17825184Sek110237  * so that the same file is opened and later closed. "fd" is an index
17835184Sek110237  * into a pair of arrays maintained by threadflows, one of which
17845184Sek110237  * contains the operating system assigned file descriptors and the other
17855184Sek110237  * a pointer to the filesetentry whose file the file descriptor
17865184Sek110237  * references. An openfile flowop defined without fd being set will use
17875184Sek110237  * the default (0) fd or, if specified, rotate through fd indices, but
17885184Sek110237  * createfile and closefile must use the default or a specified fd.
17895184Sek110237  * Meanwhile deletefile picks and arbitrary file to delete, regardless
17905184Sek110237  * of fd attribute.
17915184Sek110237  */
17925184Sek110237 
17935184Sek110237 /*
17945184Sek110237  * XXX Making file selection more consistent among the flowops might good
17955184Sek110237  */
17965184Sek110237 
17975184Sek110237 
17985184Sek110237 /*
17995184Sek110237  * Emulates (and actually does) file open. Obtains a file descriptor
18006084Saw148015  * index, then calls flowoplib_openfile_common() to open. Returns
18016084Saw148015  * FILEBENCH_ERROR if no file descriptor is found, and returns the
18026084Saw148015  * status from flowoplib_openfile_common otherwise (FILEBENCH_ERROR,
18036084Saw148015  * FILEBENCH_NORSC, FILEBENCH_OK).
18045184Sek110237  */
18055184Sek110237 static int
18065184Sek110237 flowoplib_openfile(threadflow_t *threadflow, flowop_t *flowop)
18075184Sek110237 {
18085184Sek110237 	int fd = flowoplib_fdnum(threadflow, flowop);
18095184Sek110237 
18105184Sek110237 	if (fd == -1)
18116084Saw148015 		return (FILEBENCH_ERROR);
18125184Sek110237 
18135184Sek110237 	return (flowoplib_openfile_common(threadflow, flowop, fd));
18145184Sek110237 }
18155184Sek110237 
18165184Sek110237 /*
18175184Sek110237  * Common file opening code for filesets. Uses the supplied
18185184Sek110237  * file descriptor index to determine the tf_fd entry to use.
18195184Sek110237  * If the entry is empty (0) and the fileset exists, fileset
18205184Sek110237  * pick is called to select a fileset entry to use. The file
18215184Sek110237  * specified in the filesetentry is opened, and the returned
18225184Sek110237  * operating system file descriptor and a pointer to the
18235184Sek110237  * filesetentry are stored in tf_fd[fd] and tf_fse[fd],
18246084Saw148015  * respectively. Returns FILEBENCH_ERROR on error,
18256084Saw148015  * FILEBENCH_NORSC if no suitable filesetentry can be found,
18266084Saw148015  * and FILEBENCH_OK on success.
18275184Sek110237  */
18285184Sek110237 static int
18295184Sek110237 flowoplib_openfile_common(threadflow_t *threadflow, flowop_t *flowop, int fd)
18305184Sek110237 {
18315184Sek110237 	filesetentry_t *file;
18326212Saw148015 	char *fileset_name;
18335184Sek110237 	int tid = 0;
1834*8404SAndrew.W.Wilson@sun.com 	int err;
18355184Sek110237 
18366391Saw148015 	if (flowop->fo_fileset == NULL) {
18376391Saw148015 		filebench_log(LOG_ERROR, "flowop NULL file");
18386391Saw148015 		return (FILEBENCH_ERROR);
18396391Saw148015 	}
18406391Saw148015 
18416212Saw148015 	if ((fileset_name =
18426212Saw148015 	    avd_get_str(flowop->fo_fileset->fs_name)) == NULL) {
18436212Saw148015 		filebench_log(LOG_ERROR,
18446212Saw148015 		    "flowop %s: fileset has no name", flowop->fo_name);
18456212Saw148015 		return (FILEBENCH_ERROR);
18466212Saw148015 	}
18476212Saw148015 
18485184Sek110237 	/*
18495184Sek110237 	 * If the flowop doesn't default to persistent fd
18505184Sek110237 	 * then get unique thread ID for use by fileset_pick
18515184Sek110237 	 */
18526212Saw148015 	if (avd_get_bool(flowop->fo_rotatefd))
18535184Sek110237 		tid = threadflow->tf_utid;
18545184Sek110237 
18555184Sek110237 	if (threadflow->tf_fd[fd] != 0) {
18565184Sek110237 		filebench_log(LOG_ERROR,
18575184Sek110237 		    "flowop %s attempted to open without closing on fd %d",
18585184Sek110237 		    flowop->fo_name, fd);
18596084Saw148015 		return (FILEBENCH_ERROR);
18605184Sek110237 	}
18615184Sek110237 
18625673Saw148015 #ifdef HAVE_RAW_SUPPORT
18635673Saw148015 	if (flowop->fo_fileset->fs_attrs & FILESET_IS_RAW_DEV) {
18645673Saw148015 		int open_attrs = 0;
18655673Saw148015 		char name[MAXPATHLEN];
18665673Saw148015 
18677946SAndrew.W.Wilson@sun.com 		(void) fb_strlcpy(name,
18687946SAndrew.W.Wilson@sun.com 		    avd_get_str(flowop->fo_fileset->fs_path), MAXPATHLEN);
18697946SAndrew.W.Wilson@sun.com 		(void) fb_strlcat(name, "/", MAXPATHLEN);
18707946SAndrew.W.Wilson@sun.com 		(void) fb_strlcat(name, fileset_name, MAXPATHLEN);
18715673Saw148015 
18726212Saw148015 		if (avd_get_bool(flowop->fo_dsync)) {
18735673Saw148015 #ifdef sun
18745673Saw148015 			open_attrs |= O_DSYNC;
18755673Saw148015 #else
18765673Saw148015 			open_attrs |= O_FSYNC;
18775673Saw148015 #endif
18785673Saw148015 		}
18795673Saw148015 
18805673Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
18815673Saw148015 		    "open raw device %s flags %d = %d", name, open_attrs, fd);
18825673Saw148015 
18835673Saw148015 		threadflow->tf_fd[fd] = open64(name,
18845673Saw148015 		    O_RDWR | open_attrs, 0666);
18855673Saw148015 
18865673Saw148015 		if (threadflow->tf_fd[fd] < 0) {
18875673Saw148015 			filebench_log(LOG_ERROR,
18885673Saw148015 			    "Failed to open raw device %s: %s",
18895673Saw148015 			    name, strerror(errno));
18906084Saw148015 			return (FILEBENCH_ERROR);
18915673Saw148015 		}
18925673Saw148015 
18935673Saw148015 		/* if running on Solaris, use un-buffered io */
18945673Saw148015 #ifdef sun
18955673Saw148015 		(void) directio(threadflow->tf_fd[fd], DIRECTIO_ON);
18965673Saw148015 #endif
18975673Saw148015 
18985673Saw148015 		threadflow->tf_fse[fd] = NULL;
18995673Saw148015 
19006084Saw148015 		return (FILEBENCH_OK);
19015673Saw148015 	}
19025673Saw148015 #endif /* HAVE_RAW_SUPPORT */
19035673Saw148015 
1904*8404SAndrew.W.Wilson@sun.com 	if ((err = flowoplib_pickfile(&file, flowop,
1905*8404SAndrew.W.Wilson@sun.com 	    FILESET_PICKEXISTS, tid)) != FILEBENCH_OK) {
19066084Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
19075184Sek110237 		    "flowop %s failed to pick file from %s on fd %d",
19086212Saw148015 		    flowop->fo_name, fileset_name, fd);
1909*8404SAndrew.W.Wilson@sun.com 		return (err);
19105184Sek110237 	}
19115184Sek110237 
19125184Sek110237 	threadflow->tf_fse[fd] = file;
19135184Sek110237 
19145184Sek110237 	flowop_beginop(threadflow, flowop);
19155184Sek110237 	threadflow->tf_fd[fd] = fileset_openfile(flowop->fo_fileset,
19165184Sek110237 	    file, O_RDWR, 0666, flowoplib_fileattrs(flowop));
19175673Saw148015 	flowop_endop(threadflow, flowop, 0);
19185184Sek110237 
19195184Sek110237 	if (threadflow->tf_fd[fd] < 0) {
19206212Saw148015 		filebench_log(LOG_ERROR, "flowop %s failed to open file %s",
19216212Saw148015 		    flowop->fo_name, file->fse_path);
19226084Saw148015 		return (FILEBENCH_ERROR);
19235184Sek110237 	}
19245184Sek110237 
19255184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT,
19265184Sek110237 	    "flowop %s: opened %s fd[%d] = %d",
19275184Sek110237 	    flowop->fo_name, file->fse_path, fd, threadflow->tf_fd[fd]);
19285184Sek110237 
19296084Saw148015 	return (FILEBENCH_OK);
19305184Sek110237 }
19315184Sek110237 
19325184Sek110237 /*
19335184Sek110237  * Emulate create of a file. Uses the flowop's fdnumber to select
19345184Sek110237  * tf_fd and tf_fse array locations to put the created file's file
1935*8404SAndrew.W.Wilson@sun.com  * descriptor and filesetentry respectively. Uses flowoplib_pickfile()
19365184Sek110237  * to select a specific filesetentry whose file does not currently
19375184Sek110237  * exist for the file create operation. Then calls
19385184Sek110237  * fileset_openfile() with the O_CREATE flag set to create the
19396084Saw148015  * file. Returns FILEBENCH_ERROR if the array index specified by fdnumber is
19405184Sek110237  * already in use, the flowop has no associated fileset, or
19415184Sek110237  * the create call fails. Returns 1 if a filesetentry with a
19426084Saw148015  * nonexistent file cannot be found. Returns FILEBENCH_OK on success.
19435184Sek110237  */
19445184Sek110237 static int
19455184Sek110237 flowoplib_createfile(threadflow_t *threadflow, flowop_t *flowop)
19465184Sek110237 {
19475184Sek110237 	filesetentry_t *file;
19485184Sek110237 	int fd = flowop->fo_fdnumber;
1949*8404SAndrew.W.Wilson@sun.com 	int err;
19505184Sek110237 
19515184Sek110237 	if (threadflow->tf_fd[fd] != 0) {
19525184Sek110237 		filebench_log(LOG_ERROR,
19535184Sek110237 		    "flowop %s attempted to create without closing on fd %d",
19545184Sek110237 		    flowop->fo_name, fd);
19556084Saw148015 		return (FILEBENCH_ERROR);
19565184Sek110237 	}
19575184Sek110237 
19585184Sek110237 	if (flowop->fo_fileset == NULL) {
19595184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
19606084Saw148015 		return (FILEBENCH_ERROR);
19615184Sek110237 	}
19625184Sek110237 
19635673Saw148015 #ifdef HAVE_RAW_SUPPORT
19645673Saw148015 	/* can't be used with raw devices */
19655673Saw148015 	if (flowop->fo_fileset->fs_attrs & FILESET_IS_RAW_DEV) {
19665673Saw148015 		filebench_log(LOG_ERROR,
19675673Saw148015 		    "flowop %s attempted to a createfile on RAW device",
19685673Saw148015 		    flowop->fo_name);
19696084Saw148015 		return (FILEBENCH_ERROR);
19705673Saw148015 	}
19715673Saw148015 #endif /* HAVE_RAW_SUPPORT */
19725673Saw148015 
1973*8404SAndrew.W.Wilson@sun.com 	if ((err = flowoplib_pickfile(&file, flowop,
1974*8404SAndrew.W.Wilson@sun.com 	    FILESET_PICKNOEXIST, 0)) != FILEBENCH_OK) {
19756084Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
19766084Saw148015 		    "flowop %s failed to pick file from fileset %s",
19776212Saw148015 		    flowop->fo_name,
19786212Saw148015 		    avd_get_str(flowop->fo_fileset->fs_name));
1979*8404SAndrew.W.Wilson@sun.com 		return (err);
19805184Sek110237 	}
19815184Sek110237 
19825184Sek110237 	threadflow->tf_fse[fd] = file;
19835184Sek110237 
19845184Sek110237 	flowop_beginop(threadflow, flowop);
19855184Sek110237 	threadflow->tf_fd[fd] = fileset_openfile(flowop->fo_fileset,
19865184Sek110237 	    file, O_RDWR | O_CREAT, 0666, flowoplib_fileattrs(flowop));
19875673Saw148015 	flowop_endop(threadflow, flowop, 0);
19885184Sek110237 
19895184Sek110237 	if (threadflow->tf_fd[fd] < 0) {
19905184Sek110237 		filebench_log(LOG_ERROR, "failed to create file %s",
19915184Sek110237 		    flowop->fo_name);
19926084Saw148015 		return (FILEBENCH_ERROR);
19935184Sek110237 	}
19945184Sek110237 
19955184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT,
19965184Sek110237 	    "flowop %s: created %s fd[%d] = %d",
19975184Sek110237 	    flowop->fo_name, file->fse_path, fd, threadflow->tf_fd[fd]);
19985184Sek110237 
19996084Saw148015 	return (FILEBENCH_OK);
20005184Sek110237 }
20015184Sek110237 
20025184Sek110237 /*
20036391Saw148015  * Emulates delete of a file. If a valid fd is provided, it uses the
20046391Saw148015  * filesetentry stored at that fd location to select the file to be
20056391Saw148015  * deleted, otherwise it picks an arbitrary filesetentry
20066391Saw148015  * whose file exists. It then uses unlink() to delete it and Clears
20076084Saw148015  * the FSE_EXISTS flag for the filesetentry. Returns FILEBENCH_ERROR if the
20086084Saw148015  * flowop has no associated fileset. Returns FILEBENCH_NORSC if an appropriate
20096084Saw148015  * filesetentry cannot be found, and FILEBENCH_OK on success.
20105184Sek110237  */
20115184Sek110237 static int
20125184Sek110237 flowoplib_deletefile(threadflow_t *threadflow, flowop_t *flowop)
20135184Sek110237 {
20145184Sek110237 	filesetentry_t *file;
20155184Sek110237 	fileset_t *fileset;
20165184Sek110237 	char path[MAXPATHLEN];
20175184Sek110237 	char *pathtmp;
20186391Saw148015 	int fd = flowop->fo_fdnumber;
20195184Sek110237 
20206391Saw148015 	/* if fd specified, use it to access file */
20216391Saw148015 	if ((fd > 0) && ((file = threadflow->tf_fse[fd]) != NULL)) {
20226391Saw148015 
20236391Saw148015 		/* indicate that the file will be deleted */
20246391Saw148015 		threadflow->tf_fse[fd] = NULL;
20256391Saw148015 
20266391Saw148015 		/* if here, we still have a valid file pointer */
20276391Saw148015 		fileset = file->fse_fileset;
20286391Saw148015 	} else {
2029*8404SAndrew.W.Wilson@sun.com 
20306391Saw148015 		/* Otherwise, pick arbitrary file */
20316391Saw148015 		file = NULL;
20326391Saw148015 		fileset = flowop->fo_fileset;
20336391Saw148015 	}
20346391Saw148015 
20356391Saw148015 
20366391Saw148015 	if (fileset == NULL) {
20375184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
20386084Saw148015 		return (FILEBENCH_ERROR);
20395184Sek110237 	}
20405184Sek110237 
20415673Saw148015 #ifdef HAVE_RAW_SUPPORT
20425673Saw148015 	/* can't be used with raw devices */
20436391Saw148015 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
20445673Saw148015 		filebench_log(LOG_ERROR,
20455673Saw148015 		    "flowop %s attempted a deletefile on RAW device",
20465673Saw148015 		    flowop->fo_name);
20476084Saw148015 		return (FILEBENCH_ERROR);
20485673Saw148015 	}
20495673Saw148015 #endif /* HAVE_RAW_SUPPORT */
20505673Saw148015 
20516391Saw148015 	if (file == NULL) {
2052*8404SAndrew.W.Wilson@sun.com 		int err;
2053*8404SAndrew.W.Wilson@sun.com 
20547556SAndrew.W.Wilson@sun.com 		/* pick arbitrary, existing (allocated) file */
2055*8404SAndrew.W.Wilson@sun.com 		if ((err = flowoplib_pickfile(&file, flowop,
2056*8404SAndrew.W.Wilson@sun.com 		    FILESET_PICKEXISTS, 0)) != FILEBENCH_OK) {
20576391Saw148015 			filebench_log(LOG_DEBUG_SCRIPT,
20586391Saw148015 			    "flowop %s failed to pick file", flowop->fo_name);
2059*8404SAndrew.W.Wilson@sun.com 			return (err);
20606391Saw148015 		}
20616391Saw148015 	} else {
20627556SAndrew.W.Wilson@sun.com 		/* delete specific file. wait for it to be non-busy */
20637556SAndrew.W.Wilson@sun.com 		(void) ipc_mutex_lock(&fileset->fs_pick_lock);
20647556SAndrew.W.Wilson@sun.com 		while (file->fse_flags & FSE_BUSY) {
20657556SAndrew.W.Wilson@sun.com 			file->fse_flags |= FSE_THRD_WAITNG;
20667556SAndrew.W.Wilson@sun.com 			(void) pthread_cond_wait(&fileset->fs_thrd_wait_cv,
20677556SAndrew.W.Wilson@sun.com 			    &fileset->fs_pick_lock);
20687556SAndrew.W.Wilson@sun.com 		}
20697556SAndrew.W.Wilson@sun.com 
20707556SAndrew.W.Wilson@sun.com 		/* File now available, grab it for deletion */
20717556SAndrew.W.Wilson@sun.com 		file->fse_flags |= FSE_BUSY;
20727556SAndrew.W.Wilson@sun.com 		fileset->fs_idle_files--;
20737556SAndrew.W.Wilson@sun.com 		(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
20745184Sek110237 	}
20755184Sek110237 
2076*8404SAndrew.W.Wilson@sun.com 	/* don't delete if anyone (other than me) has file open */
2077*8404SAndrew.W.Wilson@sun.com 	if ((fd > 0) && (threadflow->tf_fd[fd] > 0)) {
2078*8404SAndrew.W.Wilson@sun.com 		if (file->fse_open_cnt > 1) {
2079*8404SAndrew.W.Wilson@sun.com 			filebench_log(LOG_DEBUG_SCRIPT,
2080*8404SAndrew.W.Wilson@sun.com 			    "flowop %s can't delete file opened by other"
2081*8404SAndrew.W.Wilson@sun.com 			    " threads at fd = %d", flowop->fo_name, fd);
2082*8404SAndrew.W.Wilson@sun.com 			fileset_unbusy(file, FALSE, FALSE, 0);
2083*8404SAndrew.W.Wilson@sun.com 			return (FILEBENCH_OK);
2084*8404SAndrew.W.Wilson@sun.com 		} else {
2085*8404SAndrew.W.Wilson@sun.com 			filebench_log(LOG_DEBUG_SCRIPT,
2086*8404SAndrew.W.Wilson@sun.com 			    "flowop %s deleting still open file at fd = %d",
2087*8404SAndrew.W.Wilson@sun.com 			    flowop->fo_name, fd);
2088*8404SAndrew.W.Wilson@sun.com 		}
2089*8404SAndrew.W.Wilson@sun.com 	} else if (file->fse_open_cnt > 0) {
2090*8404SAndrew.W.Wilson@sun.com 		filebench_log(LOG_DEBUG_SCRIPT,
2091*8404SAndrew.W.Wilson@sun.com 		    "flowop %s can't delete file opened by other"
2092*8404SAndrew.W.Wilson@sun.com 		    " threads at fd = %d, open count = %d",
2093*8404SAndrew.W.Wilson@sun.com 		    flowop->fo_name, fd, file->fse_open_cnt);
2094*8404SAndrew.W.Wilson@sun.com 		fileset_unbusy(file, FALSE, FALSE, 0);
2095*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_OK);
2096*8404SAndrew.W.Wilson@sun.com 	}
2097*8404SAndrew.W.Wilson@sun.com 
20987946SAndrew.W.Wilson@sun.com 	(void) fb_strlcpy(path, avd_get_str(fileset->fs_path), MAXPATHLEN);
20997946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, "/", MAXPATHLEN);
21007946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, avd_get_str(fileset->fs_name), MAXPATHLEN);
21015184Sek110237 	pathtmp = fileset_resolvepath(file);
21027946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
21035184Sek110237 	free(pathtmp);
21045184Sek110237 
21057556SAndrew.W.Wilson@sun.com 	/* delete the selected file */
21065184Sek110237 	flowop_beginop(threadflow, flowop);
21075184Sek110237 	(void) unlink(path);
21085673Saw148015 	flowop_endop(threadflow, flowop, 0);
21097556SAndrew.W.Wilson@sun.com 
21107556SAndrew.W.Wilson@sun.com 	/* indicate that it is no longer busy and no longer exists */
2111*8404SAndrew.W.Wilson@sun.com 	fileset_unbusy(file, TRUE, FALSE, -file->fse_open_cnt);
21125184Sek110237 
21135184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "deleted file %s", file->fse_path);
21145184Sek110237 
21156084Saw148015 	return (FILEBENCH_OK);
21165184Sek110237 }
21175184Sek110237 
21185184Sek110237 /*
21195184Sek110237  * Emulates fsync of a file. Obtains the file descriptor index
21205184Sek110237  * from the flowop, obtains the actual file descriptor from
21215184Sek110237  * the threadflow's table, checks to be sure it is still an
21226084Saw148015  * open file, then does an fsync operation on it. Returns FILEBENCH_ERROR
21236084Saw148015  * if the file no longer is open, FILEBENCH_OK otherwise.
21245184Sek110237  */
21255184Sek110237 static int
21265184Sek110237 flowoplib_fsync(threadflow_t *threadflow, flowop_t *flowop)
21275184Sek110237 {
21285184Sek110237 	filesetentry_t *file;
21295184Sek110237 	int fd = flowop->fo_fdnumber;
21305184Sek110237 
21315184Sek110237 	if (threadflow->tf_fd[fd] == 0) {
21325184Sek110237 		filebench_log(LOG_ERROR,
21335184Sek110237 		    "flowop %s attempted to fsync a closed fd %d",
21345184Sek110237 		    flowop->fo_name, fd);
21356084Saw148015 		return (FILEBENCH_ERROR);
21365184Sek110237 	}
21375184Sek110237 
21385673Saw148015 	file = threadflow->tf_fse[fd];
21395673Saw148015 
21405673Saw148015 	if ((file == NULL) ||
21415673Saw148015 	    (file->fse_fileset->fs_attrs & FILESET_IS_RAW_DEV)) {
21425673Saw148015 		filebench_log(LOG_ERROR,
21435673Saw148015 		    "flowop %s attempted to a fsync a RAW device",
21445673Saw148015 		    flowop->fo_name);
21456084Saw148015 		return (FILEBENCH_ERROR);
21465673Saw148015 	}
21475673Saw148015 
21485184Sek110237 	/* Measure time to fsync */
21495184Sek110237 	flowop_beginop(threadflow, flowop);
21505184Sek110237 	(void) fsync(threadflow->tf_fd[fd]);
21515673Saw148015 	flowop_endop(threadflow, flowop, 0);
21525184Sek110237 
21535184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "fsync file %s", file->fse_path);
21545184Sek110237 
21556084Saw148015 	return (FILEBENCH_OK);
21565184Sek110237 }
21575184Sek110237 
21585184Sek110237 /*
21595184Sek110237  * Emulate fsync of an entire fileset. Search through the
21605184Sek110237  * threadflow's file descriptor array, doing fsync() on each
21615184Sek110237  * open file that belongs to the flowop's fileset. Always
21626084Saw148015  * returns FILEBENCH_OK.
21635184Sek110237  */
21645184Sek110237 static int
21655184Sek110237 flowoplib_fsyncset(threadflow_t *threadflow, flowop_t *flowop)
21665184Sek110237 {
21675184Sek110237 	int fd;
21685184Sek110237 
21695184Sek110237 	for (fd = 0; fd < THREADFLOW_MAXFD; fd++) {
21705184Sek110237 		filesetentry_t *file;
21715184Sek110237 
21725184Sek110237 		/* Match the file set to fsync */
21735184Sek110237 		if ((threadflow->tf_fse[fd] == NULL) ||
21745184Sek110237 		    (flowop->fo_fileset != threadflow->tf_fse[fd]->fse_fileset))
21755184Sek110237 			continue;
21765184Sek110237 
21775184Sek110237 		/* Measure time to fsync */
21785184Sek110237 		flowop_beginop(threadflow, flowop);
21795184Sek110237 		(void) fsync(threadflow->tf_fd[fd]);
21805673Saw148015 		flowop_endop(threadflow, flowop, 0);
21815184Sek110237 
21825184Sek110237 		file = threadflow->tf_fse[fd];
21835184Sek110237 
21845184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT, "fsync file %s",
21855184Sek110237 		    file->fse_path);
21865184Sek110237 	}
21875184Sek110237 
21886084Saw148015 	return (FILEBENCH_OK);
21895184Sek110237 }
21905184Sek110237 
21915184Sek110237 /*
21925184Sek110237  * Emulate close of a file.  Obtains the file descriptor index
21935184Sek110237  * from the flowop, obtains the actual file descriptor from the
21945184Sek110237  * threadflow's table, checks to be sure it is still an open
21955184Sek110237  * file, then does a close operation on it. Then sets the
21965184Sek110237  * threadflow file descriptor table entry to 0, and the file set
21976084Saw148015  * entry pointer to NULL. Returns FILEBENCH_ERROR if the file was not open,
21986084Saw148015  * FILEBENCH_OK otherwise.
21995184Sek110237  */
22005184Sek110237 static int
22015184Sek110237 flowoplib_closefile(threadflow_t *threadflow, flowop_t *flowop)
22025184Sek110237 {
22035184Sek110237 	filesetentry_t *file;
2204*8404SAndrew.W.Wilson@sun.com 	fileset_t *fileset;
22055184Sek110237 	int fd = flowop->fo_fdnumber;
22065184Sek110237 
22075184Sek110237 	if (threadflow->tf_fd[fd] == 0) {
22085184Sek110237 		filebench_log(LOG_ERROR,
22095184Sek110237 		    "flowop %s attempted to close an already closed fd %d",
22105184Sek110237 		    flowop->fo_name, fd);
22116084Saw148015 		return (FILEBENCH_ERROR);
22125184Sek110237 	}
22135184Sek110237 
2214*8404SAndrew.W.Wilson@sun.com 	file = threadflow->tf_fse[fd];
2215*8404SAndrew.W.Wilson@sun.com 	fileset = file->fse_fileset;
2216*8404SAndrew.W.Wilson@sun.com 
2217*8404SAndrew.W.Wilson@sun.com 	/* Wait for it to be non-busy */
2218*8404SAndrew.W.Wilson@sun.com 	(void) ipc_mutex_lock(&fileset->fs_pick_lock);
2219*8404SAndrew.W.Wilson@sun.com 	while (file->fse_flags & FSE_BUSY) {
2220*8404SAndrew.W.Wilson@sun.com 		file->fse_flags |= FSE_THRD_WAITNG;
2221*8404SAndrew.W.Wilson@sun.com 		(void) pthread_cond_wait(&fileset->fs_thrd_wait_cv,
2222*8404SAndrew.W.Wilson@sun.com 		    &fileset->fs_pick_lock);
2223*8404SAndrew.W.Wilson@sun.com 	}
2224*8404SAndrew.W.Wilson@sun.com 
2225*8404SAndrew.W.Wilson@sun.com 	/* File now available, grab it for closing */
2226*8404SAndrew.W.Wilson@sun.com 	file->fse_flags |= FSE_BUSY;
2227*8404SAndrew.W.Wilson@sun.com 
2228*8404SAndrew.W.Wilson@sun.com 	/* if last open, set declare idle */
2229*8404SAndrew.W.Wilson@sun.com 	if (file->fse_open_cnt == 1)
2230*8404SAndrew.W.Wilson@sun.com 		fileset->fs_idle_files--;
2231*8404SAndrew.W.Wilson@sun.com 
2232*8404SAndrew.W.Wilson@sun.com 	(void) ipc_mutex_unlock(&fileset->fs_pick_lock);
2233*8404SAndrew.W.Wilson@sun.com 
22345184Sek110237 	/* Measure time to close */
22355184Sek110237 	flowop_beginop(threadflow, flowop);
22365184Sek110237 	(void) close(threadflow->tf_fd[fd]);
22375673Saw148015 	flowop_endop(threadflow, flowop, 0);
22385184Sek110237 
2239*8404SAndrew.W.Wilson@sun.com 	fileset_unbusy(file, FALSE, FALSE, -1);
22405184Sek110237 
22415184Sek110237 	threadflow->tf_fd[fd] = 0;
22425184Sek110237 
22435184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "closed file %s", file->fse_path);
22445184Sek110237 
22456084Saw148015 	return (FILEBENCH_OK);
22465184Sek110237 }
22475184Sek110237 
22485184Sek110237 /*
22497946SAndrew.W.Wilson@sun.com  * Obtain the full pathname of the directory described by the filesetentry
22507946SAndrew.W.Wilson@sun.com  * indicated by "dir", and copy it into the character array pointed to by
22517946SAndrew.W.Wilson@sun.com  * path. Returns FILEBENCH_ERROR on errors, FILEBENCH_OK otherwise.
22527946SAndrew.W.Wilson@sun.com  */
22537946SAndrew.W.Wilson@sun.com static int
22547946SAndrew.W.Wilson@sun.com flowoplib_getdirpath(filesetentry_t *dir, char *path)
22557946SAndrew.W.Wilson@sun.com {
22567946SAndrew.W.Wilson@sun.com 	char		*fileset_path;
22577946SAndrew.W.Wilson@sun.com 	char		*fileset_name;
22587946SAndrew.W.Wilson@sun.com 	char		*part_path;
22597946SAndrew.W.Wilson@sun.com 
22607946SAndrew.W.Wilson@sun.com 	if ((fileset_path = avd_get_str(dir->fse_fileset->fs_path)) == NULL) {
22617946SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR, "Fileset path not set");
22627946SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
22637946SAndrew.W.Wilson@sun.com 	}
22647946SAndrew.W.Wilson@sun.com 
22657946SAndrew.W.Wilson@sun.com 	if ((fileset_name = avd_get_str(dir->fse_fileset->fs_name)) == NULL) {
22667946SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR, "Fileset name not set");
22677946SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
22687946SAndrew.W.Wilson@sun.com 	}
22697946SAndrew.W.Wilson@sun.com 
22707946SAndrew.W.Wilson@sun.com 	(void) fb_strlcpy(path, fileset_path, MAXPATHLEN);
22717946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, "/", MAXPATHLEN);
22727946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, fileset_name, MAXPATHLEN);
22737946SAndrew.W.Wilson@sun.com 
22747946SAndrew.W.Wilson@sun.com 	if ((part_path = fileset_resolvepath(dir)) == NULL)
22757946SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
22767946SAndrew.W.Wilson@sun.com 
22777946SAndrew.W.Wilson@sun.com 	(void) fb_strlcat(path, part_path, MAXPATHLEN);
22787946SAndrew.W.Wilson@sun.com 	free(part_path);
22797946SAndrew.W.Wilson@sun.com 
22807946SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
22817946SAndrew.W.Wilson@sun.com }
22827946SAndrew.W.Wilson@sun.com 
22837946SAndrew.W.Wilson@sun.com /*
22847946SAndrew.W.Wilson@sun.com  * Use mkdir to create a directory.  Obtains the fileset name from the
22857946SAndrew.W.Wilson@sun.com  * flowop, selects a non-existent leaf directory and obtains its full
22867946SAndrew.W.Wilson@sun.com  * path, then uses mkdir to create it on the storage subsystem (make it
22877946SAndrew.W.Wilson@sun.com  * existent). Returns FILEBENCH_NORSC is there are no more non-existent
22887946SAndrew.W.Wilson@sun.com  * directories in the fileset, FILEBENCH_ERROR on other errors, and
22897946SAndrew.W.Wilson@sun.com  * FILEBENCH_OK on success.
22907946SAndrew.W.Wilson@sun.com  */
22917946SAndrew.W.Wilson@sun.com static int
22927946SAndrew.W.Wilson@sun.com flowoplib_makedir(threadflow_t *threadflow, flowop_t *flowop)
22937946SAndrew.W.Wilson@sun.com {
22947946SAndrew.W.Wilson@sun.com 	filesetentry_t	*dir;
22957946SAndrew.W.Wilson@sun.com 	int		ret;
22967946SAndrew.W.Wilson@sun.com 	char		full_path[MAXPATHLEN];
22977946SAndrew.W.Wilson@sun.com 
22987946SAndrew.W.Wilson@sun.com 	if ((ret = flowoplib_pickleafdir(&dir, flowop,
22997946SAndrew.W.Wilson@sun.com 	    FILESET_PICKNOEXIST)) != FILEBENCH_OK)
23007946SAndrew.W.Wilson@sun.com 		return (ret);
23017946SAndrew.W.Wilson@sun.com 
23027946SAndrew.W.Wilson@sun.com 	if ((ret = flowoplib_getdirpath(dir, full_path)) != FILEBENCH_OK)
23037946SAndrew.W.Wilson@sun.com 		return (ret);
23047946SAndrew.W.Wilson@sun.com 
23057946SAndrew.W.Wilson@sun.com 	flowop_beginop(threadflow, flowop);
23067946SAndrew.W.Wilson@sun.com 	(void) mkdir(full_path, 0755);
23077946SAndrew.W.Wilson@sun.com 	flowop_endop(threadflow, flowop, 0);
23087946SAndrew.W.Wilson@sun.com 
23097946SAndrew.W.Wilson@sun.com 	/* indicate that it is no longer busy and now exists */
2310*8404SAndrew.W.Wilson@sun.com 	fileset_unbusy(dir, TRUE, TRUE, 0);
23117946SAndrew.W.Wilson@sun.com 
23127946SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
23137946SAndrew.W.Wilson@sun.com }
23147946SAndrew.W.Wilson@sun.com 
23157946SAndrew.W.Wilson@sun.com /*
23167946SAndrew.W.Wilson@sun.com  * Use rmdir to delete a directory.  Obtains the fileset name from the
23177946SAndrew.W.Wilson@sun.com  * flowop, selects an existent leaf directory and obtains its full path,
23187946SAndrew.W.Wilson@sun.com  * then uses rmdir to remove it from the storage subsystem (make it
23197946SAndrew.W.Wilson@sun.com  * non-existent). Returns FILEBENCH_NORSC is there are no more existent
23207946SAndrew.W.Wilson@sun.com  * directories in the fileset, FILEBENCH_ERROR on other errors, and
23217946SAndrew.W.Wilson@sun.com  * FILEBENCH_OK on success.
23227946SAndrew.W.Wilson@sun.com  */
23237946SAndrew.W.Wilson@sun.com static int
23247946SAndrew.W.Wilson@sun.com flowoplib_removedir(threadflow_t *threadflow, flowop_t *flowop)
23257946SAndrew.W.Wilson@sun.com {
23267946SAndrew.W.Wilson@sun.com 	filesetentry_t *dir;
23277946SAndrew.W.Wilson@sun.com 	int		ret;
23287946SAndrew.W.Wilson@sun.com 	char		full_path[MAXPATHLEN];
23297946SAndrew.W.Wilson@sun.com 
23307946SAndrew.W.Wilson@sun.com 	if ((ret = flowoplib_pickleafdir(&dir, flowop,
23317946SAndrew.W.Wilson@sun.com 	    FILESET_PICKEXISTS)) != FILEBENCH_OK)
23327946SAndrew.W.Wilson@sun.com 		return (ret);
23337946SAndrew.W.Wilson@sun.com 
23347946SAndrew.W.Wilson@sun.com 	if ((ret = flowoplib_getdirpath(dir, full_path)) != FILEBENCH_OK)
23357946SAndrew.W.Wilson@sun.com 		return (ret);
23367946SAndrew.W.Wilson@sun.com 
23377946SAndrew.W.Wilson@sun.com 	flowop_beginop(threadflow, flowop);
23387946SAndrew.W.Wilson@sun.com 	(void) rmdir(full_path);
23397946SAndrew.W.Wilson@sun.com 	flowop_endop(threadflow, flowop, 0);
23407946SAndrew.W.Wilson@sun.com 
23417946SAndrew.W.Wilson@sun.com 	/* indicate that it is no longer busy and no longer exists */
2342*8404SAndrew.W.Wilson@sun.com 	fileset_unbusy(dir, TRUE, FALSE, 0);
23437946SAndrew.W.Wilson@sun.com 
23447946SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
23457946SAndrew.W.Wilson@sun.com }
23467946SAndrew.W.Wilson@sun.com 
23477946SAndrew.W.Wilson@sun.com /*
23487946SAndrew.W.Wilson@sun.com  * Use opendir(), multiple readdir() calls, and closedir() to list the
23497946SAndrew.W.Wilson@sun.com  * contents of a directory.  Obtains the fileset name from the
23507946SAndrew.W.Wilson@sun.com  * flowop, selects a normal subdirectory (which always exist) and obtains
23517946SAndrew.W.Wilson@sun.com  * its full path, then uses opendir() to get a DIR handle to it from the
23527946SAndrew.W.Wilson@sun.com  * file system, a readdir() loop to access each directory entry, and
23537946SAndrew.W.Wilson@sun.com  * finally cleans up with a closedir(). The latency reported is the total
23547946SAndrew.W.Wilson@sun.com  * for all this activity, and it also reports the total number of bytes
23557946SAndrew.W.Wilson@sun.com  * in the entries as the amount "read". Returns FILEBENCH_ERROR on errors,
23567946SAndrew.W.Wilson@sun.com  * and FILEBENCH_OK on success.
23577946SAndrew.W.Wilson@sun.com  */
23587946SAndrew.W.Wilson@sun.com static int
23597946SAndrew.W.Wilson@sun.com flowoplib_listdir(threadflow_t *threadflow, flowop_t *flowop)
23607946SAndrew.W.Wilson@sun.com {
23617946SAndrew.W.Wilson@sun.com 	fileset_t	*fileset;
23627946SAndrew.W.Wilson@sun.com 	filesetentry_t	*dir;
23637946SAndrew.W.Wilson@sun.com 	DIR		*dir_handlep;
23647946SAndrew.W.Wilson@sun.com 	struct dirent	*direntp;
23657946SAndrew.W.Wilson@sun.com 	int		dir_bytes = 0;
23667946SAndrew.W.Wilson@sun.com 	int		ret;
23677946SAndrew.W.Wilson@sun.com 	char		full_path[MAXPATHLEN];
23687946SAndrew.W.Wilson@sun.com 
23697946SAndrew.W.Wilson@sun.com 	if ((fileset = flowop->fo_fileset) == NULL) {
23707946SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR, "flowop NO fileset");
23717946SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
23727946SAndrew.W.Wilson@sun.com 	}
23737946SAndrew.W.Wilson@sun.com 
2374*8404SAndrew.W.Wilson@sun.com 	if ((dir = fileset_pick(fileset, FILESET_PICKDIR, 0, 0)) == NULL) {
23757946SAndrew.W.Wilson@sun.com 		filebench_log(LOG_DEBUG_SCRIPT,
23767946SAndrew.W.Wilson@sun.com 		    "flowop %s failed to pick directory from fileset %s",
23777946SAndrew.W.Wilson@sun.com 		    flowop->fo_name,
23787946SAndrew.W.Wilson@sun.com 		    avd_get_str(fileset->fs_name));
2379*8404SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
23807946SAndrew.W.Wilson@sun.com 	}
23817946SAndrew.W.Wilson@sun.com 
23827946SAndrew.W.Wilson@sun.com 	if ((ret = flowoplib_getdirpath(dir, full_path)) != FILEBENCH_OK)
23837946SAndrew.W.Wilson@sun.com 		return (ret);
23847946SAndrew.W.Wilson@sun.com 
23857946SAndrew.W.Wilson@sun.com 	flowop_beginop(threadflow, flowop);
23867946SAndrew.W.Wilson@sun.com 
23877946SAndrew.W.Wilson@sun.com 	/* open the directory */
23887946SAndrew.W.Wilson@sun.com 	if ((dir_handlep = opendir(full_path)) == NULL) {
23897946SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR,
23907946SAndrew.W.Wilson@sun.com 		    "flowop %s failed to open directory in fileset %s\n",
23917946SAndrew.W.Wilson@sun.com 		    flowop->fo_name, avd_get_str(fileset->fs_name));
23927946SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
23937946SAndrew.W.Wilson@sun.com 	}
23947946SAndrew.W.Wilson@sun.com 
23957946SAndrew.W.Wilson@sun.com 	/* read through the directory entries */
23967946SAndrew.W.Wilson@sun.com 	while ((direntp = readdir(dir_handlep)) != NULL) {
23977946SAndrew.W.Wilson@sun.com 		dir_bytes += (strlen(direntp->d_name) +
23987946SAndrew.W.Wilson@sun.com 		    sizeof (struct dirent) - 1);
23997946SAndrew.W.Wilson@sun.com 	}
24007946SAndrew.W.Wilson@sun.com 
24017946SAndrew.W.Wilson@sun.com 	/* close the directory */
24027946SAndrew.W.Wilson@sun.com 	(void) closedir(dir_handlep);
24037946SAndrew.W.Wilson@sun.com 
24047946SAndrew.W.Wilson@sun.com 	flowop_endop(threadflow, flowop, dir_bytes);
24057946SAndrew.W.Wilson@sun.com 
24067946SAndrew.W.Wilson@sun.com 	/* indicate that it is no longer busy */
2407*8404SAndrew.W.Wilson@sun.com 	fileset_unbusy(dir, FALSE, FALSE, 0);
24087946SAndrew.W.Wilson@sun.com 
24097946SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
24107946SAndrew.W.Wilson@sun.com }
24117946SAndrew.W.Wilson@sun.com 
24127946SAndrew.W.Wilson@sun.com /*
24135184Sek110237  * Emulate stat of a file. Picks an arbitrary filesetentry with
24145184Sek110237  * an existing file from the flowop's fileset, then performs a
24156084Saw148015  * stat() operation on it. Returns FILEBENCH_ERROR if the flowop has no
24166084Saw148015  * associated fileset. Returns FILEBENCH_NORSC if an appropriate filesetentry
24176084Saw148015  * cannot be found, and FILEBENCH_OK on success.
24185184Sek110237  */
24195184Sek110237 static int
24205184Sek110237 flowoplib_statfile(threadflow_t *threadflow, flowop_t *flowop)
24215184Sek110237 {
24225184Sek110237 	filesetentry_t *file;
24235184Sek110237 	fileset_t *fileset;
24247556SAndrew.W.Wilson@sun.com 	struct stat statbuf;
24257556SAndrew.W.Wilson@sun.com 	int fd = flowop->fo_fdnumber;
24267556SAndrew.W.Wilson@sun.com 
24277556SAndrew.W.Wilson@sun.com 	/* if fd specified and the file is open, use it to access file */
24287556SAndrew.W.Wilson@sun.com 	if ((fd > 0) && ((threadflow->tf_fd[fd]) > 0)) {
24297556SAndrew.W.Wilson@sun.com 
24307556SAndrew.W.Wilson@sun.com 		/* check whether file handle still valid */
24317556SAndrew.W.Wilson@sun.com 		if ((file = threadflow->tf_fse[fd]) == NULL) {
24327556SAndrew.W.Wilson@sun.com 			filebench_log(LOG_DEBUG_SCRIPT,
24337556SAndrew.W.Wilson@sun.com 			    "flowop %s trying to stat NULL file at fd = %d",
24347556SAndrew.W.Wilson@sun.com 			    flowop->fo_name, fd);
24357556SAndrew.W.Wilson@sun.com 			return (FILEBENCH_ERROR);
24367556SAndrew.W.Wilson@sun.com 		}
24377556SAndrew.W.Wilson@sun.com 
24387556SAndrew.W.Wilson@sun.com 		/* if here, we still have a valid file pointer */
24397556SAndrew.W.Wilson@sun.com 		fileset = file->fse_fileset;
24407556SAndrew.W.Wilson@sun.com 	} else {
24417556SAndrew.W.Wilson@sun.com 		/* Otherwise, pick arbitrary file */
24427556SAndrew.W.Wilson@sun.com 		file = NULL;
24437556SAndrew.W.Wilson@sun.com 		fileset = flowop->fo_fileset;
24447556SAndrew.W.Wilson@sun.com 	}
24457556SAndrew.W.Wilson@sun.com 
24467556SAndrew.W.Wilson@sun.com 	if (fileset == NULL) {
24477556SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR,
24487556SAndrew.W.Wilson@sun.com 		    "statfile with no fileset specified");
24497556SAndrew.W.Wilson@sun.com 		return (FILEBENCH_ERROR);
24507556SAndrew.W.Wilson@sun.com 	}
24517556SAndrew.W.Wilson@sun.com 
24527556SAndrew.W.Wilson@sun.com #ifdef HAVE_RAW_SUPPORT
24537556SAndrew.W.Wilson@sun.com 	/* can't be used with raw devices */
24547556SAndrew.W.Wilson@sun.com 	if (fileset->fs_attrs & FILESET_IS_RAW_DEV) {
24557556SAndrew.W.Wilson@sun.com 		filebench_log(LOG_ERROR,
24567556SAndrew.W.Wilson@sun.com 		    "flowop %s attempted do a statfile on a RAW device",
24577556SAndrew.W.Wilson@sun.com 		    flowop->fo_name);
24586084Saw148015 		return (FILEBENCH_ERROR);
24595184Sek110237 	}
24607556SAndrew.W.Wilson@sun.com #endif /* HAVE_RAW_SUPPORT */
24617556SAndrew.W.Wilson@sun.com 
24627556SAndrew.W.Wilson@sun.com 	if (file == NULL) {
24637556SAndrew.W.Wilson@sun.com 		char path[MAXPATHLEN];
24647556SAndrew.W.Wilson@sun.com 		char *pathtmp;
2465*8404SAndrew.W.Wilson@sun.com 		int err;
24667556SAndrew.W.Wilson@sun.com 
24677556SAndrew.W.Wilson@sun.com 		/* pick arbitrary, existing (allocated) file */
2468*8404SAndrew.W.Wilson@sun.com 		if ((err = flowoplib_pickfile(&file, flowop,
2469*8404SAndrew.W.Wilson@sun.com 		    FILESET_PICKEXISTS, 0)) != FILEBENCH_OK) {
24707556SAndrew.W.Wilson@sun.com 			filebench_log(LOG_DEBUG_SCRIPT,
24717556SAndrew.W.Wilson@sun.com 			    "Statfile flowop %s failed to pick file",
24727556SAndrew.W.Wilson@sun.com 			    flowop->fo_name);
2473*8404SAndrew.W.Wilson@sun.com 			return (err);
24747556SAndrew.W.Wilson@sun.com 		}
24757556SAndrew.W.Wilson@sun.com 
24767556SAndrew.W.Wilson@sun.com 		/* resolve path and do a stat on file */
24777946SAndrew.W.Wilson@sun.com 		(void) fb_strlcpy(path, avd_get_str(fileset->fs_path),
24787946SAndrew.W.Wilson@sun.com 		    MAXPATHLEN);
24797946SAndrew.W.Wilson@sun.com 		(void) fb_strlcat(path, "/", MAXPATHLEN);
24807946SAndrew.W.Wilson@sun.com 		(void) fb_strlcat(path, avd_get_str(fileset->fs_name),
24817946SAndrew.W.Wilson@sun.com 		    MAXPATHLEN);
24827556SAndrew.W.Wilson@sun.com 		pathtmp = fileset_resolvepath(file);
24837946SAndrew.W.Wilson@sun.com 		(void) fb_strlcat(path, pathtmp, MAXPATHLEN);
24847556SAndrew.W.Wilson@sun.com 		free(pathtmp);
24857556SAndrew.W.Wilson@sun.com 
24867556SAndrew.W.Wilson@sun.com 		/* stat the file */
24877556SAndrew.W.Wilson@sun.com 		flowop_beginop(threadflow, flowop);
24887556SAndrew.W.Wilson@sun.com 		if (stat(path, &statbuf) == -1)
24897556SAndrew.W.Wilson@sun.com 			filebench_log(LOG_ERROR,
24907556SAndrew.W.Wilson@sun.com 			    "statfile flowop %s failed", flowop->fo_name);
24917556SAndrew.W.Wilson@sun.com 		flowop_endop(threadflow, flowop, 0);
24927556SAndrew.W.Wilson@sun.com 
2493*8404SAndrew.W.Wilson@sun.com 		fileset_unbusy(file, FALSE, FALSE, 0);
24947556SAndrew.W.Wilson@sun.com 	} else {
24957556SAndrew.W.Wilson@sun.com 		/* stat specific file */
24967556SAndrew.W.Wilson@sun.com 		flowop_beginop(threadflow, flowop);
24977556SAndrew.W.Wilson@sun.com 		if (fstat(threadflow->tf_fd[fd], &statbuf) == -1)
24987556SAndrew.W.Wilson@sun.com 			filebench_log(LOG_ERROR,
24997556SAndrew.W.Wilson@sun.com 			    "statfile flowop %s failed", flowop->fo_name);
25007556SAndrew.W.Wilson@sun.com 		flowop_endop(threadflow, flowop, 0);
25017556SAndrew.W.Wilson@sun.com 
25025184Sek110237 	}
25035184Sek110237 
25046084Saw148015 	return (FILEBENCH_OK);
25055184Sek110237 }
25065184Sek110237 
25075184Sek110237 
25085184Sek110237 /*
25095184Sek110237  * Additional reads and writes. Read and write whole files, write
25105184Sek110237  * and append to files. Some of these work with both fileobjs and
25115184Sek110237  * filesets, others only with filesets. The flowoplib_write routine
25125184Sek110237  * writes from thread memory, while the others read or write using
25135184Sek110237  * fo_buf memory. Note that both flowoplib_read() and
25145184Sek110237  * flowoplib_aiowrite() use thread memory as well.
25155184Sek110237  */
25165184Sek110237 
25175184Sek110237 
25185184Sek110237 /*
25195673Saw148015  * Emulate a read of a whole file. The file must be open with
25205673Saw148015  * file descriptor and filesetentry stored at the locations indexed
25215673Saw148015  * by the flowop's fdnumber. It then seeks to the beginning of the
25225673Saw148015  * associated file, and reads fs_iosize bytes at a time until the end
25236084Saw148015  * of the file. Returns FILEBENCH_ERROR on error, FILEBENCH_NORSC if
25246084Saw148015  * out of files, and FILEBENCH_OK on success.
25255184Sek110237  */
25265184Sek110237 static int
25275184Sek110237 flowoplib_readwholefile(threadflow_t *threadflow, flowop_t *flowop)
25285184Sek110237 {
25295673Saw148015 	caddr_t iobuf;
25305184Sek110237 	off64_t bytes = 0;
25315673Saw148015 	int filedesc;
25326212Saw148015 	uint64_t wss;
25336212Saw148015 	fbint_t iosize;
25345184Sek110237 	int ret;
25356212Saw148015 	char zerordbuf;
25365184Sek110237 
25375673Saw148015 	/* get the file to use */
25386084Saw148015 	if ((ret = flowoplib_filesetup(threadflow, flowop, &wss,
25396084Saw148015 	    &filedesc)) != FILEBENCH_OK)
25406084Saw148015 		return (ret);
25415184Sek110237 
25425673Saw148015 	/* an I/O size of zero means read entire working set with one I/O */
25436212Saw148015 	if ((iosize = avd_get_int(flowop->fo_iosize)) == 0)
25445673Saw148015 		iosize = wss;
25455184Sek110237 
25466212Saw148015 	/*
25476212Saw148015 	 * The file may actually be 0 bytes long, in which case skip
25486212Saw148015 	 * the buffer set up call (which would fail) and substitute
25496212Saw148015 	 * a small buffer, which won't really be used.
25506212Saw148015 	 */
25516212Saw148015 	if (iosize == 0) {
25526212Saw148015 		iobuf = (caddr_t)&zerordbuf;
25536212Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
25546212Saw148015 		    "flowop %s read zero length file", flowop->fo_name);
25556212Saw148015 	} else {
25566212Saw148015 		if (flowoplib_iobufsetup(threadflow, flowop, &iobuf,
25576212Saw148015 		    iosize) != 0)
25586212Saw148015 			return (FILEBENCH_ERROR);
25596212Saw148015 	}
25605184Sek110237 
25615184Sek110237 	/* Measure time to read bytes */
25625184Sek110237 	flowop_beginop(threadflow, flowop);
25635673Saw148015 	(void) lseek64(filedesc, 0, SEEK_SET);
25645673Saw148015 	while ((ret = read(filedesc, iobuf, iosize)) > 0)
25655184Sek110237 		bytes += ret;
25665184Sek110237 
25675673Saw148015 	flowop_endop(threadflow, flowop, bytes);
25685184Sek110237 
25695184Sek110237 	if (ret < 0) {
25705184Sek110237 		filebench_log(LOG_ERROR,
25716391Saw148015 		    "readwhole fail Failed to read whole file: %s",
25726391Saw148015 		    strerror(errno));
25736084Saw148015 		return (FILEBENCH_ERROR);
25745184Sek110237 	}
25755184Sek110237 
25766084Saw148015 	return (FILEBENCH_OK);
25775184Sek110237 }
25785184Sek110237 
25795184Sek110237 /*
25805184Sek110237  * Emulate a write to a file of size fo_iosize.  Will write
25815184Sek110237  * to a file from a fileset if the flowop's fo_fileset field
25825184Sek110237  * specifies one or its fdnumber is non zero. Otherwise it
25835184Sek110237  * will write to a fileobj file, if one exists. If the file
25845184Sek110237  * is not currently open, the routine will attempt to open
25855184Sek110237  * it. The flowop's fo_wss parameter will be used to set the
25865184Sek110237  * maximum file size if it is non-zero, otherwise the
25875184Sek110237  * filesetentry's  fse_size will be used. A random memory
25885184Sek110237  * buffer offset is calculated, and, if fo_random is TRUE,
25895184Sek110237  * a random file offset is used for the write. Otherwise the
25906084Saw148015  * write is to the next sequential location. Returns
25916084Saw148015  * FILEBENCH_ERROR on errors, FILEBENCH_NORSC if iosetup can't
25926084Saw148015  * obtain a file, or FILEBENCH_OK on success.
25935184Sek110237  */
25945184Sek110237 static int
25955184Sek110237 flowoplib_write(threadflow_t *threadflow, flowop_t *flowop)
25965184Sek110237 {
25975673Saw148015 	caddr_t iobuf;
25986212Saw148015 	fbint_t wss;
25996212Saw148015 	fbint_t iosize;
26005184Sek110237 	int filedesc;
26016084Saw148015 	int ret;
26025184Sek110237 
26036212Saw148015 	iosize = avd_get_int(flowop->fo_iosize);
26046084Saw148015 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
26056212Saw148015 	    &filedesc, iosize)) != FILEBENCH_OK)
26066084Saw148015 		return (ret);
26075184Sek110237 
26086212Saw148015 	if (avd_get_bool(flowop->fo_random)) {
26095184Sek110237 		uint64_t fileoffset;
26105184Sek110237 
26115184Sek110237 		if (filebench_randomno64(&fileoffset,
26126212Saw148015 		    wss, iosize, NULL) == -1) {
26135184Sek110237 			filebench_log(LOG_ERROR,
26145184Sek110237 			    "file size smaller than IO size for thread %s",
26155184Sek110237 			    flowop->fo_name);
26166084Saw148015 			return (FILEBENCH_ERROR);
26175184Sek110237 		}
26185184Sek110237 		flowop_beginop(threadflow, flowop);
26195673Saw148015 		if (pwrite64(filedesc, iobuf,
26206212Saw148015 		    iosize, (off64_t)fileoffset) == -1) {
26215184Sek110237 			filebench_log(LOG_ERROR, "write failed, "
26226286Saw148015 			    "offset %llu io buffer %zd: %s",
26236286Saw148015 			    (u_longlong_t)fileoffset, iobuf, strerror(errno));
26245673Saw148015 			flowop_endop(threadflow, flowop, 0);
26256084Saw148015 			return (FILEBENCH_ERROR);
26265184Sek110237 		}
26276212Saw148015 		flowop_endop(threadflow, flowop, iosize);
26285184Sek110237 	} else {
26295184Sek110237 		flowop_beginop(threadflow, flowop);
26306391Saw148015 		if (write(filedesc, iobuf, iosize) == -1) {
26315184Sek110237 			filebench_log(LOG_ERROR,
26325673Saw148015 			    "write failed, io buffer %zd: %s",
26335673Saw148015 			    iobuf, strerror(errno));
26345673Saw148015 			flowop_endop(threadflow, flowop, 0);
26356084Saw148015 			return (FILEBENCH_ERROR);
26365184Sek110237 		}
26376212Saw148015 		flowop_endop(threadflow, flowop, iosize);
26385184Sek110237 	}
26395184Sek110237 
26406084Saw148015 	return (FILEBENCH_OK);
26415184Sek110237 }
26425184Sek110237 
26435184Sek110237 /*
26445184Sek110237  * Emulate a write of a whole file.  The size of the file
26455673Saw148015  * is taken from a filesetentry identified by fo_srcfdnumber or
26465673Saw148015  * from the working set size, while the file descriptor used is
26475673Saw148015  * identified by fo_fdnumber. Does multiple writes of fo_iosize
26486084Saw148015  * length length until full file has been written. Returns FILEBENCH_ERROR on
26496084Saw148015  * error, FILEBENCH_NORSC if out of files, FILEBENCH_OK on success.
26505184Sek110237  */
26515184Sek110237 static int
26525184Sek110237 flowoplib_writewholefile(threadflow_t *threadflow, flowop_t *flowop)
26535184Sek110237 {
26545673Saw148015 	caddr_t iobuf;
26555184Sek110237 	filesetentry_t *file;
26565184Sek110237 	int wsize;
26575184Sek110237 	off64_t seek;
26585184Sek110237 	off64_t bytes = 0;
26595673Saw148015 	uint64_t wss;
26606212Saw148015 	fbint_t iosize;
26615673Saw148015 	int filedesc;
26625184Sek110237 	int srcfd = flowop->fo_srcfdnumber;
26635184Sek110237 	int ret;
26646212Saw148015 	char zerowrtbuf;
26655184Sek110237 
26665673Saw148015 	/* get the file to use */
26676084Saw148015 	if ((ret = flowoplib_filesetup(threadflow, flowop, &wss,
26686084Saw148015 	    &filedesc)) != FILEBENCH_OK)
26696084Saw148015 		return (ret);
26705184Sek110237 
26716212Saw148015 	/* an I/O size of zero means write entire working set with one I/O */
26726212Saw148015 	if ((iosize = avd_get_int(flowop->fo_iosize)) == 0)
26735673Saw148015 		iosize = wss;
26745184Sek110237 
26756212Saw148015 	/*
26766212Saw148015 	 * The file may actually be 0 bytes long, in which case skip
26776212Saw148015 	 * the buffer set up call (which would fail) and substitute
26786212Saw148015 	 * a small buffer, which won't really be used.
26796212Saw148015 	 */
26806212Saw148015 	if (iosize == 0) {
26816212Saw148015 		iobuf = (caddr_t)&zerowrtbuf;
26826212Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
26836212Saw148015 		    "flowop %s wrote zero length file", flowop->fo_name);
26846212Saw148015 	} else {
26856212Saw148015 		if (flowoplib_iobufsetup(threadflow, flowop, &iobuf,
26866212Saw148015 		    iosize) != 0)
26876212Saw148015 			return (FILEBENCH_ERROR);
26886212Saw148015 	}
26895184Sek110237 
26905184Sek110237 	file = threadflow->tf_fse[srcfd];
26915673Saw148015 	if ((srcfd != 0) && (file == NULL)) {
26925673Saw148015 		filebench_log(LOG_ERROR, "flowop %s: NULL src file",
26935184Sek110237 		    flowop->fo_name);
26946084Saw148015 		return (FILEBENCH_ERROR);
26955184Sek110237 	}
26965184Sek110237 
26975673Saw148015 	if (file)
26985673Saw148015 		wss = file->fse_size;
26995673Saw148015 
27005673Saw148015 	wsize = (int)MIN(wss, iosize);
27015184Sek110237 
27025184Sek110237 	/* Measure time to write bytes */
27035184Sek110237 	flowop_beginop(threadflow, flowop);
27045673Saw148015 	for (seek = 0; seek < wss; seek += wsize) {
27055673Saw148015 		ret = write(filedesc, iobuf, wsize);
27065184Sek110237 		if (ret != wsize) {
27075184Sek110237 			filebench_log(LOG_ERROR,
27085184Sek110237 			    "Failed to write %d bytes on fd %d: %s",
27095673Saw148015 			    wsize, filedesc, strerror(errno));
27105673Saw148015 			flowop_endop(threadflow, flowop, 0);
27116084Saw148015 			return (FILEBENCH_ERROR);
27125184Sek110237 		}
27135673Saw148015 		wsize = (int)MIN(wss - seek, iosize);
27145184Sek110237 		bytes += ret;
27155184Sek110237 	}
27165673Saw148015 	flowop_endop(threadflow, flowop, bytes);
27175184Sek110237 
27186084Saw148015 	return (FILEBENCH_OK);
27195184Sek110237 }
27205184Sek110237 
27215184Sek110237 
27225184Sek110237 /*
27235184Sek110237  * Emulate a fixed size append to a file. Will append data to
27245184Sek110237  * a file chosen from a fileset if the flowop's fo_fileset
27255184Sek110237  * field specifies one or if its fdnumber is non zero.
27265184Sek110237  * Otherwise it will write to a fileobj file, if one exists.
27275184Sek110237  * The flowop's fo_wss parameter will be used to set the
27285184Sek110237  * maximum file size if it is non-zero, otherwise the
27295184Sek110237  * filesetentry's fse_size will be used. A random memory
27305184Sek110237  * buffer offset is calculated, then a logical seek to the
27315184Sek110237  * end of file is done followed by a write of fo_iosize
27325184Sek110237  * bytes. Writes are actually done from fo_buf, rather than
27335184Sek110237  * tf_mem as is done with flowoplib_write(), and no check
27345184Sek110237  * is made to see if fo_iosize exceeds the size of fo_buf.
27356084Saw148015  * Returns FILEBENCH_ERROR on error, FILEBENCH_NORSC if out of
27366084Saw148015  * files in the fileset, FILEBENCH_OK on success.
27375184Sek110237  */
27385184Sek110237 static int
27395184Sek110237 flowoplib_appendfile(threadflow_t *threadflow, flowop_t *flowop)
27405184Sek110237 {
27415673Saw148015 	caddr_t iobuf;
27425673Saw148015 	int filedesc;
27436212Saw148015 	fbint_t wss;
27446212Saw148015 	fbint_t iosize;
27455184Sek110237 	int ret;
27465184Sek110237 
27476212Saw148015 	iosize = avd_get_int(flowop->fo_iosize);
27486084Saw148015 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
27496084Saw148015 	    &filedesc, iosize)) != FILEBENCH_OK)
27506084Saw148015 		return (ret);
27515184Sek110237 
27525184Sek110237 	/* XXX wss is not being used */
27535184Sek110237 
27545184Sek110237 	/* Measure time to write bytes */
27555184Sek110237 	flowop_beginop(threadflow, flowop);
27565184Sek110237 	(void) lseek64(filedesc, 0, SEEK_END);
27575673Saw148015 	ret = write(filedesc, iobuf, iosize);
27585673Saw148015 	if (ret != iosize) {
27595184Sek110237 		filebench_log(LOG_ERROR,
27606286Saw148015 		    "Failed to write %llu bytes on fd %d: %s",
27616286Saw148015 		    (u_longlong_t)iosize, filedesc, strerror(errno));
27626212Saw148015 		flowop_endop(threadflow, flowop, ret);
27636084Saw148015 		return (FILEBENCH_ERROR);
27645184Sek110237 	}
27656212Saw148015 	flowop_endop(threadflow, flowop, ret);
27665184Sek110237 
27676084Saw148015 	return (FILEBENCH_OK);
27685184Sek110237 }
27695184Sek110237 
27705184Sek110237 /*
27715184Sek110237  * Emulate a random size append to a file. Will append data
27725184Sek110237  * to a file chosen from a fileset if the flowop's fo_fileset
27735184Sek110237  * field specifies one or if its fdnumber is non zero. Otherwise
27745184Sek110237  * it will write to a fileobj file, if one exists. The flowop's
27755184Sek110237  * fo_wss parameter will be used to set the maximum file size
27765184Sek110237  * if it is non-zero, otherwise the filesetentry's fse_size
27775184Sek110237  * will be used.  A random transfer size (but at most fo_iosize
27785184Sek110237  * bytes) and a random memory offset are calculated. A logical
27795184Sek110237  * seek to the end of file is done, then writes of up to
27805184Sek110237  * FILE_ALLOC_BLOCK in size are done until the full transfer
27815184Sek110237  * size has been written. Writes are actually done from fo_buf,
27825184Sek110237  * rather than tf_mem as is done with flowoplib_write().
27836084Saw148015  * Returns FILEBENCH_ERROR on error, FILEBENCH_NORSC if out of
27846084Saw148015  * files in the fileset, FILEBENCH_OK on success.
27855184Sek110237  */
27865184Sek110237 static int
27875184Sek110237 flowoplib_appendfilerand(threadflow_t *threadflow, flowop_t *flowop)
27885184Sek110237 {
27895673Saw148015 	caddr_t iobuf;
27905184Sek110237 	uint64_t appendsize;
27915673Saw148015 	int filedesc;
27926212Saw148015 	fbint_t wss;
27936212Saw148015 	fbint_t iosize;
27946212Saw148015 	int ret = 0;
27955184Sek110237 
27966212Saw148015 	if ((iosize = avd_get_int(flowop->fo_iosize)) == 0) {
27976212Saw148015 		filebench_log(LOG_ERROR, "zero iosize for flowop %s",
27986212Saw148015 		    flowop->fo_name);
27996212Saw148015 		return (FILEBENCH_ERROR);
28006212Saw148015 	}
28016212Saw148015 
28026212Saw148015 	if (filebench_randomno64(&appendsize, iosize, 1LL, NULL) != 0)
28036084Saw148015 		return (FILEBENCH_ERROR);
28045184Sek110237 
28055673Saw148015 	/* skip if attempting zero length append */
28065673Saw148015 	if (appendsize == 0) {
28075673Saw148015 		flowop_beginop(threadflow, flowop);
28085673Saw148015 		flowop_endop(threadflow, flowop, 0LL);
28096084Saw148015 		return (FILEBENCH_OK);
28105673Saw148015 	}
28115184Sek110237 
28126084Saw148015 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
28136084Saw148015 	    &filedesc, appendsize)) != FILEBENCH_OK)
28146084Saw148015 		return (ret);
28155673Saw148015 
28165184Sek110237 	/* XXX wss is not being used */
28175184Sek110237 
28185673Saw148015 	/* Measure time to write bytes */
28195673Saw148015 	flowop_beginop(threadflow, flowop);
28205673Saw148015 
28215673Saw148015 	(void) lseek64(filedesc, 0, SEEK_END);
28225673Saw148015 	ret = write(filedesc, iobuf, appendsize);
28235673Saw148015 	if (ret != appendsize) {
28245673Saw148015 		filebench_log(LOG_ERROR,
28256286Saw148015 		    "Failed to write %llu bytes on fd %d: %s",
28266286Saw148015 		    (u_longlong_t)appendsize, filedesc, strerror(errno));
28275673Saw148015 		flowop_endop(threadflow, flowop, 0);
28286084Saw148015 		return (FILEBENCH_ERROR);
28295184Sek110237 	}
28305184Sek110237 
28315673Saw148015 	flowop_endop(threadflow, flowop, appendsize);
28325184Sek110237 
28336084Saw148015 	return (FILEBENCH_OK);
28345184Sek110237 }
28355184Sek110237 
28366212Saw148015 typedef struct testrandvar_priv {
28376212Saw148015 	uint64_t sample_count;
28386212Saw148015 	double val_sum;
28396212Saw148015 	double sqr_sum;
28406212Saw148015 } testrandvar_priv_t;
28416212Saw148015 
28426212Saw148015 /*
28436212Saw148015  * flowop to calculate various statistics from the number stream
28446212Saw148015  * produced by a random variable. This allows verification that the
28456212Saw148015  * random distribution used to define the random variable is producing
28466212Saw148015  * the expected distribution of random numbers.
28476212Saw148015  */
28486212Saw148015 /* ARGSUSED */
28496212Saw148015 static int
28506212Saw148015 flowoplib_testrandvar(threadflow_t *threadflow, flowop_t *flowop)
28516212Saw148015 {
28526212Saw148015 	testrandvar_priv_t	*mystats;
28536212Saw148015 	double			value;
28546212Saw148015 
28556212Saw148015 	if ((mystats = (testrandvar_priv_t *)flowop->fo_private) == NULL) {
28566212Saw148015 		filebench_log(LOG_ERROR, "testrandvar not initialized\n");
28576212Saw148015 		filebench_shutdown(1);
28586212Saw148015 		return (-1);
28596212Saw148015 	}
28606212Saw148015 
28616212Saw148015 	value = avd_get_dbl(flowop->fo_value);
28626212Saw148015 
28636212Saw148015 	mystats->sample_count++;
28646212Saw148015 	mystats->val_sum += value;
28656212Saw148015 	mystats->sqr_sum += (value * value);
28666212Saw148015 
28676212Saw148015 	return (0);
28686212Saw148015 }
28696212Saw148015 
28706212Saw148015 /*
28716212Saw148015  * Initialize the private data area used to accumulate the statistics
28726212Saw148015  */
28736212Saw148015 static int
28746212Saw148015 flowoplib_testrandvar_init(flowop_t *flowop)
28756212Saw148015 {
28766212Saw148015 	testrandvar_priv_t	*mystats;
28776212Saw148015 
28786212Saw148015 	if ((mystats = (testrandvar_priv_t *)
28796212Saw148015 	    malloc(sizeof (testrandvar_priv_t))) == NULL) {
28806212Saw148015 		filebench_log(LOG_ERROR, "could not initialize testrandvar");
28816212Saw148015 		filebench_shutdown(1);
28826212Saw148015 		return (-1);
28836212Saw148015 	}
28846212Saw148015 
28856212Saw148015 	mystats->sample_count = 0;
28866212Saw148015 	mystats->val_sum = 0;
28876212Saw148015 	mystats->sqr_sum = 0;
28886212Saw148015 	flowop->fo_private = (void *)mystats;
28896212Saw148015 
28906212Saw148015 	(void) ipc_mutex_unlock(&flowop->fo_lock);
28916212Saw148015 	return (0);
28926212Saw148015 }
28936212Saw148015 
28946212Saw148015 /*
28956212Saw148015  * Print out the accumulated statistics, and free the private storage
28966212Saw148015  */
28976212Saw148015 static void
28986212Saw148015 flowoplib_testrandvar_destruct(flowop_t *flowop)
28996212Saw148015 {
29006212Saw148015 	testrandvar_priv_t	*mystats;
29016212Saw148015 	double mean, std_dev, dbl_count;
29026212Saw148015 
29036212Saw148015 	(void) ipc_mutex_lock(&flowop->fo_lock);
29046212Saw148015 	if ((mystats = (testrandvar_priv_t *)
29056212Saw148015 	    flowop->fo_private) == NULL) {
29066212Saw148015 		(void) ipc_mutex_unlock(&flowop->fo_lock);
29076212Saw148015 		return;
29086212Saw148015 	}
29096212Saw148015 
29106212Saw148015 	flowop->fo_private = NULL;
29116212Saw148015 	(void) ipc_mutex_unlock(&flowop->fo_lock);
29126212Saw148015 
29136212Saw148015 	dbl_count = (double)mystats->sample_count;
29146212Saw148015 	mean = mystats->val_sum / dbl_count;
29156212Saw148015 	std_dev = sqrt((mystats->sqr_sum / dbl_count) - (mean * mean)) / mean;
29166212Saw148015 
29176212Saw148015 	filebench_log(LOG_VERBOSE,
29186286Saw148015 	    "testrandvar: ops = %llu, mean = %8.2lf, stddev = %8.2lf",
29196286Saw148015 	    (u_longlong_t)mystats->sample_count, mean, std_dev);
29206212Saw148015 	free(mystats);
29216212Saw148015 }
29225184Sek110237 
29235184Sek110237 /*
29247556SAndrew.W.Wilson@sun.com  * prints message to the console from within a thread
29257556SAndrew.W.Wilson@sun.com  */
29267556SAndrew.W.Wilson@sun.com static int
29277556SAndrew.W.Wilson@sun.com flowoplib_print(threadflow_t *threadflow, flowop_t *flowop)
29287556SAndrew.W.Wilson@sun.com {
29297556SAndrew.W.Wilson@sun.com 	procflow_t *procflow;
29307556SAndrew.W.Wilson@sun.com 
29317556SAndrew.W.Wilson@sun.com 	procflow = threadflow->tf_process;
29327556SAndrew.W.Wilson@sun.com 	filebench_log(LOG_INFO,
29337556SAndrew.W.Wilson@sun.com 	    "Message from process (%s,%d), thread (%s,%d): %s",
29347556SAndrew.W.Wilson@sun.com 	    procflow->pf_name, procflow->pf_instance,
29357556SAndrew.W.Wilson@sun.com 	    threadflow->tf_name, threadflow->tf_instance,
29367556SAndrew.W.Wilson@sun.com 	    avd_get_str(flowop->fo_value));
29377556SAndrew.W.Wilson@sun.com 
29387556SAndrew.W.Wilson@sun.com 	return (FILEBENCH_OK);
29397556SAndrew.W.Wilson@sun.com }
29407556SAndrew.W.Wilson@sun.com 
29417556SAndrew.W.Wilson@sun.com /*
29425184Sek110237  * Prints usage information for flowop operations.
29435184Sek110237  */
29445184Sek110237 void
29455184Sek110237 flowoplib_usage()
29465184Sek110237 {
29475184Sek110237 	(void) fprintf(stderr,
29485184Sek110237 	    "flowop [openfile|createfile] name=<name>,fileset=<fname>\n");
29495184Sek110237 	(void) fprintf(stderr,
29505184Sek110237 	    "                       [,fd=<file desc num>]\n");
29515184Sek110237 	(void) fprintf(stderr, "\n");
29525184Sek110237 	(void) fprintf(stderr,
29535184Sek110237 	    "flowop closefile name=<name>,fd=<file desc num>]\n");
29545184Sek110237 	(void) fprintf(stderr, "\n");
29555184Sek110237 	(void) fprintf(stderr, "flowop deletefile name=<name>\n");
29565184Sek110237 	(void) fprintf(stderr, "                       [,fileset=<fname>]\n");
29575184Sek110237 	(void) fprintf(stderr,
29585184Sek110237 	    "                       [,fd=<file desc num>]\n");
29595184Sek110237 	(void) fprintf(stderr, "\n");
29605184Sek110237 	(void) fprintf(stderr, "flowop statfile name=<name>\n");
29615184Sek110237 	(void) fprintf(stderr, "                       [,fileset=<fname>]\n");
29625184Sek110237 	(void) fprintf(stderr,
29635184Sek110237 	    "                       [,fd=<file desc num>]\n");
29645184Sek110237 	(void) fprintf(stderr, "\n");
29655184Sek110237 	(void) fprintf(stderr,
29665184Sek110237 	    "flowop fsync name=<name>,fd=<file desc num>]\n");
29675184Sek110237 	(void) fprintf(stderr, "\n");
29685184Sek110237 	(void) fprintf(stderr,
29695184Sek110237 	    "flowop fsyncset name=<name>,fileset=<fname>]\n");
29705184Sek110237 	(void) fprintf(stderr, "\n");
29715184Sek110237 	(void) fprintf(stderr, "flowop [write|read|aiowrite] name=<name>, \n");
29725184Sek110237 	(void) fprintf(stderr,
29735184Sek110237 	    "                       filename|fileset=<fname>,\n");
29745184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
29755184Sek110237 	(void) fprintf(stderr, "                       [,directio]\n");
29765184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
29775184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
29785184Sek110237 	(void) fprintf(stderr, "                       [,random]\n");
29795184Sek110237 	(void) fprintf(stderr, "                       [,opennext]\n");
29805184Sek110237 	(void) fprintf(stderr, "                       [,workingset=<size>]\n");
29815184Sek110237 	(void) fprintf(stderr,
29825184Sek110237 	    "flowop [appendfile|appendfilerand] name=<name>, \n");
29835184Sek110237 	(void) fprintf(stderr,
29845184Sek110237 	    "                       filename|fileset=<fname>,\n");
29855184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
29865184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
29875184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
29885184Sek110237 	(void) fprintf(stderr, "                       [,workingset=<size>]\n");
29895184Sek110237 	(void) fprintf(stderr,
29905184Sek110237 	    "flowop [readwholefile|writewholefile] name=<name>, \n");
29915184Sek110237 	(void) fprintf(stderr,
29925184Sek110237 	    "                       filename|fileset=<fname>,\n");
29935184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
29945184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
29955184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
29965184Sek110237 	(void) fprintf(stderr, "\n");
29975184Sek110237 	(void) fprintf(stderr, "flowop aiowait name=<name>,target="
29985184Sek110237 	    "<aiowrite-flowop>\n");
29995184Sek110237 	(void) fprintf(stderr, "\n");
30005184Sek110237 	(void) fprintf(stderr, "flowop sempost name=<name>,"
30015184Sek110237 	    "target=<semblock-flowop>,\n");
30025184Sek110237 	(void) fprintf(stderr,
30035184Sek110237 	    "                       value=<increment-to-post>\n");
30045184Sek110237 	(void) fprintf(stderr, "\n");
30055184Sek110237 	(void) fprintf(stderr, "flowop semblock name=<name>,value="
30065184Sek110237 	    "<decrement-to-receive>,\n");
30075184Sek110237 	(void) fprintf(stderr, "                       highwater="
30085184Sek110237 	    "<inbound-queue-max>\n");
30095184Sek110237 	(void) fprintf(stderr, "\n");
30105184Sek110237 	(void) fprintf(stderr, "flowop block name=<name>\n");
30115184Sek110237 	(void) fprintf(stderr, "\n");
30125184Sek110237 	(void) fprintf(stderr,
30135184Sek110237 	    "flowop wakeup name=<name>,target=<block-flowop>,\n");
30145184Sek110237 	(void) fprintf(stderr, "\n");
30155184Sek110237 	(void) fprintf(stderr,
30165184Sek110237 	    "flowop hog name=<name>,value=<number-of-mem-ops>\n");
30175184Sek110237 	(void) fprintf(stderr,
30185184Sek110237 	    "flowop delay name=<name>,value=<number-of-seconds>\n");
30195184Sek110237 	(void) fprintf(stderr, "\n");
30205184Sek110237 	(void) fprintf(stderr, "flowop eventlimit name=<name>\n");
30215184Sek110237 	(void) fprintf(stderr, "flowop bwlimit name=<name>,value=<mb/s>\n");
30225184Sek110237 	(void) fprintf(stderr, "flowop iopslimit name=<name>,value=<iop/s>\n");
30235184Sek110237 	(void) fprintf(stderr,
30245184Sek110237 	    "flowop finishoncount name=<name>,value=<ops/s>\n");
30255184Sek110237 	(void) fprintf(stderr,
30265184Sek110237 	    "flowop finishonbytes name=<name>,value=<bytes>\n");
30275184Sek110237 	(void) fprintf(stderr, "\n");
30285184Sek110237 	(void) fprintf(stderr, "\n");
30295184Sek110237 }
3030