xref: /onnv-gate/usr/src/cmd/filebench/common/flowop_library.c (revision 5673:043503f0cca3)
15184Sek110237 /*
25184Sek110237  * CDDL HEADER START
35184Sek110237  *
45184Sek110237  * The contents of this file are subject to the terms of the
55184Sek110237  * Common Development and Distribution License (the "License").
65184Sek110237  * You may not use this file except in compliance with the License.
75184Sek110237  *
85184Sek110237  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
95184Sek110237  * or http://www.opensolaris.org/os/licensing.
105184Sek110237  * See the License for the specific language governing permissions
115184Sek110237  * and limitations under the License.
125184Sek110237  *
135184Sek110237  * When distributing Covered Code, include this CDDL HEADER in each
145184Sek110237  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
155184Sek110237  * If applicable, add the following below this CDDL HEADER, with the
165184Sek110237  * fields enclosed by brackets "[]" replaced with your own identifying
175184Sek110237  * information: Portions Copyright [yyyy] [name of copyright owner]
185184Sek110237  *
195184Sek110237  * CDDL HEADER END
205184Sek110237  */
215184Sek110237 /*
225184Sek110237  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
235184Sek110237  * Use is subject to license terms.
245184Sek110237  */
255184Sek110237 
265184Sek110237 #pragma ident	"%Z%%M%	%I%	%E% SMI"
275184Sek110237 
285184Sek110237 #include "config.h"
295184Sek110237 
305184Sek110237 #include <sys/types.h>
315184Sek110237 #ifdef HAVE_SYS_ASYNCH_H
325184Sek110237 #include <sys/asynch.h>
335184Sek110237 #endif
345184Sek110237 #include <sys/ipc.h>
355184Sek110237 #include <sys/sem.h>
365184Sek110237 #include <sys/errno.h>
375184Sek110237 #include <sys/time.h>
385184Sek110237 #include <inttypes.h>
395184Sek110237 #include <fcntl.h>
405184Sek110237 
415184Sek110237 #ifdef HAVE_UTILITY_H
425184Sek110237 #include <utility.h>
435184Sek110237 #endif /* HAVE_UTILITY_H */
445184Sek110237 
455184Sek110237 #ifdef HAVE_AIO
465184Sek110237 #include <aio.h>
475184Sek110237 #endif /* HAVE_AIO */
485184Sek110237 
495184Sek110237 #ifdef HAVE_LIBAIO_H
505184Sek110237 #include <libaio.h>
515184Sek110237 #endif /* HAVE_LIBAIO_H */
525184Sek110237 
535184Sek110237 #ifdef HAVE_SYS_ASYNC_H
545184Sek110237 #include <sys/asynch.h>
555184Sek110237 #endif /* HAVE_SYS_ASYNC_H */
565184Sek110237 
575184Sek110237 #ifdef HAVE_AIO_H
585184Sek110237 #include <aio.h>
595184Sek110237 #endif /* HAVE_AIO_H */
605184Sek110237 
615184Sek110237 #ifndef HAVE_UINT_T
625184Sek110237 #define	uint_t unsigned int
635184Sek110237 #endif /* HAVE_UINT_T */
645184Sek110237 
655184Sek110237 #ifndef HAVE_AIOCB64_T
665184Sek110237 #define	aiocb64 aiocb
675184Sek110237 #endif /* HAVE_AIOCB64_T */
685184Sek110237 
695184Sek110237 #ifndef HAVE_SYSV_SEM
705184Sek110237 #include <semaphore.h>
715184Sek110237 #endif /* HAVE_SYSV_SEM */
725184Sek110237 
735184Sek110237 #include "filebench.h"
745184Sek110237 #include "flowop.h"
755184Sek110237 #include "fileset.h"
765184Sek110237 
775184Sek110237 /*
785184Sek110237  * These routines implement the flowops from the f language. Each
795184Sek110237  * flowop has has a name such as "read", and a set of function pointers
805184Sek110237  * to call for initialization, execution and destruction of the flowop.
815184Sek110237  * The table flowoplib_funcs[] contains a flowoplib struct for each
825184Sek110237  * implemented flowop. Most flowops use a generic initialization function
835184Sek110237  * and all currently use a generic destruction function. All flowop
845184Sek110237  * functions referenced from the table are in this file, though, of
855184Sek110237  * course, they often call functions from other files.
865184Sek110237  *
875184Sek110237  * The flowop_init() routine uses the flowoplib_funcs[] table to
885184Sek110237  * create an initial set of "instance 0" flowops, one for each type of
895184Sek110237  * flowop, from which all other flowops are derived. These "instance 0"
905184Sek110237  * flowops are initialized with information from the table including
915184Sek110237  * pointers for their fo_init, fo_func and fo_destroy functions. When
925184Sek110237  * a flowop definition is encountered in an f language script, the
935184Sek110237  * "type" of flowop, such as "read" is used to search for the
945184Sek110237  * "instance 0" flowop named "read", then a new flowop is allocated
955184Sek110237  * which inherits its function pointers and other initial properties
965184Sek110237  * from the instance 0 flowop, and is given a new name as specified
975184Sek110237  * by the "name=" attribute.
985184Sek110237  */
995184Sek110237 
1005184Sek110237 static int flowoplib_init_generic(flowop_t *flowop);
1015184Sek110237 static void flowoplib_destruct_generic(flowop_t *flowop);
1025184Sek110237 static int flowoplib_fdnum(threadflow_t *threadflow, flowop_t *flowop);
1035184Sek110237 static int flowoplib_write(threadflow_t *threadflow, flowop_t *flowop);
1045184Sek110237 #ifdef HAVE_AIO
1055184Sek110237 static int flowoplib_aiowrite(threadflow_t *threadflow, flowop_t *flowop);
1065184Sek110237 static int flowoplib_aiowait(threadflow_t *threadflow, flowop_t *flowop);
1075184Sek110237 #endif
1085184Sek110237 static int flowoplib_read(threadflow_t *threadflow, flowop_t *flowop);
1095184Sek110237 static int flowoplib_block_init(flowop_t *flowop);
1105184Sek110237 static int flowoplib_block(threadflow_t *threadflow, flowop_t *flowop);
1115184Sek110237 static int flowoplib_wakeup(threadflow_t *threadflow, flowop_t *flowop);
1125184Sek110237 static int flowoplib_hog(threadflow_t *threadflow, flowop_t *flowop);
1135184Sek110237 static int flowoplib_delay(threadflow_t *threadflow, flowop_t *flowop);
1145184Sek110237 static int flowoplib_sempost(threadflow_t *threadflow, flowop_t *flowop);
1155184Sek110237 static int flowoplib_sempost_init(flowop_t *flowop);
1165184Sek110237 static int flowoplib_semblock(threadflow_t *threadflow, flowop_t *flowop);
1175184Sek110237 static int flowoplib_semblock_init(flowop_t *flowop);
1185184Sek110237 static void flowoplib_semblock_destruct(flowop_t *flowop);
1195184Sek110237 static int flowoplib_eventlimit(threadflow_t *, flowop_t *flowop);
1205184Sek110237 static int flowoplib_bwlimit(threadflow_t *, flowop_t *flowop);
1215184Sek110237 static int flowoplib_iopslimit(threadflow_t *, flowop_t *flowop);
1225184Sek110237 static int flowoplib_opslimit(threadflow_t *, flowop_t *flowop);
1235184Sek110237 static int flowoplib_openfile(threadflow_t *, flowop_t *flowop);
1245184Sek110237 static int flowoplib_openfile_common(threadflow_t *, flowop_t *flowop, int fd);
1255184Sek110237 static int flowoplib_createfile(threadflow_t *, flowop_t *flowop);
1265184Sek110237 static int flowoplib_closefile(threadflow_t *, flowop_t *flowop);
1275184Sek110237 static int flowoplib_fsync(threadflow_t *, flowop_t *flowop);
1285184Sek110237 static int flowoplib_readwholefile(threadflow_t *, flowop_t *flowop);
1295184Sek110237 static int flowoplib_writewholefile(threadflow_t *, flowop_t *flowop);
1305184Sek110237 static int flowoplib_appendfile(threadflow_t *threadflow, flowop_t *flowop);
1315184Sek110237 static int flowoplib_appendfilerand(threadflow_t *threadflow, flowop_t *flowop);
1325184Sek110237 static int flowoplib_deletefile(threadflow_t *threadflow, flowop_t *flowop);
1335184Sek110237 static int flowoplib_statfile(threadflow_t *threadflow, flowop_t *flowop);
1345184Sek110237 static int flowoplib_finishoncount(threadflow_t *threadflow, flowop_t *flowop);
1355184Sek110237 static int flowoplib_finishonbytes(threadflow_t *threadflow, flowop_t *flowop);
1365184Sek110237 static int flowoplib_fsyncset(threadflow_t *threadflow, flowop_t *flowop);
1375184Sek110237 
1385184Sek110237 typedef struct flowoplib {
1395184Sek110237 	int	fl_type;
1405184Sek110237 	int	fl_attrs;
1415184Sek110237 	char	*fl_name;
1425184Sek110237 	int	(*fl_init)();
1435184Sek110237 	int	(*fl_func)();
1445184Sek110237 	void	(*fl_destruct)();
1455184Sek110237 } flowoplib_t;
1465184Sek110237 
1475184Sek110237 static flowoplib_t flowoplib_funcs[] = {
1485184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "write", flowoplib_init_generic,
1495184Sek110237 	flowoplib_write, flowoplib_destruct_generic,
1505184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_READ, "read", flowoplib_init_generic,
1515184Sek110237 	flowoplib_read, flowoplib_destruct_generic,
1525184Sek110237 #ifdef HAVE_AIO
1535184Sek110237 	FLOW_TYPE_AIO, FLOW_ATTR_WRITE, "aiowrite", flowoplib_init_generic,
1545184Sek110237 	flowoplib_aiowrite, flowoplib_destruct_generic,
1555184Sek110237 	FLOW_TYPE_AIO, 0, "aiowait", flowoplib_init_generic,
1565184Sek110237 	flowoplib_aiowait, flowoplib_destruct_generic,
1575184Sek110237 #endif
1585184Sek110237 	FLOW_TYPE_SYNC, 0, "block", flowoplib_block_init,
1595184Sek110237 	flowoplib_block, flowoplib_destruct_generic,
1605184Sek110237 	FLOW_TYPE_SYNC, 0, "wakeup", flowoplib_init_generic,
1615184Sek110237 	flowoplib_wakeup, flowoplib_destruct_generic,
1625184Sek110237 	FLOW_TYPE_SYNC, 0, "semblock", flowoplib_semblock_init,
1635184Sek110237 	flowoplib_semblock, flowoplib_semblock_destruct,
1645184Sek110237 	FLOW_TYPE_SYNC, 0, "sempost", flowoplib_sempost_init,
1655184Sek110237 	flowoplib_sempost, flowoplib_destruct_generic,
1665184Sek110237 	FLOW_TYPE_OTHER, 0, "hog", flowoplib_init_generic,
1675184Sek110237 	flowoplib_hog, flowoplib_destruct_generic,
1685184Sek110237 	FLOW_TYPE_OTHER, 0, "delay", flowoplib_init_generic,
1695184Sek110237 	flowoplib_delay, flowoplib_destruct_generic,
1705184Sek110237 	FLOW_TYPE_OTHER, 0, "eventlimit", flowoplib_init_generic,
1715184Sek110237 	flowoplib_eventlimit, flowoplib_destruct_generic,
1725184Sek110237 	FLOW_TYPE_OTHER, 0, "bwlimit", flowoplib_init_generic,
1735184Sek110237 	flowoplib_bwlimit, flowoplib_destruct_generic,
1745184Sek110237 	FLOW_TYPE_OTHER, 0, "iopslimit", flowoplib_init_generic,
1755184Sek110237 	flowoplib_iopslimit, flowoplib_destruct_generic,
1765184Sek110237 	FLOW_TYPE_OTHER, 0, "opslimit", flowoplib_init_generic,
1775184Sek110237 	flowoplib_opslimit, flowoplib_destruct_generic,
1785184Sek110237 	FLOW_TYPE_OTHER, 0, "finishoncount", flowoplib_init_generic,
1795184Sek110237 	flowoplib_finishoncount, flowoplib_destruct_generic,
1805184Sek110237 	FLOW_TYPE_OTHER, 0, "finishonbytes", flowoplib_init_generic,
1815184Sek110237 	flowoplib_finishonbytes, flowoplib_destruct_generic,
1825184Sek110237 	FLOW_TYPE_IO, 0, "openfile", flowoplib_init_generic,
1835184Sek110237 	flowoplib_openfile, flowoplib_destruct_generic,
1845184Sek110237 	FLOW_TYPE_IO, 0, "createfile", flowoplib_init_generic,
1855184Sek110237 	flowoplib_createfile, flowoplib_destruct_generic,
1865184Sek110237 	FLOW_TYPE_IO, 0, "closefile", flowoplib_init_generic,
1875184Sek110237 	flowoplib_closefile, flowoplib_destruct_generic,
1885184Sek110237 	FLOW_TYPE_IO, 0, "fsync", flowoplib_init_generic,
1895184Sek110237 	flowoplib_fsync, flowoplib_destruct_generic,
1905184Sek110237 	FLOW_TYPE_IO, 0, "fsyncset", flowoplib_init_generic,
1915184Sek110237 	flowoplib_fsyncset, flowoplib_destruct_generic,
1925184Sek110237 	FLOW_TYPE_IO, 0, "statfile", flowoplib_init_generic,
1935184Sek110237 	flowoplib_statfile, flowoplib_destruct_generic,
1945184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_READ, "readwholefile", flowoplib_init_generic,
1955184Sek110237 	flowoplib_readwholefile, flowoplib_destruct_generic,
1965184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "appendfile", flowoplib_init_generic,
1975184Sek110237 	flowoplib_appendfile, flowoplib_destruct_generic,
1985184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "appendfilerand", flowoplib_init_generic,
1995184Sek110237 	flowoplib_appendfilerand, flowoplib_destruct_generic,
2005184Sek110237 	FLOW_TYPE_IO, 0, "deletefile", flowoplib_init_generic,
2015184Sek110237 	flowoplib_deletefile, flowoplib_destruct_generic,
2025184Sek110237 	FLOW_TYPE_IO, FLOW_ATTR_WRITE, "writewholefile", flowoplib_init_generic,
2035184Sek110237 	flowoplib_writewholefile, flowoplib_destruct_generic
2045184Sek110237 };
2055184Sek110237 
2065184Sek110237 /*
2075184Sek110237  * Loops through the master list of flowops defined in this
2085184Sek110237  * module, and creates and initializes a flowop for each one
2095184Sek110237  * by calling flowop_define. As a side effect of calling
2105184Sek110237  * flowop define, the created flowops are placed on the
2115184Sek110237  * master flowop list. All created flowops are set to
2125184Sek110237  * instance "0".
2135184Sek110237  */
2145184Sek110237 void
2155184Sek110237 flowoplib_init()
2165184Sek110237 {
2175184Sek110237 	int nops = sizeof (flowoplib_funcs) / sizeof (flowoplib_t);
2185184Sek110237 	int i;
2195184Sek110237 
2205184Sek110237 	for (i = 0; i < nops; i++) {
2215184Sek110237 		flowop_t *flowop;
2225184Sek110237 		flowoplib_t *fl;
2235184Sek110237 
2245184Sek110237 		fl = &flowoplib_funcs[i];
2255184Sek110237 
2265184Sek110237 		if ((flowop = flowop_define(NULL,
2275184Sek110237 		    fl->fl_name, NULL, 0, fl->fl_type)) == 0) {
2285184Sek110237 			filebench_log(LOG_ERROR,
2295184Sek110237 			    "failed to create flowop %s\n",
2305184Sek110237 			    fl->fl_name);
2315184Sek110237 			filebench_shutdown(1);
2325184Sek110237 		}
2335184Sek110237 
2345184Sek110237 		flowop->fo_func = fl->fl_func;
2355184Sek110237 		flowop->fo_init = fl->fl_init;
2365184Sek110237 		flowop->fo_destruct = fl->fl_destruct;
2375184Sek110237 		flowop->fo_attrs = fl->fl_attrs;
2385184Sek110237 	}
2395184Sek110237 }
2405184Sek110237 
2415184Sek110237 static int
2425184Sek110237 flowoplib_init_generic(flowop_t *flowop)
2435184Sek110237 {
2445184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
2455184Sek110237 	return (0);
2465184Sek110237 }
2475184Sek110237 
2485184Sek110237 /* ARGSUSED */
2495184Sek110237 static void
2505184Sek110237 flowoplib_destruct_generic(flowop_t *flowop)
2515184Sek110237 {
252*5673Saw148015 	/* release any resources held by the flowop */
253*5673Saw148015 	if (flowop->fo_buf)
254*5673Saw148015 		free(flowop->fo_buf);
2555184Sek110237 }
2565184Sek110237 
2575184Sek110237 /*
2585184Sek110237  * Generates a file attribute from flags in the supplied flowop.
2595184Sek110237  * Sets FLOW_ATTR_DIRECTIO and/or FLOW_ATTR_DSYNC as needed.
2605184Sek110237  */
2615184Sek110237 static int
2625184Sek110237 flowoplib_fileattrs(flowop_t *flowop)
2635184Sek110237 {
2645184Sek110237 	int attrs = 0;
2655184Sek110237 
2665184Sek110237 	if (*flowop->fo_directio)
2675184Sek110237 		attrs |= FLOW_ATTR_DIRECTIO;
2685184Sek110237 
2695184Sek110237 	if (*flowop->fo_dsync)
2705184Sek110237 		attrs |= FLOW_ATTR_DSYNC;
2715184Sek110237 
2725184Sek110237 	return (attrs);
2735184Sek110237 }
2745184Sek110237 
2755184Sek110237 /*
2765184Sek110237  * Searches for a file descriptor. Tries the flowop's
2775184Sek110237  * fo_fdnumber first and returns with it if it has been
2785184Sek110237  * explicitly set (greater than 0). It next checks to
2795184Sek110237  * see if a rotating file descriptor policy is in effect,
2805184Sek110237  * and if not returns the fdnumber regardless of what
2815184Sek110237  * it is. (note that if it is 0, it just selects to the
2825184Sek110237  * default file descriptor in the threadflow's tf_fd
2835184Sek110237  * array). If the rotating fd policy is in effect, it
2845184Sek110237  * cycles from the end of the tf_fd array to one location
2855184Sek110237  * beyond the maximum needed by the number of entries in
2865184Sek110237  * the associated fileset on each invocation, then starts
2875184Sek110237  * over from the end.
2885184Sek110237  *
2895184Sek110237  * The routine returns an index into the threadflow's
2905184Sek110237  * tf_fd table where the actual file descriptor will be
2915184Sek110237  * found. Note: the calling routine must not call this
2925184Sek110237  * routine if the flowop does not have a fileset, and the
2935184Sek110237  * flowop's fo_fdnumber is zero and fo_rotatefd is
2945184Sek110237  * asserted, or an addressing fault may occur.
2955184Sek110237  */
296*5673Saw148015 static int
2975184Sek110237 flowoplib_fdnum(threadflow_t *threadflow, flowop_t *flowop)
2985184Sek110237 {
2995184Sek110237 	/* If the script sets the fd explicitly */
3005184Sek110237 	if (flowop->fo_fdnumber > 0)
3015184Sek110237 		return (flowop->fo_fdnumber);
3025184Sek110237 
3035184Sek110237 	/* If the flowop defaults to persistent fd */
3045184Sek110237 	if (!integer_isset(flowop->fo_rotatefd))
3055184Sek110237 		return (flowop->fo_fdnumber);
3065184Sek110237 
3075184Sek110237 	/* Rotate the fd on each flowop invocation */
3085184Sek110237 	if (*(flowop->fo_fileset->fs_entries) > (THREADFLOW_MAXFD / 2)) {
3095184Sek110237 		filebench_log(LOG_ERROR, "Out of file descriptors in flowop %s"
3105184Sek110237 		    " (too many files : %d", flowop->fo_name,
3115184Sek110237 		    *(flowop->fo_fileset->fs_entries));
3125184Sek110237 		return (-1);
3135184Sek110237 	}
3145184Sek110237 
3155184Sek110237 	/* First time around */
3165184Sek110237 	if (threadflow->tf_fdrotor == 0)
3175184Sek110237 		threadflow->tf_fdrotor = THREADFLOW_MAXFD;
3185184Sek110237 
3195184Sek110237 	/* One fd for every file in the set */
3205184Sek110237 	if (*(flowop->fo_fileset->fs_entries) ==
3215184Sek110237 	    (THREADFLOW_MAXFD - threadflow->tf_fdrotor))
3225184Sek110237 		threadflow->tf_fdrotor = THREADFLOW_MAXFD;
3235184Sek110237 
3245184Sek110237 
3255184Sek110237 	threadflow->tf_fdrotor--;
3265184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "selected fd = %d",
3275184Sek110237 	    threadflow->tf_fdrotor);
3285184Sek110237 	return (threadflow->tf_fdrotor);
3295184Sek110237 }
3305184Sek110237 
3315184Sek110237 /*
332*5673Saw148015  * Determines the file descriptor to use, and attempts to open
333*5673Saw148015  * the file if it is not already open. Also determines the wss
334*5673Saw148015  * value. Returns -1 on errors, 0 otherwise.
335*5673Saw148015  */
336*5673Saw148015 static int
337*5673Saw148015 flowoplib_filesetup(threadflow_t *threadflow, flowop_t *flowop,
338*5673Saw148015     vinteger_t *wssp, int *filedescp)
339*5673Saw148015 {
340*5673Saw148015 	int fd = flowoplib_fdnum(threadflow, flowop);
341*5673Saw148015 
342*5673Saw148015 	if (fd == -1)
343*5673Saw148015 		return (-1);
344*5673Saw148015 
345*5673Saw148015 	if (threadflow->tf_fd[fd] == 0) {
346*5673Saw148015 		if (flowoplib_openfile_common(
347*5673Saw148015 		    threadflow, flowop, fd) == -1)
348*5673Saw148015 			return (-1);
349*5673Saw148015 
350*5673Saw148015 		if (threadflow->tf_fse[fd]) {
351*5673Saw148015 			filebench_log(LOG_DEBUG_IMPL, "opened file %s",
352*5673Saw148015 			    threadflow->tf_fse[fd]->fse_path);
353*5673Saw148015 		} else {
354*5673Saw148015 			filebench_log(LOG_DEBUG_IMPL,
355*5673Saw148015 			    "opened device %s/%s",
356*5673Saw148015 			    flowop->fo_fileset->fs_path,
357*5673Saw148015 			    flowop->fo_fileset->fs_name);
358*5673Saw148015 		}
359*5673Saw148015 	}
360*5673Saw148015 
361*5673Saw148015 	*filedescp = threadflow->tf_fd[fd];
362*5673Saw148015 
363*5673Saw148015 	if (*flowop->fo_wss == 0) {
364*5673Saw148015 		if (threadflow->tf_fse[fd])
365*5673Saw148015 			*wssp = threadflow->tf_fse[fd]->fse_size;
366*5673Saw148015 		else
367*5673Saw148015 			*wssp = *flowop->fo_fileset->fs_size;
368*5673Saw148015 	} else {
369*5673Saw148015 		*wssp = *flowop->fo_wss;
370*5673Saw148015 	}
371*5673Saw148015 
372*5673Saw148015 	return (0);
373*5673Saw148015 }
374*5673Saw148015 
375*5673Saw148015 /*
376*5673Saw148015  * Determines the io buffer or random offset into tf_mem for
377*5673Saw148015  * the IO operation. Returns -1 on errors, 0 otherwise.
378*5673Saw148015  */
379*5673Saw148015 static int
380*5673Saw148015 flowoplib_iobufsetup(threadflow_t *threadflow, flowop_t *flowop,
381*5673Saw148015     caddr_t *iobufp, vinteger_t iosize)
382*5673Saw148015 {
383*5673Saw148015 	long memsize;
384*5673Saw148015 	size_t memoffset;
385*5673Saw148015 
386*5673Saw148015 	if (iosize == 0) {
387*5673Saw148015 		filebench_log(LOG_ERROR, "zero iosize for thread %s",
388*5673Saw148015 		    flowop->fo_name);
389*5673Saw148015 		return (-1);
390*5673Saw148015 	}
391*5673Saw148015 
392*5673Saw148015 	if ((memsize = *threadflow->tf_memsize) != 0) {
393*5673Saw148015 
394*5673Saw148015 		/* use tf_mem for I/O with random offset */
395*5673Saw148015 		if (filebench_randomno(&memoffset, memsize, iosize) == -1) {
396*5673Saw148015 			filebench_log(LOG_ERROR,
397*5673Saw148015 			    "tf_memsize smaller than IO size for thread %s",
398*5673Saw148015 			    flowop->fo_name);
399*5673Saw148015 			return (-1);
400*5673Saw148015 		}
401*5673Saw148015 		*iobufp = threadflow->tf_mem + memoffset;
402*5673Saw148015 
403*5673Saw148015 	} else {
404*5673Saw148015 		/* use private I/O buffer */
405*5673Saw148015 		if ((flowop->fo_buf != NULL) &&
406*5673Saw148015 		    (flowop->fo_buf_size < iosize)) {
407*5673Saw148015 			free(flowop->fo_buf);
408*5673Saw148015 			flowop->fo_buf = NULL;
409*5673Saw148015 		}
410*5673Saw148015 		if ((flowop->fo_buf == NULL) && ((flowop->fo_buf
411*5673Saw148015 		    = (char *)malloc(iosize)) == NULL))
412*5673Saw148015 				return (-1);
413*5673Saw148015 
414*5673Saw148015 		flowop->fo_buf_size = iosize;
415*5673Saw148015 		*iobufp = flowop->fo_buf;
416*5673Saw148015 	}
417*5673Saw148015 	return (0);
418*5673Saw148015 }
419*5673Saw148015 
420*5673Saw148015 /*
421*5673Saw148015  * Determines the file descriptor to use, opens it if necessary, the
422*5673Saw148015  * io buffer or random offset into tf_mem for IO operation and the wss
423*5673Saw148015  * value. Returns -1 on errors, 0 otherwise.
424*5673Saw148015  */
425*5673Saw148015 static int
426*5673Saw148015 flowoplib_iosetup(threadflow_t *threadflow, flowop_t *flowop,
427*5673Saw148015     vinteger_t *wssp, caddr_t *iobufp, int *filedescp, vinteger_t iosize)
428*5673Saw148015 {
429*5673Saw148015 	if (flowoplib_filesetup(threadflow, flowop, wssp, filedescp) == -1)
430*5673Saw148015 		return (-1);
431*5673Saw148015 
432*5673Saw148015 	if (flowoplib_iobufsetup(threadflow, flowop, iobufp, iosize) == -1)
433*5673Saw148015 		return (-1);
434*5673Saw148015 
435*5673Saw148015 	return (0);
436*5673Saw148015 }
437*5673Saw148015 
438*5673Saw148015 /*
4395184Sek110237  * Emulate posix read / pread. If the flowop has a fileset,
4405184Sek110237  * a file descriptor number index is fetched, otherwise a
4415184Sek110237  * supplied fileobj file is used. In either case the specified
4425184Sek110237  * file will be opened if not already open. If the flowop has
4435184Sek110237  * neither a fileset or fileobj, an error is logged and -1
4445184Sek110237  * returned.
4455184Sek110237  *
4465184Sek110237  * The actual read is done to a random offset in the
4475184Sek110237  * threadflow's thread memory (tf_mem), with a size set by
4485184Sek110237  * fo_iosize and at either a random disk offset within the
4495184Sek110237  * working set size, or at the next sequential location. If
4505184Sek110237  * any errors are encountered, -1 is returned, if successful,
4515184Sek110237  * 0 is returned.
4525184Sek110237  */
4535184Sek110237 static int
4545184Sek110237 flowoplib_read(threadflow_t *threadflow, flowop_t *flowop)
4555184Sek110237 {
456*5673Saw148015 	caddr_t iobuf;
4575184Sek110237 	vinteger_t wss;
4585184Sek110237 	int filedesc;
4595184Sek110237 	int ret;
4605184Sek110237 
461*5673Saw148015 	if (flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
462*5673Saw148015 	    &filedesc, *flowop->fo_iosize) != 0)
4635184Sek110237 		return (-1);
4645184Sek110237 
4655184Sek110237 	if (*flowop->fo_random) {
4665184Sek110237 		uint64_t fileoffset;
4675184Sek110237 
4685184Sek110237 		if (filebench_randomno64(&fileoffset, wss,
4695184Sek110237 		    *flowop->fo_iosize) == -1) {
4705184Sek110237 			filebench_log(LOG_ERROR,
4715184Sek110237 			    "file size smaller than IO size for thread %s",
4725184Sek110237 			    flowop->fo_name);
4735184Sek110237 			return (-1);
4745184Sek110237 		}
4755184Sek110237 
4765184Sek110237 		(void) flowop_beginop(threadflow, flowop);
477*5673Saw148015 		if ((ret = pread64(filedesc, iobuf,
4785184Sek110237 		    *flowop->fo_iosize, (off64_t)fileoffset)) == -1) {
479*5673Saw148015 			(void) flowop_endop(threadflow, flowop, 0);
4805184Sek110237 			filebench_log(LOG_ERROR,
4815184Sek110237 			    "read file %s failed, offset %lld "
482*5673Saw148015 			    "io buffer %zd: %s",
483*5673Saw148015 			    flowop->fo_fileset->fs_name,
484*5673Saw148015 			    fileoffset, iobuf, strerror(errno));
485*5673Saw148015 			flowop_endop(threadflow, flowop, 0);
4865184Sek110237 			return (-1);
4875184Sek110237 		}
488*5673Saw148015 		(void) flowop_endop(threadflow, flowop, ret);
4895184Sek110237 
4905184Sek110237 		if ((ret == 0))
4915184Sek110237 			(void) lseek64(filedesc, 0, SEEK_SET);
4925184Sek110237 
4935184Sek110237 	} else {
4945184Sek110237 		(void) flowop_beginop(threadflow, flowop);
495*5673Saw148015 		if ((ret = read(filedesc, iobuf,
4965184Sek110237 		    *flowop->fo_iosize)) == -1) {
4975184Sek110237 			filebench_log(LOG_ERROR,
498*5673Saw148015 			    "read file %s failed, io buffer %zd: %s",
499*5673Saw148015 			    flowop->fo_fileset->fs_name,
500*5673Saw148015 			    iobuf, strerror(errno));
501*5673Saw148015 			(void) flowop_endop(threadflow, flowop, 0);
5025184Sek110237 			return (-1);
5035184Sek110237 		}
504*5673Saw148015 		(void) flowop_endop(threadflow, flowop, ret);
5055184Sek110237 
5065184Sek110237 		if ((ret == 0))
5075184Sek110237 			(void) lseek64(filedesc, 0, SEEK_SET);
5085184Sek110237 	}
5095184Sek110237 
5105184Sek110237 	return (0);
5115184Sek110237 }
5125184Sek110237 
5135184Sek110237 #ifdef HAVE_AIO
5145184Sek110237 
5155184Sek110237 /*
5165184Sek110237  * Asynchronous write section. An Asynchronous IO element
5175184Sek110237  * (aiolist_t) is used to associate the asynchronous write request with
5185184Sek110237  * its subsequent completion. This element includes a aiocb64 struct
5195184Sek110237  * that is used by posix aio_xxx calls to track the asynchronous writes.
5205184Sek110237  * The flowops aiowrite and aiowait result in calls to these posix
5215184Sek110237  * aio_xxx system routines to do the actual asynchronous write IO
5225184Sek110237  * operations.
5235184Sek110237  */
5245184Sek110237 
5255184Sek110237 
5265184Sek110237 /*
5275184Sek110237  * Allocates an asynchronous I/O list (aio, of type
5285184Sek110237  * aiolist_t) element. Adds it to the flowop thread's
5295184Sek110237  * threadflow aio list. Returns a pointer to the element.
5305184Sek110237  */
5315184Sek110237 static aiolist_t *
5325184Sek110237 aio_allocate(flowop_t *flowop)
5335184Sek110237 {
5345184Sek110237 	aiolist_t *aiolist;
5355184Sek110237 
5365184Sek110237 	if ((aiolist = malloc(sizeof (aiolist_t))) == NULL) {
5375184Sek110237 		filebench_log(LOG_ERROR, "malloc aiolist failed");
5385184Sek110237 		filebench_shutdown(1);
5395184Sek110237 	}
5405184Sek110237 
5415184Sek110237 	/* Add to list */
5425184Sek110237 	if (flowop->fo_thread->tf_aiolist == NULL) {
5435184Sek110237 		flowop->fo_thread->tf_aiolist = aiolist;
5445184Sek110237 		aiolist->al_next = NULL;
5455184Sek110237 	} else {
5465184Sek110237 		aiolist->al_next = flowop->fo_thread->tf_aiolist;
5475184Sek110237 		flowop->fo_thread->tf_aiolist = aiolist;
5485184Sek110237 	}
5495184Sek110237 	return (aiolist);
5505184Sek110237 }
5515184Sek110237 
5525184Sek110237 /*
5535184Sek110237  * Searches for the aiolist element that has a matching
5545184Sek110237  * completion block, aiocb. If none found returns -1. If
5555184Sek110237  * found, removes the aiolist element from flowop thread's
5565184Sek110237  * list and returns 0.
5575184Sek110237  */
5585184Sek110237 static int
5595184Sek110237 aio_deallocate(flowop_t *flowop, struct aiocb64 *aiocb)
5605184Sek110237 {
5615184Sek110237 	aiolist_t *aiolist = flowop->fo_thread->tf_aiolist;
5625184Sek110237 	aiolist_t *previous = NULL;
5635184Sek110237 	aiolist_t *match = NULL;
5645184Sek110237 
5655184Sek110237 	if (aiocb == NULL) {
5665184Sek110237 		filebench_log(LOG_ERROR, "null aiocb deallocate");
5675184Sek110237 		return (0);
5685184Sek110237 	}
5695184Sek110237 
5705184Sek110237 	while (aiolist) {
5715184Sek110237 		if (aiocb == &(aiolist->al_aiocb)) {
5725184Sek110237 			match = aiolist;
5735184Sek110237 			break;
5745184Sek110237 		}
5755184Sek110237 		previous = aiolist;
5765184Sek110237 		aiolist = aiolist->al_next;
5775184Sek110237 	}
5785184Sek110237 
5795184Sek110237 	if (match == NULL)
5805184Sek110237 		return (-1);
5815184Sek110237 
5825184Sek110237 	/* Remove from the list */
5835184Sek110237 	if (previous)
5845184Sek110237 		previous->al_next = match->al_next;
5855184Sek110237 	else
5865184Sek110237 		flowop->fo_thread->tf_aiolist = match->al_next;
5875184Sek110237 
5885184Sek110237 	return (0);
5895184Sek110237 }
5905184Sek110237 
5915184Sek110237 /*
5925184Sek110237  * Emulate posix aiowrite(). Determines which file to use,
5935184Sek110237  * either one file of a fileset, or the file associated
5945184Sek110237  * with a fileobj, allocates and fills an aiolist_t element
5955184Sek110237  * for the write, and issues the asynchronous write. This
5965184Sek110237  * operation is only valid for random IO, and returns an
5975184Sek110237  * error if the flowop is set for sequential IO. Returns 0
5985184Sek110237  * on success, -1 on any encountered error.
5995184Sek110237  */
6005184Sek110237 static int
6015184Sek110237 flowoplib_aiowrite(threadflow_t *threadflow, flowop_t *flowop)
6025184Sek110237 {
603*5673Saw148015 	caddr_t iobuf;
6045184Sek110237 	vinteger_t wss;
6055184Sek110237 	int filedesc;
6065184Sek110237 
607*5673Saw148015 	if (flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
608*5673Saw148015 	    &filedesc, *flowop->fo_iosize) != 0)
6095184Sek110237 		return (-1);
6105184Sek110237 
6115184Sek110237 	if (*flowop->fo_random) {
6125184Sek110237 		uint64_t fileoffset;
6135184Sek110237 		struct aiocb64 *aiocb;
6145184Sek110237 		aiolist_t *aiolist;
6155184Sek110237 
6165184Sek110237 		if (filebench_randomno64(&fileoffset,
6175184Sek110237 		    wss, *flowop->fo_iosize) == -1) {
6185184Sek110237 			filebench_log(LOG_ERROR,
6195184Sek110237 			    "file size smaller than IO size for thread %s",
6205184Sek110237 			    flowop->fo_name);
6215184Sek110237 			return (-1);
6225184Sek110237 		}
6235184Sek110237 
6245184Sek110237 		aiolist = aio_allocate(flowop);
6255184Sek110237 		aiolist->al_type = AL_WRITE;
6265184Sek110237 		aiocb = &aiolist->al_aiocb;
6275184Sek110237 
6285184Sek110237 		aiocb->aio_fildes = filedesc;
629*5673Saw148015 		aiocb->aio_buf = iobuf;
6305184Sek110237 		aiocb->aio_nbytes = *flowop->fo_iosize;
6315184Sek110237 		aiocb->aio_offset = (off64_t)fileoffset;
6325184Sek110237 		aiocb->aio_reqprio = 0;
6335184Sek110237 
6345184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
6355184Sek110237 		    "aio fd=%d, bytes=%lld, offset=%lld",
6365184Sek110237 		    filedesc, *flowop->fo_iosize, fileoffset);
6375184Sek110237 
6385184Sek110237 		flowop_beginop(threadflow, flowop);
6395184Sek110237 		if (aio_write64(aiocb) < 0) {
6405184Sek110237 			filebench_log(LOG_ERROR, "aiowrite failed: %s",
6415184Sek110237 			    strerror(errno));
6425184Sek110237 			filebench_shutdown(1);
6435184Sek110237 		}
644*5673Saw148015 		flowop_endop(threadflow, flowop, *flowop->fo_iosize);
6455184Sek110237 	} else {
6465184Sek110237 		return (-1);
6475184Sek110237 	}
6485184Sek110237 
6495184Sek110237 	return (0);
6505184Sek110237 }
6515184Sek110237 
6525184Sek110237 
6535184Sek110237 
6545184Sek110237 #define	MAXREAP 4096
6555184Sek110237 
6565184Sek110237 /*
6575184Sek110237  * Emulate posix aiowait(). Waits for the completion of half the
6585184Sek110237  * outstanding asynchronous IOs, or a single IO, which ever is
6595184Sek110237  * larger. The routine will return after a sufficient number of
6605184Sek110237  * completed calls issued by any thread in the procflow have
6615184Sek110237  * completed, or a 1 second timout elapses. All completed
6625184Sek110237  * IO operations are deleted from the thread's aiolist.
6635184Sek110237  */
6645184Sek110237 static int
6655184Sek110237 flowoplib_aiowait(threadflow_t *threadflow, flowop_t *flowop)
6665184Sek110237 {
6675184Sek110237 	struct aiocb64 **worklist;
6685184Sek110237 	aiolist_t *aio = flowop->fo_thread->tf_aiolist;
6695184Sek110237 	int uncompleted = 0;
6705184Sek110237 
6715184Sek110237 	worklist = calloc(MAXREAP, sizeof (struct aiocb64 *));
6725184Sek110237 
6735184Sek110237 	/* Count the list of pending aios */
6745184Sek110237 	while (aio) {
6755184Sek110237 		uncompleted++;
6765184Sek110237 		aio = aio->al_next;
6775184Sek110237 	}
6785184Sek110237 
6795184Sek110237 	do {
6805184Sek110237 		uint_t ncompleted = 0;
6815184Sek110237 		uint_t todo;
6825184Sek110237 		struct timespec timeout;
6835184Sek110237 		int inprogress;
6845184Sek110237 		int i;
6855184Sek110237 
6865184Sek110237 		/* Wait for half of the outstanding requests */
6875184Sek110237 		timeout.tv_sec = 1;
6885184Sek110237 		timeout.tv_nsec = 0;
6895184Sek110237 
6905184Sek110237 		if (uncompleted > MAXREAP)
6915184Sek110237 			todo = MAXREAP;
6925184Sek110237 		else
6935184Sek110237 			todo = uncompleted / 2;
6945184Sek110237 
6955184Sek110237 		if (todo == 0)
6965184Sek110237 			todo = 1;
6975184Sek110237 
6985184Sek110237 		flowop_beginop(threadflow, flowop);
6995184Sek110237 
7005184Sek110237 #ifdef HAVE_AIOWAITN
7015184Sek110237 		if ((aio_waitn64((struct aiocb64 **)worklist,
7025184Sek110237 		    MAXREAP, &todo, &timeout) == -1) &&
7035184Sek110237 		    errno && (errno != ETIME)) {
7045184Sek110237 			filebench_log(LOG_ERROR,
7055184Sek110237 			    "aiowait failed: %s, outstanding = %d, "
7065184Sek110237 			    "ncompleted = %d ",
7075184Sek110237 			    strerror(errno), uncompleted, todo);
7085184Sek110237 		}
7095184Sek110237 
7105184Sek110237 		ncompleted = todo;
7115184Sek110237 		/* Take the  completed I/Os from the list */
7125184Sek110237 		inprogress = 0;
7135184Sek110237 		for (i = 0; i < ncompleted; i++) {
7145184Sek110237 			if ((aio_return64(worklist[i]) == -1) &&
7155184Sek110237 			    (errno == EINPROGRESS)) {
7165184Sek110237 				inprogress++;
7175184Sek110237 				continue;
7185184Sek110237 			}
7195184Sek110237 			if (aio_deallocate(flowop, worklist[i]) < 0) {
7205184Sek110237 				filebench_log(LOG_ERROR, "Could not remove "
7215184Sek110237 				    "aio from list ");
722*5673Saw148015 				flowop_endop(threadflow, flowop, 0);
7235184Sek110237 				return (-1);
7245184Sek110237 			}
7255184Sek110237 		}
7265184Sek110237 
7275184Sek110237 		uncompleted -= ncompleted;
7285184Sek110237 		uncompleted += inprogress;
7295184Sek110237 
7305184Sek110237 #else
7315184Sek110237 
7325184Sek110237 		for (ncompleted = 0, inprogress = 0,
7335184Sek110237 		    aio = flowop->fo_thread->tf_aiolist;
7345184Sek110237 		    ncompleted < todo, aio != NULL; aio = aio->al_next) {
7355184Sek110237 
7365184Sek110237 			result = aio_error64(&aio->al_aiocb);
7375184Sek110237 
7385184Sek110237 			if (result == EINPROGRESS) {
7395184Sek110237 				inprogress++;
7405184Sek110237 				continue;
7415184Sek110237 			}
7425184Sek110237 
7435184Sek110237 			if ((aio_return64(&aio->al_aiocb) == -1) || result) {
7445184Sek110237 				filebench_log(LOG_ERROR, "aio failed: %s",
7455184Sek110237 				    strerror(result));
7465184Sek110237 				continue;
7475184Sek110237 			}
7485184Sek110237 
7495184Sek110237 			ncompleted++;
7505184Sek110237 
7515184Sek110237 			if (aio_deallocate(flowop, &aio->al_aiocb) < 0) {
7525184Sek110237 				filebench_log(LOG_ERROR, "Could not remove aio "
7535184Sek110237 				    "from list ");
754*5673Saw148015 				flowop_endop(threadflow, flowop, 0);
7555184Sek110237 				return (-1);
7565184Sek110237 			}
7575184Sek110237 		}
7585184Sek110237 
7595184Sek110237 		uncompleted -= ncompleted;
7605184Sek110237 
7615184Sek110237 #endif
7625184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT,
7635184Sek110237 		    "aio2 completed %d ios, uncompleted = %d, inprogress = %d",
7645184Sek110237 		    ncompleted, uncompleted, inprogress);
7655184Sek110237 
7665184Sek110237 	} while (uncompleted > MAXREAP);
7675184Sek110237 
768*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
7695184Sek110237 
7705184Sek110237 	free(worklist);
7715184Sek110237 
7725184Sek110237 	return (0);
7735184Sek110237 }
7745184Sek110237 
7755184Sek110237 #endif /* HAVE_AIO */
7765184Sek110237 
7775184Sek110237 /*
7785184Sek110237  * Initializes a "flowop_block" flowop. Specifically, it
7795184Sek110237  * initializes the flowop's fo_cv and unlocks the fo_lock.
7805184Sek110237  */
7815184Sek110237 static int
7825184Sek110237 flowoplib_block_init(flowop_t *flowop)
7835184Sek110237 {
7845184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d block init address %zx",
7855184Sek110237 	    flowop->fo_name, flowop->fo_instance, &flowop->fo_cv);
7865184Sek110237 	(void) pthread_cond_init(&flowop->fo_cv, ipc_condattr());
7875184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
7885184Sek110237 
7895184Sek110237 	return (0);
7905184Sek110237 }
7915184Sek110237 
7925184Sek110237 /*
7935184Sek110237  * Blocks the threadflow until woken up by flowoplib_wakeup.
7945184Sek110237  * The routine blocks on the flowop's fo_cv condition variable.
7955184Sek110237  */
7965184Sek110237 static int
7975184Sek110237 flowoplib_block(threadflow_t *threadflow, flowop_t *flowop)
7985184Sek110237 {
7995184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d blocking at address %zx",
8005184Sek110237 	    flowop->fo_name, flowop->fo_instance, &flowop->fo_cv);
8015184Sek110237 	(void) ipc_mutex_lock(&flowop->fo_lock);
8025184Sek110237 
8035184Sek110237 	flowop_beginop(threadflow, flowop);
8045184Sek110237 	(void) pthread_cond_wait(&flowop->fo_cv, &flowop->fo_lock);
805*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
8065184Sek110237 
8075184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d unblocking",
8085184Sek110237 	    flowop->fo_name, flowop->fo_instance);
8095184Sek110237 
8105184Sek110237 	(void) ipc_mutex_unlock(&flowop->fo_lock);
8115184Sek110237 
8125184Sek110237 	return (0);
8135184Sek110237 }
8145184Sek110237 
8155184Sek110237 /*
8165184Sek110237  * Wakes up one or more target blocking flowops.
8175184Sek110237  * Sends broadcasts on the fo_cv condition variables of all
8185184Sek110237  * flowops on the target list, except those that are
8195184Sek110237  * FLOW_MASTER flowops. The target list consists of all
8205184Sek110237  * flowops whose name matches this flowop's "fo_targetname"
8215184Sek110237  * attribute. The target list is generated on the first
8225184Sek110237  * invocation, and the run will be shutdown if no targets
8235184Sek110237  * are found. Otherwise the routine always returns 0.
8245184Sek110237  */
8255184Sek110237 static int
8265184Sek110237 flowoplib_wakeup(threadflow_t *threadflow, flowop_t *flowop)
8275184Sek110237 {
8285184Sek110237 	flowop_t *target;
8295184Sek110237 
8305184Sek110237 	/* if this is the first wakeup, create the wakeup list */
8315184Sek110237 	if (flowop->fo_targets == NULL) {
8325184Sek110237 		flowop_t *result = flowop_find(flowop->fo_targetname);
8335184Sek110237 
8345184Sek110237 		flowop->fo_targets = result;
8355184Sek110237 		if (result == NULL) {
8365184Sek110237 			filebench_log(LOG_ERROR,
8375184Sek110237 			    "wakeup: could not find op %s for thread %s",
8385184Sek110237 			    flowop->fo_targetname,
8395184Sek110237 			    threadflow->tf_name);
8405184Sek110237 			filebench_shutdown(1);
8415184Sek110237 		}
8425184Sek110237 		while (result) {
8435184Sek110237 			result->fo_targetnext =
8445184Sek110237 			    result->fo_resultnext;
8455184Sek110237 			result = result->fo_resultnext;
8465184Sek110237 		}
8475184Sek110237 	}
8485184Sek110237 
8495184Sek110237 	target = flowop->fo_targets;
8505184Sek110237 
8515184Sek110237 	/* wakeup the targets */
8525184Sek110237 	while (target) {
8535184Sek110237 		if (target->fo_instance == FLOW_MASTER) {
8545184Sek110237 			target = target->fo_targetnext;
8555184Sek110237 			continue;
8565184Sek110237 		}
8575184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
8585184Sek110237 		    "wakeup flow %s-%d at address %zx",
8595184Sek110237 		    target->fo_name,
8605184Sek110237 		    target->fo_instance,
8615184Sek110237 		    &target->fo_cv);
8625184Sek110237 
8635184Sek110237 		flowop_beginop(threadflow, flowop);
8645184Sek110237 		(void) ipc_mutex_lock(&target->fo_lock);
8655184Sek110237 		(void) pthread_cond_broadcast(&target->fo_cv);
8665184Sek110237 		(void) ipc_mutex_unlock(&target->fo_lock);
867*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
8685184Sek110237 
8695184Sek110237 		target = target->fo_targetnext;
8705184Sek110237 	}
8715184Sek110237 
8725184Sek110237 	return (0);
8735184Sek110237 }
8745184Sek110237 
8755184Sek110237 /*
8765184Sek110237  * "think time" routines. the "hog" routine consumes cpu cycles as
8775184Sek110237  * it "thinks", while the "delay" flowop simply calls sleep() to delay
8785184Sek110237  * for a given number of seconds without consuming cpu cycles.
8795184Sek110237  */
8805184Sek110237 
8815184Sek110237 
8825184Sek110237 /*
8835184Sek110237  * Consumes CPU cycles and memory bandwidth by looping for
8845184Sek110237  * flowop->fo_value times. With each loop sets memory location
8855184Sek110237  * threadflow->tf_mem to 1.
8865184Sek110237  */
8875184Sek110237 static int
8885184Sek110237 flowoplib_hog(threadflow_t *threadflow, flowop_t *flowop)
8895184Sek110237 {
8905184Sek110237 	uint64_t value = *flowop->fo_value;
8915184Sek110237 	int i;
8925184Sek110237 
893*5673Saw148015 	filebench_log(LOG_DEBUG_IMPL, "hog enter");
8945184Sek110237 	flowop_beginop(threadflow, flowop);
895*5673Saw148015 	if (threadflow->tf_mem != NULL) {
896*5673Saw148015 		for (i = 0; i < value; i++)
897*5673Saw148015 			*(threadflow->tf_mem) = 1;
898*5673Saw148015 	}
899*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
9005184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "hog exit");
9015184Sek110237 	return (0);
9025184Sek110237 }
9035184Sek110237 
9045184Sek110237 
9055184Sek110237 /*
9065184Sek110237  * Delays for fo_value seconds.
9075184Sek110237  */
9085184Sek110237 static int
9095184Sek110237 flowoplib_delay(threadflow_t *threadflow, flowop_t *flowop)
9105184Sek110237 {
9115184Sek110237 	int value = *flowop->fo_value;
9125184Sek110237 
9135184Sek110237 	flowop_beginop(threadflow, flowop);
9145184Sek110237 	(void) sleep(value);
915*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
9165184Sek110237 	return (0);
9175184Sek110237 }
9185184Sek110237 
9195184Sek110237 /*
9205184Sek110237  * Rate limiting routines. This is the event consuming half of the
9215184Sek110237  * event system. Each of the four following routines will limit the rate
9225184Sek110237  * to one unit of either calls, issued I/O operations, issued filebench
9235184Sek110237  * operations, or I/O bandwidth. Since there is only one event generator,
9245184Sek110237  * the events will be divided amoung multiple instances of an event
9255184Sek110237  * consumer, and further divided among different consumers if more than
9265184Sek110237  * one has been defined. There is no mechanism to enforce equal sharing
9275184Sek110237  * of events.
9285184Sek110237  */
9295184Sek110237 
9305184Sek110237 /*
9315184Sek110237  * Completes one invocation per posted event. If eventgen_q
9325184Sek110237  * has an event count greater than zero, one will be removed
9335184Sek110237  * (count decremented), otherwise the calling thread will
9345184Sek110237  * block until another event has been posted. Always returns 0
9355184Sek110237  */
9365184Sek110237 static int
9375184Sek110237 flowoplib_eventlimit(threadflow_t *threadflow, flowop_t *flowop)
9385184Sek110237 {
9395184Sek110237 	/* Immediately bail if not set/enabled */
9405184Sek110237 	if (filebench_shm->eventgen_hz == 0)
9415184Sek110237 		return (0);
9425184Sek110237 
9435184Sek110237 	if (flowop->fo_initted == 0) {
9445184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
9455184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
9465184Sek110237 		flowop->fo_initted = 1;
9475184Sek110237 	}
9485184Sek110237 
9495184Sek110237 	flowop_beginop(threadflow, flowop);
9505184Sek110237 	while (filebench_shm->eventgen_hz) {
9515184Sek110237 		(void) ipc_mutex_lock(&filebench_shm->eventgen_lock);
9525184Sek110237 		if (filebench_shm->eventgen_q > 0) {
9535184Sek110237 			filebench_shm->eventgen_q--;
9545184Sek110237 			(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
9555184Sek110237 			break;
9565184Sek110237 		}
9575184Sek110237 		(void) pthread_cond_wait(&filebench_shm->eventgen_cv,
9585184Sek110237 		    &filebench_shm->eventgen_lock);
9595184Sek110237 		(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
9605184Sek110237 	}
961*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
9625184Sek110237 	return (0);
9635184Sek110237 }
9645184Sek110237 
9655184Sek110237 /*
9665184Sek110237  * Blocks the calling thread if the number of issued I/O
9675184Sek110237  * operations exceeds the number of posted events, thus
9685184Sek110237  * limiting the average I/O operation rate to the rate
9695184Sek110237  * specified by eventgen_hz. Always returns 0.
9705184Sek110237  */
9715184Sek110237 static int
9725184Sek110237 flowoplib_iopslimit(threadflow_t *threadflow, flowop_t *flowop)
9735184Sek110237 {
9745184Sek110237 	uint64_t iops;
9755184Sek110237 	uint64_t delta;
976*5673Saw148015 	uint64_t events;
9775184Sek110237 
9785184Sek110237 	/* Immediately bail if not set/enabled */
9795184Sek110237 	if (filebench_shm->eventgen_hz == 0)
9805184Sek110237 		return (0);
9815184Sek110237 
9825184Sek110237 	if (flowop->fo_initted == 0) {
9835184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
9845184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
9855184Sek110237 		flowop->fo_initted = 1;
9865184Sek110237 	}
9875184Sek110237 
9885184Sek110237 	iops = (controlstats.fs_rcount +
9895184Sek110237 	    controlstats.fs_wcount);
9905184Sek110237 
9915184Sek110237 	/* Is this the first time around */
9925184Sek110237 	if (flowop->fo_tputlast == 0) {
9935184Sek110237 		flowop->fo_tputlast = iops;
9945184Sek110237 		return (0);
9955184Sek110237 	}
9965184Sek110237 
9975184Sek110237 	delta = iops - flowop->fo_tputlast;
9985184Sek110237 	flowop->fo_tputbucket -= delta;
9995184Sek110237 	flowop->fo_tputlast = iops;
10005184Sek110237 
10015184Sek110237 	/* No need to block if the q isn't empty */
10025184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
1003*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
10045184Sek110237 		return (0);
10055184Sek110237 	}
10065184Sek110237 
10075184Sek110237 	iops = flowop->fo_tputbucket * -1;
10085184Sek110237 	events = iops;
10095184Sek110237 
10105184Sek110237 	flowop_beginop(threadflow, flowop);
10115184Sek110237 	while (filebench_shm->eventgen_hz) {
10125184Sek110237 
10135184Sek110237 		(void) ipc_mutex_lock(&filebench_shm->eventgen_lock);
10145184Sek110237 		if (filebench_shm->eventgen_q >= events) {
10155184Sek110237 			filebench_shm->eventgen_q -= events;
10165184Sek110237 			(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
10175184Sek110237 			flowop->fo_tputbucket += events;
10185184Sek110237 			break;
10195184Sek110237 		}
10205184Sek110237 		(void) pthread_cond_wait(&filebench_shm->eventgen_cv,
10215184Sek110237 		    &filebench_shm->eventgen_lock);
10225184Sek110237 		(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
10235184Sek110237 	}
1024*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
10255184Sek110237 
10265184Sek110237 	return (0);
10275184Sek110237 }
10285184Sek110237 
10295184Sek110237 /*
10305184Sek110237  * Blocks the calling thread if the number of issued filebench
10315184Sek110237  * operations exceeds the number of posted events, thus limiting
10325184Sek110237  * the average filebench operation rate to the rate specified by
10335184Sek110237  * eventgen_hz. Always returns 0.
10345184Sek110237  */
10355184Sek110237 static int
10365184Sek110237 flowoplib_opslimit(threadflow_t *threadflow, flowop_t *flowop)
10375184Sek110237 {
10385184Sek110237 	uint64_t ops;
10395184Sek110237 	uint64_t delta;
1040*5673Saw148015 	uint64_t events;
10415184Sek110237 
10425184Sek110237 	/* Immediately bail if not set/enabled */
10435184Sek110237 	if (filebench_shm->eventgen_hz == 0)
10445184Sek110237 		return (0);
10455184Sek110237 
10465184Sek110237 	if (flowop->fo_initted == 0) {
10475184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
10485184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
10495184Sek110237 		flowop->fo_initted = 1;
10505184Sek110237 	}
10515184Sek110237 
10525184Sek110237 	ops = controlstats.fs_count;
10535184Sek110237 
10545184Sek110237 	/* Is this the first time around */
10555184Sek110237 	if (flowop->fo_tputlast == 0) {
10565184Sek110237 		flowop->fo_tputlast = ops;
10575184Sek110237 		return (0);
10585184Sek110237 	}
10595184Sek110237 
10605184Sek110237 	delta = ops - flowop->fo_tputlast;
10615184Sek110237 	flowop->fo_tputbucket -= delta;
10625184Sek110237 	flowop->fo_tputlast = ops;
10635184Sek110237 
10645184Sek110237 	/* No need to block if the q isn't empty */
10655184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
1066*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
10675184Sek110237 		return (0);
10685184Sek110237 	}
10695184Sek110237 
10705184Sek110237 	ops = flowop->fo_tputbucket * -1;
10715184Sek110237 	events = ops;
10725184Sek110237 
10735184Sek110237 	flowop_beginop(threadflow, flowop);
10745184Sek110237 	while (filebench_shm->eventgen_hz) {
10755184Sek110237 		(void) ipc_mutex_lock(&filebench_shm->eventgen_lock);
10765184Sek110237 		if (filebench_shm->eventgen_q >= events) {
10775184Sek110237 			filebench_shm->eventgen_q -= events;
10785184Sek110237 			(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
10795184Sek110237 			flowop->fo_tputbucket += events;
10805184Sek110237 			break;
10815184Sek110237 		}
10825184Sek110237 		(void) pthread_cond_wait(&filebench_shm->eventgen_cv,
10835184Sek110237 		    &filebench_shm->eventgen_lock);
10845184Sek110237 		(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
10855184Sek110237 	}
1086*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
10875184Sek110237 
10885184Sek110237 	return (0);
10895184Sek110237 }
10905184Sek110237 
10915184Sek110237 
10925184Sek110237 /*
10935184Sek110237  * Blocks the calling thread if the number of bytes of I/O
10945184Sek110237  * issued exceeds one megabyte times the number of posted
10955184Sek110237  * events, thus limiting the average I/O byte rate to one
10965184Sek110237  * megabyte times the event rate as set by eventgen_hz.
10975184Sek110237  * Always retuns 0.
10985184Sek110237  */
10995184Sek110237 static int
11005184Sek110237 flowoplib_bwlimit(threadflow_t *threadflow, flowop_t *flowop)
11015184Sek110237 {
11025184Sek110237 	uint64_t bytes;
11035184Sek110237 	uint64_t delta;
1104*5673Saw148015 	uint64_t events;
11055184Sek110237 
11065184Sek110237 	/* Immediately bail if not set/enabled */
11075184Sek110237 	if (filebench_shm->eventgen_hz == 0)
11085184Sek110237 		return (0);
11095184Sek110237 
11105184Sek110237 	if (flowop->fo_initted == 0) {
11115184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "rate %zx %s-%d locking",
11125184Sek110237 		    flowop, threadflow->tf_name, threadflow->tf_instance);
11135184Sek110237 		flowop->fo_initted = 1;
11145184Sek110237 	}
11155184Sek110237 
11165184Sek110237 	bytes = (controlstats.fs_rbytes +
11175184Sek110237 	    controlstats.fs_wbytes);
11185184Sek110237 
11195184Sek110237 	/* Is this the first time around */
11205184Sek110237 	if (flowop->fo_tputlast == 0) {
11215184Sek110237 		flowop->fo_tputlast = bytes;
11225184Sek110237 		return (0);
11235184Sek110237 	}
11245184Sek110237 
11255184Sek110237 	delta = bytes - flowop->fo_tputlast;
11265184Sek110237 	flowop->fo_tputbucket -= delta;
11275184Sek110237 	flowop->fo_tputlast = bytes;
11285184Sek110237 
11295184Sek110237 	/* No need to block if the q isn't empty */
11305184Sek110237 	if (flowop->fo_tputbucket >= 0LL) {
1131*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
11325184Sek110237 		return (0);
11335184Sek110237 	}
11345184Sek110237 
11355184Sek110237 	bytes = flowop->fo_tputbucket * -1;
11365184Sek110237 	events = (bytes / MB) + 1;
11375184Sek110237 
11385184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "%lld bytes, %lld events",
11395184Sek110237 	    bytes, events);
11405184Sek110237 
11415184Sek110237 	flowop_beginop(threadflow, flowop);
11425184Sek110237 	while (filebench_shm->eventgen_hz) {
11435184Sek110237 		(void) ipc_mutex_lock(&filebench_shm->eventgen_lock);
11445184Sek110237 		if (filebench_shm->eventgen_q >= events) {
11455184Sek110237 			filebench_shm->eventgen_q -= events;
11465184Sek110237 			(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
11475184Sek110237 			flowop->fo_tputbucket += (events * MB);
11485184Sek110237 			break;
11495184Sek110237 		}
11505184Sek110237 		(void) pthread_cond_wait(&filebench_shm->eventgen_cv,
11515184Sek110237 		    &filebench_shm->eventgen_lock);
11525184Sek110237 		(void) ipc_mutex_unlock(&filebench_shm->eventgen_lock);
11535184Sek110237 	}
1154*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
11555184Sek110237 
11565184Sek110237 	return (0);
11575184Sek110237 }
11585184Sek110237 
11595184Sek110237 /*
11605184Sek110237  * These flowops terminate a benchmark run when either the specified
11615184Sek110237  * number of bytes of I/O (flowoplib_finishonbytes) or the specified
11625184Sek110237  * number of I/O operations (flowoplib_finishoncount) have been generated.
11635184Sek110237  */
11645184Sek110237 
11655184Sek110237 
11665184Sek110237 /*
11675184Sek110237  * Stop filebench run when specified number of I/O bytes have been
11685184Sek110237  * transferred. Compares controlstats.fs_bytes with *flowop->value,
11695184Sek110237  * and if greater returns 1, stopping the run, if not, returns 0
11705184Sek110237  * to continue running.
11715184Sek110237  */
11725184Sek110237 static int
11735184Sek110237 flowoplib_finishonbytes(threadflow_t *threadflow, flowop_t *flowop)
11745184Sek110237 {
11755184Sek110237 	uint64_t b;
11765184Sek110237 	uint64_t bytes = *flowop->fo_value;
11775184Sek110237 
11785184Sek110237 	b = controlstats.fs_bytes;
11795184Sek110237 
11805184Sek110237 	flowop_beginop(threadflow, flowop);
11815184Sek110237 	if (b > bytes) {
1182*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
11835184Sek110237 		return (1);
11845184Sek110237 	}
1185*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
11865184Sek110237 
11875184Sek110237 	return (0);
11885184Sek110237 }
11895184Sek110237 
11905184Sek110237 /*
11915184Sek110237  * Stop filebench run when specified number of I/O operations have
11925184Sek110237  * been performed. Compares controlstats.fs_count with *flowop->value,
11935184Sek110237  * and if greater returns 1, stopping the run, if not, returns 0 to
11945184Sek110237  * continue running.
11955184Sek110237  */
11965184Sek110237 static int
11975184Sek110237 flowoplib_finishoncount(threadflow_t *threadflow, flowop_t *flowop)
11985184Sek110237 {
11995184Sek110237 	uint64_t ops;
12005184Sek110237 	uint64_t count = *flowop->fo_value;
12015184Sek110237 
12025184Sek110237 	ops = controlstats.fs_count;
12035184Sek110237 
12045184Sek110237 	flowop_beginop(threadflow, flowop);
12055184Sek110237 	if (ops > count) {
1206*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
12075184Sek110237 		return (1);
12085184Sek110237 	}
1209*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
12105184Sek110237 
12115184Sek110237 	return (0);
12125184Sek110237 }
12135184Sek110237 
12145184Sek110237 /*
12155184Sek110237  * Semaphore synchronization using either System V semaphores or
12165184Sek110237  * posix semaphores. If System V semaphores are available, they will be
12175184Sek110237  * used, otherwise posix semaphores will be used.
12185184Sek110237  */
12195184Sek110237 
12205184Sek110237 
12215184Sek110237 /*
12225184Sek110237  * Initializes the filebench "block on semaphore" flowop.
12235184Sek110237  * If System V semaphores are implemented, the routine
12245184Sek110237  * initializes the System V semaphore subsystem if it hasn't
12255184Sek110237  * already been initialized, also allocates a pair of semids
12265184Sek110237  * and initializes the highwater System V semaphore.
12275184Sek110237  * If no System V semaphores, then does nothing special.
12285184Sek110237  * Returns -1 if it cannot acquire a set of System V semphores
12295184Sek110237  * or if the initial post to the semaphore set fails. Returns 0
12305184Sek110237  * on success.
12315184Sek110237  */
12325184Sek110237 static int
12335184Sek110237 flowoplib_semblock_init(flowop_t *flowop)
12345184Sek110237 {
12355184Sek110237 
12365184Sek110237 #ifdef HAVE_SYSV_SEM
12375184Sek110237 	int semid;
12385184Sek110237 	struct sembuf sbuf[2];
12395184Sek110237 	int highwater;
12405184Sek110237 
12415184Sek110237 	ipc_seminit();
12425184Sek110237 
12435184Sek110237 	flowop->fo_semid_lw = ipc_semidalloc();
12445184Sek110237 	flowop->fo_semid_hw = ipc_semidalloc();
12455184Sek110237 
12465184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d semblock init semid=%x",
12475184Sek110237 	    flowop->fo_name, flowop->fo_instance, flowop->fo_semid_lw);
12485184Sek110237 
12495184Sek110237 	/*
12505184Sek110237 	 * Raise the number of the hw queue, causing the posting side to
12515184Sek110237 	 * block if queue is > 2 x blocking value
12525184Sek110237 	 */
12535184Sek110237 	if ((semid = semget(filebench_shm->semkey, FILEBENCH_NSEMS, 0)) == -1) {
12545184Sek110237 		filebench_log(LOG_ERROR, "semblock init lookup %x failed: %s",
12555184Sek110237 		    filebench_shm->semkey,
12565184Sek110237 		    strerror(errno));
12575184Sek110237 		return (-1);
12585184Sek110237 	}
12595184Sek110237 
12605184Sek110237 	if ((highwater = flowop->fo_semid_hw) == 0)
12615184Sek110237 		highwater = *flowop->fo_value;
12625184Sek110237 
12635184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "setting highwater to : %d", highwater);
12645184Sek110237 
1265*5673Saw148015 	sbuf[0].sem_num = (short)highwater;
12665184Sek110237 	sbuf[0].sem_op = *flowop->fo_highwater;
12675184Sek110237 	sbuf[0].sem_flg = 0;
12685184Sek110237 	if ((semop(semid, &sbuf[0], 1) == -1) && errno) {
12695184Sek110237 		filebench_log(LOG_ERROR, "semblock init post failed: %s (%d,"
12705184Sek110237 		    "%d)", strerror(errno), sbuf[0].sem_num, sbuf[0].sem_op);
12715184Sek110237 		return (-1);
12725184Sek110237 	}
12735184Sek110237 #else
12745184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
12755184Sek110237 	    "flow %s-%d semblock init with posix semaphore",
12765184Sek110237 	    flowop->fo_name, flowop->fo_instance);
12775184Sek110237 
12785184Sek110237 	sem_init(&flowop->fo_sem, 1, 0);
12795184Sek110237 #endif	/* HAVE_SYSV_SEM */
12805184Sek110237 
12815184Sek110237 	if (!(*flowop->fo_blocking))
12825184Sek110237 		(void) ipc_mutex_unlock(&flowop->fo_lock);
12835184Sek110237 
12845184Sek110237 	return (0);
12855184Sek110237 }
12865184Sek110237 
12875184Sek110237 /*
12885184Sek110237  * Releases the semids for the System V semaphore allocated
12895184Sek110237  * to this flowop. If not using System V semaphores, then
12905184Sek110237  * it is effectively just a no-op. Always returns 0.
12915184Sek110237  */
12925184Sek110237 static void
12935184Sek110237 flowoplib_semblock_destruct(flowop_t *flowop)
12945184Sek110237 {
12955184Sek110237 #ifdef HAVE_SYSV_SEM
12965184Sek110237 	ipc_semidfree(flowop->fo_semid_lw);
12975184Sek110237 	ipc_semidfree(flowop->fo_semid_hw);
12985184Sek110237 #else
12995184Sek110237 	sem_destroy(&flowop->fo_sem);
13005184Sek110237 #endif /* HAVE_SYSV_SEM */
13015184Sek110237 }
13025184Sek110237 
13035184Sek110237 /*
13045184Sek110237  * Attempts to pass a System V or posix semaphore as appropriate,
13055184Sek110237  * and blocks if necessary. Returns -1 if a set of System V
13065184Sek110237  * semphores is not available or cannot be acquired, or if the initial
13075184Sek110237  * post to the semaphore set fails. Returns 0 on success.
13085184Sek110237  */
13095184Sek110237 static int
13105184Sek110237 flowoplib_semblock(threadflow_t *threadflow, flowop_t *flowop)
13115184Sek110237 {
13125184Sek110237 
13135184Sek110237 #ifdef HAVE_SYSV_SEM
13145184Sek110237 	struct sembuf sbuf[2];
13155184Sek110237 	int value = *flowop->fo_value;
13165184Sek110237 	int semid;
13175184Sek110237 	struct timespec timeout;
13185184Sek110237 
13195184Sek110237 	if ((semid = semget(filebench_shm->semkey, FILEBENCH_NSEMS, 0)) == -1) {
13205184Sek110237 		filebench_log(LOG_ERROR, "lookup semop %x failed: %s",
13215184Sek110237 		    filebench_shm->semkey,
13225184Sek110237 		    strerror(errno));
13235184Sek110237 		return (-1);
13245184Sek110237 	}
13255184Sek110237 
13265184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
13275184Sek110237 	    "flow %s-%d sem blocking on id %x num %x value %d",
13285184Sek110237 	    flowop->fo_name, flowop->fo_instance, semid,
13295184Sek110237 	    flowop->fo_semid_hw, value);
13305184Sek110237 
13315184Sek110237 	/* Post, decrement the increment the hw queue */
13325184Sek110237 	sbuf[0].sem_num = flowop->fo_semid_hw;
1333*5673Saw148015 	sbuf[0].sem_op = (short)value;
13345184Sek110237 	sbuf[0].sem_flg = 0;
13355184Sek110237 	sbuf[1].sem_num = flowop->fo_semid_lw;
13365184Sek110237 	sbuf[1].sem_op = value * -1;
13375184Sek110237 	sbuf[1].sem_flg = 0;
13385184Sek110237 	timeout.tv_sec = 600;
13395184Sek110237 	timeout.tv_nsec = 0;
13405184Sek110237 
13415184Sek110237 	if (*flowop->fo_blocking)
13425184Sek110237 		(void) ipc_mutex_unlock(&flowop->fo_lock);
13435184Sek110237 
13445184Sek110237 	flowop_beginop(threadflow, flowop);
13455184Sek110237 
13465184Sek110237 #ifdef HAVE_SEMTIMEDOP
13475184Sek110237 	(void) semtimedop(semid, &sbuf[0], 1, &timeout);
13485184Sek110237 	(void) semtimedop(semid, &sbuf[1], 1, &timeout);
13495184Sek110237 #else
13505184Sek110237 	(void) semop(semid, &sbuf[0], 1);
13515184Sek110237 	(void) semop(semid, &sbuf[1], 1);
13525184Sek110237 #endif /* HAVE_SEMTIMEDOP */
13535184Sek110237 
13545184Sek110237 	if (*flowop->fo_blocking)
13555184Sek110237 		(void) ipc_mutex_lock(&flowop->fo_lock);
13565184Sek110237 
1357*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
13585184Sek110237 
13595184Sek110237 #else
13605184Sek110237 	int value = *flowop->fo_value;
13615184Sek110237 	int i;
13625184Sek110237 
13635184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
13645184Sek110237 	    "flow %s-%d sem blocking on posix semaphore",
13655184Sek110237 	    flowop->fo_name, flowop->fo_instance);
13665184Sek110237 
13675184Sek110237 	/* Decrement sem by value */
13685184Sek110237 	for (i = 0; i < value; i++) {
13695184Sek110237 		if (sem_wait(&flowop->fo_sem) == -1) {
13705184Sek110237 			filebench_log(LOG_ERROR, "semop wait failed");
13715184Sek110237 			return (-1);
13725184Sek110237 		}
13735184Sek110237 	}
13745184Sek110237 
13755184Sek110237 	filebench_log(LOG_DEBUG_IMPL, "flow %s-%d sem unblocking",
13765184Sek110237 	    flowop->fo_name, flowop->fo_instance);
13775184Sek110237 #endif /* HAVE_SYSV_SEM */
13785184Sek110237 
13795184Sek110237 	return (0);
13805184Sek110237 }
13815184Sek110237 
13825184Sek110237 /*
13835184Sek110237  * Calls ipc_seminit(), and does so whether System V semaphores
13845184Sek110237  * are available or not. Hence it will cause ipc_seminit to log errors
13855184Sek110237  * if they are not. Always returns 0.
13865184Sek110237  */
13875184Sek110237 /* ARGSUSED */
13885184Sek110237 static int
13895184Sek110237 flowoplib_sempost_init(flowop_t *flowop)
13905184Sek110237 {
13915184Sek110237 #ifdef HAVE_SYSV_SEM
13925184Sek110237 	ipc_seminit();
13935184Sek110237 #endif /* HAVE_SYSV_SEM */
13945184Sek110237 	return (0);
13955184Sek110237 }
13965184Sek110237 
13975184Sek110237 /*
13985184Sek110237  * Post to a System V or posix semaphore as appropriate.
13995184Sek110237  * On the first call for a given flowop instance, this routine
14005184Sek110237  * will use the fo_targetname attribute to locate all semblock
14015184Sek110237  * flowops that are expecting posts from this flowop. All
14025184Sek110237  * target flowops on this list will have a post operation done
14035184Sek110237  * to their semaphores on each call.
14045184Sek110237  */
14055184Sek110237 static int
14065184Sek110237 flowoplib_sempost(threadflow_t *threadflow, flowop_t *flowop)
14075184Sek110237 {
14085184Sek110237 	flowop_t *target;
14095184Sek110237 
14105184Sek110237 	filebench_log(LOG_DEBUG_IMPL,
14115184Sek110237 	    "sempost flow %s-%d",
14125184Sek110237 	    flowop->fo_name,
14135184Sek110237 	    flowop->fo_instance);
14145184Sek110237 
14155184Sek110237 	/* if this is the first post, create the post list */
14165184Sek110237 	if (flowop->fo_targets == NULL) {
14175184Sek110237 		flowop_t *result = flowop_find(flowop->fo_targetname);
14185184Sek110237 
14195184Sek110237 		flowop->fo_targets = result;
14205184Sek110237 
14215184Sek110237 		if (result == NULL) {
14225184Sek110237 			filebench_log(LOG_ERROR,
14235184Sek110237 			    "sempost: could not find op %s for thread %s",
14245184Sek110237 			    flowop->fo_targetname,
14255184Sek110237 			    threadflow->tf_name);
14265184Sek110237 			filebench_shutdown(1);
14275184Sek110237 		}
14285184Sek110237 
14295184Sek110237 		while (result) {
14305184Sek110237 			result->fo_targetnext =
14315184Sek110237 			    result->fo_resultnext;
14325184Sek110237 			result = result->fo_resultnext;
14335184Sek110237 		}
14345184Sek110237 	}
14355184Sek110237 
14365184Sek110237 	target = flowop->fo_targets;
14375184Sek110237 
14385184Sek110237 	flowop_beginop(threadflow, flowop);
14395184Sek110237 	/* post to the targets */
14405184Sek110237 	while (target) {
14415184Sek110237 #ifdef HAVE_SYSV_SEM
14425184Sek110237 		struct sembuf sbuf[2];
14435184Sek110237 		int semid;
14445184Sek110237 		int blocking;
14455184Sek110237 #else
14465184Sek110237 		int i;
14475184Sek110237 #endif /* HAVE_SYSV_SEM */
14485184Sek110237 		int value = *flowop->fo_value;
14495184Sek110237 		struct timespec timeout;
14505184Sek110237 
14515184Sek110237 		if (target->fo_instance == FLOW_MASTER) {
14525184Sek110237 			target = target->fo_targetnext;
14535184Sek110237 			continue;
14545184Sek110237 		}
14555184Sek110237 
14565184Sek110237 #ifdef HAVE_SYSV_SEM
14575184Sek110237 
14585184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
14595184Sek110237 		    "sempost flow %s-%d num %x",
14605184Sek110237 		    target->fo_name,
14615184Sek110237 		    target->fo_instance,
14625184Sek110237 		    target->fo_semid_lw);
14635184Sek110237 
14645184Sek110237 		if ((semid = semget(filebench_shm->semkey,
14655184Sek110237 		    FILEBENCH_NSEMS, 0)) == -1) {
14665184Sek110237 			filebench_log(LOG_ERROR,
14675184Sek110237 			    "lookup semop %x failed: %s",
14685184Sek110237 			    filebench_shm->semkey,
14695184Sek110237 			    strerror(errno));
14705184Sek110237 			return (-1);
14715184Sek110237 		}
14725184Sek110237 
14735184Sek110237 		sbuf[0].sem_num = target->fo_semid_lw;
1474*5673Saw148015 		sbuf[0].sem_op = (short)value;
14755184Sek110237 		sbuf[0].sem_flg = 0;
14765184Sek110237 		sbuf[1].sem_num = target->fo_semid_hw;
14775184Sek110237 		sbuf[1].sem_op = value * -1;
14785184Sek110237 		sbuf[1].sem_flg = 0;
14795184Sek110237 		timeout.tv_sec = 600;
14805184Sek110237 		timeout.tv_nsec = 0;
14815184Sek110237 
14825184Sek110237 		if (*flowop->fo_blocking)
14835184Sek110237 			blocking = 1;
14845184Sek110237 		else
14855184Sek110237 			blocking = 0;
14865184Sek110237 
14875184Sek110237 #ifdef HAVE_SEMTIMEDOP
14885184Sek110237 		if ((semtimedop(semid, &sbuf[0], blocking + 1,
14895184Sek110237 		    &timeout) == -1) && (errno && (errno != EAGAIN))) {
14905184Sek110237 #else
14915184Sek110237 		if ((semop(semid, &sbuf[0], blocking + 1) == -1) &&
14925184Sek110237 		    (errno && (errno != EAGAIN))) {
14935184Sek110237 #endif /* HAVE_SEMTIMEDOP */
14945184Sek110237 			filebench_log(LOG_ERROR, "semop post failed: %s",
14955184Sek110237 			    strerror(errno));
14965184Sek110237 			return (-1);
14975184Sek110237 		}
14985184Sek110237 
14995184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
15005184Sek110237 		    "flow %s-%d finished posting",
15015184Sek110237 		    target->fo_name, target->fo_instance);
15025184Sek110237 #else
15035184Sek110237 		filebench_log(LOG_DEBUG_IMPL,
15045184Sek110237 		    "sempost flow %s-%d to posix semaphore",
15055184Sek110237 		    target->fo_name,
15065184Sek110237 		    target->fo_instance);
15075184Sek110237 
15085184Sek110237 		/* Increment sem by value */
15095184Sek110237 		for (i = 0; i < value; i++) {
15105184Sek110237 			if (sem_post(&target->fo_sem) == -1) {
15115184Sek110237 				filebench_log(LOG_ERROR, "semop post failed");
15125184Sek110237 				return (-1);
15135184Sek110237 			}
15145184Sek110237 		}
15155184Sek110237 
15165184Sek110237 		filebench_log(LOG_DEBUG_IMPL, "flow %s-%d unblocking",
15175184Sek110237 		    target->fo_name, target->fo_instance);
15185184Sek110237 #endif /* HAVE_SYSV_SEM */
15195184Sek110237 
15205184Sek110237 		target = target->fo_targetnext;
15215184Sek110237 	}
1522*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
15235184Sek110237 
15245184Sek110237 	return (0);
15255184Sek110237 }
15265184Sek110237 
15275184Sek110237 
15285184Sek110237 /*
15295184Sek110237  * Section for exercising create / open / close / delete operations
15305184Sek110237  * on files within a fileset. For proper operation, the flowop attribute
15315184Sek110237  * "fd", which sets the fo_fdnumber field in the flowop, must be used
15325184Sek110237  * so that the same file is opened and later closed. "fd" is an index
15335184Sek110237  * into a pair of arrays maintained by threadflows, one of which
15345184Sek110237  * contains the operating system assigned file descriptors and the other
15355184Sek110237  * a pointer to the filesetentry whose file the file descriptor
15365184Sek110237  * references. An openfile flowop defined without fd being set will use
15375184Sek110237  * the default (0) fd or, if specified, rotate through fd indices, but
15385184Sek110237  * createfile and closefile must use the default or a specified fd.
15395184Sek110237  * Meanwhile deletefile picks and arbitrary file to delete, regardless
15405184Sek110237  * of fd attribute.
15415184Sek110237  */
15425184Sek110237 
15435184Sek110237 /*
15445184Sek110237  * XXX Making file selection more consistent among the flowops might good
15455184Sek110237  */
15465184Sek110237 
15475184Sek110237 
15485184Sek110237 /*
15495184Sek110237  * Emulates (and actually does) file open. Obtains a file descriptor
15505184Sek110237  * index, then calls flowoplib_openfile_common() to open. Returns -1
15515184Sek110237  * if not file descriptor is found or flowoplib_openfile_common
15525184Sek110237  * encounters an error, otherwise 0.
15535184Sek110237  */
15545184Sek110237 static int
15555184Sek110237 flowoplib_openfile(threadflow_t *threadflow, flowop_t *flowop)
15565184Sek110237 {
15575184Sek110237 	int fd = flowoplib_fdnum(threadflow, flowop);
15585184Sek110237 
15595184Sek110237 	if (fd == -1)
15605184Sek110237 		return (-1);
15615184Sek110237 
15625184Sek110237 	return (flowoplib_openfile_common(threadflow, flowop, fd));
15635184Sek110237 }
15645184Sek110237 
15655184Sek110237 /*
15665184Sek110237  * Common file opening code for filesets. Uses the supplied
15675184Sek110237  * file descriptor index to determine the tf_fd entry to use.
15685184Sek110237  * If the entry is empty (0) and the fileset exists, fileset
15695184Sek110237  * pick is called to select a fileset entry to use. The file
15705184Sek110237  * specified in the filesetentry is opened, and the returned
15715184Sek110237  * operating system file descriptor and a pointer to the
15725184Sek110237  * filesetentry are stored in tf_fd[fd] and tf_fse[fd],
15735184Sek110237  * respectively. Returns -1 on error, 0 on success.
15745184Sek110237  */
15755184Sek110237 static int
15765184Sek110237 flowoplib_openfile_common(threadflow_t *threadflow, flowop_t *flowop, int fd)
15775184Sek110237 {
15785184Sek110237 	filesetentry_t *file;
15795184Sek110237 	int tid = 0;
15805184Sek110237 
15815184Sek110237 	/*
15825184Sek110237 	 * If the flowop doesn't default to persistent fd
15835184Sek110237 	 * then get unique thread ID for use by fileset_pick
15845184Sek110237 	 */
15855184Sek110237 	if (integer_isset(flowop->fo_rotatefd))
15865184Sek110237 		tid = threadflow->tf_utid;
15875184Sek110237 
15885184Sek110237 	if (threadflow->tf_fd[fd] != 0) {
15895184Sek110237 		filebench_log(LOG_ERROR,
15905184Sek110237 		    "flowop %s attempted to open without closing on fd %d",
15915184Sek110237 		    flowop->fo_name, fd);
15925184Sek110237 		return (-1);
15935184Sek110237 	}
15945184Sek110237 
15955184Sek110237 	if (flowop->fo_fileset == NULL) {
15965184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
15975184Sek110237 		return (-1);
15985184Sek110237 	}
15995184Sek110237 
1600*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
1601*5673Saw148015 	if (flowop->fo_fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1602*5673Saw148015 		int open_attrs = 0;
1603*5673Saw148015 		char name[MAXPATHLEN];
1604*5673Saw148015 
1605*5673Saw148015 		(void) strcpy(name, *flowop->fo_fileset->fs_path);
1606*5673Saw148015 		(void) strcat(name, "/");
1607*5673Saw148015 		(void) strcat(name, flowop->fo_fileset->fs_name);
1608*5673Saw148015 
1609*5673Saw148015 		if (*flowop->fo_dsync) {
1610*5673Saw148015 #ifdef sun
1611*5673Saw148015 			open_attrs |= O_DSYNC;
1612*5673Saw148015 #else
1613*5673Saw148015 			open_attrs |= O_FSYNC;
1614*5673Saw148015 #endif
1615*5673Saw148015 		}
1616*5673Saw148015 
1617*5673Saw148015 		filebench_log(LOG_DEBUG_SCRIPT,
1618*5673Saw148015 		    "open raw device %s flags %d = %d", name, open_attrs, fd);
1619*5673Saw148015 
1620*5673Saw148015 		threadflow->tf_fd[fd] = open64(name,
1621*5673Saw148015 		    O_RDWR | open_attrs, 0666);
1622*5673Saw148015 
1623*5673Saw148015 		if (threadflow->tf_fd[fd] < 0) {
1624*5673Saw148015 			filebench_log(LOG_ERROR,
1625*5673Saw148015 			    "Failed to open raw device %s: %s",
1626*5673Saw148015 			    name, strerror(errno));
1627*5673Saw148015 			return (-1);
1628*5673Saw148015 		}
1629*5673Saw148015 
1630*5673Saw148015 		/* if running on Solaris, use un-buffered io */
1631*5673Saw148015 #ifdef sun
1632*5673Saw148015 		(void) directio(threadflow->tf_fd[fd], DIRECTIO_ON);
1633*5673Saw148015 #endif
1634*5673Saw148015 
1635*5673Saw148015 		threadflow->tf_fse[fd] = NULL;
1636*5673Saw148015 
1637*5673Saw148015 		return (0);
1638*5673Saw148015 	}
1639*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
1640*5673Saw148015 
16415184Sek110237 	if ((file = fileset_pick(flowop->fo_fileset,
16425184Sek110237 	    FILESET_PICKEXISTS, tid)) == NULL) {
16435184Sek110237 		filebench_log(LOG_ERROR,
16445184Sek110237 		    "flowop %s failed to pick file from %s on fd %d",
16455184Sek110237 		    flowop->fo_name,
16465184Sek110237 		    flowop->fo_fileset->fs_name, fd);
16475184Sek110237 		return (-1);
16485184Sek110237 	}
16495184Sek110237 
16505184Sek110237 	threadflow->tf_fse[fd] = file;
16515184Sek110237 
16525184Sek110237 	flowop_beginop(threadflow, flowop);
16535184Sek110237 	threadflow->tf_fd[fd] = fileset_openfile(flowop->fo_fileset,
16545184Sek110237 	    file, O_RDWR, 0666, flowoplib_fileattrs(flowop));
1655*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
16565184Sek110237 
16575184Sek110237 	if (threadflow->tf_fd[fd] < 0) {
16585184Sek110237 		filebench_log(LOG_ERROR, "failed to open file %s",
16595184Sek110237 		    flowop->fo_name);
16605184Sek110237 		return (-1);
16615184Sek110237 	}
16625184Sek110237 
16635184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT,
16645184Sek110237 	    "flowop %s: opened %s fd[%d] = %d",
16655184Sek110237 	    flowop->fo_name, file->fse_path, fd, threadflow->tf_fd[fd]);
16665184Sek110237 
16675184Sek110237 	return (0);
16685184Sek110237 }
16695184Sek110237 
16705184Sek110237 /*
16715184Sek110237  * Emulate create of a file. Uses the flowop's fdnumber to select
16725184Sek110237  * tf_fd and tf_fse array locations to put the created file's file
16735184Sek110237  * descriptor and filesetentry respectively. Uses fileset_pick()
16745184Sek110237  * to select a specific filesetentry whose file does not currently
16755184Sek110237  * exist for the file create operation. Then calls
16765184Sek110237  * fileset_openfile() with the O_CREATE flag set to create the
16775184Sek110237  * file. Returns -1 if the array index specified by fdnumber is
16785184Sek110237  * already in use, the flowop has no associated fileset, or
16795184Sek110237  * the create call fails. Returns 1 if a filesetentry with a
16805184Sek110237  * nonexistent file cannot be found. Returns 0 on success.
16815184Sek110237  */
16825184Sek110237 static int
16835184Sek110237 flowoplib_createfile(threadflow_t *threadflow, flowop_t *flowop)
16845184Sek110237 {
16855184Sek110237 	filesetentry_t *file;
16865184Sek110237 	int fd = flowop->fo_fdnumber;
16875184Sek110237 
16885184Sek110237 	if (threadflow->tf_fd[fd] != 0) {
16895184Sek110237 		filebench_log(LOG_ERROR,
16905184Sek110237 		    "flowop %s attempted to create without closing on fd %d",
16915184Sek110237 		    flowop->fo_name, fd);
16925184Sek110237 		return (-1);
16935184Sek110237 	}
16945184Sek110237 
16955184Sek110237 	if (flowop->fo_fileset == NULL) {
16965184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
16975184Sek110237 		return (-1);
16985184Sek110237 	}
16995184Sek110237 
1700*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
1701*5673Saw148015 	/* can't be used with raw devices */
1702*5673Saw148015 	if (flowop->fo_fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1703*5673Saw148015 		filebench_log(LOG_ERROR,
1704*5673Saw148015 		    "flowop %s attempted to a createfile on RAW device",
1705*5673Saw148015 		    flowop->fo_name);
1706*5673Saw148015 		return (-1);
1707*5673Saw148015 	}
1708*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
1709*5673Saw148015 
17105184Sek110237 	if ((file = fileset_pick(flowop->fo_fileset,
17115184Sek110237 	    FILESET_PICKNOEXIST, 0)) == NULL) {
17125184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT, "flowop %s failed to pick file",
17135184Sek110237 		    flowop->fo_name);
17145184Sek110237 		return (1);
17155184Sek110237 	}
17165184Sek110237 
17175184Sek110237 	threadflow->tf_fse[fd] = file;
17185184Sek110237 
17195184Sek110237 	flowop_beginop(threadflow, flowop);
17205184Sek110237 	threadflow->tf_fd[fd] = fileset_openfile(flowop->fo_fileset,
17215184Sek110237 	    file, O_RDWR | O_CREAT, 0666, flowoplib_fileattrs(flowop));
1722*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
17235184Sek110237 
17245184Sek110237 	if (threadflow->tf_fd[fd] < 0) {
17255184Sek110237 		filebench_log(LOG_ERROR, "failed to create file %s",
17265184Sek110237 		    flowop->fo_name);
17275184Sek110237 		return (-1);
17285184Sek110237 	}
17295184Sek110237 
17305184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT,
17315184Sek110237 	    "flowop %s: created %s fd[%d] = %d",
17325184Sek110237 	    flowop->fo_name, file->fse_path, fd, threadflow->tf_fd[fd]);
17335184Sek110237 
17345184Sek110237 	return (0);
17355184Sek110237 }
17365184Sek110237 
17375184Sek110237 /*
17385184Sek110237  * Emulates delete of a file. Picks an arbitrary filesetentry
17395184Sek110237  * whose file exists and uses unlink() to delete it. Clears
17405184Sek110237  * the FSE_EXISTS flag for the filesetentry. Returns -1 if the
17415184Sek110237  * flowop has no associated fileset. Returns 1 if an appropriate
17425184Sek110237  * filesetentry cannot be found, and 0 on success.
17435184Sek110237  */
17445184Sek110237 static int
17455184Sek110237 flowoplib_deletefile(threadflow_t *threadflow, flowop_t *flowop)
17465184Sek110237 {
17475184Sek110237 	filesetentry_t *file;
17485184Sek110237 	fileset_t *fileset;
17495184Sek110237 	char path[MAXPATHLEN];
17505184Sek110237 	char *pathtmp;
17515184Sek110237 
17525184Sek110237 	if (flowop->fo_fileset == NULL) {
17535184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
17545184Sek110237 		return (-1);
17555184Sek110237 	}
17565184Sek110237 
17575184Sek110237 	fileset = flowop->fo_fileset;
17585184Sek110237 
1759*5673Saw148015 #ifdef HAVE_RAW_SUPPORT
1760*5673Saw148015 	/* can't be used with raw devices */
1761*5673Saw148015 	if (flowop->fo_fileset->fs_attrs & FILESET_IS_RAW_DEV) {
1762*5673Saw148015 		filebench_log(LOG_ERROR,
1763*5673Saw148015 		    "flowop %s attempted a deletefile on RAW device",
1764*5673Saw148015 		    flowop->fo_name);
1765*5673Saw148015 		return (-1);
1766*5673Saw148015 	}
1767*5673Saw148015 #endif /* HAVE_RAW_SUPPORT */
1768*5673Saw148015 
17695184Sek110237 	if ((file = fileset_pick(flowop->fo_fileset,
17705184Sek110237 	    FILESET_PICKEXISTS, 0)) == NULL) {
17715184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT, "flowop %s failed to pick file",
17725184Sek110237 		    flowop->fo_name);
17735184Sek110237 		return (1);
17745184Sek110237 	}
17755184Sek110237 
17765184Sek110237 	*path = 0;
17775184Sek110237 	(void) strcpy(path, *fileset->fs_path);
17785184Sek110237 	(void) strcat(path, "/");
17795184Sek110237 	(void) strcat(path, fileset->fs_name);
17805184Sek110237 	pathtmp = fileset_resolvepath(file);
17815184Sek110237 	(void) strcat(path, pathtmp);
17825184Sek110237 	free(pathtmp);
17835184Sek110237 
17845184Sek110237 	flowop_beginop(threadflow, flowop);
17855184Sek110237 	(void) unlink(path);
1786*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
17875184Sek110237 	file->fse_flags &= ~FSE_EXISTS;
17885184Sek110237 	(void) ipc_mutex_unlock(&file->fse_lock);
17895184Sek110237 
17905184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "deleted file %s", file->fse_path);
17915184Sek110237 
17925184Sek110237 	return (0);
17935184Sek110237 }
17945184Sek110237 
17955184Sek110237 /*
17965184Sek110237  * Emulates fsync of a file. Obtains the file descriptor index
17975184Sek110237  * from the flowop, obtains the actual file descriptor from
17985184Sek110237  * the threadflow's table, checks to be sure it is still an
17995184Sek110237  * open file, then does an fsync operation on it. Returns -1
18005184Sek110237  * if the file no longer is open, 0 otherwise.
18015184Sek110237  */
18025184Sek110237 static int
18035184Sek110237 flowoplib_fsync(threadflow_t *threadflow, flowop_t *flowop)
18045184Sek110237 {
18055184Sek110237 	filesetentry_t *file;
18065184Sek110237 	int fd = flowop->fo_fdnumber;
18075184Sek110237 
18085184Sek110237 	if (threadflow->tf_fd[fd] == 0) {
18095184Sek110237 		filebench_log(LOG_ERROR,
18105184Sek110237 		    "flowop %s attempted to fsync a closed fd %d",
18115184Sek110237 		    flowop->fo_name, fd);
18125184Sek110237 		return (-1);
18135184Sek110237 	}
18145184Sek110237 
1815*5673Saw148015 	file = threadflow->tf_fse[fd];
1816*5673Saw148015 
1817*5673Saw148015 	if ((file == NULL) ||
1818*5673Saw148015 	    (file->fse_fileset->fs_attrs & FILESET_IS_RAW_DEV)) {
1819*5673Saw148015 		filebench_log(LOG_ERROR,
1820*5673Saw148015 		    "flowop %s attempted to a fsync a RAW device",
1821*5673Saw148015 		    flowop->fo_name);
1822*5673Saw148015 		return (-1);
1823*5673Saw148015 	}
1824*5673Saw148015 
18255184Sek110237 	/* Measure time to fsync */
18265184Sek110237 	flowop_beginop(threadflow, flowop);
18275184Sek110237 	(void) fsync(threadflow->tf_fd[fd]);
1828*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
18295184Sek110237 
18305184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "fsync file %s", file->fse_path);
18315184Sek110237 
18325184Sek110237 	return (0);
18335184Sek110237 }
18345184Sek110237 
18355184Sek110237 /*
18365184Sek110237  * Emulate fsync of an entire fileset. Search through the
18375184Sek110237  * threadflow's file descriptor array, doing fsync() on each
18385184Sek110237  * open file that belongs to the flowop's fileset. Always
18395184Sek110237  * returns 0.
18405184Sek110237  */
18415184Sek110237 static int
18425184Sek110237 flowoplib_fsyncset(threadflow_t *threadflow, flowop_t *flowop)
18435184Sek110237 {
18445184Sek110237 	int fd;
18455184Sek110237 
18465184Sek110237 	for (fd = 0; fd < THREADFLOW_MAXFD; fd++) {
18475184Sek110237 		filesetentry_t *file;
18485184Sek110237 
18495184Sek110237 		/* Match the file set to fsync */
18505184Sek110237 		if ((threadflow->tf_fse[fd] == NULL) ||
18515184Sek110237 		    (flowop->fo_fileset != threadflow->tf_fse[fd]->fse_fileset))
18525184Sek110237 			continue;
18535184Sek110237 
18545184Sek110237 		/* Measure time to fsync */
18555184Sek110237 		flowop_beginop(threadflow, flowop);
18565184Sek110237 		(void) fsync(threadflow->tf_fd[fd]);
1857*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
18585184Sek110237 
18595184Sek110237 		file = threadflow->tf_fse[fd];
18605184Sek110237 
18615184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT, "fsync file %s",
18625184Sek110237 		    file->fse_path);
18635184Sek110237 	}
18645184Sek110237 
18655184Sek110237 	return (0);
18665184Sek110237 }
18675184Sek110237 
18685184Sek110237 /*
18695184Sek110237  * Emulate close of a file.  Obtains the file descriptor index
18705184Sek110237  * from the flowop, obtains the actual file descriptor from the
18715184Sek110237  * threadflow's table, checks to be sure it is still an open
18725184Sek110237  * file, then does a close operation on it. Then sets the
18735184Sek110237  * threadflow file descriptor table entry to 0, and the file set
18745184Sek110237  * entry pointer to NULL. Returns -1 if the file was not open,
18755184Sek110237  * 0 otherwise.
18765184Sek110237  */
18775184Sek110237 static int
18785184Sek110237 flowoplib_closefile(threadflow_t *threadflow, flowop_t *flowop)
18795184Sek110237 {
18805184Sek110237 	filesetentry_t *file;
18815184Sek110237 	int fd = flowop->fo_fdnumber;
18825184Sek110237 
18835184Sek110237 	if (threadflow->tf_fd[fd] == 0) {
18845184Sek110237 		filebench_log(LOG_ERROR,
18855184Sek110237 		    "flowop %s attempted to close an already closed fd %d",
18865184Sek110237 		    flowop->fo_name, fd);
18875184Sek110237 		return (-1);
18885184Sek110237 	}
18895184Sek110237 
18905184Sek110237 	/* Measure time to close */
18915184Sek110237 	flowop_beginop(threadflow, flowop);
18925184Sek110237 	(void) close(threadflow->tf_fd[fd]);
1893*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
18945184Sek110237 
18955184Sek110237 	file = threadflow->tf_fse[fd];
18965184Sek110237 
18975184Sek110237 	threadflow->tf_fd[fd] = 0;
18985184Sek110237 	threadflow->tf_fse[fd] = NULL;
18995184Sek110237 
19005184Sek110237 	filebench_log(LOG_DEBUG_SCRIPT, "closed file %s", file->fse_path);
19015184Sek110237 
19025184Sek110237 	return (0);
19035184Sek110237 }
19045184Sek110237 
19055184Sek110237 /*
19065184Sek110237  * Emulate stat of a file. Picks an arbitrary filesetentry with
19075184Sek110237  * an existing file from the flowop's fileset, then performs a
19085184Sek110237  * stat() operation on it. Returns -1 if the flowop has no
19095184Sek110237  * associated fileset. Returns 1 if an appropriate filesetentry
19105184Sek110237  * cannot be found, and 0 on success.
19115184Sek110237  */
19125184Sek110237 static int
19135184Sek110237 flowoplib_statfile(threadflow_t *threadflow, flowop_t *flowop)
19145184Sek110237 {
19155184Sek110237 	filesetentry_t *file;
19165184Sek110237 	fileset_t *fileset;
19175184Sek110237 	char path[MAXPATHLEN];
19185184Sek110237 	char *pathtmp;
19195184Sek110237 
19205184Sek110237 	if (flowop->fo_fileset == NULL) {
19215184Sek110237 		filebench_log(LOG_ERROR, "flowop NULL file");
19225184Sek110237 		return (-1);
19235184Sek110237 	}
19245184Sek110237 
19255184Sek110237 	fileset = flowop->fo_fileset;
19265184Sek110237 
19275184Sek110237 	if ((file = fileset_pick(flowop->fo_fileset,
19285184Sek110237 	    FILESET_PICKEXISTS, 0)) == NULL) {
19295184Sek110237 		filebench_log(LOG_DEBUG_SCRIPT, "flowop %s failed to pick file",
19305184Sek110237 		    flowop->fo_name);
19315184Sek110237 		return (1);
19325184Sek110237 	}
19335184Sek110237 
19345184Sek110237 	*path = 0;
19355184Sek110237 	(void) strcpy(path, *fileset->fs_path);
19365184Sek110237 	(void) strcat(path, "/");
19375184Sek110237 	(void) strcat(path, fileset->fs_name);
19385184Sek110237 	pathtmp = fileset_resolvepath(file);
19395184Sek110237 	(void) strcat(path, pathtmp);
19405184Sek110237 	free(pathtmp);
19415184Sek110237 
19425184Sek110237 	flowop_beginop(threadflow, flowop);
1943*5673Saw148015 	flowop_endop(threadflow, flowop, 0);
19445184Sek110237 
19455184Sek110237 	(void) ipc_mutex_unlock(&file->fse_lock);
19465184Sek110237 
19475184Sek110237 	return (0);
19485184Sek110237 }
19495184Sek110237 
19505184Sek110237 
19515184Sek110237 /*
19525184Sek110237  * Additional reads and writes. Read and write whole files, write
19535184Sek110237  * and append to files. Some of these work with both fileobjs and
19545184Sek110237  * filesets, others only with filesets. The flowoplib_write routine
19555184Sek110237  * writes from thread memory, while the others read or write using
19565184Sek110237  * fo_buf memory. Note that both flowoplib_read() and
19575184Sek110237  * flowoplib_aiowrite() use thread memory as well.
19585184Sek110237  */
19595184Sek110237 
19605184Sek110237 
19615184Sek110237 /*
1962*5673Saw148015  * Emulate a read of a whole file. The file must be open with
1963*5673Saw148015  * file descriptor and filesetentry stored at the locations indexed
1964*5673Saw148015  * by the flowop's fdnumber. It then seeks to the beginning of the
1965*5673Saw148015  * associated file, and reads fs_iosize bytes at a time until the end
1966*5673Saw148015  * of the file. Returns -1 on error, 0 on success.
19675184Sek110237  */
19685184Sek110237 static int
19695184Sek110237 flowoplib_readwholefile(threadflow_t *threadflow, flowop_t *flowop)
19705184Sek110237 {
1971*5673Saw148015 	caddr_t iobuf;
19725184Sek110237 	off64_t bytes = 0;
19735184Sek110237 	int fd = flowop->fo_fdnumber;
1974*5673Saw148015 	int filedesc;
19755184Sek110237 	int ret;
1976*5673Saw148015 	uint64_t wss;
1977*5673Saw148015 	vinteger_t iosize = *flowop->fo_iosize;
19785184Sek110237 
1979*5673Saw148015 	/* get the file to use */
1980*5673Saw148015 	if (flowoplib_filesetup(threadflow, flowop, &wss, &filedesc) != 0)
19815184Sek110237 		return (-1);
19825184Sek110237 
1983*5673Saw148015 	/* an I/O size of zero means read entire working set with one I/O */
1984*5673Saw148015 	if (iosize == 0)
1985*5673Saw148015 		iosize = wss;
19865184Sek110237 
1987*5673Saw148015 	if (flowoplib_iobufsetup(threadflow, flowop, &iobuf, iosize) != 0)
19885184Sek110237 		return (-1);
19895184Sek110237 
19905184Sek110237 	/* Measure time to read bytes */
19915184Sek110237 	flowop_beginop(threadflow, flowop);
1992*5673Saw148015 	(void) lseek64(filedesc, 0, SEEK_SET);
1993*5673Saw148015 	while ((ret = read(filedesc, iobuf, iosize)) > 0)
19945184Sek110237 		bytes += ret;
19955184Sek110237 
1996*5673Saw148015 	flowop_endop(threadflow, flowop, bytes);
19975184Sek110237 
19985184Sek110237 	if (ret < 0) {
19995184Sek110237 		filebench_log(LOG_ERROR,
20005184Sek110237 		    "Failed to read fd %d: %s",
20015184Sek110237 		    fd, strerror(errno));
20025184Sek110237 		return (-1);
20035184Sek110237 	}
20045184Sek110237 
20055184Sek110237 	return (0);
20065184Sek110237 }
20075184Sek110237 
20085184Sek110237 /*
20095184Sek110237  * Emulate a write to a file of size fo_iosize.  Will write
20105184Sek110237  * to a file from a fileset if the flowop's fo_fileset field
20115184Sek110237  * specifies one or its fdnumber is non zero. Otherwise it
20125184Sek110237  * will write to a fileobj file, if one exists. If the file
20135184Sek110237  * is not currently open, the routine will attempt to open
20145184Sek110237  * it. The flowop's fo_wss parameter will be used to set the
20155184Sek110237  * maximum file size if it is non-zero, otherwise the
20165184Sek110237  * filesetentry's  fse_size will be used. A random memory
20175184Sek110237  * buffer offset is calculated, and, if fo_random is TRUE,
20185184Sek110237  * a random file offset is used for the write. Otherwise the
20195184Sek110237  * write is to the next sequential location. Returns 1 on
20205184Sek110237  * errors, 0 on success.
20215184Sek110237  */
20225184Sek110237 static int
20235184Sek110237 flowoplib_write(threadflow_t *threadflow, flowop_t *flowop)
20245184Sek110237 {
2025*5673Saw148015 	caddr_t iobuf;
20265184Sek110237 	vinteger_t wss;
20275184Sek110237 	int filedesc;
20285184Sek110237 
2029*5673Saw148015 	if (flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
2030*5673Saw148015 	    &filedesc, *flowop->fo_iosize) != 0)
20315184Sek110237 		return (-1);
20325184Sek110237 
20335184Sek110237 	if (*flowop->fo_random) {
20345184Sek110237 		uint64_t fileoffset;
20355184Sek110237 
20365184Sek110237 		if (filebench_randomno64(&fileoffset,
20375184Sek110237 		    wss, *flowop->fo_iosize) == -1) {
20385184Sek110237 			filebench_log(LOG_ERROR,
20395184Sek110237 			    "file size smaller than IO size for thread %s",
20405184Sek110237 			    flowop->fo_name);
20415184Sek110237 			return (-1);
20425184Sek110237 		}
20435184Sek110237 		flowop_beginop(threadflow, flowop);
2044*5673Saw148015 		if (pwrite64(filedesc, iobuf,
20455184Sek110237 		    *flowop->fo_iosize, (off64_t)fileoffset) == -1) {
20465184Sek110237 			filebench_log(LOG_ERROR, "write failed, "
2047*5673Saw148015 			    "offset %lld io buffer %zd: %s",
2048*5673Saw148015 			    fileoffset, iobuf, strerror(errno));
2049*5673Saw148015 			flowop_endop(threadflow, flowop, 0);
20505184Sek110237 			return (-1);
20515184Sek110237 		}
2052*5673Saw148015 		flowop_endop(threadflow, flowop, *flowop->fo_iosize);
20535184Sek110237 	} else {
20545184Sek110237 		flowop_beginop(threadflow, flowop);
2055*5673Saw148015 		if (write(filedesc, iobuf,
20565184Sek110237 		    *flowop->fo_iosize) == -1) {
20575184Sek110237 			filebench_log(LOG_ERROR,
2058*5673Saw148015 			    "write failed, io buffer %zd: %s",
2059*5673Saw148015 			    iobuf, strerror(errno));
2060*5673Saw148015 			flowop_endop(threadflow, flowop, 0);
20615184Sek110237 			return (-1);
20625184Sek110237 		}
2063*5673Saw148015 		flowop_endop(threadflow, flowop, *flowop->fo_iosize);
20645184Sek110237 	}
20655184Sek110237 
20665184Sek110237 	return (0);
20675184Sek110237 }
20685184Sek110237 
20695184Sek110237 /*
20705184Sek110237  * Emulate a write of a whole file.  The size of the file
2071*5673Saw148015  * is taken from a filesetentry identified by fo_srcfdnumber or
2072*5673Saw148015  * from the working set size, while the file descriptor used is
2073*5673Saw148015  * identified by fo_fdnumber. Does multiple writes of fo_iosize
2074*5673Saw148015  * length length until full file has been written. Returns -1 on
2075*5673Saw148015  * error, 0 on success.
20765184Sek110237  */
20775184Sek110237 static int
20785184Sek110237 flowoplib_writewholefile(threadflow_t *threadflow, flowop_t *flowop)
20795184Sek110237 {
2080*5673Saw148015 	caddr_t iobuf;
20815184Sek110237 	filesetentry_t *file;
20825184Sek110237 	int wsize;
20835184Sek110237 	off64_t seek;
20845184Sek110237 	off64_t bytes = 0;
2085*5673Saw148015 	uint64_t wss;
2086*5673Saw148015 	int filedesc;
20875184Sek110237 	int srcfd = flowop->fo_srcfdnumber;
20885184Sek110237 	int ret;
2089*5673Saw148015 	vinteger_t iosize = *flowop->fo_iosize;
20905184Sek110237 
2091*5673Saw148015 	/* get the file to use */
2092*5673Saw148015 	if (flowoplib_filesetup(threadflow, flowop, &wss, &filedesc) != 0)
20935184Sek110237 		return (-1);
20945184Sek110237 
2095*5673Saw148015 	/* an I/O size of zero means read entire working set with one I/O */
2096*5673Saw148015 	if (iosize == 0)
2097*5673Saw148015 		iosize = wss;
20985184Sek110237 
2099*5673Saw148015 	if (flowoplib_iobufsetup(threadflow, flowop, &iobuf, iosize) != 0)
2100*5673Saw148015 		return (-1);
21015184Sek110237 
21025184Sek110237 	file = threadflow->tf_fse[srcfd];
2103*5673Saw148015 	if ((srcfd != 0) && (file == NULL)) {
2104*5673Saw148015 		filebench_log(LOG_ERROR, "flowop %s: NULL src file",
21055184Sek110237 		    flowop->fo_name);
21065184Sek110237 		return (-1);
21075184Sek110237 	}
21085184Sek110237 
2109*5673Saw148015 	if (file)
2110*5673Saw148015 		wss = file->fse_size;
2111*5673Saw148015 
2112*5673Saw148015 	wsize = (int)MIN(wss, iosize);
21135184Sek110237 
21145184Sek110237 	/* Measure time to write bytes */
21155184Sek110237 	flowop_beginop(threadflow, flowop);
2116*5673Saw148015 	for (seek = 0; seek < wss; seek += wsize) {
2117*5673Saw148015 		ret = write(filedesc, iobuf, wsize);
21185184Sek110237 		if (ret != wsize) {
21195184Sek110237 			filebench_log(LOG_ERROR,
21205184Sek110237 			    "Failed to write %d bytes on fd %d: %s",
2121*5673Saw148015 			    wsize, filedesc, strerror(errno));
2122*5673Saw148015 			flowop_endop(threadflow, flowop, 0);
21235184Sek110237 			return (-1);
21245184Sek110237 		}
2125*5673Saw148015 		wsize = (int)MIN(wss - seek, iosize);
21265184Sek110237 		bytes += ret;
21275184Sek110237 	}
2128*5673Saw148015 	flowop_endop(threadflow, flowop, bytes);
21295184Sek110237 
21305184Sek110237 	return (0);
21315184Sek110237 }
21325184Sek110237 
21335184Sek110237 
21345184Sek110237 /*
21355184Sek110237  * Emulate a fixed size append to a file. Will append data to
21365184Sek110237  * a file chosen from a fileset if the flowop's fo_fileset
21375184Sek110237  * field specifies one or if its fdnumber is non zero.
21385184Sek110237  * Otherwise it will write to a fileobj file, if one exists.
21395184Sek110237  * The flowop's fo_wss parameter will be used to set the
21405184Sek110237  * maximum file size if it is non-zero, otherwise the
21415184Sek110237  * filesetentry's fse_size will be used. A random memory
21425184Sek110237  * buffer offset is calculated, then a logical seek to the
21435184Sek110237  * end of file is done followed by a write of fo_iosize
21445184Sek110237  * bytes. Writes are actually done from fo_buf, rather than
21455184Sek110237  * tf_mem as is done with flowoplib_write(), and no check
21465184Sek110237  * is made to see if fo_iosize exceeds the size of fo_buf.
21475184Sek110237  * Returns -1 on error, 0 on success.
21485184Sek110237  */
21495184Sek110237 static int
21505184Sek110237 flowoplib_appendfile(threadflow_t *threadflow, flowop_t *flowop)
21515184Sek110237 {
2152*5673Saw148015 	caddr_t iobuf;
2153*5673Saw148015 	int filedesc;
21545184Sek110237 	vinteger_t wss;
2155*5673Saw148015 	vinteger_t iosize = *flowop->fo_iosize;
21565184Sek110237 	int ret;
21575184Sek110237 
2158*5673Saw148015 	if (flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
2159*5673Saw148015 	    &filedesc, iosize) != 0)
2160*5673Saw148015 		return (-1);
21615184Sek110237 
21625184Sek110237 	/* XXX wss is not being used */
21635184Sek110237 
21645184Sek110237 	/* Measure time to write bytes */
21655184Sek110237 	flowop_beginop(threadflow, flowop);
21665184Sek110237 	(void) lseek64(filedesc, 0, SEEK_END);
2167*5673Saw148015 	ret = write(filedesc, iobuf, iosize);
2168*5673Saw148015 	if (ret != iosize) {
21695184Sek110237 		filebench_log(LOG_ERROR,
21705184Sek110237 		    "Failed to write %d bytes on fd %d: %s",
2171*5673Saw148015 		    iosize, filedesc, strerror(errno));
2172*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
21735184Sek110237 		return (-1);
21745184Sek110237 	}
2175*5673Saw148015 	flowop_endop(threadflow, flowop, iosize);
21765184Sek110237 
21775184Sek110237 	return (0);
21785184Sek110237 }
21795184Sek110237 
21805184Sek110237 /*
21815184Sek110237  * Emulate a random size append to a file. Will append data
21825184Sek110237  * to a file chosen from a fileset if the flowop's fo_fileset
21835184Sek110237  * field specifies one or if its fdnumber is non zero. Otherwise
21845184Sek110237  * it will write to a fileobj file, if one exists. The flowop's
21855184Sek110237  * fo_wss parameter will be used to set the maximum file size
21865184Sek110237  * if it is non-zero, otherwise the filesetentry's fse_size
21875184Sek110237  * will be used.  A random transfer size (but at most fo_iosize
21885184Sek110237  * bytes) and a random memory offset are calculated. A logical
21895184Sek110237  * seek to the end of file is done, then writes of up to
21905184Sek110237  * FILE_ALLOC_BLOCK in size are done until the full transfer
21915184Sek110237  * size has been written. Writes are actually done from fo_buf,
21925184Sek110237  * rather than tf_mem as is done with flowoplib_write().
21935184Sek110237  * Returns -1 on error, 0 on success.
21945184Sek110237  */
21955184Sek110237 static int
21965184Sek110237 flowoplib_appendfilerand(threadflow_t *threadflow, flowop_t *flowop)
21975184Sek110237 {
2198*5673Saw148015 	caddr_t iobuf;
21995184Sek110237 	uint64_t appendsize;
2200*5673Saw148015 	int filedesc;
22015184Sek110237 	vinteger_t wss;
2202*5673Saw148015 	int ret = 0;
22035184Sek110237 
2204*5673Saw148015 	if (filebench_randomno64(&appendsize, *flowop->fo_iosize, 1LL) != 0)
2205*5673Saw148015 		return (-1);
22065184Sek110237 
2207*5673Saw148015 	/* skip if attempting zero length append */
2208*5673Saw148015 	if (appendsize == 0) {
2209*5673Saw148015 		flowop_beginop(threadflow, flowop);
2210*5673Saw148015 		flowop_endop(threadflow, flowop, 0LL);
2211*5673Saw148015 		return (0);
2212*5673Saw148015 	}
22135184Sek110237 
2214*5673Saw148015 	if (flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
2215*5673Saw148015 	    &filedesc, appendsize) != 0)
2216*5673Saw148015 		return (-1);
2217*5673Saw148015 
22185184Sek110237 	/* XXX wss is not being used */
22195184Sek110237 
2220*5673Saw148015 	/* Measure time to write bytes */
2221*5673Saw148015 	flowop_beginop(threadflow, flowop);
2222*5673Saw148015 
2223*5673Saw148015 	(void) lseek64(filedesc, 0, SEEK_END);
2224*5673Saw148015 	ret = write(filedesc, iobuf, appendsize);
2225*5673Saw148015 	if (ret != appendsize) {
2226*5673Saw148015 		filebench_log(LOG_ERROR,
2227*5673Saw148015 		    "Failed to write %d bytes on fd %d: %s",
2228*5673Saw148015 		    appendsize, filedesc, strerror(errno));
2229*5673Saw148015 		flowop_endop(threadflow, flowop, 0);
22305184Sek110237 		return (-1);
22315184Sek110237 	}
22325184Sek110237 
2233*5673Saw148015 	flowop_endop(threadflow, flowop, appendsize);
22345184Sek110237 
22355184Sek110237 	return (0);
22365184Sek110237 }
22375184Sek110237 
22385184Sek110237 
22395184Sek110237 /*
22405184Sek110237  * Prints usage information for flowop operations.
22415184Sek110237  */
22425184Sek110237 void
22435184Sek110237 flowoplib_usage()
22445184Sek110237 {
22455184Sek110237 	(void) fprintf(stderr,
22465184Sek110237 	    "flowop [openfile|createfile] name=<name>,fileset=<fname>\n");
22475184Sek110237 	(void) fprintf(stderr,
22485184Sek110237 	    "                       [,fd=<file desc num>]\n");
22495184Sek110237 	(void) fprintf(stderr, "\n");
22505184Sek110237 	(void) fprintf(stderr,
22515184Sek110237 	    "flowop closefile name=<name>,fd=<file desc num>]\n");
22525184Sek110237 	(void) fprintf(stderr, "\n");
22535184Sek110237 	(void) fprintf(stderr, "flowop deletefile name=<name>\n");
22545184Sek110237 	(void) fprintf(stderr, "                       [,fileset=<fname>]\n");
22555184Sek110237 	(void) fprintf(stderr,
22565184Sek110237 	    "                       [,fd=<file desc num>]\n");
22575184Sek110237 	(void) fprintf(stderr, "\n");
22585184Sek110237 	(void) fprintf(stderr, "flowop statfile name=<name>\n");
22595184Sek110237 	(void) fprintf(stderr, "                       [,fileset=<fname>]\n");
22605184Sek110237 	(void) fprintf(stderr,
22615184Sek110237 	    "                       [,fd=<file desc num>]\n");
22625184Sek110237 	(void) fprintf(stderr, "\n");
22635184Sek110237 	(void) fprintf(stderr,
22645184Sek110237 	    "flowop fsync name=<name>,fd=<file desc num>]\n");
22655184Sek110237 	(void) fprintf(stderr, "\n");
22665184Sek110237 	(void) fprintf(stderr,
22675184Sek110237 	    "flowop fsyncset name=<name>,fileset=<fname>]\n");
22685184Sek110237 	(void) fprintf(stderr, "\n");
22695184Sek110237 	(void) fprintf(stderr, "flowop [write|read|aiowrite] name=<name>, \n");
22705184Sek110237 	(void) fprintf(stderr,
22715184Sek110237 	    "                       filename|fileset=<fname>,\n");
22725184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
22735184Sek110237 	(void) fprintf(stderr, "                       [,directio]\n");
22745184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
22755184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
22765184Sek110237 	(void) fprintf(stderr, "                       [,random]\n");
22775184Sek110237 	(void) fprintf(stderr, "                       [,opennext]\n");
22785184Sek110237 	(void) fprintf(stderr, "                       [,workingset=<size>]\n");
22795184Sek110237 	(void) fprintf(stderr,
22805184Sek110237 	    "flowop [appendfile|appendfilerand] name=<name>, \n");
22815184Sek110237 	(void) fprintf(stderr,
22825184Sek110237 	    "                       filename|fileset=<fname>,\n");
22835184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
22845184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
22855184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
22865184Sek110237 	(void) fprintf(stderr, "                       [,workingset=<size>]\n");
22875184Sek110237 	(void) fprintf(stderr,
22885184Sek110237 	    "flowop [readwholefile|writewholefile] name=<name>, \n");
22895184Sek110237 	(void) fprintf(stderr,
22905184Sek110237 	    "                       filename|fileset=<fname>,\n");
22915184Sek110237 	(void) fprintf(stderr, "                       iosize=<size>\n");
22925184Sek110237 	(void) fprintf(stderr, "                       [,dsync]\n");
22935184Sek110237 	(void) fprintf(stderr, "                       [,iters=<count>]\n");
22945184Sek110237 	(void) fprintf(stderr, "\n");
22955184Sek110237 	(void) fprintf(stderr, "flowop aiowait name=<name>,target="
22965184Sek110237 	    "<aiowrite-flowop>\n");
22975184Sek110237 	(void) fprintf(stderr, "\n");
22985184Sek110237 	(void) fprintf(stderr, "flowop sempost name=<name>,"
22995184Sek110237 	    "target=<semblock-flowop>,\n");
23005184Sek110237 	(void) fprintf(stderr,
23015184Sek110237 	    "                       value=<increment-to-post>\n");
23025184Sek110237 	(void) fprintf(stderr, "\n");
23035184Sek110237 	(void) fprintf(stderr, "flowop semblock name=<name>,value="
23045184Sek110237 	    "<decrement-to-receive>,\n");
23055184Sek110237 	(void) fprintf(stderr, "                       highwater="
23065184Sek110237 	    "<inbound-queue-max>\n");
23075184Sek110237 	(void) fprintf(stderr, "\n");
23085184Sek110237 	(void) fprintf(stderr, "flowop block name=<name>\n");
23095184Sek110237 	(void) fprintf(stderr, "\n");
23105184Sek110237 	(void) fprintf(stderr,
23115184Sek110237 	    "flowop wakeup name=<name>,target=<block-flowop>,\n");
23125184Sek110237 	(void) fprintf(stderr, "\n");
23135184Sek110237 	(void) fprintf(stderr,
23145184Sek110237 	    "flowop hog name=<name>,value=<number-of-mem-ops>\n");
23155184Sek110237 	(void) fprintf(stderr,
23165184Sek110237 	    "flowop delay name=<name>,value=<number-of-seconds>\n");
23175184Sek110237 	(void) fprintf(stderr, "\n");
23185184Sek110237 	(void) fprintf(stderr, "flowop eventlimit name=<name>\n");
23195184Sek110237 	(void) fprintf(stderr, "flowop bwlimit name=<name>,value=<mb/s>\n");
23205184Sek110237 	(void) fprintf(stderr, "flowop iopslimit name=<name>,value=<iop/s>\n");
23215184Sek110237 	(void) fprintf(stderr,
23225184Sek110237 	    "flowop finishoncount name=<name>,value=<ops/s>\n");
23235184Sek110237 	(void) fprintf(stderr,
23245184Sek110237 	    "flowop finishonbytes name=<name>,value=<bytes>\n");
23255184Sek110237 	(void) fprintf(stderr, "\n");
23265184Sek110237 	(void) fprintf(stderr, "\n");
23275184Sek110237 }
2328