xref: /onnv-gate/usr/src/cmd/filebench/common/fb_localfs.c (revision 8762:6e3d4153d2d5)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * Portions Copyright 2008 Denis Cheng
26  */
27 
28 #include "config.h"
29 #include "filebench.h"
30 #include "flowop.h"
31 #include "threadflow.h" /* For aiolist definition */
32 
33 #ifndef HAVE_OFF64_T
34 /*
35  * We are probably on linux.
36  * According to http://www.suse.de/~aj/linux_lfs.html, defining the
37  * above, automatically changes type of off_t to off64_t. so let
38  * us use only off_t as off64_t is not defined
39  */
40 #defineoff64_t off_t
41 #endif /* HAVE_OFF64_T */
42 
43 #include <fcntl.h>
44 #include <stdio.h>
45 #include <stdlib.h>
46 #include <unistd.h>
47 #include <libgen.h>
48 #include <sys/mman.h>
49 #include <sys/stat.h>
50 #include <sys/types.h>
51 #include <sys/param.h>
52 #include <sys/resource.h>
53 
54 #include "filebench.h"
55 #include "fsplug.h"
56 
57 #ifdef HAVE_AIO
58 #include <aio.h>
59 #endif /* HAVE_AIO */
60 
61 #ifdef HAVE_LIBAIO_H
62 #include <libaio.h>
63 #endif /* HAVE_LIBAIO_H */
64 
65 #ifndef HAVE_AIOCB64_T
66 #define	aiocb64 aiocb
67 #endif /* HAVE_AIOCB64_T */
68 
69 /*
70  * These routines implement local file access. They are placed into a
71  * vector of functions that are called by all I/O operations in fileset.c
72  * and flowop_library.c. This represents the default file system plug-in,
73  * and may be replaced by vectors for other file system plug-ins.
74  */
75 
76 static int fb_lfs_freemem(fb_fdesc_t *fd, off64_t size);
77 static int fb_lfs_open(fb_fdesc_t *, char *, int, int);
78 static int fb_lfs_pread(fb_fdesc_t *, caddr_t, fbint_t, off64_t);
79 static int fb_lfs_read(fb_fdesc_t *, caddr_t, fbint_t);
80 static int fb_lfs_pwrite(fb_fdesc_t *, caddr_t, fbint_t, off64_t);
81 static int fb_lfs_write(fb_fdesc_t *, caddr_t, fbint_t);
82 static int fb_lfs_lseek(fb_fdesc_t *, off64_t, int);
83 static int fb_lfs_truncate(fb_fdesc_t *, off64_t);
84 static int fb_lfs_rename(const char *, const char *);
85 static int fb_lfs_close(fb_fdesc_t *);
86 static int fb_lfs_link(const char *, const char *);
87 static int fb_lfs_symlink(const char *, const char *);
88 static int fb_lfs_unlink(char *);
89 static ssize_t fb_lfs_readlink(const char *, char *, size_t);
90 static int fb_lfs_mkdir(char *, int);
91 static int fb_lfs_rmdir(char *);
92 static DIR *fb_lfs_opendir(char *);
93 static struct dirent *fb_lfs_readdir(DIR *);
94 static int fb_lfs_closedir(DIR *);
95 static int fb_lfs_fsync(fb_fdesc_t *);
96 static int fb_lfs_stat(char *, struct stat64 *);
97 static int fb_lfs_fstat(fb_fdesc_t *, struct stat64 *);
98 static int fb_lfs_access(const char *, int);
99 
100 static fsplug_func_t fb_lfs_funcs =
101 {
102 	"locfs",
103 	fb_lfs_freemem,		/* flush page cache */
104 	fb_lfs_open,		/* open */
105 	fb_lfs_pread,		/* pread */
106 	fb_lfs_read,		/* read */
107 	fb_lfs_pwrite,		/* pwrite */
108 	fb_lfs_write,		/* write */
109 	fb_lfs_lseek,		/* lseek */
110 	fb_lfs_truncate,	/* ftruncate */
111 	fb_lfs_rename,		/* rename */
112 	fb_lfs_close,		/* close */
113 	fb_lfs_link,		/* link */
114 	fb_lfs_symlink,		/* symlink */
115 	fb_lfs_unlink,		/* unlink */
116 	fb_lfs_readlink,	/* readlink */
117 	fb_lfs_mkdir,		/* mkdir */
118 	fb_lfs_rmdir,		/* rmdir */
119 	fb_lfs_opendir,		/* opendir */
120 	fb_lfs_readdir,		/* readdir */
121 	fb_lfs_closedir,	/* closedir */
122 	fb_lfs_fsync,		/* fsync */
123 	fb_lfs_stat,		/* stat */
124 	fb_lfs_fstat,		/* fstat */
125 	fb_lfs_access		/* access */
126 };
127 
128 #ifdef HAVE_AIO
129 /*
130  * Local file system asynchronous IO flowops are in this module, as
131  * they have a number of local file system specific features.
132  */
133 static int fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop);
134 static int fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop);
135 
136 static flowop_proto_t fb_lfsflow_funcs[] = {
137 	FLOW_TYPE_AIO, FLOW_ATTR_WRITE, "aiowrite", flowop_init_generic,
138 	fb_lfsflow_aiowrite, flowop_destruct_generic,
139 	FLOW_TYPE_AIO, 0, "aiowait", flowop_init_generic,
140 	fb_lfsflow_aiowait, flowop_destruct_generic
141 };
142 
143 #endif /* HAVE_AIO */
144 
145 /*
146  * Initialize this processes I/O functions vector to point to
147  * the vector of local file system I/O functions
148  */
149 void
150 fb_lfs_funcvecinit(void)
151 {
152 	fs_functions_vec = &fb_lfs_funcs;
153 }
154 
155 /*
156  * Initialize those flowops whose implementation is file system
157  * specific.
158  */
159 void
160 fb_lfs_flowinit(void)
161 {
162 	int nops;
163 
164 	/*
165 	 * re-initialize the I/O functions vector while we are at
166 	 * it as it may have been redefined since the process was
167 	 * created, at least if this is the master processes
168 	 */
169 	fb_lfs_funcvecinit();
170 
171 #ifdef HAVE_AIO
172 	nops = sizeof (fb_lfsflow_funcs) / sizeof (flowop_proto_t);
173 	flowop_flow_init(fb_lfsflow_funcs, nops);
174 #endif /* HAVE_AIO */
175 }
176 
177 /*
178  * Frees up memory mapped file region of supplied size. The
179  * file descriptor "fd" indicates which memory mapped file.
180  * If successful, returns 0. Otherwise returns -1 if "size"
181  * is zero, or -1 times the number of times msync() failed.
182  */
183 static int
184 fb_lfs_freemem(fb_fdesc_t *fd, off64_t size)
185 {
186 	off64_t left;
187 	int ret = 0;
188 
189 	for (left = size; left > 0; left -= MMAP_SIZE) {
190 		off64_t thismapsize;
191 		caddr_t addr;
192 
193 		thismapsize = MIN(MMAP_SIZE, left);
194 		addr = mmap64(0, thismapsize, PROT_READ|PROT_WRITE,
195 		    MAP_SHARED, fd->fd_num, size - left);
196 		ret += msync(addr, thismapsize, MS_INVALIDATE);
197 		(void) munmap(addr, thismapsize);
198 	}
199 	return (ret);
200 }
201 
202 /*
203  * Does a posix pread. Returns what the pread() returns.
204  */
205 static int
206 fb_lfs_pread(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t fileoffset)
207 {
208 	return (pread64(fd->fd_num, iobuf, iosize, fileoffset));
209 }
210 
211 /*
212  * Does a posix read. Returns what the read() returns.
213  */
214 static int
215 fb_lfs_read(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize)
216 {
217 	return (read(fd->fd_num, iobuf, iosize));
218 }
219 
220 #ifdef HAVE_AIO
221 
222 /*
223  * Asynchronous write section. An Asynchronous IO element
224  * (aiolist_t) is used to associate the asynchronous write request with
225  * its subsequent completion. This element includes a aiocb64 struct
226  * that is used by posix aio_xxx calls to track the asynchronous writes.
227  * The flowops aiowrite and aiowait result in calls to these posix
228  * aio_xxx system routines to do the actual asynchronous write IO
229  * operations.
230  */
231 
232 
233 /*
234  * Allocates an asynchronous I/O list (aio, of type
235  * aiolist_t) element. Adds it to the flowop thread's
236  * threadflow aio list. Returns a pointer to the element.
237  */
238 static aiolist_t *
239 aio_allocate(flowop_t *flowop)
240 {
241 	aiolist_t *aiolist;
242 
243 	if ((aiolist = malloc(sizeof (aiolist_t))) == NULL) {
244 		filebench_log(LOG_ERROR, "malloc aiolist failed");
245 		filebench_shutdown(1);
246 	}
247 
248 	/* Add to list */
249 	if (flowop->fo_thread->tf_aiolist == NULL) {
250 		flowop->fo_thread->tf_aiolist = aiolist;
251 		aiolist->al_next = NULL;
252 	} else {
253 		aiolist->al_next = flowop->fo_thread->tf_aiolist;
254 		flowop->fo_thread->tf_aiolist = aiolist;
255 	}
256 	return (aiolist);
257 }
258 
259 /*
260  * Searches for the aiolist element that has a matching
261  * completion block, aiocb. If none found returns FILEBENCH_ERROR. If
262  * found, removes the aiolist element from flowop thread's
263  * list and returns FILEBENCH_OK.
264  */
265 static int
266 aio_deallocate(flowop_t *flowop, struct aiocb64 *aiocb)
267 {
268 	aiolist_t *aiolist = flowop->fo_thread->tf_aiolist;
269 	aiolist_t *previous = NULL;
270 	aiolist_t *match = NULL;
271 
272 	if (aiocb == NULL) {
273 		filebench_log(LOG_ERROR, "null aiocb deallocate");
274 		return (FILEBENCH_OK);
275 	}
276 
277 	while (aiolist) {
278 		if (aiocb == &(aiolist->al_aiocb)) {
279 			match = aiolist;
280 			break;
281 		}
282 		previous = aiolist;
283 		aiolist = aiolist->al_next;
284 	}
285 
286 	if (match == NULL)
287 		return (FILEBENCH_ERROR);
288 
289 	/* Remove from the list */
290 	if (previous)
291 		previous->al_next = match->al_next;
292 	else
293 		flowop->fo_thread->tf_aiolist = match->al_next;
294 
295 	return (FILEBENCH_OK);
296 }
297 
298 /*
299  * Emulate posix aiowrite(). Determines which file to use,
300  * either one file of a fileset, or the file associated
301  * with a fileobj, allocates and fills an aiolist_t element
302  * for the write, and issues the asynchronous write. This
303  * operation is only valid for random IO, and returns an
304  * error if the flowop is set for sequential IO. Returns
305  * FILEBENCH_OK on success, FILEBENCH_NORSC if iosetup can't
306  * obtain a file to open, and FILEBENCH_ERROR on any
307  * encountered error.
308  */
309 static int
310 fb_lfsflow_aiowrite(threadflow_t *threadflow, flowop_t *flowop)
311 {
312 	caddr_t iobuf;
313 	fbint_t wss;
314 	fbint_t iosize;
315 	fb_fdesc_t *fdesc;
316 	int ret;
317 
318 	iosize = avd_get_int(flowop->fo_iosize);
319 
320 	if ((ret = flowoplib_iosetup(threadflow, flowop, &wss, &iobuf,
321 	    &fdesc, iosize)) != FILEBENCH_OK)
322 		return (ret);
323 
324 	if (avd_get_bool(flowop->fo_random)) {
325 		uint64_t fileoffset;
326 		struct aiocb64 *aiocb;
327 		aiolist_t *aiolist;
328 
329 		if (filebench_randomno64(&fileoffset,
330 		    wss, iosize, NULL) == -1) {
331 			filebench_log(LOG_ERROR,
332 			    "file size smaller than IO size for thread %s",
333 			    flowop->fo_name);
334 			return (FILEBENCH_ERROR);
335 		}
336 
337 		aiolist = aio_allocate(flowop);
338 		aiolist->al_type = AL_WRITE;
339 		aiocb = &aiolist->al_aiocb;
340 
341 		aiocb->aio_fildes = fdesc->fd_num;
342 		aiocb->aio_buf = iobuf;
343 		aiocb->aio_nbytes = (size_t)iosize;
344 		aiocb->aio_offset = (off64_t)fileoffset;
345 		aiocb->aio_reqprio = 0;
346 
347 		filebench_log(LOG_DEBUG_IMPL,
348 		    "aio fd=%d, bytes=%llu, offset=%llu",
349 		    fdesc->fd_num, (u_longlong_t)iosize,
350 		    (u_longlong_t)fileoffset);
351 
352 		flowop_beginop(threadflow, flowop);
353 		if (aio_write64(aiocb) < 0) {
354 			filebench_log(LOG_ERROR, "aiowrite failed: %s",
355 			    strerror(errno));
356 			filebench_shutdown(1);
357 		}
358 		flowop_endop(threadflow, flowop, iosize);
359 	} else {
360 		return (FILEBENCH_ERROR);
361 	}
362 
363 	return (FILEBENCH_OK);
364 }
365 
366 
367 
368 #define	MAXREAP 4096
369 
370 /*
371  * Emulate posix aiowait(). Waits for the completion of half the
372  * outstanding asynchronous IOs, or a single IO, which ever is
373  * larger. The routine will return after a sufficient number of
374  * completed calls issued by any thread in the procflow have
375  * completed, or a 1 second timout elapses. All completed
376  * IO operations are deleted from the thread's aiolist.
377  */
378 static int
379 fb_lfsflow_aiowait(threadflow_t *threadflow, flowop_t *flowop)
380 {
381 	struct aiocb64 **worklist;
382 	aiolist_t *aio = flowop->fo_thread->tf_aiolist;
383 	int uncompleted = 0;
384 
385 	worklist = calloc(MAXREAP, sizeof (struct aiocb64 *));
386 
387 	/* Count the list of pending aios */
388 	while (aio) {
389 		uncompleted++;
390 		aio = aio->al_next;
391 	}
392 
393 	do {
394 		uint_t ncompleted = 0;
395 		uint_t todo;
396 		struct timespec timeout;
397 		int inprogress;
398 		int i;
399 
400 		/* Wait for half of the outstanding requests */
401 		timeout.tv_sec = 1;
402 		timeout.tv_nsec = 0;
403 
404 		if (uncompleted > MAXREAP)
405 			todo = MAXREAP;
406 		else
407 			todo = uncompleted / 2;
408 
409 		if (todo == 0)
410 			todo = 1;
411 
412 		flowop_beginop(threadflow, flowop);
413 
414 #if (defined(HAVE_AIOWAITN) && defined(USE_PROCESS_MODEL))
415 		if (((aio_waitn64((struct aiocb64 **)worklist,
416 		    MAXREAP, &todo, &timeout)) == -1) &&
417 		    errno && (errno != ETIME)) {
418 			filebench_log(LOG_ERROR,
419 			    "aiowait failed: %s, outstanding = %d, "
420 			    "ncompleted = %d ",
421 			    strerror(errno), uncompleted, todo);
422 		}
423 
424 		ncompleted = todo;
425 		/* Take the  completed I/Os from the list */
426 		inprogress = 0;
427 		for (i = 0; i < ncompleted; i++) {
428 			if ((aio_return64(worklist[i]) == -1) &&
429 			    (errno == EINPROGRESS)) {
430 				inprogress++;
431 				continue;
432 			}
433 			if (aio_deallocate(flowop, worklist[i])
434 			    == FILEBENCH_ERROR) {
435 				filebench_log(LOG_ERROR, "Could not remove "
436 				    "aio from list ");
437 				flowop_endop(threadflow, flowop, 0);
438 				return (FILEBENCH_ERROR);
439 			}
440 		}
441 
442 		uncompleted -= ncompleted;
443 		uncompleted += inprogress;
444 
445 #else
446 
447 		for (ncompleted = 0, inprogress = 0,
448 		    aio = flowop->fo_thread->tf_aiolist;
449 		    ncompleted < todo, aio != NULL; aio = aio->al_next) {
450 			int result = aio_error64(&aio->al_aiocb);
451 
452 			if (result == EINPROGRESS) {
453 				inprogress++;
454 				continue;
455 			}
456 
457 			if ((aio_return64(&aio->al_aiocb) == -1) || result) {
458 				filebench_log(LOG_ERROR, "aio failed: %s",
459 				    strerror(result));
460 				continue;
461 			}
462 
463 			ncompleted++;
464 
465 			if (aio_deallocate(flowop, &aio->al_aiocb) < 0) {
466 				filebench_log(LOG_ERROR, "Could not remove "
467 				    "aio from list ");
468 				flowop_endop(threadflow, flowop, 0);
469 				return (FILEBENCH_ERROR);
470 			}
471 		}
472 
473 		uncompleted -= ncompleted;
474 
475 #endif
476 		filebench_log(LOG_DEBUG_SCRIPT,
477 		    "aio2 completed %d ios, uncompleted = %d, inprogress = %d",
478 		    ncompleted, uncompleted, inprogress);
479 
480 	} while (uncompleted > MAXREAP);
481 
482 	flowop_endop(threadflow, flowop, 0);
483 
484 	free(worklist);
485 
486 	return (FILEBENCH_OK);
487 }
488 
489 #endif /* HAVE_AIO */
490 
491 /*
492  * Does an open64 of a file. Inserts the file descriptor number returned
493  * by open() into the supplied filebench fd. Returns FILEBENCH_OK on
494  * successs, and FILEBENCH_ERROR on failure.
495  */
496 
497 static int
498 fb_lfs_open(fb_fdesc_t *fd, char *path, int flags, int perms)
499 {
500 	if ((fd->fd_num = open64(path, flags, perms)) < 0)
501 		return (FILEBENCH_ERROR);
502 	else
503 		return (FILEBENCH_OK);
504 }
505 
506 /*
507  * Does an unlink (delete) of a file.
508  */
509 static int
510 fb_lfs_unlink(char *path)
511 {
512 	return (unlink(path));
513 }
514 
515 /*
516  * Does a readlink of a symbolic link.
517  */
518 static ssize_t
519 fb_lfs_readlink(const char *path, char *buf, size_t buf_size)
520 {
521 	return (readlink(path, buf, buf_size));
522 }
523 
524 /*
525  * Does fsync of a file. Returns with fsync return info.
526  */
527 static int
528 fb_lfs_fsync(fb_fdesc_t *fd)
529 {
530 	return (fsync(fd->fd_num));
531 }
532 
533 /*
534  * Do a posix lseek of a file. Return what lseek() returns.
535  */
536 static int
537 fb_lfs_lseek(fb_fdesc_t *fd, off64_t offset, int whence)
538 {
539 	return (lseek64(fd->fd_num, offset, whence));
540 }
541 
542 /*
543  * Do a posix rename of a file. Return what rename() returns.
544  */
545 static int
546 fb_lfs_rename(const char *old, const char *new)
547 {
548 	return (rename(old, new));
549 }
550 
551 
552 /*
553  * Do a posix close of a file. Return what close() returns.
554  */
555 static int
556 fb_lfs_close(fb_fdesc_t *fd)
557 {
558 	return (close(fd->fd_num));
559 }
560 
561 /*
562  * Use mkdir to create a directory.
563  */
564 static int
565 fb_lfs_mkdir(char *path, int perm)
566 {
567 	return (mkdir(path, perm));
568 }
569 
570 /*
571  * Use rmdir to delete a directory. Returns what rmdir() returns.
572  */
573 static int
574 fb_lfs_rmdir(char *path)
575 {
576 	return (rmdir(path));
577 }
578 
579 /*
580  * Does a posix opendir(), Returns a directory handle on success,
581  * NULL on failure.
582  */
583 static DIR *
584 fb_lfs_opendir(char *path)
585 {
586 	return (opendir(path));
587 }
588 
589 /*
590  * Does a readdir() call. Returns a pointer to a table of directory
591  * information on success, NULL on failure.
592  */
593 static struct dirent *
594 fb_lfs_readdir(DIR *dirp)
595 {
596 	return (readdir(dirp));
597 }
598 
599 /*
600  * Does a closedir() call.
601  */
602 static int
603 fb_lfs_closedir(DIR *dirp)
604 {
605 	return (closedir(dirp));
606 }
607 
608 /*
609  * Does an fstat of a file.
610  */
611 static int
612 fb_lfs_fstat(fb_fdesc_t *fd, struct stat64 *statbufp)
613 {
614 	return (fstat64(fd->fd_num, statbufp));
615 }
616 
617 /*
618  * Does a stat of a file.
619  */
620 static int
621 fb_lfs_stat(char *path, struct stat64 *statbufp)
622 {
623 	return (stat64(path, statbufp));
624 }
625 
626 /*
627  * Do a pwrite64 to a file.
628  */
629 static int
630 fb_lfs_pwrite(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize, off64_t offset)
631 {
632 	return (pwrite64(fd->fd_num, iobuf, iosize, offset));
633 }
634 
635 /*
636  * Do a write to a file.
637  */
638 static int
639 fb_lfs_write(fb_fdesc_t *fd, caddr_t iobuf, fbint_t iosize)
640 {
641 	return (write(fd->fd_num, iobuf, iosize));
642 }
643 
644 /*
645  * Does a truncate operation and returns the result
646  */
647 static int
648 fb_lfs_truncate(fb_fdesc_t *fd, off64_t fse_size)
649 {
650 #ifdef HAVE_FTRUNCATE64
651 	return (ftruncate64(fd->fd_num, fse_size));
652 #else
653 	return (ftruncate(fd->fd_num, (off_t)fse_size));
654 #endif
655 }
656 
657 /*
658  * Does a link operation and returns the result
659  */
660 static int
661 fb_lfs_link(const char *existing, const char *new)
662 {
663 	return (link(existing, new));
664 }
665 
666 /*
667  * Does a symlink operation and returns the result
668  */
669 static int
670 fb_lfs_symlink(const char *existing, const char *new)
671 {
672 	return (symlink(existing, new));
673 }
674 
675 /*
676  * Does an access() check on a file.
677  */
678 static int
679 fb_lfs_access(const char *path, int amode)
680 {
681 	return (access(path, amode));
682 }
683