10Sstevel@tonic-gate /*
20Sstevel@tonic-gate  * CDDL HEADER START
30Sstevel@tonic-gate  *
40Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
50Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
60Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
70Sstevel@tonic-gate  * with the License.
80Sstevel@tonic-gate  *
90Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
100Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
110Sstevel@tonic-gate  * See the License for the specific language governing permissions
120Sstevel@tonic-gate  * and limitations under the License.
130Sstevel@tonic-gate  *
140Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
150Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
160Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
170Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
180Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
190Sstevel@tonic-gate  *
200Sstevel@tonic-gate  * CDDL HEADER END
210Sstevel@tonic-gate  */
220Sstevel@tonic-gate /*
23*249Sjwahlig  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
240Sstevel@tonic-gate  * Use is subject to license terms.
250Sstevel@tonic-gate  */
260Sstevel@tonic-gate 
270Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
280Sstevel@tonic-gate 
290Sstevel@tonic-gate /*
300Sstevel@tonic-gate  * nfs log - read buffer file and return structs in usable form
310Sstevel@tonic-gate  */
320Sstevel@tonic-gate 
330Sstevel@tonic-gate #include <ctype.h>
340Sstevel@tonic-gate #include <stdio.h>
350Sstevel@tonic-gate #include <stdlib.h>
360Sstevel@tonic-gate #include <stddef.h>
370Sstevel@tonic-gate #include <string.h>
380Sstevel@tonic-gate #include <fcntl.h>
390Sstevel@tonic-gate #include <unistd.h>
400Sstevel@tonic-gate #include <signal.h>
410Sstevel@tonic-gate #include <sys/types.h>
420Sstevel@tonic-gate #include <sys/param.h>
430Sstevel@tonic-gate #include <sys/stat.h>
440Sstevel@tonic-gate #include <sys/utsname.h>
450Sstevel@tonic-gate #include <sys/mman.h>
460Sstevel@tonic-gate #include <strings.h>
470Sstevel@tonic-gate #include <errno.h>
480Sstevel@tonic-gate #include <syslog.h>
490Sstevel@tonic-gate #include <time.h>
500Sstevel@tonic-gate #include <limits.h>
510Sstevel@tonic-gate #include <libintl.h>
520Sstevel@tonic-gate #include <values.h>
530Sstevel@tonic-gate #include <search.h>
540Sstevel@tonic-gate #include <pwd.h>
550Sstevel@tonic-gate #include <netdb.h>
560Sstevel@tonic-gate #include <rpc/rpc.h>
570Sstevel@tonic-gate #include <netconfig.h>
580Sstevel@tonic-gate #include <netdir.h>
590Sstevel@tonic-gate #include <nfs/nfs_sec.h>
600Sstevel@tonic-gate #include <nfs/export.h>
610Sstevel@tonic-gate #include <rpc/auth.h>
620Sstevel@tonic-gate #include <rpc/svc.h>
630Sstevel@tonic-gate #include <rpc/xdr.h>
640Sstevel@tonic-gate #include <rpc/clnt.h>
650Sstevel@tonic-gate #include <nfs/nfs.h>
660Sstevel@tonic-gate #include <nfs/nfs_log.h>
670Sstevel@tonic-gate #include "nfslogd.h"
680Sstevel@tonic-gate 
690Sstevel@tonic-gate #define	MAX_LRS_READ_AHEAD 2048
700Sstevel@tonic-gate #define	MAX_RECS_TO_DELAY 32768
710Sstevel@tonic-gate 
720Sstevel@tonic-gate static int 		nfslog_init_buf(char *, struct nfslog_buf *, int *);
730Sstevel@tonic-gate static void		nfslog_free_buf(struct nfslog_buf *, int);
740Sstevel@tonic-gate static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *);
750Sstevel@tonic-gate static void		free_lrp(struct nfslog_lr *);
760Sstevel@tonic-gate static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *,
770Sstevel@tonic-gate 			struct nfslog_lr *);
780Sstevel@tonic-gate static void		insert_lrp_to_lb(struct nfslog_buf *,
790Sstevel@tonic-gate 			struct nfslog_lr *);
800Sstevel@tonic-gate static void		nfslog_rewrite_bufheader(struct nfslog_buf *);
810Sstevel@tonic-gate 
820Sstevel@tonic-gate /*
830Sstevel@tonic-gate  * Treat the provided path name as an NFS log buffer file.
840Sstevel@tonic-gate  * Allocate a data structure for its handling and initialize it.
850Sstevel@tonic-gate  * *error contains the previous error condition encountered for
860Sstevel@tonic-gate  * this object. This value can be used to avoid printing the last
870Sstevel@tonic-gate  * error endlessly.
880Sstevel@tonic-gate  * It will set *error appropriately after processing.
890Sstevel@tonic-gate  */
900Sstevel@tonic-gate struct nfslog_buf *
910Sstevel@tonic-gate nfslog_open_buf(char *bufpath, int *error)
920Sstevel@tonic-gate {
930Sstevel@tonic-gate 	struct nfslog_buf	*lbp = NULL;
940Sstevel@tonic-gate 
950Sstevel@tonic-gate 	if (bufpath == NULL) {
960Sstevel@tonic-gate 		*error = EINVAL;
970Sstevel@tonic-gate 		return (NULL);
980Sstevel@tonic-gate 	}
990Sstevel@tonic-gate 
1000Sstevel@tonic-gate 	if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) {
1010Sstevel@tonic-gate 		*error = ENOMEM;
1020Sstevel@tonic-gate 		return (NULL);
1030Sstevel@tonic-gate 	}
1040Sstevel@tonic-gate 	bzero(lbp, sizeof (struct nfslog_buf));
1050Sstevel@tonic-gate 
1060Sstevel@tonic-gate 	if (nfslog_init_buf(bufpath, lbp, error)) {
1070Sstevel@tonic-gate 		free(lbp);
1080Sstevel@tonic-gate 		return (NULL);
1090Sstevel@tonic-gate 	}
1100Sstevel@tonic-gate 	return (lbp);
1110Sstevel@tonic-gate }
1120Sstevel@tonic-gate 
1130Sstevel@tonic-gate /*
1140Sstevel@tonic-gate  * Free the log buffer struct with all of its baggage and free the data struct
1150Sstevel@tonic-gate  */
1160Sstevel@tonic-gate void
1170Sstevel@tonic-gate nfslog_close_buf(struct nfslog_buf *lbp, int close_quick)
1180Sstevel@tonic-gate {
1190Sstevel@tonic-gate 	nfslog_free_buf(lbp, close_quick);
1200Sstevel@tonic-gate 	free(lbp);
1210Sstevel@tonic-gate }
1220Sstevel@tonic-gate 
1230Sstevel@tonic-gate /*
1240Sstevel@tonic-gate  * Set up the log buffer struct; simple things are opening and locking
1250Sstevel@tonic-gate  * the buffer file and then on to mmap()ing it for later use by the
1260Sstevel@tonic-gate  * XDR decode path.  Make sure to read the buffer header before
1270Sstevel@tonic-gate  * returning so that we will be at the first true log record.
1280Sstevel@tonic-gate  *
1290Sstevel@tonic-gate  * *error contains the last error encountered on this object. It can
1300Sstevel@tonic-gate  * be used to avoid reporting the same error endlessly. It is reset
1310Sstevel@tonic-gate  * to the current error code on return.
1320Sstevel@tonic-gate  */
1330Sstevel@tonic-gate static int
1340Sstevel@tonic-gate nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error)
1350Sstevel@tonic-gate {
1360Sstevel@tonic-gate 	struct stat sb;
1370Sstevel@tonic-gate 	int preverror = *error;
1380Sstevel@tonic-gate 
1390Sstevel@tonic-gate 	lbp->next = lbp;
1400Sstevel@tonic-gate 	lbp->prev = lbp;
1410Sstevel@tonic-gate 	/*
1420Sstevel@tonic-gate 	 * set these values so that the free routine will know what to do
1430Sstevel@tonic-gate 	 */
1440Sstevel@tonic-gate 	lbp->mmap_addr = (intptr_t)MAP_FAILED;
1450Sstevel@tonic-gate 	lbp->last_rec_id = MAXINT - 1;
1460Sstevel@tonic-gate 	lbp->bh.bh_length = 0;
1470Sstevel@tonic-gate 	lbp->bh_lrp = NULL;
1480Sstevel@tonic-gate 	lbp->num_lrps = 0;
1490Sstevel@tonic-gate 	lbp->lrps = NULL;
1500Sstevel@tonic-gate 	lbp->last_record_offset = 0;
1510Sstevel@tonic-gate 	lbp->prp = NULL;
1520Sstevel@tonic-gate 	lbp->num_pr_queued = 0;
1530Sstevel@tonic-gate 
1540Sstevel@tonic-gate 	lbp->bufpath = strdup(bufpath);
1550Sstevel@tonic-gate 	if (lbp->bufpath == NULL) {
1560Sstevel@tonic-gate 		*error = ENOMEM;
1570Sstevel@tonic-gate 		if (preverror != *error) {
1580Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"),
1590Sstevel@tonic-gate 				bufpath, strerror(*error));
1600Sstevel@tonic-gate 		}
1610Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
1620Sstevel@tonic-gate 		return (*error);
1630Sstevel@tonic-gate 	}
1640Sstevel@tonic-gate 
1650Sstevel@tonic-gate 	if ((lbp->fd = open(bufpath, O_RDWR)) < 0) {
1660Sstevel@tonic-gate 		*error = errno;
1670Sstevel@tonic-gate 		if (preverror != *error) {
1680Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot open '%s': %s"),
1690Sstevel@tonic-gate 				bufpath, strerror(*error));
1700Sstevel@tonic-gate 		}
1710Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
1720Sstevel@tonic-gate 		return (*error);
1730Sstevel@tonic-gate 	}
1740Sstevel@tonic-gate 
1750Sstevel@tonic-gate 	/*
1760Sstevel@tonic-gate 	 * Lock the entire buffer file to prevent conflicting access.
1770Sstevel@tonic-gate 	 * We get a write lock because we want only 1 process to be
1780Sstevel@tonic-gate 	 * generating records from it.
1790Sstevel@tonic-gate 	 */
1800Sstevel@tonic-gate 	lbp->fl.l_type = F_WRLCK;
1810Sstevel@tonic-gate 	lbp->fl.l_whence = SEEK_SET;		/* beginning of file */
1820Sstevel@tonic-gate 	lbp->fl.l_start = (offset_t)0;
1830Sstevel@tonic-gate 	lbp->fl.l_len = 0;			/* entire file */
1840Sstevel@tonic-gate 	lbp->fl.l_sysid = 0;
1850Sstevel@tonic-gate 	lbp->fl.l_pid = 0;
1860Sstevel@tonic-gate 	if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) {
1870Sstevel@tonic-gate 		*error = errno;
1880Sstevel@tonic-gate 		if (preverror != *error) {
1890Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot lock (%s): %s"),
1900Sstevel@tonic-gate 				bufpath, strerror(*error));
1910Sstevel@tonic-gate 		}
1920Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
1930Sstevel@tonic-gate 		return (*error);
1940Sstevel@tonic-gate 	}
1950Sstevel@tonic-gate 
1960Sstevel@tonic-gate 	if (fstat(lbp->fd, &sb)) {
1970Sstevel@tonic-gate 		*error = errno;
1980Sstevel@tonic-gate 		if (preverror != *error) {
1990Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot stat (%s): %s"),
2000Sstevel@tonic-gate 				bufpath, strerror(*error));
2010Sstevel@tonic-gate 		}
2020Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
2030Sstevel@tonic-gate 		return (*error);
2040Sstevel@tonic-gate 	}
2050Sstevel@tonic-gate 	lbp->filesize = sb.st_size;
2060Sstevel@tonic-gate 
2070Sstevel@tonic-gate 	lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE,
2080Sstevel@tonic-gate 		MAP_SHARED|MAP_NORESERVE, lbp->fd, 0);
2090Sstevel@tonic-gate 
2100Sstevel@tonic-gate 	/* This is part of the duality of the use of either mmap()|read() */
2110Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
2120Sstevel@tonic-gate 		lbp->next_rec = 0;
2130Sstevel@tonic-gate 	} else {
2140Sstevel@tonic-gate 		lbp->next_rec = lbp->mmap_addr;
2150Sstevel@tonic-gate 	}
2160Sstevel@tonic-gate 
2170Sstevel@tonic-gate 	/* Read the header */
2180Sstevel@tonic-gate 	if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) {
2190Sstevel@tonic-gate 		*error = EIO;
2200Sstevel@tonic-gate 		if (preverror != *error) {
2210Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
2220Sstevel@tonic-gate 				"error in reading file '%s': %s"),
2230Sstevel@tonic-gate 				bufpath, strerror(EIO));
2240Sstevel@tonic-gate 		}
2250Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
2260Sstevel@tonic-gate 		return (*error);
2270Sstevel@tonic-gate 	}
2280Sstevel@tonic-gate 
2290Sstevel@tonic-gate 	if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) {
2300Sstevel@tonic-gate 		*error = EIO;
2310Sstevel@tonic-gate 		if (preverror != *error) {
2320Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
2330Sstevel@tonic-gate 				"error in reading file '%s': %s"),
2340Sstevel@tonic-gate 				bufpath, strerror(*error));
2350Sstevel@tonic-gate 		}
2360Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
2370Sstevel@tonic-gate 		return (*error);
2380Sstevel@tonic-gate 	}
2390Sstevel@tonic-gate 
2400Sstevel@tonic-gate 	/*
2410Sstevel@tonic-gate 	 * Set the pointer to the next record based on the buffer header.
2420Sstevel@tonic-gate 	 * 'lbp->bh.bh_offset' contains the offset of where to begin
2430Sstevel@tonic-gate 	 * processing relative to the buffer header.
2440Sstevel@tonic-gate 	 */
2450Sstevel@tonic-gate 	lbp->next_rec += lbp->bh.bh_offset;
2460Sstevel@tonic-gate 
2470Sstevel@tonic-gate 	/*
2480Sstevel@tonic-gate 	 * If we are going to be using read() for file data, then we may
2490Sstevel@tonic-gate 	 * have to adjust the current file pointer to take into account
2500Sstevel@tonic-gate 	 * a starting point other than the beginning of the file.
2510Sstevel@tonic-gate 	 * If mmap is being used, this is taken care of as a side effect of
2520Sstevel@tonic-gate 	 * setting up the value of next_rec.
2530Sstevel@tonic-gate 	 */
2540Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) {
2550Sstevel@tonic-gate 		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
2560Sstevel@tonic-gate 		/* This is a special case of setting the last_record_offset */
2570Sstevel@tonic-gate 		lbp->last_record_offset = lbp->next_rec;
2580Sstevel@tonic-gate 	} else {
2590Sstevel@tonic-gate 		lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr;
2600Sstevel@tonic-gate 	}
2610Sstevel@tonic-gate 
2620Sstevel@tonic-gate 	return (*error = 0);
2630Sstevel@tonic-gate }
2640Sstevel@tonic-gate 
2650Sstevel@tonic-gate /*
2660Sstevel@tonic-gate  * Free the nfslog buffer and its associated allocations
2670Sstevel@tonic-gate  */
2680Sstevel@tonic-gate static void
2690Sstevel@tonic-gate nfslog_free_buf(struct nfslog_buf *lbp, int close_quick)
2700Sstevel@tonic-gate {
2710Sstevel@tonic-gate 	XDR	xdrs;
2720Sstevel@tonic-gate 	int	error;
2730Sstevel@tonic-gate 	caddr_t buffer;
2740Sstevel@tonic-gate 	struct nfslog_lr *lrp, *lrp_next;
2750Sstevel@tonic-gate 	struct processed_records *prp, *tprp;
2760Sstevel@tonic-gate 
2770Sstevel@tonic-gate 	/* work to free the offset records and rewrite header */
2780Sstevel@tonic-gate 	if (lbp->prp) {
2790Sstevel@tonic-gate 		if (lbp->last_record_offset == lbp->prp->start_offset) {
2800Sstevel@tonic-gate 
2810Sstevel@tonic-gate 			/* adjust the offset for the entire buffer */
2820Sstevel@tonic-gate 			lbp->last_record_offset =
2830Sstevel@tonic-gate 				lbp->prp->start_offset + lbp->prp->len;
2840Sstevel@tonic-gate 
2850Sstevel@tonic-gate 			nfslog_rewrite_bufheader(lbp);
2860Sstevel@tonic-gate 		}
2870Sstevel@tonic-gate 		if (close_quick)
2880Sstevel@tonic-gate 			return;
2890Sstevel@tonic-gate 		prp = lbp->prp;
2900Sstevel@tonic-gate 		do {
2910Sstevel@tonic-gate 			tprp = prp->next;
2920Sstevel@tonic-gate 			free(prp);
2930Sstevel@tonic-gate 			prp = tprp;
2940Sstevel@tonic-gate 		} while (lbp->prp != prp);
2950Sstevel@tonic-gate 	}
2960Sstevel@tonic-gate 
2970Sstevel@tonic-gate 	if (close_quick)
2980Sstevel@tonic-gate 		return;
2990Sstevel@tonic-gate 
3000Sstevel@tonic-gate 	/* Take care of the queue log records first */
3010Sstevel@tonic-gate 	if (lbp->lrps != NULL) {
3020Sstevel@tonic-gate 		lrp = lbp->lrps;
3030Sstevel@tonic-gate 		do {
3040Sstevel@tonic-gate 			lrp_next = lrp->next;
3050Sstevel@tonic-gate 			nfslog_free_logrecord(lrp, FALSE);
3060Sstevel@tonic-gate 			lrp = lrp_next;
3070Sstevel@tonic-gate 		} while (lrp != lbp->lrps);
3080Sstevel@tonic-gate 		lbp->lrps = NULL;
3090Sstevel@tonic-gate 	}
3100Sstevel@tonic-gate 
3110Sstevel@tonic-gate 	/* The buffer header was decoded and needs to be freed */
3120Sstevel@tonic-gate 	if (lbp->bh.bh_length != 0) {
3130Sstevel@tonic-gate 		buffer = (lbp->bh_lrp->buffer != NULL ?
3140Sstevel@tonic-gate 			lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr);
3150Sstevel@tonic-gate 		xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE);
3160Sstevel@tonic-gate 		(void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh);
3170Sstevel@tonic-gate 		lbp->bh.bh_length = 0;
3180Sstevel@tonic-gate 	}
3190Sstevel@tonic-gate 
3200Sstevel@tonic-gate 	/* get rid of the bufheader lrp */
3210Sstevel@tonic-gate 	if (lbp->bh_lrp != NULL) {
3220Sstevel@tonic-gate 		free_lrp(lbp->bh_lrp);
3230Sstevel@tonic-gate 		lbp->bh_lrp = NULL;
3240Sstevel@tonic-gate 	}
3250Sstevel@tonic-gate 
3260Sstevel@tonic-gate 	/* Clean up for mmap() usage */
3270Sstevel@tonic-gate 	if (lbp->mmap_addr != (intptr_t)MAP_FAILED) {
3280Sstevel@tonic-gate 		if (munmap((void *)lbp->mmap_addr, lbp->filesize)) {
3290Sstevel@tonic-gate 			error = errno;
3300Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("munmap failed: %s: %s"),
3310Sstevel@tonic-gate 				(lbp->bufpath != NULL ? lbp->bufpath : ""),
3320Sstevel@tonic-gate 				strerror(error));
3330Sstevel@tonic-gate 		}
3340Sstevel@tonic-gate 		lbp->mmap_addr = (intptr_t)MAP_FAILED;
3350Sstevel@tonic-gate 	}
3360Sstevel@tonic-gate 
3370Sstevel@tonic-gate 	/* Finally close the buffer file */
3380Sstevel@tonic-gate 	if (lbp->fd >= 0) {
3390Sstevel@tonic-gate 		lbp->fl.l_type = F_UNLCK;
3400Sstevel@tonic-gate 		if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) {
3410Sstevel@tonic-gate 			error = errno;
3420Sstevel@tonic-gate 			syslog(LOG_ERR,
3430Sstevel@tonic-gate 				gettext("Cannot unlock file %s: %s"),
3440Sstevel@tonic-gate 				(lbp->bufpath != NULL ? lbp->bufpath : ""),
3450Sstevel@tonic-gate 				strerror(error));
3460Sstevel@tonic-gate 		}
3470Sstevel@tonic-gate 		(void) close(lbp->fd);
3480Sstevel@tonic-gate 		lbp->fd = -1;
3490Sstevel@tonic-gate 	}
3500Sstevel@tonic-gate 	if (lbp->bufpath != NULL)
3510Sstevel@tonic-gate 		free(lbp->bufpath);
3520Sstevel@tonic-gate }
3530Sstevel@tonic-gate 
3540Sstevel@tonic-gate /*
3550Sstevel@tonic-gate  * We are reading a record from the log buffer file.  Since we are reading
3560Sstevel@tonic-gate  * an XDR stream, we first have to read the first integer to determine
3570Sstevel@tonic-gate  * how much to read in whole for this record.  Our preference is to use
3580Sstevel@tonic-gate  * mmap() but if failed initially we will be using read().  Need to be
3590Sstevel@tonic-gate  * careful about proper initialization of the log record both from a field
3600Sstevel@tonic-gate  * perspective and for XDR decoding.
3610Sstevel@tonic-gate  */
3620Sstevel@tonic-gate static struct nfslog_lr *
3630Sstevel@tonic-gate nfslog_read_buffer(struct nfslog_buf *lbp)
3640Sstevel@tonic-gate {
3650Sstevel@tonic-gate 	XDR xdrs;
3660Sstevel@tonic-gate 	unsigned int	record_size;
3670Sstevel@tonic-gate 	struct nfslog_lr *lrp;
3680Sstevel@tonic-gate 	char		*sizebuf, tbuf[16];
3690Sstevel@tonic-gate 	caddr_t		buffer;
3700Sstevel@tonic-gate 	offset_t	next_rec;
3710Sstevel@tonic-gate 
3720Sstevel@tonic-gate 	lrp = (struct nfslog_lr *)malloc(sizeof (*lrp));
3730Sstevel@tonic-gate 	bzero(lrp, sizeof (*lrp));
3740Sstevel@tonic-gate 
3750Sstevel@tonic-gate 	/* Check to see if mmap worked */
3760Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
3770Sstevel@tonic-gate 		/*
3780Sstevel@tonic-gate 		 * EOF or other failure; we don't try to recover, just return
3790Sstevel@tonic-gate 		 */
3800Sstevel@tonic-gate 		if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) {
3810Sstevel@tonic-gate 			free_lrp(lrp);
3820Sstevel@tonic-gate 			return (NULL);
3830Sstevel@tonic-gate 		}
3840Sstevel@tonic-gate 		sizebuf = tbuf;
3850Sstevel@tonic-gate 	} else {
3860Sstevel@tonic-gate 		/* EOF check for the mmap() case */
3870Sstevel@tonic-gate 		if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) {
3880Sstevel@tonic-gate 			free_lrp(lrp);
3890Sstevel@tonic-gate 			return (NULL);
3900Sstevel@tonic-gate 		}
391*249Sjwahlig 		sizebuf = (char *)(uintptr_t)lbp->next_rec;
3920Sstevel@tonic-gate 	}
3930Sstevel@tonic-gate 
3940Sstevel@tonic-gate 	/* We have to XDR the first int so we know how much is in this record */
3950Sstevel@tonic-gate 	xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE);
3960Sstevel@tonic-gate 
3970Sstevel@tonic-gate 	if (!xdr_u_int(&xdrs, &record_size)) {
3980Sstevel@tonic-gate 		free_lrp(lrp);
3990Sstevel@tonic-gate 		return (NULL);
4000Sstevel@tonic-gate 	}
4010Sstevel@tonic-gate 
4020Sstevel@tonic-gate 	lrp->recsize = record_size;
4030Sstevel@tonic-gate 	next_rec = lbp->next_rec + lrp->recsize;
4040Sstevel@tonic-gate 
4050Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
4060Sstevel@tonic-gate 		/*
4070Sstevel@tonic-gate 		 * Read() case - shouldn't be used very much.
4080Sstevel@tonic-gate 		 * Note: The 'buffer' field is used later on
4090Sstevel@tonic-gate 		 * to determine which method is being used mmap()|read()
4100Sstevel@tonic-gate 		 */
4110Sstevel@tonic-gate 		if (lbp->filesize < next_rec) {
4120Sstevel@tonic-gate 			/* partial record from buffer */
4130Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
4140Sstevel@tonic-gate 				"Last partial record in work buffer %s "
4150Sstevel@tonic-gate 				"discarded\n"), lbp->bufpath);
4160Sstevel@tonic-gate 			free_lrp(lrp);
4170Sstevel@tonic-gate 			return (NULL);
4180Sstevel@tonic-gate 		}
4190Sstevel@tonic-gate 
4200Sstevel@tonic-gate 		if ((lrp->buffer = malloc(lrp->recsize)) == NULL) {
4210Sstevel@tonic-gate 			free_lrp(lrp);
4220Sstevel@tonic-gate 			return (NULL);
4230Sstevel@tonic-gate 		}
4240Sstevel@tonic-gate 		bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT);
4250Sstevel@tonic-gate 		if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT],
4260Sstevel@tonic-gate 			lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) {
4270Sstevel@tonic-gate 			free_lrp(lrp);
4280Sstevel@tonic-gate 			return (NULL);
4290Sstevel@tonic-gate 		}
4300Sstevel@tonic-gate 	} else if (lbp->filesize < next_rec - lbp->mmap_addr) {
4310Sstevel@tonic-gate 			/* partial record from buffer */
4320Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
4330Sstevel@tonic-gate 				"Last partial record in work buffer %s "
4340Sstevel@tonic-gate 				"discarded\n"), lbp->bufpath);
4350Sstevel@tonic-gate 			free_lrp(lrp);
4360Sstevel@tonic-gate 			return (NULL);
4370Sstevel@tonic-gate 	}
4380Sstevel@tonic-gate 
4390Sstevel@tonic-gate 
4400Sstevel@tonic-gate 	/* other initializations */
4410Sstevel@tonic-gate 	lrp->next = lrp->prev = lrp;
4420Sstevel@tonic-gate 	/* Keep track of the offset at which this record was read */
4430Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED)
4440Sstevel@tonic-gate 		lrp->f_offset = lbp->next_rec;
4450Sstevel@tonic-gate 	else
4460Sstevel@tonic-gate 		lrp->f_offset = lbp->next_rec - lbp->mmap_addr;
4470Sstevel@tonic-gate 	/* This is the true address of the record */
4480Sstevel@tonic-gate 	lrp->record = lbp->next_rec;
4490Sstevel@tonic-gate 	lrp->xdrargs = lrp->xdrres = NULL;
4500Sstevel@tonic-gate 	lrp->lbp = lbp;
4510Sstevel@tonic-gate 
4520Sstevel@tonic-gate 	/* Here is the logic for mmap() vs. read() */
4530Sstevel@tonic-gate 	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
4540Sstevel@tonic-gate 
4550Sstevel@tonic-gate 	/* Setup for the 'real' XDR decode of the entire record */
4560Sstevel@tonic-gate 	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE);
4570Sstevel@tonic-gate 
4580Sstevel@tonic-gate 	/* calculate the offset for the next record */
4590Sstevel@tonic-gate 	lbp->next_rec = next_rec;
4600Sstevel@tonic-gate 
4610Sstevel@tonic-gate 	return (lrp);
4620Sstevel@tonic-gate }
4630Sstevel@tonic-gate 
4640Sstevel@tonic-gate /*
4650Sstevel@tonic-gate  * Simple removal of the log record from the log buffer queue.
4660Sstevel@tonic-gate  * Make sure to manage the count of records queued.
4670Sstevel@tonic-gate  */
4680Sstevel@tonic-gate static struct nfslog_lr *
4690Sstevel@tonic-gate remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
4700Sstevel@tonic-gate {
4710Sstevel@tonic-gate 	if (lbp->lrps == lrp) {
4720Sstevel@tonic-gate 		if (lbp->lrps == lbp->lrps->next) {
4730Sstevel@tonic-gate 			lbp->lrps = NULL;
4740Sstevel@tonic-gate 		} else {
4750Sstevel@tonic-gate 			lbp->lrps = lrp->next;
4760Sstevel@tonic-gate 			remque(lrp);
4770Sstevel@tonic-gate 		}
4780Sstevel@tonic-gate 	} else {
4790Sstevel@tonic-gate 		remque(lrp);
4800Sstevel@tonic-gate 	}
4810Sstevel@tonic-gate 	lbp->num_lrps--;
4820Sstevel@tonic-gate 	return (lrp);
4830Sstevel@tonic-gate }
4840Sstevel@tonic-gate 
4850Sstevel@tonic-gate /*
4860Sstevel@tonic-gate  * Insert a log record struct on the log buffer struct.  The log buffer
4870Sstevel@tonic-gate  * has a pointer to the head of a queue of log records that have been
4880Sstevel@tonic-gate  * read from the buffer file but have not been processed yet because
4890Sstevel@tonic-gate  * the record id did not match the sequence desired for processing.
4900Sstevel@tonic-gate  * The insertion must be in the 'correct'/sorted order which adds
4910Sstevel@tonic-gate  * to the complexity of this function.
4920Sstevel@tonic-gate  */
4930Sstevel@tonic-gate static void
4940Sstevel@tonic-gate insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
4950Sstevel@tonic-gate {
4960Sstevel@tonic-gate 	int ins_rec_id = lrp->log_record.re_header.rh_rec_id;
4970Sstevel@tonic-gate 	struct nfslog_lr *curlrp;
4980Sstevel@tonic-gate 
4990Sstevel@tonic-gate 	if (lbp->lrps == NULL) {
5000Sstevel@tonic-gate 		/* that was easy */
5010Sstevel@tonic-gate 		lbp->lrps = lrp;
5020Sstevel@tonic-gate 	} else {
5030Sstevel@tonic-gate 		/*
5040Sstevel@tonic-gate 		 * Does this lrp go before the first on the list?
5050Sstevel@tonic-gate 		 * If so, do the insertion by hand since insque is not
5060Sstevel@tonic-gate 		 * as flexible when queueing an element to the head of
5070Sstevel@tonic-gate 		 * a list.
5080Sstevel@tonic-gate 		 */
5090Sstevel@tonic-gate 		if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) {
5100Sstevel@tonic-gate 			lrp->next = lbp->lrps;
5110Sstevel@tonic-gate 			lrp->prev = lbp->lrps->prev;
5120Sstevel@tonic-gate 			lbp->lrps->prev->next = lrp;
5130Sstevel@tonic-gate 			lbp->lrps->prev = lrp;
5140Sstevel@tonic-gate 			lbp->lrps = lrp;
5150Sstevel@tonic-gate 		} else {
5160Sstevel@tonic-gate 			/*
5170Sstevel@tonic-gate 			 * Search the queue for the correct insertion point.
5180Sstevel@tonic-gate 			 * Be careful about the insque so that the record
5190Sstevel@tonic-gate 			 * ends up in the right place.
5200Sstevel@tonic-gate 			 */
5210Sstevel@tonic-gate 			curlrp = lbp->lrps;
5220Sstevel@tonic-gate 			do {
5230Sstevel@tonic-gate 				if (ins_rec_id <
5240Sstevel@tonic-gate 				curlrp->next->log_record.re_header.rh_rec_id)
5250Sstevel@tonic-gate 					break;
5260Sstevel@tonic-gate 				curlrp = curlrp->next;
5270Sstevel@tonic-gate 			} while (curlrp != lbp->lrps);
5280Sstevel@tonic-gate 			if (curlrp == lbp->lrps)
5290Sstevel@tonic-gate 				insque(lrp, lbp->lrps->prev);
5300Sstevel@tonic-gate 			else
5310Sstevel@tonic-gate 				insque(lrp, curlrp);
5320Sstevel@tonic-gate 		}
5330Sstevel@tonic-gate 	}
5340Sstevel@tonic-gate 	/* always keep track of how many we have */
5350Sstevel@tonic-gate 	lbp->num_lrps++;
5360Sstevel@tonic-gate }
5370Sstevel@tonic-gate 
5380Sstevel@tonic-gate /*
5390Sstevel@tonic-gate  * We are rewriting the buffer header at the start of the log buffer
5400Sstevel@tonic-gate  * for the sole purpose of resetting the bh_offset field.  This is
5410Sstevel@tonic-gate  * supposed to represent the progress that the nfslogd daemon has made
5420Sstevel@tonic-gate  * in its processing of the log buffer file.
5430Sstevel@tonic-gate  * 'lbp->last_record_offset' contains the absolute offset of the end
5440Sstevel@tonic-gate  * of the last element processed. The on-disk buffer offset is relative
5450Sstevel@tonic-gate  * to the buffer header, therefore we subtract the length of the buffer
5460Sstevel@tonic-gate  * header from the absolute offset.
5470Sstevel@tonic-gate  */
5480Sstevel@tonic-gate static void
5490Sstevel@tonic-gate nfslog_rewrite_bufheader(struct nfslog_buf *lbp)
5500Sstevel@tonic-gate {
5510Sstevel@tonic-gate 	XDR xdrs;
5520Sstevel@tonic-gate 	nfslog_buffer_header bh;
5530Sstevel@tonic-gate 	/* size big enough for buffer header encode */
5540Sstevel@tonic-gate #define	XBUFSIZE 128
5550Sstevel@tonic-gate 	char buffer[XBUFSIZE];
5560Sstevel@tonic-gate 	unsigned int wsize;
5570Sstevel@tonic-gate 
5580Sstevel@tonic-gate 	/*
5590Sstevel@tonic-gate 	 * if version 1 buffer is large and the current offset cannot be
5600Sstevel@tonic-gate 	 * represented, then don't update the offset in the buffer.
5610Sstevel@tonic-gate 	 */
5620Sstevel@tonic-gate 	if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) {
5630Sstevel@tonic-gate 		/* No need to update the header - offset too big */
5640Sstevel@tonic-gate 		return;
5650Sstevel@tonic-gate 	}
5660Sstevel@tonic-gate 	/*
5670Sstevel@tonic-gate 	 * build the buffer header from the original that was saved
5680Sstevel@tonic-gate 	 * on initialization; note that the offset is taken from the
5690Sstevel@tonic-gate 	 * last record processed (the last offset that represents
5700Sstevel@tonic-gate 	 * all records processed without any holes in the processing)
5710Sstevel@tonic-gate 	 */
5720Sstevel@tonic-gate 	bh = lbp->bh;
5730Sstevel@tonic-gate 
5740Sstevel@tonic-gate 	/*
5750Sstevel@tonic-gate 	 * if version 1 buffer is large and the current offset cannot be
5760Sstevel@tonic-gate 	 * represented in 32 bits, then save only the last valid offset
5770Sstevel@tonic-gate 	 * in the buffer and mark the flags to indicate that.
5780Sstevel@tonic-gate 	 */
5790Sstevel@tonic-gate 	if ((bh.bh_version > 1) ||
5800Sstevel@tonic-gate 		(lbp->last_record_offset - bh.bh_length < UINT32_MAX)) {
5810Sstevel@tonic-gate 		bh.bh_offset = lbp->last_record_offset - bh.bh_length;
5820Sstevel@tonic-gate 	} else {
5830Sstevel@tonic-gate 		/* don't update the offset in the buffer */
5840Sstevel@tonic-gate 		bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW;
5850Sstevel@tonic-gate 		lbp->bh.bh_flags = bh.bh_flags;
5860Sstevel@tonic-gate 		syslog(LOG_ERR, gettext(
5870Sstevel@tonic-gate 			"nfslog_rewrite_bufheader: %s: offset does not fit "
5880Sstevel@tonic-gate 			"in a 32 bit field\n"), lbp->bufpath);
5890Sstevel@tonic-gate 	}
5900Sstevel@tonic-gate 
5910Sstevel@tonic-gate 	xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE);
5920Sstevel@tonic-gate 
5930Sstevel@tonic-gate 	if (!xdr_nfslog_buffer_header(&xdrs, &bh)) {
5940Sstevel@tonic-gate 		syslog(LOG_ERR, gettext(
5950Sstevel@tonic-gate 			"error in re-writing buffer file %s header\n"),
5960Sstevel@tonic-gate 			lbp->bufpath);
5970Sstevel@tonic-gate 		return;
5980Sstevel@tonic-gate 	}
5990Sstevel@tonic-gate 
6000Sstevel@tonic-gate 	wsize = xdr_getpos(&xdrs);
6010Sstevel@tonic-gate 
6020Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
6030Sstevel@tonic-gate 		/* go to the beginning of the file */
6040Sstevel@tonic-gate 		(void) lseek(lbp->fd, 0, SEEK_SET);
6050Sstevel@tonic-gate 		(void) write(lbp->fd, buffer, wsize);
6060Sstevel@tonic-gate 		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
6070Sstevel@tonic-gate 		(void) fsync(lbp->fd);
6080Sstevel@tonic-gate 	} else {
6090Sstevel@tonic-gate 		bcopy(buffer, (void *)lbp->mmap_addr, wsize);
6100Sstevel@tonic-gate 		(void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC);
6110Sstevel@tonic-gate 	}
6120Sstevel@tonic-gate }
6130Sstevel@tonic-gate 
6140Sstevel@tonic-gate /*
6150Sstevel@tonic-gate  * With the provided lrp, we will take and 'insert' the range that the
6160Sstevel@tonic-gate  * record covered in the buffer file into a list of processed ranges
6170Sstevel@tonic-gate  * for the buffer file.  These ranges represent the records processed
6180Sstevel@tonic-gate  * but not 'marked' in the buffer header as being processed.
6190Sstevel@tonic-gate  * This insertion process is being done for two reasons.  The first is that
6200Sstevel@tonic-gate  * we do not want to pay the performance penalty of re-writing the buffer header
6210Sstevel@tonic-gate  * for each record that we process.  The second reason is that the records
6220Sstevel@tonic-gate  * may be processed out of order because of the unique ids.  This will occur
6230Sstevel@tonic-gate  * if the kernel has written the records to the buffer file out of order.
6240Sstevel@tonic-gate  * The read routine will 'sort' them as the records are read.
6250Sstevel@tonic-gate  *
6260Sstevel@tonic-gate  * We do not want to re-write the buffer header such that a record is
6270Sstevel@tonic-gate  * represented and being processed when it has not been.  In the case
6280Sstevel@tonic-gate  * that the nfslogd daemon restarts processing and the buffer header
6290Sstevel@tonic-gate  * has been re-written improperly, some records could be skipped.
6300Sstevel@tonic-gate  * We will be taking the conservative approach and only writing buffer
6310Sstevel@tonic-gate  * header offsets when the entire offset range has been processed.
6320Sstevel@tonic-gate  */
6330Sstevel@tonic-gate static void
6340Sstevel@tonic-gate nfslog_ins_last_rec_processed(struct nfslog_lr *lrp)
6350Sstevel@tonic-gate {
6360Sstevel@tonic-gate 	struct processed_records *prp, *tp;
6370Sstevel@tonic-gate 
6380Sstevel@tonic-gate 	/* init the data struct as if it were the only one */
6390Sstevel@tonic-gate 	prp = malloc(sizeof (*prp));
6400Sstevel@tonic-gate 	prp->next = prp->prev = prp;
6410Sstevel@tonic-gate 	prp->start_offset = lrp->f_offset;
6420Sstevel@tonic-gate 	prp->len = lrp->recsize;
6430Sstevel@tonic-gate 	prp->num_recs = 1;
6440Sstevel@tonic-gate 
6450Sstevel@tonic-gate 	/* always add since we know we are going to insert */
6460Sstevel@tonic-gate 	lrp->lbp->num_pr_queued++;
6470Sstevel@tonic-gate 
6480Sstevel@tonic-gate 	/* Is this the first one?  If so, take the easy way out */
6490Sstevel@tonic-gate 	if (lrp->lbp->prp == NULL) {
6500Sstevel@tonic-gate 		lrp->lbp->prp = prp;
6510Sstevel@tonic-gate 	} else {
6520Sstevel@tonic-gate 		/* sort on insertion... */
6530Sstevel@tonic-gate 		tp = lrp->lbp->prp;
6540Sstevel@tonic-gate 		do {
6550Sstevel@tonic-gate 			if (prp->start_offset < tp->start_offset)
6560Sstevel@tonic-gate 				break;
6570Sstevel@tonic-gate 			tp = tp->next;
6580Sstevel@tonic-gate 		} while (tp != lrp->lbp->prp);
6590Sstevel@tonic-gate 		/* insert where appropriate (before the one we found */
6600Sstevel@tonic-gate 		insque(prp, tp->prev);
6610Sstevel@tonic-gate 		/*
6620Sstevel@tonic-gate 		 * special case where the insertion was done at the
6630Sstevel@tonic-gate 		 * head of the list
6640Sstevel@tonic-gate 		 */
6650Sstevel@tonic-gate 		if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset)
6660Sstevel@tonic-gate 			lrp->lbp->prp = prp;
6670Sstevel@tonic-gate 
6680Sstevel@tonic-gate 		/*
6690Sstevel@tonic-gate 		 * now that the entry is in place, we need to see if it can
6700Sstevel@tonic-gate 		 * be combined with the previous or following entries.
6710Sstevel@tonic-gate 		 * combination is done by adding to the length.
6720Sstevel@tonic-gate 		 */
6730Sstevel@tonic-gate 		if (prp->start_offset ==
6740Sstevel@tonic-gate 			(prp->prev->start_offset + prp->prev->len)) {
6750Sstevel@tonic-gate 			tp = prp->prev;
6760Sstevel@tonic-gate 			remque(prp);
6770Sstevel@tonic-gate 			tp->len += prp->len;
6780Sstevel@tonic-gate 			tp->num_recs += prp->num_recs;
6790Sstevel@tonic-gate 			free(prp);
6800Sstevel@tonic-gate 			prp = tp;
6810Sstevel@tonic-gate 		}
6820Sstevel@tonic-gate 		if (prp->next->start_offset ==
6830Sstevel@tonic-gate 			(prp->start_offset + prp->len)) {
6840Sstevel@tonic-gate 			prp->len += prp->next->len;
6850Sstevel@tonic-gate 			prp->num_recs += prp->next->num_recs;
6860Sstevel@tonic-gate 			tp = prp->next;
6870Sstevel@tonic-gate 			remque(tp);
6880Sstevel@tonic-gate 			free(tp);
6890Sstevel@tonic-gate 		}
6900Sstevel@tonic-gate 	}
6910Sstevel@tonic-gate 
6920Sstevel@tonic-gate 	if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) {
6930Sstevel@tonic-gate 		prp = lrp->lbp->prp;
6940Sstevel@tonic-gate 		if (lrp->lbp->last_record_offset ==
6950Sstevel@tonic-gate 			prp->start_offset) {
6960Sstevel@tonic-gate 
6970Sstevel@tonic-gate 			/* adjust the offset for the entire buffer */
6980Sstevel@tonic-gate 			lrp->lbp->last_record_offset =
6990Sstevel@tonic-gate 				prp->start_offset + prp->len;
7000Sstevel@tonic-gate 
7010Sstevel@tonic-gate 			nfslog_rewrite_bufheader(lrp->lbp);
7020Sstevel@tonic-gate 
7030Sstevel@tonic-gate 			tp = prp->next;
7040Sstevel@tonic-gate 			if (tp != prp)
7050Sstevel@tonic-gate 				remque(prp);
7060Sstevel@tonic-gate 			else
7070Sstevel@tonic-gate 				tp = NULL;
7080Sstevel@tonic-gate 			lrp->lbp->prp = tp;
7090Sstevel@tonic-gate 			lrp->lbp->num_pr_queued -= prp->num_recs;
7100Sstevel@tonic-gate 			free(prp);
7110Sstevel@tonic-gate 		}
7120Sstevel@tonic-gate 	}
7130Sstevel@tonic-gate }
7140Sstevel@tonic-gate 
7150Sstevel@tonic-gate /*
7160Sstevel@tonic-gate  * nfslog_get_logrecord is responsible for retrieving the next log record
7170Sstevel@tonic-gate  * from the buffer file. This would normally be very straightforward but there
7180Sstevel@tonic-gate  * is the added complexity of attempting to order the requests coming out of
7190Sstevel@tonic-gate  * the buffer file.  The fundamental problems is that the kernel nfs logging
7200Sstevel@tonic-gate  * functionality does not guarantee that the records were written to the file
7210Sstevel@tonic-gate  * in the order that the NFS server processed them.  This can cause a problem
7220Sstevel@tonic-gate  * in the fh -> pathname mapping in the case were a lookup for a file comes
7230Sstevel@tonic-gate  * later in the buffer file than other operations on the lookup's target.
7240Sstevel@tonic-gate  * The fh mapping database will not have an entry and will therefore not
7250Sstevel@tonic-gate  * be able to map the fh to a name.
7260Sstevel@tonic-gate  *
7270Sstevel@tonic-gate  * So to solve this problem, the kernel nfs logging code tags each record
7280Sstevel@tonic-gate  * with a monotonically increasing id and is guaranteed to be allocated
7290Sstevel@tonic-gate  * in the order that the requests were processed.  Realize however that
7300Sstevel@tonic-gate  * this processing guarantee is essentially for one thread on one client.
7310Sstevel@tonic-gate  * This id mechanism does not order all requests since it is only the
7320Sstevel@tonic-gate  * single client/single thread case that is most concerning to us here.
7330Sstevel@tonic-gate  *
7340Sstevel@tonic-gate  * This function will do the 'sorting' of the requests as they are
7350Sstevel@tonic-gate  * read from the buffer file.  The sorting needs to take into account
7360Sstevel@tonic-gate  * that some ids may be missing (operations not logged but ids allocated)
7370Sstevel@tonic-gate  * and that the id field will eventually wrap over MAXINT.
7380Sstevel@tonic-gate  *
7390Sstevel@tonic-gate  * Complexity to solve the fh -> pathname mapping issue.
7400Sstevel@tonic-gate  */
7410Sstevel@tonic-gate struct nfslog_lr *
7420Sstevel@tonic-gate nfslog_get_logrecord(struct nfslog_buf *lbp)
7430Sstevel@tonic-gate {
7440Sstevel@tonic-gate 	/* figure out what the next should be if the world were perfect */
7450Sstevel@tonic-gate 	unsigned int next_rec_id = lbp->last_rec_id + 1;
7460Sstevel@tonic-gate 	struct nfslog_lr *lrp = NULL;
7470Sstevel@tonic-gate 
7480Sstevel@tonic-gate 	/*
7490Sstevel@tonic-gate 	 * First we check the queued records on the log buffer struct
7500Sstevel@tonic-gate 	 * to see if the one we want is there.  The records are sorted
7510Sstevel@tonic-gate 	 * on the record id during the insertions to the queue so that
7520Sstevel@tonic-gate 	 * this check is easy.
7530Sstevel@tonic-gate 	 */
7540Sstevel@tonic-gate 	if (lbp->lrps != NULL) {
7550Sstevel@tonic-gate 		/* Does the first record match ? */
7560Sstevel@tonic-gate 		if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) {
7570Sstevel@tonic-gate 			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
7580Sstevel@tonic-gate 			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
7590Sstevel@tonic-gate 		} else {
7600Sstevel@tonic-gate 			/*
7610Sstevel@tonic-gate 			 * Here we are checking for wrap of the record id
7620Sstevel@tonic-gate 			 * since it is an unsigned in.  The idea is that
7630Sstevel@tonic-gate 			 * if there is a huge span between what we expect
7640Sstevel@tonic-gate 			 * and what is queued then we need to flush/empty
7650Sstevel@tonic-gate 			 * the queued records first.
7660Sstevel@tonic-gate 			 */
7670Sstevel@tonic-gate 			if (next_rec_id <
7680Sstevel@tonic-gate 				lbp->lrps->log_record.re_header.rh_rec_id &&
7690Sstevel@tonic-gate 				((lbp->lrps->log_record.re_header.rh_rec_id -
7700Sstevel@tonic-gate 					next_rec_id) > (MAXINT / 2))) {
7710Sstevel@tonic-gate 
7720Sstevel@tonic-gate 				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
7730Sstevel@tonic-gate 				lbp->last_rec_id =
7740Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
7750Sstevel@tonic-gate 			}
7760Sstevel@tonic-gate 		}
7770Sstevel@tonic-gate 	}
7780Sstevel@tonic-gate 	/*
7790Sstevel@tonic-gate 	 * So the first queued record didn't match (or there were no queued
7800Sstevel@tonic-gate 	 * records to look at).  Now we go to the buffer file looking for
7810Sstevel@tonic-gate 	 * the expected log record based on its id.  We loop looking for
7820Sstevel@tonic-gate 	 * a matching records and save/queue the records that don't match.
7830Sstevel@tonic-gate 	 * Note that we will queue a maximum number to handle the case
7840Sstevel@tonic-gate 	 * of a missing record id or a queue that is very confused.  We don't
7850Sstevel@tonic-gate 	 * want to consume too much memory.
7860Sstevel@tonic-gate 	 */
7870Sstevel@tonic-gate 	while (lrp == NULL) {
7880Sstevel@tonic-gate 		/* Have we queued too many for this buffer? */
7890Sstevel@tonic-gate 		if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) {
7900Sstevel@tonic-gate 			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
7910Sstevel@tonic-gate 			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
7920Sstevel@tonic-gate 			break;
7930Sstevel@tonic-gate 		}
7940Sstevel@tonic-gate 		/*
7950Sstevel@tonic-gate 		 * Get a record from the buffer file.  If none are available,
7960Sstevel@tonic-gate 		 * this is probably and EOF condition (could be a read error
7970Sstevel@tonic-gate 		 * as well but that is masked. :-().  No records in the
7980Sstevel@tonic-gate 		 * file means that we need to pull any queued records
7990Sstevel@tonic-gate 		 * so that we don't miss any in the processing.
8000Sstevel@tonic-gate 		 */
8010Sstevel@tonic-gate 		if ((lrp = nfslog_read_buffer(lbp)) == NULL) {
8020Sstevel@tonic-gate 			if (lbp->lrps != NULL) {
8030Sstevel@tonic-gate 				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
8040Sstevel@tonic-gate 				lbp->last_rec_id =
8050Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
8060Sstevel@tonic-gate 			} else {
8070Sstevel@tonic-gate 				return (NULL);  /* it was really and EOF */
8080Sstevel@tonic-gate 			}
8090Sstevel@tonic-gate 		} else {
8100Sstevel@tonic-gate 			/*
8110Sstevel@tonic-gate 			 * Just read a record from the buffer file and now we
8120Sstevel@tonic-gate 			 * need to XDR the record header so that we can take
8130Sstevel@tonic-gate 			 * a look at the record id.
8140Sstevel@tonic-gate 			 */
8150Sstevel@tonic-gate 			if (!xdr_nfslog_request_record(&lrp->xdrs,
8160Sstevel@tonic-gate 				&lrp->log_record)) {
8170Sstevel@tonic-gate 				/* Free and return EOF/NULL on error */
8180Sstevel@tonic-gate 				nfslog_free_logrecord(lrp, FALSE);
8190Sstevel@tonic-gate 				return (NULL);
8200Sstevel@tonic-gate 			}
8210Sstevel@tonic-gate 			/*
8220Sstevel@tonic-gate 			 * If the new record is less than or matches the
8230Sstevel@tonic-gate 			 * expected record id, then we return this record
8240Sstevel@tonic-gate 			 */
8250Sstevel@tonic-gate 			if (lrp->log_record.re_header.rh_rec_id <=
8260Sstevel@tonic-gate 				next_rec_id) {
8270Sstevel@tonic-gate 
8280Sstevel@tonic-gate 				lbp->last_rec_id =
8290Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
8300Sstevel@tonic-gate 			} else {
8310Sstevel@tonic-gate 				/*
8320Sstevel@tonic-gate 				 * This is not the one we were looking
8330Sstevel@tonic-gate 				 * for; queue it for later processing
8340Sstevel@tonic-gate 				 * (queueing sorts on record id)
8350Sstevel@tonic-gate 				 */
8360Sstevel@tonic-gate 				insert_lrp_to_lb(lbp, lrp);
8370Sstevel@tonic-gate 				lrp = NULL;
8380Sstevel@tonic-gate 			}
8390Sstevel@tonic-gate 		}
8400Sstevel@tonic-gate 	}
8410Sstevel@tonic-gate 	return (lrp);
8420Sstevel@tonic-gate }
8430Sstevel@tonic-gate 
8440Sstevel@tonic-gate /*
8450Sstevel@tonic-gate  * Free the log record provided.
8460Sstevel@tonic-gate  * This is complex because the associated XDR streams also need to be freed
8470Sstevel@tonic-gate  * since allocation could have occured during the DECODE phase.  The record
8480Sstevel@tonic-gate  * header, args and results need to be XDR_FREEd.  The xdr funtions will
8490Sstevel@tonic-gate  * be provided if a free needs to be done.
8500Sstevel@tonic-gate  *
8510Sstevel@tonic-gate  * Note that caller tells us if the record being freed was processed.
8520Sstevel@tonic-gate  * If so, then the buffer header should be updated.  Updating the buffer
8530Sstevel@tonic-gate  * header keeps track of where the nfslogd daemon left off in its processing
8540Sstevel@tonic-gate  * if it is unable to complete the entire file.
8550Sstevel@tonic-gate  */
8560Sstevel@tonic-gate void
8570Sstevel@tonic-gate nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete)
8580Sstevel@tonic-gate {
8590Sstevel@tonic-gate 	caddr_t			buffer;
8600Sstevel@tonic-gate 	nfslog_request_record 	*reqrec;
8610Sstevel@tonic-gate 
8620Sstevel@tonic-gate 	if (processing_complete) {
8630Sstevel@tonic-gate 		nfslog_ins_last_rec_processed(lrp);
8640Sstevel@tonic-gate 	}
8650Sstevel@tonic-gate 
8660Sstevel@tonic-gate 	reqrec = &lrp->log_record;
8670Sstevel@tonic-gate 
8680Sstevel@tonic-gate 	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
8690Sstevel@tonic-gate 
8700Sstevel@tonic-gate 	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE);
8710Sstevel@tonic-gate 
8720Sstevel@tonic-gate 	(void) xdr_nfslog_request_record(&lrp->xdrs, reqrec);
8730Sstevel@tonic-gate 
8740Sstevel@tonic-gate 	if (lrp->xdrargs != NULL && reqrec->re_rpc_arg)
8750Sstevel@tonic-gate 		(*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg);
8760Sstevel@tonic-gate 
8770Sstevel@tonic-gate 	if (reqrec->re_rpc_arg)
8780Sstevel@tonic-gate 		free(reqrec->re_rpc_arg);
8790Sstevel@tonic-gate 
8800Sstevel@tonic-gate 	if (lrp->xdrres != NULL && reqrec->re_rpc_res)
8810Sstevel@tonic-gate 		(*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res);
8820Sstevel@tonic-gate 
8830Sstevel@tonic-gate 	if (reqrec->re_rpc_res)
8840Sstevel@tonic-gate 		free(reqrec->re_rpc_res);
8850Sstevel@tonic-gate 
8860Sstevel@tonic-gate 	free_lrp(lrp);
8870Sstevel@tonic-gate }
8880Sstevel@tonic-gate 
8890Sstevel@tonic-gate static void
8900Sstevel@tonic-gate free_lrp(struct nfslog_lr *lrp)
8910Sstevel@tonic-gate {
8920Sstevel@tonic-gate 	if (lrp->buffer != NULL)
8930Sstevel@tonic-gate 		free(lrp->buffer);
8940Sstevel@tonic-gate 	free(lrp);
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate 
8970Sstevel@tonic-gate /*
8980Sstevel@tonic-gate  * Utility function used elsewhere
8990Sstevel@tonic-gate  */
9000Sstevel@tonic-gate void
9010Sstevel@tonic-gate nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp,
9020Sstevel@tonic-gate 	int maxoffset)
9030Sstevel@tonic-gate {
9040Sstevel@tonic-gate 	int	i, j;
9050Sstevel@tonic-gate 	uint_t	*ip;
9060Sstevel@tonic-gate 	uchar_t	*u_buf = (uchar_t *)buf;
9070Sstevel@tonic-gate 	int	outbufoffset = *outbufoffsetp;
9080Sstevel@tonic-gate 
9090Sstevel@tonic-gate 	outbufoffset += sprintf(&outbuf[outbufoffset], " \"");
9100Sstevel@tonic-gate 	if (len <= sizeof (int)) {
9110Sstevel@tonic-gate 		for (j = 0; (j < len) && (outbufoffset < maxoffset);
9120Sstevel@tonic-gate 			j++, u_buf++)
9130Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
9140Sstevel@tonic-gate 						"%02x", *u_buf);
9150Sstevel@tonic-gate 		return;
9160Sstevel@tonic-gate 	}
9170Sstevel@tonic-gate 	/* More than 4 bytes, print with spaces in integer offsets */
9180Sstevel@tonic-gate 	j = (int)((uintptr_t)buf % sizeof (int));
9190Sstevel@tonic-gate 	i = 0;
9200Sstevel@tonic-gate 	if (j > 0) {
9210Sstevel@tonic-gate 		i = sizeof (int) - j;
9220Sstevel@tonic-gate 		for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
9230Sstevel@tonic-gate 			j++, u_buf++)
9240Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
9250Sstevel@tonic-gate 						"%02x", *u_buf);
9260Sstevel@tonic-gate 	}
9270Sstevel@tonic-gate 	/* LINTED */
9280Sstevel@tonic-gate 	ip = (uint_t *)u_buf;
9290Sstevel@tonic-gate 	for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset);
9300Sstevel@tonic-gate 		i += sizeof (int), ip++) {
9310Sstevel@tonic-gate 		outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip);
9320Sstevel@tonic-gate 	}
9330Sstevel@tonic-gate 	if (i < len) {
9340Sstevel@tonic-gate 		/* Last element not int */
9350Sstevel@tonic-gate 		u_buf = (uchar_t *)ip;
9360Sstevel@tonic-gate 		if (i > j)	/* not first element */
9370Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset], " ");
9380Sstevel@tonic-gate 		for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) {
9390Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
9400Sstevel@tonic-gate 						"%02x", *u_buf);
9410Sstevel@tonic-gate 		}
9420Sstevel@tonic-gate 	}
9430Sstevel@tonic-gate 	if (outbufoffset < maxoffset)
9440Sstevel@tonic-gate 		outbufoffset += sprintf(&outbuf[outbufoffset], "\"");
9450Sstevel@tonic-gate 	*outbufoffsetp = outbufoffset;
9460Sstevel@tonic-gate }
947