1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * nfs log - read buffer file and return structs in usable form
31*0Sstevel@tonic-gate  */
32*0Sstevel@tonic-gate 
33*0Sstevel@tonic-gate #include <ctype.h>
34*0Sstevel@tonic-gate #include <stdio.h>
35*0Sstevel@tonic-gate #include <stdlib.h>
36*0Sstevel@tonic-gate #include <stddef.h>
37*0Sstevel@tonic-gate #include <string.h>
38*0Sstevel@tonic-gate #include <fcntl.h>
39*0Sstevel@tonic-gate #include <unistd.h>
40*0Sstevel@tonic-gate #include <signal.h>
41*0Sstevel@tonic-gate #include <sys/types.h>
42*0Sstevel@tonic-gate #include <sys/param.h>
43*0Sstevel@tonic-gate #include <sys/stat.h>
44*0Sstevel@tonic-gate #include <sys/utsname.h>
45*0Sstevel@tonic-gate #include <sys/mman.h>
46*0Sstevel@tonic-gate #include <strings.h>
47*0Sstevel@tonic-gate #include <errno.h>
48*0Sstevel@tonic-gate #include <syslog.h>
49*0Sstevel@tonic-gate #include <time.h>
50*0Sstevel@tonic-gate #include <limits.h>
51*0Sstevel@tonic-gate #include <libintl.h>
52*0Sstevel@tonic-gate #include <values.h>
53*0Sstevel@tonic-gate #include <search.h>
54*0Sstevel@tonic-gate #include <pwd.h>
55*0Sstevel@tonic-gate #include <netdb.h>
56*0Sstevel@tonic-gate #include <rpc/rpc.h>
57*0Sstevel@tonic-gate #include <netconfig.h>
58*0Sstevel@tonic-gate #include <netdir.h>
59*0Sstevel@tonic-gate #include <nfs/nfs_sec.h>
60*0Sstevel@tonic-gate #include <nfs/export.h>
61*0Sstevel@tonic-gate #include <rpc/auth.h>
62*0Sstevel@tonic-gate #include <rpc/svc.h>
63*0Sstevel@tonic-gate #include <rpc/xdr.h>
64*0Sstevel@tonic-gate #include <rpc/clnt.h>
65*0Sstevel@tonic-gate #include <nfs/nfs.h>
66*0Sstevel@tonic-gate #include <nfs/nfs_log.h>
67*0Sstevel@tonic-gate #include "nfslogd.h"
68*0Sstevel@tonic-gate 
69*0Sstevel@tonic-gate #define	MAX_LRS_READ_AHEAD 2048
70*0Sstevel@tonic-gate #define	MAX_RECS_TO_DELAY 32768
71*0Sstevel@tonic-gate 
72*0Sstevel@tonic-gate static int 		nfslog_init_buf(char *, struct nfslog_buf *, int *);
73*0Sstevel@tonic-gate static void		nfslog_free_buf(struct nfslog_buf *, int);
74*0Sstevel@tonic-gate static struct nfslog_lr *nfslog_read_buffer(struct nfslog_buf *);
75*0Sstevel@tonic-gate static void		free_lrp(struct nfslog_lr *);
76*0Sstevel@tonic-gate static struct nfslog_lr *remove_lrp_from_lb(struct nfslog_buf *,
77*0Sstevel@tonic-gate 			struct nfslog_lr *);
78*0Sstevel@tonic-gate static void		insert_lrp_to_lb(struct nfslog_buf *,
79*0Sstevel@tonic-gate 			struct nfslog_lr *);
80*0Sstevel@tonic-gate static void		nfslog_rewrite_bufheader(struct nfslog_buf *);
81*0Sstevel@tonic-gate 
82*0Sstevel@tonic-gate /*
83*0Sstevel@tonic-gate  * Treat the provided path name as an NFS log buffer file.
84*0Sstevel@tonic-gate  * Allocate a data structure for its handling and initialize it.
85*0Sstevel@tonic-gate  * *error contains the previous error condition encountered for
86*0Sstevel@tonic-gate  * this object. This value can be used to avoid printing the last
87*0Sstevel@tonic-gate  * error endlessly.
88*0Sstevel@tonic-gate  * It will set *error appropriately after processing.
89*0Sstevel@tonic-gate  */
90*0Sstevel@tonic-gate struct nfslog_buf *
91*0Sstevel@tonic-gate nfslog_open_buf(char *bufpath, int *error)
92*0Sstevel@tonic-gate {
93*0Sstevel@tonic-gate 	struct nfslog_buf	*lbp = NULL;
94*0Sstevel@tonic-gate 
95*0Sstevel@tonic-gate 	if (bufpath == NULL) {
96*0Sstevel@tonic-gate 		*error = EINVAL;
97*0Sstevel@tonic-gate 		return (NULL);
98*0Sstevel@tonic-gate 	}
99*0Sstevel@tonic-gate 
100*0Sstevel@tonic-gate 	if ((lbp = malloc(sizeof (struct nfslog_buf))) == NULL) {
101*0Sstevel@tonic-gate 		*error = ENOMEM;
102*0Sstevel@tonic-gate 		return (NULL);
103*0Sstevel@tonic-gate 	}
104*0Sstevel@tonic-gate 	bzero(lbp, sizeof (struct nfslog_buf));
105*0Sstevel@tonic-gate 
106*0Sstevel@tonic-gate 	if (nfslog_init_buf(bufpath, lbp, error)) {
107*0Sstevel@tonic-gate 		free(lbp);
108*0Sstevel@tonic-gate 		return (NULL);
109*0Sstevel@tonic-gate 	}
110*0Sstevel@tonic-gate 	return (lbp);
111*0Sstevel@tonic-gate }
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate /*
114*0Sstevel@tonic-gate  * Free the log buffer struct with all of its baggage and free the data struct
115*0Sstevel@tonic-gate  */
116*0Sstevel@tonic-gate void
117*0Sstevel@tonic-gate nfslog_close_buf(struct nfslog_buf *lbp, int close_quick)
118*0Sstevel@tonic-gate {
119*0Sstevel@tonic-gate 	nfslog_free_buf(lbp, close_quick);
120*0Sstevel@tonic-gate 	free(lbp);
121*0Sstevel@tonic-gate }
122*0Sstevel@tonic-gate 
123*0Sstevel@tonic-gate /*
124*0Sstevel@tonic-gate  * Set up the log buffer struct; simple things are opening and locking
125*0Sstevel@tonic-gate  * the buffer file and then on to mmap()ing it for later use by the
126*0Sstevel@tonic-gate  * XDR decode path.  Make sure to read the buffer header before
127*0Sstevel@tonic-gate  * returning so that we will be at the first true log record.
128*0Sstevel@tonic-gate  *
129*0Sstevel@tonic-gate  * *error contains the last error encountered on this object. It can
130*0Sstevel@tonic-gate  * be used to avoid reporting the same error endlessly. It is reset
131*0Sstevel@tonic-gate  * to the current error code on return.
132*0Sstevel@tonic-gate  */
133*0Sstevel@tonic-gate static int
134*0Sstevel@tonic-gate nfslog_init_buf(char *bufpath, struct nfslog_buf *lbp, int *error)
135*0Sstevel@tonic-gate {
136*0Sstevel@tonic-gate 	struct stat sb;
137*0Sstevel@tonic-gate 	int preverror = *error;
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate 	lbp->next = lbp;
140*0Sstevel@tonic-gate 	lbp->prev = lbp;
141*0Sstevel@tonic-gate 	/*
142*0Sstevel@tonic-gate 	 * set these values so that the free routine will know what to do
143*0Sstevel@tonic-gate 	 */
144*0Sstevel@tonic-gate 	lbp->mmap_addr = (intptr_t)MAP_FAILED;
145*0Sstevel@tonic-gate 	lbp->last_rec_id = MAXINT - 1;
146*0Sstevel@tonic-gate 	lbp->bh.bh_length = 0;
147*0Sstevel@tonic-gate 	lbp->bh_lrp = NULL;
148*0Sstevel@tonic-gate 	lbp->num_lrps = 0;
149*0Sstevel@tonic-gate 	lbp->lrps = NULL;
150*0Sstevel@tonic-gate 	lbp->last_record_offset = 0;
151*0Sstevel@tonic-gate 	lbp->prp = NULL;
152*0Sstevel@tonic-gate 	lbp->num_pr_queued = 0;
153*0Sstevel@tonic-gate 
154*0Sstevel@tonic-gate 	lbp->bufpath = strdup(bufpath);
155*0Sstevel@tonic-gate 	if (lbp->bufpath == NULL) {
156*0Sstevel@tonic-gate 		*error = ENOMEM;
157*0Sstevel@tonic-gate 		if (preverror != *error) {
158*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot strdup '%s': %s"),
159*0Sstevel@tonic-gate 				bufpath, strerror(*error));
160*0Sstevel@tonic-gate 		}
161*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
162*0Sstevel@tonic-gate 		return (*error);
163*0Sstevel@tonic-gate 	}
164*0Sstevel@tonic-gate 
165*0Sstevel@tonic-gate 	if ((lbp->fd = open(bufpath, O_RDWR)) < 0) {
166*0Sstevel@tonic-gate 		*error = errno;
167*0Sstevel@tonic-gate 		if (preverror != *error) {
168*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot open '%s': %s"),
169*0Sstevel@tonic-gate 				bufpath, strerror(*error));
170*0Sstevel@tonic-gate 		}
171*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
172*0Sstevel@tonic-gate 		return (*error);
173*0Sstevel@tonic-gate 	}
174*0Sstevel@tonic-gate 
175*0Sstevel@tonic-gate 	/*
176*0Sstevel@tonic-gate 	 * Lock the entire buffer file to prevent conflicting access.
177*0Sstevel@tonic-gate 	 * We get a write lock because we want only 1 process to be
178*0Sstevel@tonic-gate 	 * generating records from it.
179*0Sstevel@tonic-gate 	 */
180*0Sstevel@tonic-gate 	lbp->fl.l_type = F_WRLCK;
181*0Sstevel@tonic-gate 	lbp->fl.l_whence = SEEK_SET;		/* beginning of file */
182*0Sstevel@tonic-gate 	lbp->fl.l_start = (offset_t)0;
183*0Sstevel@tonic-gate 	lbp->fl.l_len = 0;			/* entire file */
184*0Sstevel@tonic-gate 	lbp->fl.l_sysid = 0;
185*0Sstevel@tonic-gate 	lbp->fl.l_pid = 0;
186*0Sstevel@tonic-gate 	if (fcntl(lbp->fd, F_SETLKW, &lbp->fl) == -1) {
187*0Sstevel@tonic-gate 		*error = errno;
188*0Sstevel@tonic-gate 		if (preverror != *error) {
189*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot lock (%s): %s"),
190*0Sstevel@tonic-gate 				bufpath, strerror(*error));
191*0Sstevel@tonic-gate 		}
192*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
193*0Sstevel@tonic-gate 		return (*error);
194*0Sstevel@tonic-gate 	}
195*0Sstevel@tonic-gate 
196*0Sstevel@tonic-gate 	if (fstat(lbp->fd, &sb)) {
197*0Sstevel@tonic-gate 		*error = errno;
198*0Sstevel@tonic-gate 		if (preverror != *error) {
199*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("Cannot stat (%s): %s"),
200*0Sstevel@tonic-gate 				bufpath, strerror(*error));
201*0Sstevel@tonic-gate 		}
202*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
203*0Sstevel@tonic-gate 		return (*error);
204*0Sstevel@tonic-gate 	}
205*0Sstevel@tonic-gate 	lbp->filesize = sb.st_size;
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate 	lbp->mmap_addr = (intptr_t)mmap(0, lbp->filesize, PROT_READ|PROT_WRITE,
208*0Sstevel@tonic-gate 		MAP_SHARED|MAP_NORESERVE, lbp->fd, 0);
209*0Sstevel@tonic-gate 
210*0Sstevel@tonic-gate 	/* This is part of the duality of the use of either mmap()|read() */
211*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
212*0Sstevel@tonic-gate 		lbp->next_rec = 0;
213*0Sstevel@tonic-gate 	} else {
214*0Sstevel@tonic-gate 		lbp->next_rec = lbp->mmap_addr;
215*0Sstevel@tonic-gate 	}
216*0Sstevel@tonic-gate 
217*0Sstevel@tonic-gate 	/* Read the header */
218*0Sstevel@tonic-gate 	if ((lbp->bh_lrp = nfslog_read_buffer(lbp)) == NULL) {
219*0Sstevel@tonic-gate 		*error = EIO;
220*0Sstevel@tonic-gate 		if (preverror != *error) {
221*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
222*0Sstevel@tonic-gate 				"error in reading file '%s': %s"),
223*0Sstevel@tonic-gate 				bufpath, strerror(EIO));
224*0Sstevel@tonic-gate 		}
225*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
226*0Sstevel@tonic-gate 		return (*error);
227*0Sstevel@tonic-gate 	}
228*0Sstevel@tonic-gate 
229*0Sstevel@tonic-gate 	if (!xdr_nfslog_buffer_header(&lbp->bh_lrp->xdrs, &lbp->bh)) {
230*0Sstevel@tonic-gate 		*error = EIO;
231*0Sstevel@tonic-gate 		if (preverror != *error) {
232*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
233*0Sstevel@tonic-gate 				"error in reading file '%s': %s"),
234*0Sstevel@tonic-gate 				bufpath, strerror(*error));
235*0Sstevel@tonic-gate 		}
236*0Sstevel@tonic-gate 		nfslog_free_buf(lbp, FALSE);
237*0Sstevel@tonic-gate 		return (*error);
238*0Sstevel@tonic-gate 	}
239*0Sstevel@tonic-gate 
240*0Sstevel@tonic-gate 	/*
241*0Sstevel@tonic-gate 	 * Set the pointer to the next record based on the buffer header.
242*0Sstevel@tonic-gate 	 * 'lbp->bh.bh_offset' contains the offset of where to begin
243*0Sstevel@tonic-gate 	 * processing relative to the buffer header.
244*0Sstevel@tonic-gate 	 */
245*0Sstevel@tonic-gate 	lbp->next_rec += lbp->bh.bh_offset;
246*0Sstevel@tonic-gate 
247*0Sstevel@tonic-gate 	/*
248*0Sstevel@tonic-gate 	 * If we are going to be using read() for file data, then we may
249*0Sstevel@tonic-gate 	 * have to adjust the current file pointer to take into account
250*0Sstevel@tonic-gate 	 * a starting point other than the beginning of the file.
251*0Sstevel@tonic-gate 	 * If mmap is being used, this is taken care of as a side effect of
252*0Sstevel@tonic-gate 	 * setting up the value of next_rec.
253*0Sstevel@tonic-gate 	 */
254*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED && lbp->next_rec != 0) {
255*0Sstevel@tonic-gate 		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
256*0Sstevel@tonic-gate 		/* This is a special case of setting the last_record_offset */
257*0Sstevel@tonic-gate 		lbp->last_record_offset = lbp->next_rec;
258*0Sstevel@tonic-gate 	} else {
259*0Sstevel@tonic-gate 		lbp->last_record_offset = lbp->next_rec - lbp->mmap_addr;
260*0Sstevel@tonic-gate 	}
261*0Sstevel@tonic-gate 
262*0Sstevel@tonic-gate 	return (*error = 0);
263*0Sstevel@tonic-gate }
264*0Sstevel@tonic-gate 
265*0Sstevel@tonic-gate /*
266*0Sstevel@tonic-gate  * Free the nfslog buffer and its associated allocations
267*0Sstevel@tonic-gate  */
268*0Sstevel@tonic-gate static void
269*0Sstevel@tonic-gate nfslog_free_buf(struct nfslog_buf *lbp, int close_quick)
270*0Sstevel@tonic-gate {
271*0Sstevel@tonic-gate 	XDR	xdrs;
272*0Sstevel@tonic-gate 	int	error;
273*0Sstevel@tonic-gate 	caddr_t buffer;
274*0Sstevel@tonic-gate 	struct nfslog_lr *lrp, *lrp_next;
275*0Sstevel@tonic-gate 	struct processed_records *prp, *tprp;
276*0Sstevel@tonic-gate 
277*0Sstevel@tonic-gate 	/* work to free the offset records and rewrite header */
278*0Sstevel@tonic-gate 	if (lbp->prp) {
279*0Sstevel@tonic-gate 		if (lbp->last_record_offset == lbp->prp->start_offset) {
280*0Sstevel@tonic-gate 
281*0Sstevel@tonic-gate 			/* adjust the offset for the entire buffer */
282*0Sstevel@tonic-gate 			lbp->last_record_offset =
283*0Sstevel@tonic-gate 				lbp->prp->start_offset + lbp->prp->len;
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 			nfslog_rewrite_bufheader(lbp);
286*0Sstevel@tonic-gate 		}
287*0Sstevel@tonic-gate 		if (close_quick)
288*0Sstevel@tonic-gate 			return;
289*0Sstevel@tonic-gate 		prp = lbp->prp;
290*0Sstevel@tonic-gate 		do {
291*0Sstevel@tonic-gate 			tprp = prp->next;
292*0Sstevel@tonic-gate 			free(prp);
293*0Sstevel@tonic-gate 			prp = tprp;
294*0Sstevel@tonic-gate 		} while (lbp->prp != prp);
295*0Sstevel@tonic-gate 	}
296*0Sstevel@tonic-gate 
297*0Sstevel@tonic-gate 	if (close_quick)
298*0Sstevel@tonic-gate 		return;
299*0Sstevel@tonic-gate 
300*0Sstevel@tonic-gate 	/* Take care of the queue log records first */
301*0Sstevel@tonic-gate 	if (lbp->lrps != NULL) {
302*0Sstevel@tonic-gate 		lrp = lbp->lrps;
303*0Sstevel@tonic-gate 		do {
304*0Sstevel@tonic-gate 			lrp_next = lrp->next;
305*0Sstevel@tonic-gate 			nfslog_free_logrecord(lrp, FALSE);
306*0Sstevel@tonic-gate 			lrp = lrp_next;
307*0Sstevel@tonic-gate 		} while (lrp != lbp->lrps);
308*0Sstevel@tonic-gate 		lbp->lrps = NULL;
309*0Sstevel@tonic-gate 	}
310*0Sstevel@tonic-gate 
311*0Sstevel@tonic-gate 	/* The buffer header was decoded and needs to be freed */
312*0Sstevel@tonic-gate 	if (lbp->bh.bh_length != 0) {
313*0Sstevel@tonic-gate 		buffer = (lbp->bh_lrp->buffer != NULL ?
314*0Sstevel@tonic-gate 			lbp->bh_lrp->buffer : (caddr_t)lbp->mmap_addr);
315*0Sstevel@tonic-gate 		xdrmem_create(&xdrs, buffer, lbp->bh_lrp->recsize, XDR_FREE);
316*0Sstevel@tonic-gate 		(void) xdr_nfslog_buffer_header(&xdrs, &lbp->bh);
317*0Sstevel@tonic-gate 		lbp->bh.bh_length = 0;
318*0Sstevel@tonic-gate 	}
319*0Sstevel@tonic-gate 
320*0Sstevel@tonic-gate 	/* get rid of the bufheader lrp */
321*0Sstevel@tonic-gate 	if (lbp->bh_lrp != NULL) {
322*0Sstevel@tonic-gate 		free_lrp(lbp->bh_lrp);
323*0Sstevel@tonic-gate 		lbp->bh_lrp = NULL;
324*0Sstevel@tonic-gate 	}
325*0Sstevel@tonic-gate 
326*0Sstevel@tonic-gate 	/* Clean up for mmap() usage */
327*0Sstevel@tonic-gate 	if (lbp->mmap_addr != (intptr_t)MAP_FAILED) {
328*0Sstevel@tonic-gate 		if (munmap((void *)lbp->mmap_addr, lbp->filesize)) {
329*0Sstevel@tonic-gate 			error = errno;
330*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext("munmap failed: %s: %s"),
331*0Sstevel@tonic-gate 				(lbp->bufpath != NULL ? lbp->bufpath : ""),
332*0Sstevel@tonic-gate 				strerror(error));
333*0Sstevel@tonic-gate 		}
334*0Sstevel@tonic-gate 		lbp->mmap_addr = (intptr_t)MAP_FAILED;
335*0Sstevel@tonic-gate 	}
336*0Sstevel@tonic-gate 
337*0Sstevel@tonic-gate 	/* Finally close the buffer file */
338*0Sstevel@tonic-gate 	if (lbp->fd >= 0) {
339*0Sstevel@tonic-gate 		lbp->fl.l_type = F_UNLCK;
340*0Sstevel@tonic-gate 		if (fcntl(lbp->fd, F_SETLK, &lbp->fl) == -1) {
341*0Sstevel@tonic-gate 			error = errno;
342*0Sstevel@tonic-gate 			syslog(LOG_ERR,
343*0Sstevel@tonic-gate 				gettext("Cannot unlock file %s: %s"),
344*0Sstevel@tonic-gate 				(lbp->bufpath != NULL ? lbp->bufpath : ""),
345*0Sstevel@tonic-gate 				strerror(error));
346*0Sstevel@tonic-gate 		}
347*0Sstevel@tonic-gate 		(void) close(lbp->fd);
348*0Sstevel@tonic-gate 		lbp->fd = -1;
349*0Sstevel@tonic-gate 	}
350*0Sstevel@tonic-gate 	if (lbp->bufpath != NULL)
351*0Sstevel@tonic-gate 		free(lbp->bufpath);
352*0Sstevel@tonic-gate }
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate /*
355*0Sstevel@tonic-gate  * We are reading a record from the log buffer file.  Since we are reading
356*0Sstevel@tonic-gate  * an XDR stream, we first have to read the first integer to determine
357*0Sstevel@tonic-gate  * how much to read in whole for this record.  Our preference is to use
358*0Sstevel@tonic-gate  * mmap() but if failed initially we will be using read().  Need to be
359*0Sstevel@tonic-gate  * careful about proper initialization of the log record both from a field
360*0Sstevel@tonic-gate  * perspective and for XDR decoding.
361*0Sstevel@tonic-gate  */
362*0Sstevel@tonic-gate static struct nfslog_lr *
363*0Sstevel@tonic-gate nfslog_read_buffer(struct nfslog_buf *lbp)
364*0Sstevel@tonic-gate {
365*0Sstevel@tonic-gate 	XDR xdrs;
366*0Sstevel@tonic-gate 	unsigned int	record_size;
367*0Sstevel@tonic-gate 	struct nfslog_lr *lrp;
368*0Sstevel@tonic-gate 	char		*sizebuf, tbuf[16];
369*0Sstevel@tonic-gate 	caddr_t		buffer;
370*0Sstevel@tonic-gate 	offset_t	next_rec;
371*0Sstevel@tonic-gate 
372*0Sstevel@tonic-gate 	lrp = (struct nfslog_lr *)malloc(sizeof (*lrp));
373*0Sstevel@tonic-gate 	bzero(lrp, sizeof (*lrp));
374*0Sstevel@tonic-gate 
375*0Sstevel@tonic-gate 	/* Check to see if mmap worked */
376*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
377*0Sstevel@tonic-gate 		/*
378*0Sstevel@tonic-gate 		 * EOF or other failure; we don't try to recover, just return
379*0Sstevel@tonic-gate 		 */
380*0Sstevel@tonic-gate 		if (read(lbp->fd, tbuf, BYTES_PER_XDR_UNIT) <= 0) {
381*0Sstevel@tonic-gate 			free_lrp(lrp);
382*0Sstevel@tonic-gate 			return (NULL);
383*0Sstevel@tonic-gate 		}
384*0Sstevel@tonic-gate 		sizebuf = tbuf;
385*0Sstevel@tonic-gate 	} else {
386*0Sstevel@tonic-gate 		/* EOF check for the mmap() case */
387*0Sstevel@tonic-gate 		if (lbp->filesize <= lbp->next_rec - lbp->mmap_addr) {
388*0Sstevel@tonic-gate 			free_lrp(lrp);
389*0Sstevel@tonic-gate 			return (NULL);
390*0Sstevel@tonic-gate 		}
391*0Sstevel@tonic-gate 		sizebuf = (char *)lbp->next_rec;
392*0Sstevel@tonic-gate 	}
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate 	/* We have to XDR the first int so we know how much is in this record */
395*0Sstevel@tonic-gate 	xdrmem_create(&xdrs, sizebuf, sizeof (unsigned int), XDR_DECODE);
396*0Sstevel@tonic-gate 
397*0Sstevel@tonic-gate 	if (!xdr_u_int(&xdrs, &record_size)) {
398*0Sstevel@tonic-gate 		free_lrp(lrp);
399*0Sstevel@tonic-gate 		return (NULL);
400*0Sstevel@tonic-gate 	}
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate 	lrp->recsize = record_size;
403*0Sstevel@tonic-gate 	next_rec = lbp->next_rec + lrp->recsize;
404*0Sstevel@tonic-gate 
405*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
406*0Sstevel@tonic-gate 		/*
407*0Sstevel@tonic-gate 		 * Read() case - shouldn't be used very much.
408*0Sstevel@tonic-gate 		 * Note: The 'buffer' field is used later on
409*0Sstevel@tonic-gate 		 * to determine which method is being used mmap()|read()
410*0Sstevel@tonic-gate 		 */
411*0Sstevel@tonic-gate 		if (lbp->filesize < next_rec) {
412*0Sstevel@tonic-gate 			/* partial record from buffer */
413*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
414*0Sstevel@tonic-gate 				"Last partial record in work buffer %s "
415*0Sstevel@tonic-gate 				"discarded\n"), lbp->bufpath);
416*0Sstevel@tonic-gate 			free_lrp(lrp);
417*0Sstevel@tonic-gate 			return (NULL);
418*0Sstevel@tonic-gate 		}
419*0Sstevel@tonic-gate 
420*0Sstevel@tonic-gate 		if ((lrp->buffer = malloc(lrp->recsize)) == NULL) {
421*0Sstevel@tonic-gate 			free_lrp(lrp);
422*0Sstevel@tonic-gate 			return (NULL);
423*0Sstevel@tonic-gate 		}
424*0Sstevel@tonic-gate 		bcopy(sizebuf, lrp->buffer, BYTES_PER_XDR_UNIT);
425*0Sstevel@tonic-gate 		if (read(lbp->fd, &lrp->buffer[BYTES_PER_XDR_UNIT],
426*0Sstevel@tonic-gate 			lrp->recsize - BYTES_PER_XDR_UNIT) <= 0) {
427*0Sstevel@tonic-gate 			free_lrp(lrp);
428*0Sstevel@tonic-gate 			return (NULL);
429*0Sstevel@tonic-gate 		}
430*0Sstevel@tonic-gate 	} else if (lbp->filesize < next_rec - lbp->mmap_addr) {
431*0Sstevel@tonic-gate 			/* partial record from buffer */
432*0Sstevel@tonic-gate 			syslog(LOG_ERR, gettext(
433*0Sstevel@tonic-gate 				"Last partial record in work buffer %s "
434*0Sstevel@tonic-gate 				"discarded\n"), lbp->bufpath);
435*0Sstevel@tonic-gate 			free_lrp(lrp);
436*0Sstevel@tonic-gate 			return (NULL);
437*0Sstevel@tonic-gate 	}
438*0Sstevel@tonic-gate 
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate 	/* other initializations */
441*0Sstevel@tonic-gate 	lrp->next = lrp->prev = lrp;
442*0Sstevel@tonic-gate 	/* Keep track of the offset at which this record was read */
443*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED)
444*0Sstevel@tonic-gate 		lrp->f_offset = lbp->next_rec;
445*0Sstevel@tonic-gate 	else
446*0Sstevel@tonic-gate 		lrp->f_offset = lbp->next_rec - lbp->mmap_addr;
447*0Sstevel@tonic-gate 	/* This is the true address of the record */
448*0Sstevel@tonic-gate 	lrp->record = lbp->next_rec;
449*0Sstevel@tonic-gate 	lrp->xdrargs = lrp->xdrres = NULL;
450*0Sstevel@tonic-gate 	lrp->lbp = lbp;
451*0Sstevel@tonic-gate 
452*0Sstevel@tonic-gate 	/* Here is the logic for mmap() vs. read() */
453*0Sstevel@tonic-gate 	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
454*0Sstevel@tonic-gate 
455*0Sstevel@tonic-gate 	/* Setup for the 'real' XDR decode of the entire record */
456*0Sstevel@tonic-gate 	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_DECODE);
457*0Sstevel@tonic-gate 
458*0Sstevel@tonic-gate 	/* calculate the offset for the next record */
459*0Sstevel@tonic-gate 	lbp->next_rec = next_rec;
460*0Sstevel@tonic-gate 
461*0Sstevel@tonic-gate 	return (lrp);
462*0Sstevel@tonic-gate }
463*0Sstevel@tonic-gate 
464*0Sstevel@tonic-gate /*
465*0Sstevel@tonic-gate  * Simple removal of the log record from the log buffer queue.
466*0Sstevel@tonic-gate  * Make sure to manage the count of records queued.
467*0Sstevel@tonic-gate  */
468*0Sstevel@tonic-gate static struct nfslog_lr *
469*0Sstevel@tonic-gate remove_lrp_from_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
470*0Sstevel@tonic-gate {
471*0Sstevel@tonic-gate 	if (lbp->lrps == lrp) {
472*0Sstevel@tonic-gate 		if (lbp->lrps == lbp->lrps->next) {
473*0Sstevel@tonic-gate 			lbp->lrps = NULL;
474*0Sstevel@tonic-gate 		} else {
475*0Sstevel@tonic-gate 			lbp->lrps = lrp->next;
476*0Sstevel@tonic-gate 			remque(lrp);
477*0Sstevel@tonic-gate 		}
478*0Sstevel@tonic-gate 	} else {
479*0Sstevel@tonic-gate 		remque(lrp);
480*0Sstevel@tonic-gate 	}
481*0Sstevel@tonic-gate 	lbp->num_lrps--;
482*0Sstevel@tonic-gate 	return (lrp);
483*0Sstevel@tonic-gate }
484*0Sstevel@tonic-gate 
485*0Sstevel@tonic-gate /*
486*0Sstevel@tonic-gate  * Insert a log record struct on the log buffer struct.  The log buffer
487*0Sstevel@tonic-gate  * has a pointer to the head of a queue of log records that have been
488*0Sstevel@tonic-gate  * read from the buffer file but have not been processed yet because
489*0Sstevel@tonic-gate  * the record id did not match the sequence desired for processing.
490*0Sstevel@tonic-gate  * The insertion must be in the 'correct'/sorted order which adds
491*0Sstevel@tonic-gate  * to the complexity of this function.
492*0Sstevel@tonic-gate  */
493*0Sstevel@tonic-gate static void
494*0Sstevel@tonic-gate insert_lrp_to_lb(struct nfslog_buf *lbp, struct nfslog_lr *lrp)
495*0Sstevel@tonic-gate {
496*0Sstevel@tonic-gate 	int ins_rec_id = lrp->log_record.re_header.rh_rec_id;
497*0Sstevel@tonic-gate 	struct nfslog_lr *curlrp;
498*0Sstevel@tonic-gate 
499*0Sstevel@tonic-gate 	if (lbp->lrps == NULL) {
500*0Sstevel@tonic-gate 		/* that was easy */
501*0Sstevel@tonic-gate 		lbp->lrps = lrp;
502*0Sstevel@tonic-gate 	} else {
503*0Sstevel@tonic-gate 		/*
504*0Sstevel@tonic-gate 		 * Does this lrp go before the first on the list?
505*0Sstevel@tonic-gate 		 * If so, do the insertion by hand since insque is not
506*0Sstevel@tonic-gate 		 * as flexible when queueing an element to the head of
507*0Sstevel@tonic-gate 		 * a list.
508*0Sstevel@tonic-gate 		 */
509*0Sstevel@tonic-gate 		if (ins_rec_id < lbp->lrps->log_record.re_header.rh_rec_id) {
510*0Sstevel@tonic-gate 			lrp->next = lbp->lrps;
511*0Sstevel@tonic-gate 			lrp->prev = lbp->lrps->prev;
512*0Sstevel@tonic-gate 			lbp->lrps->prev->next = lrp;
513*0Sstevel@tonic-gate 			lbp->lrps->prev = lrp;
514*0Sstevel@tonic-gate 			lbp->lrps = lrp;
515*0Sstevel@tonic-gate 		} else {
516*0Sstevel@tonic-gate 			/*
517*0Sstevel@tonic-gate 			 * Search the queue for the correct insertion point.
518*0Sstevel@tonic-gate 			 * Be careful about the insque so that the record
519*0Sstevel@tonic-gate 			 * ends up in the right place.
520*0Sstevel@tonic-gate 			 */
521*0Sstevel@tonic-gate 			curlrp = lbp->lrps;
522*0Sstevel@tonic-gate 			do {
523*0Sstevel@tonic-gate 				if (ins_rec_id <
524*0Sstevel@tonic-gate 				curlrp->next->log_record.re_header.rh_rec_id)
525*0Sstevel@tonic-gate 					break;
526*0Sstevel@tonic-gate 				curlrp = curlrp->next;
527*0Sstevel@tonic-gate 			} while (curlrp != lbp->lrps);
528*0Sstevel@tonic-gate 			if (curlrp == lbp->lrps)
529*0Sstevel@tonic-gate 				insque(lrp, lbp->lrps->prev);
530*0Sstevel@tonic-gate 			else
531*0Sstevel@tonic-gate 				insque(lrp, curlrp);
532*0Sstevel@tonic-gate 		}
533*0Sstevel@tonic-gate 	}
534*0Sstevel@tonic-gate 	/* always keep track of how many we have */
535*0Sstevel@tonic-gate 	lbp->num_lrps++;
536*0Sstevel@tonic-gate }
537*0Sstevel@tonic-gate 
538*0Sstevel@tonic-gate /*
539*0Sstevel@tonic-gate  * We are rewriting the buffer header at the start of the log buffer
540*0Sstevel@tonic-gate  * for the sole purpose of resetting the bh_offset field.  This is
541*0Sstevel@tonic-gate  * supposed to represent the progress that the nfslogd daemon has made
542*0Sstevel@tonic-gate  * in its processing of the log buffer file.
543*0Sstevel@tonic-gate  * 'lbp->last_record_offset' contains the absolute offset of the end
544*0Sstevel@tonic-gate  * of the last element processed. The on-disk buffer offset is relative
545*0Sstevel@tonic-gate  * to the buffer header, therefore we subtract the length of the buffer
546*0Sstevel@tonic-gate  * header from the absolute offset.
547*0Sstevel@tonic-gate  */
548*0Sstevel@tonic-gate static void
549*0Sstevel@tonic-gate nfslog_rewrite_bufheader(struct nfslog_buf *lbp)
550*0Sstevel@tonic-gate {
551*0Sstevel@tonic-gate 	XDR xdrs;
552*0Sstevel@tonic-gate 	nfslog_buffer_header bh;
553*0Sstevel@tonic-gate 	/* size big enough for buffer header encode */
554*0Sstevel@tonic-gate #define	XBUFSIZE 128
555*0Sstevel@tonic-gate 	char buffer[XBUFSIZE];
556*0Sstevel@tonic-gate 	unsigned int wsize;
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 	/*
559*0Sstevel@tonic-gate 	 * if version 1 buffer is large and the current offset cannot be
560*0Sstevel@tonic-gate 	 * represented, then don't update the offset in the buffer.
561*0Sstevel@tonic-gate 	 */
562*0Sstevel@tonic-gate 	if (lbp->bh.bh_flags & NFSLOG_BH_OFFSET_OVERFLOW) {
563*0Sstevel@tonic-gate 		/* No need to update the header - offset too big */
564*0Sstevel@tonic-gate 		return;
565*0Sstevel@tonic-gate 	}
566*0Sstevel@tonic-gate 	/*
567*0Sstevel@tonic-gate 	 * build the buffer header from the original that was saved
568*0Sstevel@tonic-gate 	 * on initialization; note that the offset is taken from the
569*0Sstevel@tonic-gate 	 * last record processed (the last offset that represents
570*0Sstevel@tonic-gate 	 * all records processed without any holes in the processing)
571*0Sstevel@tonic-gate 	 */
572*0Sstevel@tonic-gate 	bh = lbp->bh;
573*0Sstevel@tonic-gate 
574*0Sstevel@tonic-gate 	/*
575*0Sstevel@tonic-gate 	 * if version 1 buffer is large and the current offset cannot be
576*0Sstevel@tonic-gate 	 * represented in 32 bits, then save only the last valid offset
577*0Sstevel@tonic-gate 	 * in the buffer and mark the flags to indicate that.
578*0Sstevel@tonic-gate 	 */
579*0Sstevel@tonic-gate 	if ((bh.bh_version > 1) ||
580*0Sstevel@tonic-gate 		(lbp->last_record_offset - bh.bh_length < UINT32_MAX)) {
581*0Sstevel@tonic-gate 		bh.bh_offset = lbp->last_record_offset - bh.bh_length;
582*0Sstevel@tonic-gate 	} else {
583*0Sstevel@tonic-gate 		/* don't update the offset in the buffer */
584*0Sstevel@tonic-gate 		bh.bh_flags |= NFSLOG_BH_OFFSET_OVERFLOW;
585*0Sstevel@tonic-gate 		lbp->bh.bh_flags = bh.bh_flags;
586*0Sstevel@tonic-gate 		syslog(LOG_ERR, gettext(
587*0Sstevel@tonic-gate 			"nfslog_rewrite_bufheader: %s: offset does not fit "
588*0Sstevel@tonic-gate 			"in a 32 bit field\n"), lbp->bufpath);
589*0Sstevel@tonic-gate 	}
590*0Sstevel@tonic-gate 
591*0Sstevel@tonic-gate 	xdrmem_create(&xdrs, buffer, XBUFSIZE, XDR_ENCODE);
592*0Sstevel@tonic-gate 
593*0Sstevel@tonic-gate 	if (!xdr_nfslog_buffer_header(&xdrs, &bh)) {
594*0Sstevel@tonic-gate 		syslog(LOG_ERR, gettext(
595*0Sstevel@tonic-gate 			"error in re-writing buffer file %s header\n"),
596*0Sstevel@tonic-gate 			lbp->bufpath);
597*0Sstevel@tonic-gate 		return;
598*0Sstevel@tonic-gate 	}
599*0Sstevel@tonic-gate 
600*0Sstevel@tonic-gate 	wsize = xdr_getpos(&xdrs);
601*0Sstevel@tonic-gate 
602*0Sstevel@tonic-gate 	if (lbp->mmap_addr == (intptr_t)MAP_FAILED) {
603*0Sstevel@tonic-gate 		/* go to the beginning of the file */
604*0Sstevel@tonic-gate 		(void) lseek(lbp->fd, 0, SEEK_SET);
605*0Sstevel@tonic-gate 		(void) write(lbp->fd, buffer, wsize);
606*0Sstevel@tonic-gate 		(void) lseek(lbp->fd, lbp->next_rec, SEEK_SET);
607*0Sstevel@tonic-gate 		(void) fsync(lbp->fd);
608*0Sstevel@tonic-gate 	} else {
609*0Sstevel@tonic-gate 		bcopy(buffer, (void *)lbp->mmap_addr, wsize);
610*0Sstevel@tonic-gate 		(void) msync((void *)lbp->mmap_addr, wsize, MS_SYNC);
611*0Sstevel@tonic-gate 	}
612*0Sstevel@tonic-gate }
613*0Sstevel@tonic-gate 
614*0Sstevel@tonic-gate /*
615*0Sstevel@tonic-gate  * With the provided lrp, we will take and 'insert' the range that the
616*0Sstevel@tonic-gate  * record covered in the buffer file into a list of processed ranges
617*0Sstevel@tonic-gate  * for the buffer file.  These ranges represent the records processed
618*0Sstevel@tonic-gate  * but not 'marked' in the buffer header as being processed.
619*0Sstevel@tonic-gate  * This insertion process is being done for two reasons.  The first is that
620*0Sstevel@tonic-gate  * we do not want to pay the performance penalty of re-writing the buffer header
621*0Sstevel@tonic-gate  * for each record that we process.  The second reason is that the records
622*0Sstevel@tonic-gate  * may be processed out of order because of the unique ids.  This will occur
623*0Sstevel@tonic-gate  * if the kernel has written the records to the buffer file out of order.
624*0Sstevel@tonic-gate  * The read routine will 'sort' them as the records are read.
625*0Sstevel@tonic-gate  *
626*0Sstevel@tonic-gate  * We do not want to re-write the buffer header such that a record is
627*0Sstevel@tonic-gate  * represented and being processed when it has not been.  In the case
628*0Sstevel@tonic-gate  * that the nfslogd daemon restarts processing and the buffer header
629*0Sstevel@tonic-gate  * has been re-written improperly, some records could be skipped.
630*0Sstevel@tonic-gate  * We will be taking the conservative approach and only writing buffer
631*0Sstevel@tonic-gate  * header offsets when the entire offset range has been processed.
632*0Sstevel@tonic-gate  */
633*0Sstevel@tonic-gate static void
634*0Sstevel@tonic-gate nfslog_ins_last_rec_processed(struct nfslog_lr *lrp)
635*0Sstevel@tonic-gate {
636*0Sstevel@tonic-gate 	struct processed_records *prp, *tp;
637*0Sstevel@tonic-gate 
638*0Sstevel@tonic-gate 	/* init the data struct as if it were the only one */
639*0Sstevel@tonic-gate 	prp = malloc(sizeof (*prp));
640*0Sstevel@tonic-gate 	prp->next = prp->prev = prp;
641*0Sstevel@tonic-gate 	prp->start_offset = lrp->f_offset;
642*0Sstevel@tonic-gate 	prp->len = lrp->recsize;
643*0Sstevel@tonic-gate 	prp->num_recs = 1;
644*0Sstevel@tonic-gate 
645*0Sstevel@tonic-gate 	/* always add since we know we are going to insert */
646*0Sstevel@tonic-gate 	lrp->lbp->num_pr_queued++;
647*0Sstevel@tonic-gate 
648*0Sstevel@tonic-gate 	/* Is this the first one?  If so, take the easy way out */
649*0Sstevel@tonic-gate 	if (lrp->lbp->prp == NULL) {
650*0Sstevel@tonic-gate 		lrp->lbp->prp = prp;
651*0Sstevel@tonic-gate 	} else {
652*0Sstevel@tonic-gate 		/* sort on insertion... */
653*0Sstevel@tonic-gate 		tp = lrp->lbp->prp;
654*0Sstevel@tonic-gate 		do {
655*0Sstevel@tonic-gate 			if (prp->start_offset < tp->start_offset)
656*0Sstevel@tonic-gate 				break;
657*0Sstevel@tonic-gate 			tp = tp->next;
658*0Sstevel@tonic-gate 		} while (tp != lrp->lbp->prp);
659*0Sstevel@tonic-gate 		/* insert where appropriate (before the one we found */
660*0Sstevel@tonic-gate 		insque(prp, tp->prev);
661*0Sstevel@tonic-gate 		/*
662*0Sstevel@tonic-gate 		 * special case where the insertion was done at the
663*0Sstevel@tonic-gate 		 * head of the list
664*0Sstevel@tonic-gate 		 */
665*0Sstevel@tonic-gate 		if (tp == lrp->lbp->prp && prp->start_offset < tp->start_offset)
666*0Sstevel@tonic-gate 			lrp->lbp->prp = prp;
667*0Sstevel@tonic-gate 
668*0Sstevel@tonic-gate 		/*
669*0Sstevel@tonic-gate 		 * now that the entry is in place, we need to see if it can
670*0Sstevel@tonic-gate 		 * be combined with the previous or following entries.
671*0Sstevel@tonic-gate 		 * combination is done by adding to the length.
672*0Sstevel@tonic-gate 		 */
673*0Sstevel@tonic-gate 		if (prp->start_offset ==
674*0Sstevel@tonic-gate 			(prp->prev->start_offset + prp->prev->len)) {
675*0Sstevel@tonic-gate 			tp = prp->prev;
676*0Sstevel@tonic-gate 			remque(prp);
677*0Sstevel@tonic-gate 			tp->len += prp->len;
678*0Sstevel@tonic-gate 			tp->num_recs += prp->num_recs;
679*0Sstevel@tonic-gate 			free(prp);
680*0Sstevel@tonic-gate 			prp = tp;
681*0Sstevel@tonic-gate 		}
682*0Sstevel@tonic-gate 		if (prp->next->start_offset ==
683*0Sstevel@tonic-gate 			(prp->start_offset + prp->len)) {
684*0Sstevel@tonic-gate 			prp->len += prp->next->len;
685*0Sstevel@tonic-gate 			prp->num_recs += prp->next->num_recs;
686*0Sstevel@tonic-gate 			tp = prp->next;
687*0Sstevel@tonic-gate 			remque(tp);
688*0Sstevel@tonic-gate 			free(tp);
689*0Sstevel@tonic-gate 		}
690*0Sstevel@tonic-gate 	}
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 	if (lrp->lbp->num_pr_queued > MAX_RECS_TO_DELAY) {
693*0Sstevel@tonic-gate 		prp = lrp->lbp->prp;
694*0Sstevel@tonic-gate 		if (lrp->lbp->last_record_offset ==
695*0Sstevel@tonic-gate 			prp->start_offset) {
696*0Sstevel@tonic-gate 
697*0Sstevel@tonic-gate 			/* adjust the offset for the entire buffer */
698*0Sstevel@tonic-gate 			lrp->lbp->last_record_offset =
699*0Sstevel@tonic-gate 				prp->start_offset + prp->len;
700*0Sstevel@tonic-gate 
701*0Sstevel@tonic-gate 			nfslog_rewrite_bufheader(lrp->lbp);
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 			tp = prp->next;
704*0Sstevel@tonic-gate 			if (tp != prp)
705*0Sstevel@tonic-gate 				remque(prp);
706*0Sstevel@tonic-gate 			else
707*0Sstevel@tonic-gate 				tp = NULL;
708*0Sstevel@tonic-gate 			lrp->lbp->prp = tp;
709*0Sstevel@tonic-gate 			lrp->lbp->num_pr_queued -= prp->num_recs;
710*0Sstevel@tonic-gate 			free(prp);
711*0Sstevel@tonic-gate 		}
712*0Sstevel@tonic-gate 	}
713*0Sstevel@tonic-gate }
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate /*
716*0Sstevel@tonic-gate  * nfslog_get_logrecord is responsible for retrieving the next log record
717*0Sstevel@tonic-gate  * from the buffer file. This would normally be very straightforward but there
718*0Sstevel@tonic-gate  * is the added complexity of attempting to order the requests coming out of
719*0Sstevel@tonic-gate  * the buffer file.  The fundamental problems is that the kernel nfs logging
720*0Sstevel@tonic-gate  * functionality does not guarantee that the records were written to the file
721*0Sstevel@tonic-gate  * in the order that the NFS server processed them.  This can cause a problem
722*0Sstevel@tonic-gate  * in the fh -> pathname mapping in the case were a lookup for a file comes
723*0Sstevel@tonic-gate  * later in the buffer file than other operations on the lookup's target.
724*0Sstevel@tonic-gate  * The fh mapping database will not have an entry and will therefore not
725*0Sstevel@tonic-gate  * be able to map the fh to a name.
726*0Sstevel@tonic-gate  *
727*0Sstevel@tonic-gate  * So to solve this problem, the kernel nfs logging code tags each record
728*0Sstevel@tonic-gate  * with a monotonically increasing id and is guaranteed to be allocated
729*0Sstevel@tonic-gate  * in the order that the requests were processed.  Realize however that
730*0Sstevel@tonic-gate  * this processing guarantee is essentially for one thread on one client.
731*0Sstevel@tonic-gate  * This id mechanism does not order all requests since it is only the
732*0Sstevel@tonic-gate  * single client/single thread case that is most concerning to us here.
733*0Sstevel@tonic-gate  *
734*0Sstevel@tonic-gate  * This function will do the 'sorting' of the requests as they are
735*0Sstevel@tonic-gate  * read from the buffer file.  The sorting needs to take into account
736*0Sstevel@tonic-gate  * that some ids may be missing (operations not logged but ids allocated)
737*0Sstevel@tonic-gate  * and that the id field will eventually wrap over MAXINT.
738*0Sstevel@tonic-gate  *
739*0Sstevel@tonic-gate  * Complexity to solve the fh -> pathname mapping issue.
740*0Sstevel@tonic-gate  */
741*0Sstevel@tonic-gate struct nfslog_lr *
742*0Sstevel@tonic-gate nfslog_get_logrecord(struct nfslog_buf *lbp)
743*0Sstevel@tonic-gate {
744*0Sstevel@tonic-gate 	/* figure out what the next should be if the world were perfect */
745*0Sstevel@tonic-gate 	unsigned int next_rec_id = lbp->last_rec_id + 1;
746*0Sstevel@tonic-gate 	struct nfslog_lr *lrp = NULL;
747*0Sstevel@tonic-gate 
748*0Sstevel@tonic-gate 	/*
749*0Sstevel@tonic-gate 	 * First we check the queued records on the log buffer struct
750*0Sstevel@tonic-gate 	 * to see if the one we want is there.  The records are sorted
751*0Sstevel@tonic-gate 	 * on the record id during the insertions to the queue so that
752*0Sstevel@tonic-gate 	 * this check is easy.
753*0Sstevel@tonic-gate 	 */
754*0Sstevel@tonic-gate 	if (lbp->lrps != NULL) {
755*0Sstevel@tonic-gate 		/* Does the first record match ? */
756*0Sstevel@tonic-gate 		if (lbp->lrps->log_record.re_header.rh_rec_id == next_rec_id) {
757*0Sstevel@tonic-gate 			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
758*0Sstevel@tonic-gate 			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
759*0Sstevel@tonic-gate 		} else {
760*0Sstevel@tonic-gate 			/*
761*0Sstevel@tonic-gate 			 * Here we are checking for wrap of the record id
762*0Sstevel@tonic-gate 			 * since it is an unsigned in.  The idea is that
763*0Sstevel@tonic-gate 			 * if there is a huge span between what we expect
764*0Sstevel@tonic-gate 			 * and what is queued then we need to flush/empty
765*0Sstevel@tonic-gate 			 * the queued records first.
766*0Sstevel@tonic-gate 			 */
767*0Sstevel@tonic-gate 			if (next_rec_id <
768*0Sstevel@tonic-gate 				lbp->lrps->log_record.re_header.rh_rec_id &&
769*0Sstevel@tonic-gate 				((lbp->lrps->log_record.re_header.rh_rec_id -
770*0Sstevel@tonic-gate 					next_rec_id) > (MAXINT / 2))) {
771*0Sstevel@tonic-gate 
772*0Sstevel@tonic-gate 				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
773*0Sstevel@tonic-gate 				lbp->last_rec_id =
774*0Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
775*0Sstevel@tonic-gate 			}
776*0Sstevel@tonic-gate 		}
777*0Sstevel@tonic-gate 	}
778*0Sstevel@tonic-gate 	/*
779*0Sstevel@tonic-gate 	 * So the first queued record didn't match (or there were no queued
780*0Sstevel@tonic-gate 	 * records to look at).  Now we go to the buffer file looking for
781*0Sstevel@tonic-gate 	 * the expected log record based on its id.  We loop looking for
782*0Sstevel@tonic-gate 	 * a matching records and save/queue the records that don't match.
783*0Sstevel@tonic-gate 	 * Note that we will queue a maximum number to handle the case
784*0Sstevel@tonic-gate 	 * of a missing record id or a queue that is very confused.  We don't
785*0Sstevel@tonic-gate 	 * want to consume too much memory.
786*0Sstevel@tonic-gate 	 */
787*0Sstevel@tonic-gate 	while (lrp == NULL) {
788*0Sstevel@tonic-gate 		/* Have we queued too many for this buffer? */
789*0Sstevel@tonic-gate 		if (lbp->num_lrps >= MAX_LRS_READ_AHEAD) {
790*0Sstevel@tonic-gate 			lrp = remove_lrp_from_lb(lbp, lbp->lrps);
791*0Sstevel@tonic-gate 			lbp->last_rec_id = lrp->log_record.re_header.rh_rec_id;
792*0Sstevel@tonic-gate 			break;
793*0Sstevel@tonic-gate 		}
794*0Sstevel@tonic-gate 		/*
795*0Sstevel@tonic-gate 		 * Get a record from the buffer file.  If none are available,
796*0Sstevel@tonic-gate 		 * this is probably and EOF condition (could be a read error
797*0Sstevel@tonic-gate 		 * as well but that is masked. :-().  No records in the
798*0Sstevel@tonic-gate 		 * file means that we need to pull any queued records
799*0Sstevel@tonic-gate 		 * so that we don't miss any in the processing.
800*0Sstevel@tonic-gate 		 */
801*0Sstevel@tonic-gate 		if ((lrp = nfslog_read_buffer(lbp)) == NULL) {
802*0Sstevel@tonic-gate 			if (lbp->lrps != NULL) {
803*0Sstevel@tonic-gate 				lrp = remove_lrp_from_lb(lbp, lbp->lrps);
804*0Sstevel@tonic-gate 				lbp->last_rec_id =
805*0Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
806*0Sstevel@tonic-gate 			} else {
807*0Sstevel@tonic-gate 				return (NULL);  /* it was really and EOF */
808*0Sstevel@tonic-gate 			}
809*0Sstevel@tonic-gate 		} else {
810*0Sstevel@tonic-gate 			/*
811*0Sstevel@tonic-gate 			 * Just read a record from the buffer file and now we
812*0Sstevel@tonic-gate 			 * need to XDR the record header so that we can take
813*0Sstevel@tonic-gate 			 * a look at the record id.
814*0Sstevel@tonic-gate 			 */
815*0Sstevel@tonic-gate 			if (!xdr_nfslog_request_record(&lrp->xdrs,
816*0Sstevel@tonic-gate 				&lrp->log_record)) {
817*0Sstevel@tonic-gate 				/* Free and return EOF/NULL on error */
818*0Sstevel@tonic-gate 				nfslog_free_logrecord(lrp, FALSE);
819*0Sstevel@tonic-gate 				return (NULL);
820*0Sstevel@tonic-gate 			}
821*0Sstevel@tonic-gate 			/*
822*0Sstevel@tonic-gate 			 * If the new record is less than or matches the
823*0Sstevel@tonic-gate 			 * expected record id, then we return this record
824*0Sstevel@tonic-gate 			 */
825*0Sstevel@tonic-gate 			if (lrp->log_record.re_header.rh_rec_id <=
826*0Sstevel@tonic-gate 				next_rec_id) {
827*0Sstevel@tonic-gate 
828*0Sstevel@tonic-gate 				lbp->last_rec_id =
829*0Sstevel@tonic-gate 					lrp->log_record.re_header.rh_rec_id;
830*0Sstevel@tonic-gate 			} else {
831*0Sstevel@tonic-gate 				/*
832*0Sstevel@tonic-gate 				 * This is not the one we were looking
833*0Sstevel@tonic-gate 				 * for; queue it for later processing
834*0Sstevel@tonic-gate 				 * (queueing sorts on record id)
835*0Sstevel@tonic-gate 				 */
836*0Sstevel@tonic-gate 				insert_lrp_to_lb(lbp, lrp);
837*0Sstevel@tonic-gate 				lrp = NULL;
838*0Sstevel@tonic-gate 			}
839*0Sstevel@tonic-gate 		}
840*0Sstevel@tonic-gate 	}
841*0Sstevel@tonic-gate 	return (lrp);
842*0Sstevel@tonic-gate }
843*0Sstevel@tonic-gate 
844*0Sstevel@tonic-gate /*
845*0Sstevel@tonic-gate  * Free the log record provided.
846*0Sstevel@tonic-gate  * This is complex because the associated XDR streams also need to be freed
847*0Sstevel@tonic-gate  * since allocation could have occured during the DECODE phase.  The record
848*0Sstevel@tonic-gate  * header, args and results need to be XDR_FREEd.  The xdr funtions will
849*0Sstevel@tonic-gate  * be provided if a free needs to be done.
850*0Sstevel@tonic-gate  *
851*0Sstevel@tonic-gate  * Note that caller tells us if the record being freed was processed.
852*0Sstevel@tonic-gate  * If so, then the buffer header should be updated.  Updating the buffer
853*0Sstevel@tonic-gate  * header keeps track of where the nfslogd daemon left off in its processing
854*0Sstevel@tonic-gate  * if it is unable to complete the entire file.
855*0Sstevel@tonic-gate  */
856*0Sstevel@tonic-gate void
857*0Sstevel@tonic-gate nfslog_free_logrecord(struct nfslog_lr *lrp, bool_t processing_complete)
858*0Sstevel@tonic-gate {
859*0Sstevel@tonic-gate 	caddr_t			buffer;
860*0Sstevel@tonic-gate 	nfslog_request_record 	*reqrec;
861*0Sstevel@tonic-gate 
862*0Sstevel@tonic-gate 	if (processing_complete) {
863*0Sstevel@tonic-gate 		nfslog_ins_last_rec_processed(lrp);
864*0Sstevel@tonic-gate 	}
865*0Sstevel@tonic-gate 
866*0Sstevel@tonic-gate 	reqrec = &lrp->log_record;
867*0Sstevel@tonic-gate 
868*0Sstevel@tonic-gate 	buffer = (lrp->buffer != NULL ? lrp->buffer : (caddr_t)lrp->record);
869*0Sstevel@tonic-gate 
870*0Sstevel@tonic-gate 	xdrmem_create(&lrp->xdrs, buffer, lrp->recsize, XDR_FREE);
871*0Sstevel@tonic-gate 
872*0Sstevel@tonic-gate 	(void) xdr_nfslog_request_record(&lrp->xdrs, reqrec);
873*0Sstevel@tonic-gate 
874*0Sstevel@tonic-gate 	if (lrp->xdrargs != NULL && reqrec->re_rpc_arg)
875*0Sstevel@tonic-gate 		(*lrp->xdrargs)(&lrp->xdrs, reqrec->re_rpc_arg);
876*0Sstevel@tonic-gate 
877*0Sstevel@tonic-gate 	if (reqrec->re_rpc_arg)
878*0Sstevel@tonic-gate 		free(reqrec->re_rpc_arg);
879*0Sstevel@tonic-gate 
880*0Sstevel@tonic-gate 	if (lrp->xdrres != NULL && reqrec->re_rpc_res)
881*0Sstevel@tonic-gate 		(*lrp->xdrres)(&lrp->xdrs, reqrec->re_rpc_res);
882*0Sstevel@tonic-gate 
883*0Sstevel@tonic-gate 	if (reqrec->re_rpc_res)
884*0Sstevel@tonic-gate 		free(reqrec->re_rpc_res);
885*0Sstevel@tonic-gate 
886*0Sstevel@tonic-gate 	free_lrp(lrp);
887*0Sstevel@tonic-gate }
888*0Sstevel@tonic-gate 
889*0Sstevel@tonic-gate static void
890*0Sstevel@tonic-gate free_lrp(struct nfslog_lr *lrp)
891*0Sstevel@tonic-gate {
892*0Sstevel@tonic-gate 	if (lrp->buffer != NULL)
893*0Sstevel@tonic-gate 		free(lrp->buffer);
894*0Sstevel@tonic-gate 	free(lrp);
895*0Sstevel@tonic-gate }
896*0Sstevel@tonic-gate 
897*0Sstevel@tonic-gate /*
898*0Sstevel@tonic-gate  * Utility function used elsewhere
899*0Sstevel@tonic-gate  */
900*0Sstevel@tonic-gate void
901*0Sstevel@tonic-gate nfslog_opaque_print_buf(void *buf, int len, char *outbuf, int *outbufoffsetp,
902*0Sstevel@tonic-gate 	int maxoffset)
903*0Sstevel@tonic-gate {
904*0Sstevel@tonic-gate 	int	i, j;
905*0Sstevel@tonic-gate 	uint_t	*ip;
906*0Sstevel@tonic-gate 	uchar_t	*u_buf = (uchar_t *)buf;
907*0Sstevel@tonic-gate 	int	outbufoffset = *outbufoffsetp;
908*0Sstevel@tonic-gate 
909*0Sstevel@tonic-gate 	outbufoffset += sprintf(&outbuf[outbufoffset], " \"");
910*0Sstevel@tonic-gate 	if (len <= sizeof (int)) {
911*0Sstevel@tonic-gate 		for (j = 0; (j < len) && (outbufoffset < maxoffset);
912*0Sstevel@tonic-gate 			j++, u_buf++)
913*0Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
914*0Sstevel@tonic-gate 						"%02x", *u_buf);
915*0Sstevel@tonic-gate 		return;
916*0Sstevel@tonic-gate 	}
917*0Sstevel@tonic-gate 	/* More than 4 bytes, print with spaces in integer offsets */
918*0Sstevel@tonic-gate 	j = (int)((uintptr_t)buf % sizeof (int));
919*0Sstevel@tonic-gate 	i = 0;
920*0Sstevel@tonic-gate 	if (j > 0) {
921*0Sstevel@tonic-gate 		i = sizeof (int) - j;
922*0Sstevel@tonic-gate 		for (; (j < sizeof (int)) && (outbufoffset < maxoffset);
923*0Sstevel@tonic-gate 			j++, u_buf++)
924*0Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
925*0Sstevel@tonic-gate 						"%02x", *u_buf);
926*0Sstevel@tonic-gate 	}
927*0Sstevel@tonic-gate 	/* LINTED */
928*0Sstevel@tonic-gate 	ip = (uint_t *)u_buf;
929*0Sstevel@tonic-gate 	for (; ((i + sizeof (int)) <= len) && (outbufoffset < maxoffset);
930*0Sstevel@tonic-gate 		i += sizeof (int), ip++) {
931*0Sstevel@tonic-gate 		outbufoffset += sprintf(&outbuf[outbufoffset], " %08x", *ip);
932*0Sstevel@tonic-gate 	}
933*0Sstevel@tonic-gate 	if (i < len) {
934*0Sstevel@tonic-gate 		/* Last element not int */
935*0Sstevel@tonic-gate 		u_buf = (uchar_t *)ip;
936*0Sstevel@tonic-gate 		if (i > j)	/* not first element */
937*0Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset], " ");
938*0Sstevel@tonic-gate 		for (; (i < len) && (outbufoffset < maxoffset); i++, u_buf++) {
939*0Sstevel@tonic-gate 			outbufoffset += sprintf(&outbuf[outbufoffset],
940*0Sstevel@tonic-gate 						"%02x", *u_buf);
941*0Sstevel@tonic-gate 		}
942*0Sstevel@tonic-gate 	}
943*0Sstevel@tonic-gate 	if (outbufoffset < maxoffset)
944*0Sstevel@tonic-gate 		outbufoffset += sprintf(&outbuf[outbufoffset], "\"");
945*0Sstevel@tonic-gate 	*outbufoffsetp = outbufoffset;
946*0Sstevel@tonic-gate }
947