xref: /onnv-gate/usr/src/cmd/fm/fmd/common/fmd_log.c (revision 0:68f95e015346)
1*0Sstevel@tonic-gate /*
2*0Sstevel@tonic-gate  * CDDL HEADER START
3*0Sstevel@tonic-gate  *
4*0Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*0Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*0Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*0Sstevel@tonic-gate  * with the License.
8*0Sstevel@tonic-gate  *
9*0Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*0Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*0Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*0Sstevel@tonic-gate  * and limitations under the License.
13*0Sstevel@tonic-gate  *
14*0Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*0Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*0Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*0Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*0Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*0Sstevel@tonic-gate  *
20*0Sstevel@tonic-gate  * CDDL HEADER END
21*0Sstevel@tonic-gate  */
22*0Sstevel@tonic-gate /*
23*0Sstevel@tonic-gate  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24*0Sstevel@tonic-gate  * Use is subject to license terms.
25*0Sstevel@tonic-gate  */
26*0Sstevel@tonic-gate 
27*0Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*0Sstevel@tonic-gate 
29*0Sstevel@tonic-gate /*
30*0Sstevel@tonic-gate  * FMD Log File Subsystem
31*0Sstevel@tonic-gate  *
32*0Sstevel@tonic-gate  * Events are written to one of two log files as they are received or created;
33*0Sstevel@tonic-gate  * the error log tracks all ereport.* events received on the inbound event
34*0Sstevel@tonic-gate  * transport, and the fault log tracks all list.* events generated by fmd or
35*0Sstevel@tonic-gate  * its client modules.  In addition, we use the same log file format to cache
36*0Sstevel@tonic-gate  * state and events associated with ASRUs that are named in a diagnosis.
37*0Sstevel@tonic-gate  *
38*0Sstevel@tonic-gate  * The log files use the exacct format manipulated by libexacct(3LIB) and
39*0Sstevel@tonic-gate  * originally defined in PSARC 1999/119.  However, the exacct library was
40*0Sstevel@tonic-gate  * designed primarily for read-only clients and without the synchronous i/o
41*0Sstevel@tonic-gate  * considerations and seeking required for fmd, so we use libexacct here only
42*0Sstevel@tonic-gate  * to read and write the file headers and to pack data from memory into a file
43*0Sstevel@tonic-gate  * bytestream.  All of the i/o and file offset manipulations are performed by
44*0Sstevel@tonic-gate  * the fmd code below.  Our exacct file management uses the following grammar:
45*0Sstevel@tonic-gate  *
46*0Sstevel@tonic-gate  * file := hdr toc event*
47*0Sstevel@tonic-gate  * hdr := EXD_FMA_LABEL EXD_FMA_VERSION EXD_FMA_OSREL EXD_FMA_OSVER EXD_FMA_PLAT
48*0Sstevel@tonic-gate  * toc := EXD_FMA_OFFSET
49*0Sstevel@tonic-gate  * event := EXD_FMA_TODSEC EXD_FMA_TODNSEC EXD_FMA_NVLIST evref*
50*0Sstevel@tonic-gate  * evref := EXD_FMA_MAJOR EXD_FMA_MINOR EXD_FMA_INODE EXD_FMA_OFFSET
51*0Sstevel@tonic-gate  *
52*0Sstevel@tonic-gate  * Any event can be uniquely identified by the tuple (file, offset) where file
53*0Sstevel@tonic-gate  * is encoded as (major, minor, inode) when we are cross-linking files.  Note
54*0Sstevel@tonic-gate  * that we break out of the file's dev_t into its two 32-bit components to
55*0Sstevel@tonic-gate  * permit development of either 32-bit or 64-bit log readers and writers; the
56*0Sstevel@tonic-gate  * LFS APIs do not yet export a 64-bit dev_t to fstat64(), so there is no way
57*0Sstevel@tonic-gate  * for a 32-bit application to retrieve and store a 64-bit dev_t.
58*0Sstevel@tonic-gate  *
59*0Sstevel@tonic-gate  * In order to replay events in the event of an fmd crash, events are initially
60*0Sstevel@tonic-gate  * written to the error log using the group catalog tag EXD_GROUP_RFMA by the
61*0Sstevel@tonic-gate  * fmd_log_append() function.  Later, once an event transitions from the
62*0Sstevel@tonic-gate  * received state to one of its other states (see fmd_event.c for details),
63*0Sstevel@tonic-gate  * fmd_log_commit() is used to overwrite the tag with EXD_GROUP_FMA, indicating
64*0Sstevel@tonic-gate  * that the event is fully processed and no longer needs to be replayed.
65*0Sstevel@tonic-gate  */
66*0Sstevel@tonic-gate 
67*0Sstevel@tonic-gate #include <sys/types.h>
68*0Sstevel@tonic-gate #include <sys/mkdev.h>
69*0Sstevel@tonic-gate #include <sys/statvfs.h>
70*0Sstevel@tonic-gate #include <sys/fm/protocol.h>
71*0Sstevel@tonic-gate #include <sys/exacct_impl.h>
72*0Sstevel@tonic-gate 
73*0Sstevel@tonic-gate #include <unistd.h>
74*0Sstevel@tonic-gate #include <limits.h>
75*0Sstevel@tonic-gate #include <fcntl.h>
76*0Sstevel@tonic-gate #include <ctype.h>
77*0Sstevel@tonic-gate 
78*0Sstevel@tonic-gate #include <fmd_alloc.h>
79*0Sstevel@tonic-gate #include <fmd_error.h>
80*0Sstevel@tonic-gate #include <fmd_string.h>
81*0Sstevel@tonic-gate #include <fmd_event.h>
82*0Sstevel@tonic-gate #include <fmd_conf.h>
83*0Sstevel@tonic-gate #include <fmd_subr.h>
84*0Sstevel@tonic-gate #include <fmd_case.h>
85*0Sstevel@tonic-gate #include <fmd_log.h>
86*0Sstevel@tonic-gate 
87*0Sstevel@tonic-gate #include <fmd.h>
88*0Sstevel@tonic-gate 
89*0Sstevel@tonic-gate #define	CAT_FMA_RGROUP	(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_RFMA)
90*0Sstevel@tonic-gate #define	CAT_FMA_GROUP	(EXT_GROUP | EXC_DEFAULT | EXD_GROUP_FMA)
91*0Sstevel@tonic-gate 
92*0Sstevel@tonic-gate #define	CAT_FMA_LABEL	(EXT_STRING | EXC_DEFAULT | EXD_FMA_LABEL)
93*0Sstevel@tonic-gate #define	CAT_FMA_VERSION	(EXT_STRING | EXC_DEFAULT | EXD_FMA_VERSION)
94*0Sstevel@tonic-gate #define	CAT_FMA_OSREL	(EXT_STRING | EXC_DEFAULT | EXD_FMA_OSREL)
95*0Sstevel@tonic-gate #define	CAT_FMA_OSVER	(EXT_STRING | EXC_DEFAULT | EXD_FMA_OSVER)
96*0Sstevel@tonic-gate #define	CAT_FMA_PLAT	(EXT_STRING | EXC_DEFAULT | EXD_FMA_PLAT)
97*0Sstevel@tonic-gate #define	CAT_FMA_TODSEC	(EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODSEC)
98*0Sstevel@tonic-gate #define	CAT_FMA_TODNSEC	(EXT_UINT64 | EXC_DEFAULT | EXD_FMA_TODNSEC)
99*0Sstevel@tonic-gate #define	CAT_FMA_NVLIST	(EXT_RAW | EXC_DEFAULT | EXD_FMA_NVLIST)
100*0Sstevel@tonic-gate #define	CAT_FMA_MAJOR	(EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MAJOR)
101*0Sstevel@tonic-gate #define	CAT_FMA_MINOR	(EXT_UINT32 | EXC_DEFAULT | EXD_FMA_MINOR)
102*0Sstevel@tonic-gate #define	CAT_FMA_INODE	(EXT_UINT64 | EXC_DEFAULT | EXD_FMA_INODE)
103*0Sstevel@tonic-gate #define	CAT_FMA_OFFSET	(EXT_UINT64 | EXC_DEFAULT | EXD_FMA_OFFSET)
104*0Sstevel@tonic-gate 
105*0Sstevel@tonic-gate static ssize_t
106*0Sstevel@tonic-gate fmd_log_write(fmd_log_t *lp, const void *buf, size_t n)
107*0Sstevel@tonic-gate {
108*0Sstevel@tonic-gate 	ssize_t resid = n;
109*0Sstevel@tonic-gate 	ssize_t len;
110*0Sstevel@tonic-gate 
111*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&lp->log_lock));
112*0Sstevel@tonic-gate 
113*0Sstevel@tonic-gate 	while (resid != 0) {
114*0Sstevel@tonic-gate 		if ((len = write(lp->log_fd, buf, resid)) <= 0)
115*0Sstevel@tonic-gate 			break;
116*0Sstevel@tonic-gate 
117*0Sstevel@tonic-gate 		resid -= len;
118*0Sstevel@tonic-gate 		buf = (char *)buf + len;
119*0Sstevel@tonic-gate 	}
120*0Sstevel@tonic-gate 
121*0Sstevel@tonic-gate 	if (resid == n && n != 0)
122*0Sstevel@tonic-gate 		return (-1);
123*0Sstevel@tonic-gate 
124*0Sstevel@tonic-gate 	return (n - resid);
125*0Sstevel@tonic-gate }
126*0Sstevel@tonic-gate 
127*0Sstevel@tonic-gate static int
128*0Sstevel@tonic-gate fmd_log_write_hdr(fmd_log_t *lp, const char *tag)
129*0Sstevel@tonic-gate {
130*0Sstevel@tonic-gate 	ea_object_t hdr, toc, i0, i1, i2, i3, i4, i5;
131*0Sstevel@tonic-gate 	const char *osrel, *osver, *plat;
132*0Sstevel@tonic-gate 	off64_t off = 0;
133*0Sstevel@tonic-gate 	int err = 0;
134*0Sstevel@tonic-gate 
135*0Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "osrelease", &osrel);
136*0Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "osversion", &osver);
137*0Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "platform", &plat);
138*0Sstevel@tonic-gate 
139*0Sstevel@tonic-gate 	err |= ea_set_group(&hdr, CAT_FMA_GROUP);
140*0Sstevel@tonic-gate 	err |= ea_set_group(&toc, CAT_FMA_GROUP);
141*0Sstevel@tonic-gate 
142*0Sstevel@tonic-gate 	err |= ea_set_item(&i0, CAT_FMA_LABEL, tag, 0);
143*0Sstevel@tonic-gate 	err |= ea_set_item(&i1, CAT_FMA_VERSION, fmd.d_version, 0);
144*0Sstevel@tonic-gate 	err |= ea_set_item(&i2, CAT_FMA_OSREL, osrel, 0);
145*0Sstevel@tonic-gate 	err |= ea_set_item(&i3, CAT_FMA_OSVER, osver, 0);
146*0Sstevel@tonic-gate 	err |= ea_set_item(&i4, CAT_FMA_PLAT, plat, 0);
147*0Sstevel@tonic-gate 	err |= ea_set_item(&i5, CAT_FMA_OFFSET, &off, 0);
148*0Sstevel@tonic-gate 
149*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&hdr, &i0);
150*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&hdr, &i1);
151*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&hdr, &i2);
152*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&hdr, &i3);
153*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&hdr, &i4);
154*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&toc, &i5);
155*0Sstevel@tonic-gate 
156*0Sstevel@tonic-gate 	if (err == 0) {
157*0Sstevel@tonic-gate 		size_t hdr_size = ea_pack_object(&hdr, NULL, 0);
158*0Sstevel@tonic-gate 		size_t toc_size = ea_pack_object(&toc, NULL, 0);
159*0Sstevel@tonic-gate 
160*0Sstevel@tonic-gate 		size_t size = hdr_size + toc_size;
161*0Sstevel@tonic-gate 		void *buf = fmd_alloc(size, FMD_SLEEP);
162*0Sstevel@tonic-gate 
163*0Sstevel@tonic-gate 		(void) ea_pack_object(&hdr, buf, hdr_size);
164*0Sstevel@tonic-gate 		(void) ea_pack_object(&toc, (char *)buf + hdr_size, toc_size);
165*0Sstevel@tonic-gate 
166*0Sstevel@tonic-gate 		if ((lp->log_off = lseek64(lp->log_fd, 0, SEEK_END)) == -1L)
167*0Sstevel@tonic-gate 			fmd_panic("failed to seek log %s", lp->log_name);
168*0Sstevel@tonic-gate 
169*0Sstevel@tonic-gate 		if (fmd_log_write(lp, buf, size) != size)
170*0Sstevel@tonic-gate 			err = errno; /* save errno for fmd_set_errno() below */
171*0Sstevel@tonic-gate 
172*0Sstevel@tonic-gate 		fmd_free(buf, size);
173*0Sstevel@tonic-gate 
174*0Sstevel@tonic-gate 		lp->log_toc = lp->log_off + hdr_size;
175*0Sstevel@tonic-gate 		lp->log_beg = lp->log_off + hdr_size + toc_size;
176*0Sstevel@tonic-gate 		lp->log_off = lp->log_off + hdr_size + toc_size;
177*0Sstevel@tonic-gate 
178*0Sstevel@tonic-gate 		if (lp->log_off != lseek64(lp->log_fd, 0, SEEK_END))
179*0Sstevel@tonic-gate 			fmd_panic("eof off != log_off 0x%llx\n", lp->log_off);
180*0Sstevel@tonic-gate 	} else
181*0Sstevel@tonic-gate 		err = EFMD_LOG_EXACCT;
182*0Sstevel@tonic-gate 
183*0Sstevel@tonic-gate 	(void) ea_free_item(&i0, EUP_ALLOC);
184*0Sstevel@tonic-gate 	(void) ea_free_item(&i1, EUP_ALLOC);
185*0Sstevel@tonic-gate 	(void) ea_free_item(&i2, EUP_ALLOC);
186*0Sstevel@tonic-gate 	(void) ea_free_item(&i3, EUP_ALLOC);
187*0Sstevel@tonic-gate 	(void) ea_free_item(&i4, EUP_ALLOC);
188*0Sstevel@tonic-gate 	(void) ea_free_item(&i5, EUP_ALLOC);
189*0Sstevel@tonic-gate 
190*0Sstevel@tonic-gate 	return (err ? fmd_set_errno(err) : 0);
191*0Sstevel@tonic-gate }
192*0Sstevel@tonic-gate 
193*0Sstevel@tonic-gate static int
194*0Sstevel@tonic-gate fmd_log_check_err(fmd_log_t *lp, int err, const char *msg)
195*0Sstevel@tonic-gate {
196*0Sstevel@tonic-gate 	int eaerr = ea_error();
197*0Sstevel@tonic-gate 	char buf[BUFSIZ];
198*0Sstevel@tonic-gate 
199*0Sstevel@tonic-gate 	(void) snprintf(buf, sizeof (buf), "%s: %s: %s\n",
200*0Sstevel@tonic-gate 	    lp->log_name, msg, eaerr != EXR_OK ?
201*0Sstevel@tonic-gate 	    fmd_ea_strerror(eaerr) : "catalog tag mismatch");
202*0Sstevel@tonic-gate 
203*0Sstevel@tonic-gate 	fmd_error(err, buf);
204*0Sstevel@tonic-gate 	return (fmd_set_errno(err));
205*0Sstevel@tonic-gate }
206*0Sstevel@tonic-gate 
207*0Sstevel@tonic-gate static int
208*0Sstevel@tonic-gate fmd_log_check_hdr(fmd_log_t *lp, const char *tag)
209*0Sstevel@tonic-gate {
210*0Sstevel@tonic-gate 	int got_version = 0, got_label = 0;
211*0Sstevel@tonic-gate 	ea_object_t *grp, *obj;
212*0Sstevel@tonic-gate 	off64_t hdr_off, hdr_size;
213*0Sstevel@tonic-gate 	int dvers, fvers;
214*0Sstevel@tonic-gate 	const char *p;
215*0Sstevel@tonic-gate 
216*0Sstevel@tonic-gate 	ea_clear(&lp->log_ea); /* resync exacct file */
217*0Sstevel@tonic-gate 
218*0Sstevel@tonic-gate 	if ((hdr_off = lseek64(lp->log_fd, 0, SEEK_CUR)) == -1L)
219*0Sstevel@tonic-gate 		fmd_panic("failed to seek log %s", lp->log_name);
220*0Sstevel@tonic-gate 
221*0Sstevel@tonic-gate 	/*
222*0Sstevel@tonic-gate 	 * Read the first group of log meta-data: the write-once read-only
223*0Sstevel@tonic-gate 	 * file header.  We read all records in this group, ignoring all but
224*0Sstevel@tonic-gate 	 * the VERSION and LABEL, which are required and must be verified.
225*0Sstevel@tonic-gate 	 */
226*0Sstevel@tonic-gate 	if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
227*0Sstevel@tonic-gate 	    grp->eo_catalog != CAT_FMA_GROUP) {
228*0Sstevel@tonic-gate 		ea_free_object(grp, EUP_ALLOC);
229*0Sstevel@tonic-gate 		return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
230*0Sstevel@tonic-gate 		    "invalid fma hdr record group"));
231*0Sstevel@tonic-gate 	}
232*0Sstevel@tonic-gate 
233*0Sstevel@tonic-gate 	for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
234*0Sstevel@tonic-gate 		switch (obj->eo_catalog) {
235*0Sstevel@tonic-gate 		case CAT_FMA_VERSION:
236*0Sstevel@tonic-gate 			for (dvers = 0, p = fmd.d_version;
237*0Sstevel@tonic-gate 			    *p != '\0'; p++) {
238*0Sstevel@tonic-gate 				if (isdigit(*p))
239*0Sstevel@tonic-gate 					dvers = dvers * 10 + (*p - '0');
240*0Sstevel@tonic-gate 				else
241*0Sstevel@tonic-gate 					break;
242*0Sstevel@tonic-gate 			}
243*0Sstevel@tonic-gate 
244*0Sstevel@tonic-gate 			for (fvers = 0, p = obj->eo_item.ei_string;
245*0Sstevel@tonic-gate 			    *p != '\0'; p++) {
246*0Sstevel@tonic-gate 				if (isdigit(*p))
247*0Sstevel@tonic-gate 					fvers = fvers * 10 + (*p - '0');
248*0Sstevel@tonic-gate 				else
249*0Sstevel@tonic-gate 					break;
250*0Sstevel@tonic-gate 			}
251*0Sstevel@tonic-gate 
252*0Sstevel@tonic-gate 			if (fvers > dvers) {
253*0Sstevel@tonic-gate 				fmd_error(EFMD_LOG_INVAL, "%s: log version "
254*0Sstevel@tonic-gate 				    "%s is not supported by this daemon\n",
255*0Sstevel@tonic-gate 				    lp->log_name, obj->eo_item.ei_string);
256*0Sstevel@tonic-gate 				ea_free_object(grp, EUP_ALLOC);
257*0Sstevel@tonic-gate 				return (fmd_set_errno(EFMD_LOG_VERSION));
258*0Sstevel@tonic-gate 			}
259*0Sstevel@tonic-gate 
260*0Sstevel@tonic-gate 			got_version++;
261*0Sstevel@tonic-gate 			break;
262*0Sstevel@tonic-gate 
263*0Sstevel@tonic-gate 		case CAT_FMA_LABEL:
264*0Sstevel@tonic-gate 			if (strcmp(obj->eo_item.ei_string, tag) != 0) {
265*0Sstevel@tonic-gate 				fmd_error(EFMD_LOG_INVAL, "%s: log tag '%s' "
266*0Sstevel@tonic-gate 				    "does not matched expected tag '%s'\n",
267*0Sstevel@tonic-gate 				    lp->log_name, obj->eo_item.ei_string, tag);
268*0Sstevel@tonic-gate 				ea_free_object(grp, EUP_ALLOC);
269*0Sstevel@tonic-gate 				return (fmd_set_errno(EFMD_LOG_INVAL));
270*0Sstevel@tonic-gate 			}
271*0Sstevel@tonic-gate 			got_label++;
272*0Sstevel@tonic-gate 			break;
273*0Sstevel@tonic-gate 		}
274*0Sstevel@tonic-gate 	}
275*0Sstevel@tonic-gate 
276*0Sstevel@tonic-gate 	hdr_size = ea_pack_object(grp, NULL, 0);
277*0Sstevel@tonic-gate 	ea_free_object(grp, EUP_ALLOC);
278*0Sstevel@tonic-gate 
279*0Sstevel@tonic-gate 	if (!got_version || !got_label) {
280*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_INVAL, "%s: fmd hdr record group did not "
281*0Sstevel@tonic-gate 		    "include mandatory version and/or label\n", lp->log_name);
282*0Sstevel@tonic-gate 		return (fmd_set_errno(EFMD_LOG_INVAL));
283*0Sstevel@tonic-gate 	}
284*0Sstevel@tonic-gate 
285*0Sstevel@tonic-gate 	/*
286*0Sstevel@tonic-gate 	 * Read the second group of log meta-data: the table of contents.  We
287*0Sstevel@tonic-gate 	 * expect this group to contain an OFFSET object indicating the current
288*0Sstevel@tonic-gate 	 * value of log_skip.  We save this in our fmd_log_t and then return.
289*0Sstevel@tonic-gate 	 */
290*0Sstevel@tonic-gate 	if ((grp = ea_get_object_tree(&lp->log_ea, 1)) == NULL ||
291*0Sstevel@tonic-gate 	    grp->eo_catalog != CAT_FMA_GROUP || grp->eo_group.eg_nobjs < 1 ||
292*0Sstevel@tonic-gate 	    grp->eo_group.eg_objs->eo_catalog != CAT_FMA_OFFSET) {
293*0Sstevel@tonic-gate 		ea_free_object(grp, EUP_ALLOC);
294*0Sstevel@tonic-gate 		return (fmd_log_check_err(lp, EFMD_LOG_INVAL,
295*0Sstevel@tonic-gate 		    "invalid fma toc record group"));
296*0Sstevel@tonic-gate 	}
297*0Sstevel@tonic-gate 
298*0Sstevel@tonic-gate 	lp->log_toc = hdr_off + hdr_size;
299*0Sstevel@tonic-gate 	lp->log_beg = hdr_off + hdr_size + ea_pack_object(grp, NULL, 0);
300*0Sstevel@tonic-gate 	lp->log_off = lseek64(lp->log_fd, 0, SEEK_END);
301*0Sstevel@tonic-gate 	lp->log_skip = grp->eo_group.eg_objs->eo_item.ei_uint64;
302*0Sstevel@tonic-gate 
303*0Sstevel@tonic-gate 	if (lp->log_skip > lp->log_off) {
304*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_INVAL, "%s: skip %llx exceeds file size; "
305*0Sstevel@tonic-gate 		    "resetting to zero\n", lp->log_name, lp->log_skip);
306*0Sstevel@tonic-gate 		lp->log_skip = 0;
307*0Sstevel@tonic-gate 	}
308*0Sstevel@tonic-gate 
309*0Sstevel@tonic-gate 	ea_free_object(grp, EUP_ALLOC);
310*0Sstevel@tonic-gate 	return (0);
311*0Sstevel@tonic-gate }
312*0Sstevel@tonic-gate 
313*0Sstevel@tonic-gate static int
314*0Sstevel@tonic-gate fmd_log_open_exacct(fmd_log_t *lp, int aflags, int oflags)
315*0Sstevel@tonic-gate {
316*0Sstevel@tonic-gate 	int fd = dup(lp->log_fd);
317*0Sstevel@tonic-gate 	const char *creator;
318*0Sstevel@tonic-gate 
319*0Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "log.creator", &creator);
320*0Sstevel@tonic-gate 
321*0Sstevel@tonic-gate 	if (ea_fdopen(&lp->log_ea, fd, creator, aflags, oflags) != 0) {
322*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_EXACCT, "%s: failed to open log file: %s\n",
323*0Sstevel@tonic-gate 		    lp->log_name, fmd_ea_strerror(ea_error()));
324*0Sstevel@tonic-gate 		(void) close(fd);
325*0Sstevel@tonic-gate 		return (fmd_set_errno(EFMD_LOG_EXACCT));
326*0Sstevel@tonic-gate 	}
327*0Sstevel@tonic-gate 
328*0Sstevel@tonic-gate 	lp->log_flags |= FMD_LF_EAOPEN;
329*0Sstevel@tonic-gate 	return (0);
330*0Sstevel@tonic-gate }
331*0Sstevel@tonic-gate 
332*0Sstevel@tonic-gate static fmd_log_t *
333*0Sstevel@tonic-gate fmd_log_xopen(const char *root, const char *name, const char *tag, int oflags)
334*0Sstevel@tonic-gate {
335*0Sstevel@tonic-gate 	fmd_log_t *lp = fmd_zalloc(sizeof (fmd_log_t), FMD_SLEEP);
336*0Sstevel@tonic-gate 
337*0Sstevel@tonic-gate 	char buf[PATH_MAX];
338*0Sstevel@tonic-gate 	size_t len;
339*0Sstevel@tonic-gate 	int err;
340*0Sstevel@tonic-gate 
341*0Sstevel@tonic-gate 	(void) pthread_mutex_init(&lp->log_lock, NULL);
342*0Sstevel@tonic-gate 	(void) pthread_cond_init(&lp->log_cv, NULL);
343*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
344*0Sstevel@tonic-gate 
345*0Sstevel@tonic-gate 	len = strlen(root) + strlen(name) + 2; /* for "/" and "\0" */
346*0Sstevel@tonic-gate 	lp->log_name = fmd_alloc(len, FMD_SLEEP);
347*0Sstevel@tonic-gate 	(void) snprintf(lp->log_name, len, "%s/%s", root, name);
348*0Sstevel@tonic-gate 	lp->log_tag = fmd_strdup(tag, FMD_SLEEP);
349*0Sstevel@tonic-gate 	(void) fmd_conf_getprop(fmd.d_conf, "log.minfree", &lp->log_minfree);
350*0Sstevel@tonic-gate 
351*0Sstevel@tonic-gate 	if (strcmp(lp->log_tag, FMD_LOG_ERROR) == 0)
352*0Sstevel@tonic-gate 		lp->log_flags |= FMD_LF_REPLAY;
353*0Sstevel@tonic-gate 
354*0Sstevel@tonic-gate top:
355*0Sstevel@tonic-gate 	if ((lp->log_fd = open64(lp->log_name, oflags, 0644)) == -1 ||
356*0Sstevel@tonic-gate 	    fstat64(lp->log_fd, &lp->log_stat) == -1) {
357*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_OPEN, "failed to open log %s", lp->log_name);
358*0Sstevel@tonic-gate 		fmd_log_close(lp);
359*0Sstevel@tonic-gate 		return (NULL);
360*0Sstevel@tonic-gate 	}
361*0Sstevel@tonic-gate 
362*0Sstevel@tonic-gate 	/*
363*0Sstevel@tonic-gate 	 * If our open() created the log file, use libexacct to write a header
364*0Sstevel@tonic-gate 	 * and position the file just after the header (EO_TAIL).  If the log
365*0Sstevel@tonic-gate 	 * file already existed, use libexacct to validate the header and again
366*0Sstevel@tonic-gate 	 * position the file just after the header (EO_HEAD).  Note that we lie
367*0Sstevel@tonic-gate 	 * to libexacct about 'oflags' in order to achieve the desired result.
368*0Sstevel@tonic-gate 	 */
369*0Sstevel@tonic-gate 	if (lp->log_stat.st_size == 0) {
370*0Sstevel@tonic-gate 		err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_TAIL,
371*0Sstevel@tonic-gate 		    O_CREAT | O_WRONLY) || fmd_log_write_hdr(lp, tag);
372*0Sstevel@tonic-gate 	} else {
373*0Sstevel@tonic-gate 		err = fmd_log_open_exacct(lp, EO_VALID_HDR | EO_HEAD,
374*0Sstevel@tonic-gate 		    O_RDONLY) || fmd_log_check_hdr(lp, tag);
375*0Sstevel@tonic-gate 	}
376*0Sstevel@tonic-gate 
377*0Sstevel@tonic-gate 	/*
378*0Sstevel@tonic-gate 	 * If ea_fdopen() failed and the log was pre-existing, attempt to move
379*0Sstevel@tonic-gate 	 * it aside and start a new one.  If we created the log but failed to
380*0Sstevel@tonic-gate 	 * initialize it, then we have no choice but to give up (e.g. EROFS).
381*0Sstevel@tonic-gate 	 */
382*0Sstevel@tonic-gate 	if (err) {
383*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_OPEN,
384*0Sstevel@tonic-gate 		    "failed to initialize log %s", lp->log_name);
385*0Sstevel@tonic-gate 
386*0Sstevel@tonic-gate 		if (lp->log_flags & FMD_LF_EAOPEN) {
387*0Sstevel@tonic-gate 			lp->log_flags &= ~FMD_LF_EAOPEN;
388*0Sstevel@tonic-gate 			(void) ea_close(&lp->log_ea);
389*0Sstevel@tonic-gate 		}
390*0Sstevel@tonic-gate 
391*0Sstevel@tonic-gate 		(void) close(lp->log_fd);
392*0Sstevel@tonic-gate 		lp->log_fd = -1;
393*0Sstevel@tonic-gate 
394*0Sstevel@tonic-gate 		if (lp->log_stat.st_size != 0 && snprintf(buf,
395*0Sstevel@tonic-gate 		    sizeof (buf), "%s-", lp->log_name) < PATH_MAX &&
396*0Sstevel@tonic-gate 		    rename(lp->log_name, buf) == 0) {
397*0Sstevel@tonic-gate 			TRACE((FMD_DBG_LOG, "mv %s to %s", lp->log_name, buf));
398*0Sstevel@tonic-gate 			if (oflags & O_CREAT)
399*0Sstevel@tonic-gate 				goto top;
400*0Sstevel@tonic-gate 		}
401*0Sstevel@tonic-gate 
402*0Sstevel@tonic-gate 		fmd_log_close(lp);
403*0Sstevel@tonic-gate 		return (NULL);
404*0Sstevel@tonic-gate 	}
405*0Sstevel@tonic-gate 
406*0Sstevel@tonic-gate 	lp->log_refs++;
407*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
408*0Sstevel@tonic-gate 
409*0Sstevel@tonic-gate 	return (lp);
410*0Sstevel@tonic-gate }
411*0Sstevel@tonic-gate 
412*0Sstevel@tonic-gate fmd_log_t *
413*0Sstevel@tonic-gate fmd_log_tryopen(const char *root, const char *name, const char *tag)
414*0Sstevel@tonic-gate {
415*0Sstevel@tonic-gate 	return (fmd_log_xopen(root, name, tag, O_RDWR | O_SYNC));
416*0Sstevel@tonic-gate }
417*0Sstevel@tonic-gate 
418*0Sstevel@tonic-gate fmd_log_t *
419*0Sstevel@tonic-gate fmd_log_open(const char *root, const char *name, const char *tag)
420*0Sstevel@tonic-gate {
421*0Sstevel@tonic-gate 	return (fmd_log_xopen(root, name, tag, O_RDWR | O_CREAT | O_SYNC));
422*0Sstevel@tonic-gate }
423*0Sstevel@tonic-gate 
424*0Sstevel@tonic-gate void
425*0Sstevel@tonic-gate fmd_log_close(fmd_log_t *lp)
426*0Sstevel@tonic-gate {
427*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&lp->log_lock));
428*0Sstevel@tonic-gate 	ASSERT(lp->log_refs == 0);
429*0Sstevel@tonic-gate 
430*0Sstevel@tonic-gate 	if ((lp->log_flags & FMD_LF_EAOPEN) && ea_close(&lp->log_ea) != 0) {
431*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_CLOSE, "failed to close log %s: %s\n",
432*0Sstevel@tonic-gate 		    lp->log_name, fmd_ea_strerror(ea_error()));
433*0Sstevel@tonic-gate 	}
434*0Sstevel@tonic-gate 
435*0Sstevel@tonic-gate 	if (lp->log_fd >= 0 && close(lp->log_fd) != 0) {
436*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_CLOSE,
437*0Sstevel@tonic-gate 		    "failed to close log %s", lp->log_name);
438*0Sstevel@tonic-gate 	}
439*0Sstevel@tonic-gate 
440*0Sstevel@tonic-gate 	fmd_strfree(lp->log_name);
441*0Sstevel@tonic-gate 	fmd_strfree(lp->log_tag);
442*0Sstevel@tonic-gate 
443*0Sstevel@tonic-gate 	fmd_free(lp, sizeof (fmd_log_t));
444*0Sstevel@tonic-gate }
445*0Sstevel@tonic-gate 
446*0Sstevel@tonic-gate void
447*0Sstevel@tonic-gate fmd_log_hold_pending(fmd_log_t *lp)
448*0Sstevel@tonic-gate {
449*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
450*0Sstevel@tonic-gate 
451*0Sstevel@tonic-gate 	lp->log_refs++;
452*0Sstevel@tonic-gate 	ASSERT(lp->log_refs != 0);
453*0Sstevel@tonic-gate 
454*0Sstevel@tonic-gate 	if (lp->log_flags & FMD_LF_REPLAY) {
455*0Sstevel@tonic-gate 		lp->log_pending++;
456*0Sstevel@tonic-gate 		ASSERT(lp->log_pending != 0);
457*0Sstevel@tonic-gate 	}
458*0Sstevel@tonic-gate 
459*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
460*0Sstevel@tonic-gate }
461*0Sstevel@tonic-gate 
462*0Sstevel@tonic-gate void
463*0Sstevel@tonic-gate fmd_log_hold(fmd_log_t *lp)
464*0Sstevel@tonic-gate {
465*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
466*0Sstevel@tonic-gate 	lp->log_refs++;
467*0Sstevel@tonic-gate 	ASSERT(lp->log_refs != 0);
468*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
469*0Sstevel@tonic-gate }
470*0Sstevel@tonic-gate 
471*0Sstevel@tonic-gate void
472*0Sstevel@tonic-gate fmd_log_rele(fmd_log_t *lp)
473*0Sstevel@tonic-gate {
474*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
475*0Sstevel@tonic-gate 	ASSERT(lp->log_refs != 0);
476*0Sstevel@tonic-gate 
477*0Sstevel@tonic-gate 	if (--lp->log_refs == 0)
478*0Sstevel@tonic-gate 		fmd_log_close(lp);
479*0Sstevel@tonic-gate 	else
480*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
481*0Sstevel@tonic-gate }
482*0Sstevel@tonic-gate 
483*0Sstevel@tonic-gate void
484*0Sstevel@tonic-gate fmd_log_append(fmd_log_t *lp, fmd_event_t *e, fmd_case_t *cp)
485*0Sstevel@tonic-gate {
486*0Sstevel@tonic-gate 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
487*0Sstevel@tonic-gate 	fmd_case_impl_t *cip = (fmd_case_impl_t *)cp;
488*0Sstevel@tonic-gate 	int err = 0;
489*0Sstevel@tonic-gate 
490*0Sstevel@tonic-gate 	ea_object_t grp0, grp1, i0, i1, i2, *items;
491*0Sstevel@tonic-gate 	size_t nvsize, easize, itsize;
492*0Sstevel@tonic-gate 	char *nvbuf, *eabuf;
493*0Sstevel@tonic-gate 	statvfs64_t stv;
494*0Sstevel@tonic-gate 
495*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&ep->ev_lock);
496*0Sstevel@tonic-gate 
497*0Sstevel@tonic-gate 	ASSERT(ep->ev_flags & FMD_EVF_VOLATILE);
498*0Sstevel@tonic-gate 	ASSERT(ep->ev_log == NULL);
499*0Sstevel@tonic-gate 
500*0Sstevel@tonic-gate 	(void) nvlist_size(ep->ev_nvl, &nvsize, NV_ENCODE_XDR);
501*0Sstevel@tonic-gate 	nvbuf = fmd_alloc(nvsize, FMD_SLEEP);
502*0Sstevel@tonic-gate 	(void) nvlist_pack(ep->ev_nvl, &nvbuf, &nvsize, NV_ENCODE_XDR, 0);
503*0Sstevel@tonic-gate 
504*0Sstevel@tonic-gate 	if (lp->log_flags & FMD_LF_REPLAY)
505*0Sstevel@tonic-gate 		err |= ea_set_group(&grp0, CAT_FMA_RGROUP);
506*0Sstevel@tonic-gate 	else
507*0Sstevel@tonic-gate 		err |= ea_set_group(&grp0, CAT_FMA_GROUP);
508*0Sstevel@tonic-gate 
509*0Sstevel@tonic-gate 	err |= ea_set_item(&i0, CAT_FMA_TODSEC, &ep->ev_time.ftv_sec, 0);
510*0Sstevel@tonic-gate 	err |= ea_set_item(&i1, CAT_FMA_TODNSEC, &ep->ev_time.ftv_nsec, 0);
511*0Sstevel@tonic-gate 	err |= ea_set_item(&i2, CAT_FMA_NVLIST, nvbuf, nvsize);
512*0Sstevel@tonic-gate 
513*0Sstevel@tonic-gate 	if (err != 0) {
514*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&ep->ev_lock);
515*0Sstevel@tonic-gate 		err = EFMD_LOG_EXACCT;
516*0Sstevel@tonic-gate 		goto exerr;
517*0Sstevel@tonic-gate 	}
518*0Sstevel@tonic-gate 
519*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&grp0, &i0);
520*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&grp0, &i1);
521*0Sstevel@tonic-gate 	(void) ea_attach_to_group(&grp0, &i2);
522*0Sstevel@tonic-gate 
523*0Sstevel@tonic-gate 	/*
524*0Sstevel@tonic-gate 	 * If this event has a case associated with it (i.e. it is a list),
525*0Sstevel@tonic-gate 	 * then allocate a block of ea_object_t's and fill in a group for
526*0Sstevel@tonic-gate 	 * each event saved in the case's item list.  For each such group,
527*0Sstevel@tonic-gate 	 * we attach it to grp1, which in turn will be attached to grp0.
528*0Sstevel@tonic-gate 	 * This section of code cannot fail as we only manipulate integer
529*0Sstevel@tonic-gate 	 * objects, which require no underlying libexacct memory allocation.
530*0Sstevel@tonic-gate 	 */
531*0Sstevel@tonic-gate 	if (cp != NULL) {
532*0Sstevel@tonic-gate 		ea_object_t *egrp, *ip;
533*0Sstevel@tonic-gate 		fmd_event_impl_t *eip;
534*0Sstevel@tonic-gate 		fmd_case_item_t *cit;
535*0Sstevel@tonic-gate 
536*0Sstevel@tonic-gate 		(void) ea_set_group(&grp1, CAT_FMA_GROUP);
537*0Sstevel@tonic-gate 		itsize = sizeof (ea_object_t) * cip->ci_nitems * 5;
538*0Sstevel@tonic-gate 		items = ip = fmd_alloc(itsize, FMD_SLEEP);
539*0Sstevel@tonic-gate 
540*0Sstevel@tonic-gate 		for (cit = cip->ci_items; cit != NULL; cit = cit->cit_next) {
541*0Sstevel@tonic-gate 			major_t maj;
542*0Sstevel@tonic-gate 			minor_t min;
543*0Sstevel@tonic-gate 
544*0Sstevel@tonic-gate 			eip = (fmd_event_impl_t *)cit->cit_event;
545*0Sstevel@tonic-gate 
546*0Sstevel@tonic-gate 			if (eip->ev_log == NULL)
547*0Sstevel@tonic-gate 				continue; /* event was never logged */
548*0Sstevel@tonic-gate 
549*0Sstevel@tonic-gate 			maj = major(eip->ev_log->log_stat.st_dev);
550*0Sstevel@tonic-gate 			min = minor(eip->ev_log->log_stat.st_dev);
551*0Sstevel@tonic-gate 
552*0Sstevel@tonic-gate 			(void) ea_set_group(ip, CAT_FMA_GROUP);
553*0Sstevel@tonic-gate 			egrp = ip++; /* first obj is group */
554*0Sstevel@tonic-gate 
555*0Sstevel@tonic-gate 			(void) ea_set_item(ip, CAT_FMA_MAJOR, &maj, 0);
556*0Sstevel@tonic-gate 			(void) ea_attach_to_group(egrp, ip++);
557*0Sstevel@tonic-gate 
558*0Sstevel@tonic-gate 			(void) ea_set_item(ip, CAT_FMA_MINOR, &min, 0);
559*0Sstevel@tonic-gate 			(void) ea_attach_to_group(egrp, ip++);
560*0Sstevel@tonic-gate 
561*0Sstevel@tonic-gate 			(void) ea_set_item(ip, CAT_FMA_INODE,
562*0Sstevel@tonic-gate 			    &eip->ev_log->log_stat.st_ino, 0);
563*0Sstevel@tonic-gate 			(void) ea_attach_to_group(egrp, ip++);
564*0Sstevel@tonic-gate 
565*0Sstevel@tonic-gate 			(void) ea_set_item(ip, CAT_FMA_OFFSET, &eip->ev_off, 0);
566*0Sstevel@tonic-gate 			(void) ea_attach_to_group(egrp, ip++);
567*0Sstevel@tonic-gate 
568*0Sstevel@tonic-gate 			(void) ea_attach_to_group(&grp1, egrp);
569*0Sstevel@tonic-gate 		}
570*0Sstevel@tonic-gate 
571*0Sstevel@tonic-gate 		(void) ea_attach_to_group(&grp0, &grp1);
572*0Sstevel@tonic-gate 	}
573*0Sstevel@tonic-gate 
574*0Sstevel@tonic-gate 	easize = ea_pack_object(&grp0, NULL, 0);
575*0Sstevel@tonic-gate 	eabuf = fmd_alloc(easize, FMD_SLEEP);
576*0Sstevel@tonic-gate 	(void) ea_pack_object(&grp0, eabuf, easize);
577*0Sstevel@tonic-gate 
578*0Sstevel@tonic-gate 	/*
579*0Sstevel@tonic-gate 	 * Before writing the record, check to see if this would cause the free
580*0Sstevel@tonic-gate 	 * space in the filesystem to drop below our minfree threshold.  If so,
581*0Sstevel@tonic-gate 	 * don't bother attempting the write and instead pretend it failed.  As
582*0Sstevel@tonic-gate 	 * fmd(1M) runs as root, it will be able to access the space "reserved"
583*0Sstevel@tonic-gate 	 * for root, and therefore can run the system of out of disk space in a
584*0Sstevel@tonic-gate 	 * heavy error load situation, violating the basic design principle of
585*0Sstevel@tonic-gate 	 * fmd(1M) that we don't want to make a bad situation even worse.
586*0Sstevel@tonic-gate 	 */
587*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
588*0Sstevel@tonic-gate 
589*0Sstevel@tonic-gate 	if (lp->log_minfree != 0 && fstatvfs64(lp->log_fd, &stv) == 0 &&
590*0Sstevel@tonic-gate 	    stv.f_bavail * stv.f_frsize < lp->log_minfree + easize) {
591*0Sstevel@tonic-gate 
592*0Sstevel@tonic-gate 		TRACE((FMD_DBG_LOG, "append %s crosses minfree", lp->log_tag));
593*0Sstevel@tonic-gate 		err = EFMD_LOG_MINFREE;
594*0Sstevel@tonic-gate 
595*0Sstevel@tonic-gate 	} else if (fmd_log_write(lp, eabuf, easize) == easize) {
596*0Sstevel@tonic-gate 		TRACE((FMD_DBG_LOG, "append %s %p off=0x%llx",
597*0Sstevel@tonic-gate 		    lp->log_tag, (void *)ep, (u_longlong_t)lp->log_off));
598*0Sstevel@tonic-gate 
599*0Sstevel@tonic-gate 		ep->ev_flags &= ~FMD_EVF_VOLATILE;
600*0Sstevel@tonic-gate 		ep->ev_log = lp;
601*0Sstevel@tonic-gate 		ep->ev_off = lp->log_off;
602*0Sstevel@tonic-gate 		ep->ev_len = easize;
603*0Sstevel@tonic-gate 
604*0Sstevel@tonic-gate 		if (lp->log_flags & FMD_LF_REPLAY) {
605*0Sstevel@tonic-gate 			lp->log_pending++;
606*0Sstevel@tonic-gate 			ASSERT(lp->log_pending != 0);
607*0Sstevel@tonic-gate 		}
608*0Sstevel@tonic-gate 
609*0Sstevel@tonic-gate 		lp->log_refs++;
610*0Sstevel@tonic-gate 		ASSERT(lp->log_refs != 0);
611*0Sstevel@tonic-gate 		lp->log_off += easize;
612*0Sstevel@tonic-gate 	} else {
613*0Sstevel@tonic-gate 		err = errno; /* save errno for fmd_error() call below */
614*0Sstevel@tonic-gate 
615*0Sstevel@tonic-gate 		/*
616*0Sstevel@tonic-gate 		 * If we can't write append the record, seek the file back to
617*0Sstevel@tonic-gate 		 * the original location and truncate it there in order to make
618*0Sstevel@tonic-gate 		 * sure the file is always in a sane state w.r.t. libexacct.
619*0Sstevel@tonic-gate 		 */
620*0Sstevel@tonic-gate 		(void) lseek64(lp->log_fd, lp->log_off, SEEK_SET);
621*0Sstevel@tonic-gate 		(void) ftruncate64(lp->log_fd, lp->log_off);
622*0Sstevel@tonic-gate 	}
623*0Sstevel@tonic-gate 
624*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
625*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&ep->ev_lock);
626*0Sstevel@tonic-gate 
627*0Sstevel@tonic-gate 	if (cp != NULL)
628*0Sstevel@tonic-gate 		fmd_free(items, itsize);
629*0Sstevel@tonic-gate 
630*0Sstevel@tonic-gate 	fmd_free(eabuf, easize);
631*0Sstevel@tonic-gate exerr:
632*0Sstevel@tonic-gate 	fmd_free(nvbuf, nvsize);
633*0Sstevel@tonic-gate 
634*0Sstevel@tonic-gate 	(void) ea_free_item(&i0, EUP_ALLOC);
635*0Sstevel@tonic-gate 	(void) ea_free_item(&i1, EUP_ALLOC);
636*0Sstevel@tonic-gate 	(void) ea_free_item(&i2, EUP_ALLOC);
637*0Sstevel@tonic-gate 
638*0Sstevel@tonic-gate 	/*
639*0Sstevel@tonic-gate 	 * Keep track of out-of-space errors using global statistics.  As we're
640*0Sstevel@tonic-gate 	 * out of disk space, it's unlikely the EFMD_LOG_APPEND will be logged.
641*0Sstevel@tonic-gate 	 */
642*0Sstevel@tonic-gate 	if (err == ENOSPC || err == EFMD_LOG_MINFREE) {
643*0Sstevel@tonic-gate 		fmd_stat_t *sp;
644*0Sstevel@tonic-gate 
645*0Sstevel@tonic-gate 		if (lp == fmd.d_errlog)
646*0Sstevel@tonic-gate 			sp = &fmd.d_stats->ds_err_enospc;
647*0Sstevel@tonic-gate 		else if (lp == fmd.d_fltlog)
648*0Sstevel@tonic-gate 			sp = &fmd.d_stats->ds_flt_enospc;
649*0Sstevel@tonic-gate 		else
650*0Sstevel@tonic-gate 			sp = &fmd.d_stats->ds_oth_enospc;
651*0Sstevel@tonic-gate 
652*0Sstevel@tonic-gate 		(void) pthread_mutex_lock(&fmd.d_stats_lock);
653*0Sstevel@tonic-gate 		sp->fmds_value.ui64++;
654*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&fmd.d_stats_lock);
655*0Sstevel@tonic-gate 	}
656*0Sstevel@tonic-gate 
657*0Sstevel@tonic-gate 	if (err != 0) {
658*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_APPEND, "failed to log_append %s %p: %s\n",
659*0Sstevel@tonic-gate 		    lp->log_tag, (void *)ep, fmd_strerror(err));
660*0Sstevel@tonic-gate 	}
661*0Sstevel@tonic-gate }
662*0Sstevel@tonic-gate 
663*0Sstevel@tonic-gate /*
664*0Sstevel@tonic-gate  * Commit an event to the log permanently, indicating that it should not be
665*0Sstevel@tonic-gate  * replayed on restart.  This is done by overwriting the event group's catalog
666*0Sstevel@tonic-gate  * code with EXD_GROUP_FMA (from EXD_GROUP_RFMA used in fmd_log_append()).  We
667*0Sstevel@tonic-gate  * use pwrite64() to update the existing word directly, using somewhat guilty
668*0Sstevel@tonic-gate  * knowledge that exacct stores the 32-bit catalog word first for each object.
669*0Sstevel@tonic-gate  * Since we are overwriting an existing log location using pwrite64() and hold
670*0Sstevel@tonic-gate  * the event lock, we do not need to hold the log_lock during the i/o.
671*0Sstevel@tonic-gate  */
672*0Sstevel@tonic-gate void
673*0Sstevel@tonic-gate fmd_log_commit(fmd_log_t *lp, fmd_event_t *e)
674*0Sstevel@tonic-gate {
675*0Sstevel@tonic-gate 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
676*0Sstevel@tonic-gate 	ea_catalog_t c;
677*0Sstevel@tonic-gate 	int err = 0;
678*0Sstevel@tonic-gate 
679*0Sstevel@tonic-gate 	if (!(lp->log_flags & FMD_LF_REPLAY))
680*0Sstevel@tonic-gate 		return; /* log does not require replay tagging */
681*0Sstevel@tonic-gate 
682*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ep->ev_lock));
683*0Sstevel@tonic-gate 	ASSERT(ep->ev_log == lp && ep->ev_off != 0);
684*0Sstevel@tonic-gate 
685*0Sstevel@tonic-gate 	c = CAT_FMA_GROUP;
686*0Sstevel@tonic-gate 	exacct_order32(&c);
687*0Sstevel@tonic-gate 
688*0Sstevel@tonic-gate 	if (pwrite64(lp->log_fd, &c, sizeof (c), ep->ev_off) == sizeof (c)) {
689*0Sstevel@tonic-gate 		TRACE((FMD_DBG_LOG, "commit %s %p", lp->log_tag, (void *)ep));
690*0Sstevel@tonic-gate 		ep->ev_flags &= ~FMD_EVF_REPLAY;
691*0Sstevel@tonic-gate 
692*0Sstevel@tonic-gate 		/*
693*0Sstevel@tonic-gate 		 * If we have committed the event, check to see if the TOC skip
694*0Sstevel@tonic-gate 		 * offset needs to be updated, and decrement the pending count.
695*0Sstevel@tonic-gate 		 */
696*0Sstevel@tonic-gate 		(void) pthread_mutex_lock(&lp->log_lock);
697*0Sstevel@tonic-gate 
698*0Sstevel@tonic-gate 		if (lp->log_skip == ep->ev_off) {
699*0Sstevel@tonic-gate 			lp->log_flags |= FMD_LF_DIRTY;
700*0Sstevel@tonic-gate 			lp->log_skip += ep->ev_len;
701*0Sstevel@tonic-gate 		}
702*0Sstevel@tonic-gate 
703*0Sstevel@tonic-gate 		ASSERT(lp->log_pending != 0);
704*0Sstevel@tonic-gate 		lp->log_pending--;
705*0Sstevel@tonic-gate 
706*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
707*0Sstevel@tonic-gate 		(void) pthread_cond_broadcast(&lp->log_cv);
708*0Sstevel@tonic-gate 
709*0Sstevel@tonic-gate 	} else {
710*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_COMMIT, "failed to log_commit %s %p: %s\n",
711*0Sstevel@tonic-gate 		    lp->log_tag, (void *)ep, fmd_strerror(err));
712*0Sstevel@tonic-gate 	}
713*0Sstevel@tonic-gate }
714*0Sstevel@tonic-gate 
715*0Sstevel@tonic-gate /*
716*0Sstevel@tonic-gate  * If we need to destroy an event and it wasn't able to be committed, we permit
717*0Sstevel@tonic-gate  * the owner to decommit from ever trying again.  This operation decrements the
718*0Sstevel@tonic-gate  * pending count on the log and broadcasts to anyone waiting on log_cv.
719*0Sstevel@tonic-gate  */
720*0Sstevel@tonic-gate void
721*0Sstevel@tonic-gate fmd_log_decommit(fmd_log_t *lp, fmd_event_t *e)
722*0Sstevel@tonic-gate {
723*0Sstevel@tonic-gate 	fmd_event_impl_t *ep = (fmd_event_impl_t *)e;
724*0Sstevel@tonic-gate 
725*0Sstevel@tonic-gate 	if (!(lp->log_flags & FMD_LF_REPLAY))
726*0Sstevel@tonic-gate 		return; /* log does not require replay tagging */
727*0Sstevel@tonic-gate 
728*0Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ep->ev_lock));
729*0Sstevel@tonic-gate 	ASSERT(ep->ev_log == lp);
730*0Sstevel@tonic-gate 
731*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
732*0Sstevel@tonic-gate 
733*0Sstevel@tonic-gate 	TRACE((FMD_DBG_LOG, "decommit %s %p", lp->log_tag, (void *)ep));
734*0Sstevel@tonic-gate 	ep->ev_flags &= ~FMD_EVF_REPLAY;
735*0Sstevel@tonic-gate 
736*0Sstevel@tonic-gate 	ASSERT(lp->log_pending != 0);
737*0Sstevel@tonic-gate 	lp->log_pending--;
738*0Sstevel@tonic-gate 
739*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
740*0Sstevel@tonic-gate 	(void) pthread_cond_broadcast(&lp->log_cv);
741*0Sstevel@tonic-gate }
742*0Sstevel@tonic-gate 
743*0Sstevel@tonic-gate static fmd_event_t *
744*0Sstevel@tonic-gate fmd_log_unpack(fmd_log_t *lp, ea_object_t *grp, off64_t off)
745*0Sstevel@tonic-gate {
746*0Sstevel@tonic-gate 	fmd_timeval_t ftv = { -1ULL, -1ULL };
747*0Sstevel@tonic-gate 	nvlist_t *nvl = NULL;
748*0Sstevel@tonic-gate 
749*0Sstevel@tonic-gate 	ea_object_t *obj;
750*0Sstevel@tonic-gate 	char *class;
751*0Sstevel@tonic-gate 	int err;
752*0Sstevel@tonic-gate 
753*0Sstevel@tonic-gate 	for (obj = grp->eo_group.eg_objs; obj != NULL; obj = obj->eo_next) {
754*0Sstevel@tonic-gate 		switch (obj->eo_catalog) {
755*0Sstevel@tonic-gate 		case CAT_FMA_NVLIST:
756*0Sstevel@tonic-gate 			if ((err = nvlist_xunpack(obj->eo_item.ei_raw,
757*0Sstevel@tonic-gate 			    obj->eo_item.ei_size, &nvl, &fmd.d_nva)) != 0) {
758*0Sstevel@tonic-gate 				fmd_error(EFMD_LOG_UNPACK, "failed to unpack "
759*0Sstevel@tonic-gate 				    "log nvpair: %s\n", fmd_strerror(err));
760*0Sstevel@tonic-gate 				return (NULL);
761*0Sstevel@tonic-gate 			}
762*0Sstevel@tonic-gate 			break;
763*0Sstevel@tonic-gate 
764*0Sstevel@tonic-gate 		case CAT_FMA_TODSEC:
765*0Sstevel@tonic-gate 			ftv.ftv_sec = obj->eo_item.ei_uint64;
766*0Sstevel@tonic-gate 			break;
767*0Sstevel@tonic-gate 
768*0Sstevel@tonic-gate 		case CAT_FMA_TODNSEC:
769*0Sstevel@tonic-gate 			ftv.ftv_nsec = obj->eo_item.ei_uint64;
770*0Sstevel@tonic-gate 			break;
771*0Sstevel@tonic-gate 		}
772*0Sstevel@tonic-gate 	}
773*0Sstevel@tonic-gate 
774*0Sstevel@tonic-gate 	if (nvl == NULL || ftv.ftv_sec == -1ULL || ftv.ftv_nsec == -1ULL) {
775*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
776*0Sstevel@tonic-gate 		    "required object(s) missing from record group\n");
777*0Sstevel@tonic-gate 		nvlist_free(nvl);
778*0Sstevel@tonic-gate 		return (NULL);
779*0Sstevel@tonic-gate 	}
780*0Sstevel@tonic-gate 
781*0Sstevel@tonic-gate 	if (nvlist_lookup_string(nvl, FM_CLASS, &class) != 0) {
782*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_UNPACK, "failed to unpack log event: "
783*0Sstevel@tonic-gate 		    "record is missing required '%s' nvpair\n", FM_CLASS);
784*0Sstevel@tonic-gate 		nvlist_free(nvl);
785*0Sstevel@tonic-gate 		return (NULL);
786*0Sstevel@tonic-gate 	}
787*0Sstevel@tonic-gate 
788*0Sstevel@tonic-gate 	return (fmd_event_recreate(FMD_EVT_PROTOCOL,
789*0Sstevel@tonic-gate 	    &ftv, nvl, class, lp, off, ea_pack_object(grp, NULL, 0)));
790*0Sstevel@tonic-gate }
791*0Sstevel@tonic-gate 
792*0Sstevel@tonic-gate /*
793*0Sstevel@tonic-gate  * Replay event(s) from the specified log by invoking the specified callback
794*0Sstevel@tonic-gate  * function 'func' for each event.  If the log has the FMD_LF_REPLAY flag set,
795*0Sstevel@tonic-gate  * we replay all events after log_skip that have the FMA_RGROUP group tag.
796*0Sstevel@tonic-gate  * This mode is used for the error telemetry log.  If the log does not have
797*0Sstevel@tonic-gate  * this flag set (used for ASRU logs), only the most recent event is replayed.
798*0Sstevel@tonic-gate  */
799*0Sstevel@tonic-gate void
800*0Sstevel@tonic-gate fmd_log_replay(fmd_log_t *lp, fmd_log_f *func, void *data)
801*0Sstevel@tonic-gate {
802*0Sstevel@tonic-gate 	ea_object_t obj, *grp;
803*0Sstevel@tonic-gate 	ea_object_type_t type;
804*0Sstevel@tonic-gate 	ea_catalog_t c;
805*0Sstevel@tonic-gate 	fmd_event_t *ep;
806*0Sstevel@tonic-gate 	off64_t off, skp;
807*0Sstevel@tonic-gate 	uint_t n = 0;
808*0Sstevel@tonic-gate 
809*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
810*0Sstevel@tonic-gate 
811*0Sstevel@tonic-gate 	if (lp->log_stat.st_size == 0 && (lp->log_flags & FMD_LF_REPLAY)) {
812*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
813*0Sstevel@tonic-gate 		return; /* we just created this log: never replay events */
814*0Sstevel@tonic-gate 	}
815*0Sstevel@tonic-gate 
816*0Sstevel@tonic-gate 	while (lp->log_flags & FMD_LF_BUSY)
817*0Sstevel@tonic-gate 		(void) pthread_cond_wait(&lp->log_cv, &lp->log_lock);
818*0Sstevel@tonic-gate 
819*0Sstevel@tonic-gate 	if (lp->log_off == lp->log_beg) {
820*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
821*0Sstevel@tonic-gate 		return; /* no records appended yet */
822*0Sstevel@tonic-gate 	}
823*0Sstevel@tonic-gate 
824*0Sstevel@tonic-gate 	lp->log_flags |= FMD_LF_BUSY;
825*0Sstevel@tonic-gate 	skp = lp->log_skip;
826*0Sstevel@tonic-gate 	ea_clear(&lp->log_ea); /* resync exacct file */
827*0Sstevel@tonic-gate 
828*0Sstevel@tonic-gate 	/*
829*0Sstevel@tonic-gate 	 * If FMD_LF_REPLAY is set, begin our replay at either log_skip (if it
830*0Sstevel@tonic-gate 	 * is non-zero) or at log_beg.  Otherwise replay from the end (log_off)
831*0Sstevel@tonic-gate 	 */
832*0Sstevel@tonic-gate 	if (lp->log_flags & FMD_LF_REPLAY) {
833*0Sstevel@tonic-gate 		off = MAX(lp->log_beg, lp->log_skip);
834*0Sstevel@tonic-gate 		c = CAT_FMA_RGROUP;
835*0Sstevel@tonic-gate 	} else {
836*0Sstevel@tonic-gate 		off = lp->log_off;
837*0Sstevel@tonic-gate 		c = CAT_FMA_GROUP;
838*0Sstevel@tonic-gate 	}
839*0Sstevel@tonic-gate 
840*0Sstevel@tonic-gate 	if (lseek64(lp->log_fd, off, SEEK_SET) != off) {
841*0Sstevel@tonic-gate 		fmd_panic("failed to seek %s to 0x%llx\n",
842*0Sstevel@tonic-gate 		    lp->log_name, (u_longlong_t)off);
843*0Sstevel@tonic-gate 	}
844*0Sstevel@tonic-gate 
845*0Sstevel@tonic-gate 	/*
846*0Sstevel@tonic-gate 	 * If FMD_LF_REPLAY is not set, back up to the start of the previous
847*0Sstevel@tonic-gate 	 * object and make sure this object is an EO_GROUP; otherwise return.
848*0Sstevel@tonic-gate 	 */
849*0Sstevel@tonic-gate 	if (!(lp->log_flags & FMD_LF_REPLAY) &&
850*0Sstevel@tonic-gate 	    (type = ea_previous_object(&lp->log_ea, &obj)) != EO_GROUP) {
851*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_REPLAY, "last log object is of unexpected "
852*0Sstevel@tonic-gate 		    "type %d (log may be truncated or corrupt)\n", type);
853*0Sstevel@tonic-gate 		goto out;
854*0Sstevel@tonic-gate 	}
855*0Sstevel@tonic-gate 
856*0Sstevel@tonic-gate 	while ((grp = ea_get_object_tree(&lp->log_ea, 1)) != NULL) {
857*0Sstevel@tonic-gate 		if (!(lp->log_flags & FMD_LF_REPLAY))
858*0Sstevel@tonic-gate 			off -= ea_pack_object(grp, NULL, 0);
859*0Sstevel@tonic-gate 		else if (n == 0 && grp->eo_catalog == CAT_FMA_GROUP)
860*0Sstevel@tonic-gate 			skp = off; /* update skip */
861*0Sstevel@tonic-gate 
862*0Sstevel@tonic-gate 		/*
863*0Sstevel@tonic-gate 		 * We temporarily drop log_lock around the call to unpack the
864*0Sstevel@tonic-gate 		 * event, hold it, and perform the callback, because these
865*0Sstevel@tonic-gate 		 * operations may try to acquire log_lock to bump log_refs.
866*0Sstevel@tonic-gate 		 * We cannot lose control because the FMD_LF_BUSY flag is set.
867*0Sstevel@tonic-gate 		 */
868*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
869*0Sstevel@tonic-gate 
870*0Sstevel@tonic-gate 		if (grp->eo_catalog == c &&
871*0Sstevel@tonic-gate 		    (ep = fmd_log_unpack(lp, grp, off)) != NULL) {
872*0Sstevel@tonic-gate 
873*0Sstevel@tonic-gate 			TRACE((FMD_DBG_LOG, "replay %s %p off %llx",
874*0Sstevel@tonic-gate 			    lp->log_tag, (void *)ep, (u_longlong_t)off));
875*0Sstevel@tonic-gate 
876*0Sstevel@tonic-gate 			fmd_event_hold(ep);
877*0Sstevel@tonic-gate 			func(lp, ep, data);
878*0Sstevel@tonic-gate 			fmd_event_rele(ep);
879*0Sstevel@tonic-gate 			n++;
880*0Sstevel@tonic-gate 		}
881*0Sstevel@tonic-gate 
882*0Sstevel@tonic-gate 		(void) pthread_mutex_lock(&lp->log_lock);
883*0Sstevel@tonic-gate 		off += ea_pack_object(grp, NULL, 0);
884*0Sstevel@tonic-gate 		ea_free_object(grp, EUP_ALLOC);
885*0Sstevel@tonic-gate 	}
886*0Sstevel@tonic-gate 
887*0Sstevel@tonic-gate 	if (ea_error() != EXR_EOF) {
888*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_REPLAY, "failed to replay %s event at "
889*0Sstevel@tonic-gate 		    "offset 0x%llx: %s\n", lp->log_name, (u_longlong_t)off,
890*0Sstevel@tonic-gate 		    fmd_ea_strerror(ea_error()));
891*0Sstevel@tonic-gate 	}
892*0Sstevel@tonic-gate 
893*0Sstevel@tonic-gate 	if (n == 0)
894*0Sstevel@tonic-gate 		skp = off; /* if no replays, move skip to where we ended up */
895*0Sstevel@tonic-gate 
896*0Sstevel@tonic-gate out:
897*0Sstevel@tonic-gate 	if (lseek64(lp->log_fd, lp->log_off, SEEK_SET) != lp->log_off) {
898*0Sstevel@tonic-gate 		fmd_panic("failed to seek %s to 0x%llx\n",
899*0Sstevel@tonic-gate 		    lp->log_name, (u_longlong_t)lp->log_off);
900*0Sstevel@tonic-gate 	}
901*0Sstevel@tonic-gate 
902*0Sstevel@tonic-gate 	if (skp != lp->log_skip) {
903*0Sstevel@tonic-gate 		lp->log_flags |= FMD_LF_DIRTY;
904*0Sstevel@tonic-gate 		lp->log_skip = skp;
905*0Sstevel@tonic-gate 	}
906*0Sstevel@tonic-gate 
907*0Sstevel@tonic-gate 	lp->log_flags &= ~FMD_LF_BUSY;
908*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
909*0Sstevel@tonic-gate 	(void) pthread_cond_broadcast(&lp->log_cv);
910*0Sstevel@tonic-gate }
911*0Sstevel@tonic-gate 
912*0Sstevel@tonic-gate void
913*0Sstevel@tonic-gate fmd_log_update(fmd_log_t *lp)
914*0Sstevel@tonic-gate {
915*0Sstevel@tonic-gate 	ea_object_t toc, item;
916*0Sstevel@tonic-gate 	off64_t skip = 0;
917*0Sstevel@tonic-gate 	size_t size;
918*0Sstevel@tonic-gate 	void *buf;
919*0Sstevel@tonic-gate 
920*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
921*0Sstevel@tonic-gate 
922*0Sstevel@tonic-gate 	if (lp->log_flags & FMD_LF_DIRTY) {
923*0Sstevel@tonic-gate 		lp->log_flags &= ~FMD_LF_DIRTY;
924*0Sstevel@tonic-gate 		skip = lp->log_skip;
925*0Sstevel@tonic-gate 	}
926*0Sstevel@tonic-gate 
927*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
928*0Sstevel@tonic-gate 
929*0Sstevel@tonic-gate 	/*
930*0Sstevel@tonic-gate 	 * If the skip needs to be updated, construct a TOC record group
931*0Sstevel@tonic-gate 	 * containing the skip offset and overwrite the TOC in-place.
932*0Sstevel@tonic-gate 	 */
933*0Sstevel@tonic-gate 	if (skip != 0 && ea_set_group(&toc, CAT_FMA_GROUP) == 0 &&
934*0Sstevel@tonic-gate 	    ea_set_item(&item, CAT_FMA_OFFSET, &skip, 0) == 0) {
935*0Sstevel@tonic-gate 
936*0Sstevel@tonic-gate 		(void) ea_attach_to_group(&toc, &item);
937*0Sstevel@tonic-gate 		size = ea_pack_object(&toc, NULL, 0);
938*0Sstevel@tonic-gate 		buf = fmd_alloc(size, FMD_SLEEP);
939*0Sstevel@tonic-gate 
940*0Sstevel@tonic-gate 		(void) ea_pack_object(&toc, buf, size);
941*0Sstevel@tonic-gate 		ASSERT(lp->log_toc + size == lp->log_beg);
942*0Sstevel@tonic-gate 
943*0Sstevel@tonic-gate 		if (pwrite64(lp->log_fd, buf, size, lp->log_toc) == size) {
944*0Sstevel@tonic-gate 			TRACE((FMD_DBG_LOG, "updated skip to %llx", skip));
945*0Sstevel@tonic-gate 		} else {
946*0Sstevel@tonic-gate 			fmd_error(EFMD_LOG_UPDATE,
947*0Sstevel@tonic-gate 			    "failed to log_update %s", lp->log_tag);
948*0Sstevel@tonic-gate 		}
949*0Sstevel@tonic-gate 
950*0Sstevel@tonic-gate 		fmd_free(buf, size);
951*0Sstevel@tonic-gate 		(void) ea_free_item(&item, EUP_ALLOC);
952*0Sstevel@tonic-gate 	}
953*0Sstevel@tonic-gate }
954*0Sstevel@tonic-gate 
955*0Sstevel@tonic-gate /*
956*0Sstevel@tonic-gate  * Rotate the specified log by renaming its underlying file to a staging file
957*0Sstevel@tonic-gate  * that can be handed off to logadm(1M) or an administrator script.  If the
958*0Sstevel@tonic-gate  * rename succeeds, open a new log file using the old path and return it.
959*0Sstevel@tonic-gate  * Note that we are relying our caller to use some higher-level mechanism to
960*0Sstevel@tonic-gate  * ensure that fmd_log_rotate() cannot be called while other threads are
961*0Sstevel@tonic-gate  * attempting fmd_log_append() using the same log (fmd's d_log_lock is used
962*0Sstevel@tonic-gate  * for the global errlog and fltlog).
963*0Sstevel@tonic-gate  */
964*0Sstevel@tonic-gate fmd_log_t *
965*0Sstevel@tonic-gate fmd_log_rotate(fmd_log_t *lp)
966*0Sstevel@tonic-gate {
967*0Sstevel@tonic-gate 	char npath[PATH_MAX];
968*0Sstevel@tonic-gate 	fmd_log_t *nlp;
969*0Sstevel@tonic-gate 
970*0Sstevel@tonic-gate 	(void) snprintf(npath, sizeof (npath), "%s.0-", lp->log_name);
971*0Sstevel@tonic-gate 	(void) pthread_mutex_lock(&lp->log_lock);
972*0Sstevel@tonic-gate 
973*0Sstevel@tonic-gate 	/*
974*0Sstevel@tonic-gate 	 * Check for any pending commits to drain before proceeding.  We can't
975*0Sstevel@tonic-gate 	 * rotate the log out if commits are pending because if we die after
976*0Sstevel@tonic-gate 	 * the log is moved aside, we won't be able to replay them on restart.
977*0Sstevel@tonic-gate 	 */
978*0Sstevel@tonic-gate 	if (lp->log_pending != 0) {
979*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
980*0Sstevel@tonic-gate 		(void) fmd_set_errno(EFMD_LOG_ROTBUSY);
981*0Sstevel@tonic-gate 		return (NULL);
982*0Sstevel@tonic-gate 	}
983*0Sstevel@tonic-gate 
984*0Sstevel@tonic-gate 	if (rename(lp->log_name, npath) != 0) {
985*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
986*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_ROTATE, "failed to rename %s", lp->log_name);
987*0Sstevel@tonic-gate 		(void) fmd_set_errno(EFMD_LOG_ROTATE);
988*0Sstevel@tonic-gate 		return (NULL);
989*0Sstevel@tonic-gate 	}
990*0Sstevel@tonic-gate 
991*0Sstevel@tonic-gate 	if ((nlp = fmd_log_open("", lp->log_name, lp->log_tag)) == NULL) {
992*0Sstevel@tonic-gate 		(void) rename(npath, lp->log_name);
993*0Sstevel@tonic-gate 		(void) pthread_mutex_unlock(&lp->log_lock);
994*0Sstevel@tonic-gate 		fmd_error(EFMD_LOG_ROTATE, "failed to reopen %s", lp->log_name);
995*0Sstevel@tonic-gate 		(void) fmd_set_errno(EFMD_LOG_ROTATE);
996*0Sstevel@tonic-gate 		return (NULL);
997*0Sstevel@tonic-gate 	}
998*0Sstevel@tonic-gate 
999*0Sstevel@tonic-gate 	/*
1000*0Sstevel@tonic-gate 	 * If we've rotated the log, no pending events exist so we don't have
1001*0Sstevel@tonic-gate 	 * any more commits coming, and our caller should have arranged for
1002*0Sstevel@tonic-gate 	 * no more calls to append.  As such, we can close log_fd for good.
1003*0Sstevel@tonic-gate 	 */
1004*0Sstevel@tonic-gate 	if (lp->log_flags & FMD_LF_EAOPEN) {
1005*0Sstevel@tonic-gate 		(void) ea_close(&lp->log_ea);
1006*0Sstevel@tonic-gate 		lp->log_flags &= ~FMD_LF_EAOPEN;
1007*0Sstevel@tonic-gate 	}
1008*0Sstevel@tonic-gate 
1009*0Sstevel@tonic-gate 	(void) close(lp->log_fd);
1010*0Sstevel@tonic-gate 	lp->log_fd = -1;
1011*0Sstevel@tonic-gate 
1012*0Sstevel@tonic-gate 	(void) pthread_mutex_unlock(&lp->log_lock);
1013*0Sstevel@tonic-gate 	return (nlp);
1014*0Sstevel@tonic-gate }
1015