1*0Sstevel@tonic-gate /*-
2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information.
3*0Sstevel@tonic-gate *
4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998
5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved.
6*0Sstevel@tonic-gate */
7*0Sstevel@tonic-gate #include "config.h"
8*0Sstevel@tonic-gate
9*0Sstevel@tonic-gate #ifndef lint
10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98";
11*0Sstevel@tonic-gate #endif /* not lint */
12*0Sstevel@tonic-gate
13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES
14*0Sstevel@tonic-gate #include <sys/types.h>
15*0Sstevel@tonic-gate
16*0Sstevel@tonic-gate #include <errno.h>
17*0Sstevel@tonic-gate #include <string.h>
18*0Sstevel@tonic-gate #include <unistd.h>
19*0Sstevel@tonic-gate #endif
20*0Sstevel@tonic-gate
21*0Sstevel@tonic-gate #include "db_int.h"
22*0Sstevel@tonic-gate #include "shqueue.h"
23*0Sstevel@tonic-gate #include "db_page.h"
24*0Sstevel@tonic-gate #include "log.h"
25*0Sstevel@tonic-gate #include "hash.h"
26*0Sstevel@tonic-gate #include "common_ext.h"
27*0Sstevel@tonic-gate
28*0Sstevel@tonic-gate /*
29*0Sstevel@tonic-gate * log_get --
30*0Sstevel@tonic-gate * Get a log record.
31*0Sstevel@tonic-gate */
32*0Sstevel@tonic-gate int
log_get(dblp,alsn,dbt,flags)33*0Sstevel@tonic-gate log_get(dblp, alsn, dbt, flags)
34*0Sstevel@tonic-gate DB_LOG *dblp;
35*0Sstevel@tonic-gate DB_LSN *alsn;
36*0Sstevel@tonic-gate DBT *dbt;
37*0Sstevel@tonic-gate u_int32_t flags;
38*0Sstevel@tonic-gate {
39*0Sstevel@tonic-gate int ret;
40*0Sstevel@tonic-gate
41*0Sstevel@tonic-gate LOG_PANIC_CHECK(dblp);
42*0Sstevel@tonic-gate
43*0Sstevel@tonic-gate /* Validate arguments. */
44*0Sstevel@tonic-gate if (flags != DB_CHECKPOINT && flags != DB_CURRENT &&
45*0Sstevel@tonic-gate flags != DB_FIRST && flags != DB_LAST &&
46*0Sstevel@tonic-gate flags != DB_NEXT && flags != DB_PREV && flags != DB_SET)
47*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "log_get", 1));
48*0Sstevel@tonic-gate
49*0Sstevel@tonic-gate if (F_ISSET(dblp, DB_AM_THREAD)) {
50*0Sstevel@tonic-gate if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT)
51*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "log_get", 1));
52*0Sstevel@tonic-gate if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC))
53*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "threaded data", 1));
54*0Sstevel@tonic-gate }
55*0Sstevel@tonic-gate
56*0Sstevel@tonic-gate LOCK_LOGREGION(dblp);
57*0Sstevel@tonic-gate
58*0Sstevel@tonic-gate /*
59*0Sstevel@tonic-gate * If we get one of the log's header records, repeat the operation.
60*0Sstevel@tonic-gate * This assumes that applications don't ever request the log header
61*0Sstevel@tonic-gate * records by LSN, but that seems reasonable to me.
62*0Sstevel@tonic-gate */
63*0Sstevel@tonic-gate ret = __log_get(dblp, alsn, dbt, flags, 0);
64*0Sstevel@tonic-gate if (ret == 0 && alsn->offset == 0) {
65*0Sstevel@tonic-gate switch (flags) {
66*0Sstevel@tonic-gate case DB_FIRST:
67*0Sstevel@tonic-gate flags = DB_NEXT;
68*0Sstevel@tonic-gate break;
69*0Sstevel@tonic-gate case DB_LAST:
70*0Sstevel@tonic-gate flags = DB_PREV;
71*0Sstevel@tonic-gate break;
72*0Sstevel@tonic-gate }
73*0Sstevel@tonic-gate ret = __log_get(dblp, alsn, dbt, flags, 0);
74*0Sstevel@tonic-gate }
75*0Sstevel@tonic-gate
76*0Sstevel@tonic-gate UNLOCK_LOGREGION(dblp);
77*0Sstevel@tonic-gate
78*0Sstevel@tonic-gate return (ret);
79*0Sstevel@tonic-gate }
80*0Sstevel@tonic-gate
81*0Sstevel@tonic-gate /*
82*0Sstevel@tonic-gate * __log_get --
83*0Sstevel@tonic-gate * Get a log record; internal version.
84*0Sstevel@tonic-gate *
85*0Sstevel@tonic-gate * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int));
86*0Sstevel@tonic-gate */
87*0Sstevel@tonic-gate int
__log_get(dblp,alsn,dbt,flags,silent)88*0Sstevel@tonic-gate __log_get(dblp, alsn, dbt, flags, silent)
89*0Sstevel@tonic-gate DB_LOG *dblp;
90*0Sstevel@tonic-gate DB_LSN *alsn;
91*0Sstevel@tonic-gate DBT *dbt;
92*0Sstevel@tonic-gate u_int32_t flags;
93*0Sstevel@tonic-gate int silent;
94*0Sstevel@tonic-gate {
95*0Sstevel@tonic-gate DB_LSN nlsn;
96*0Sstevel@tonic-gate HDR hdr;
97*0Sstevel@tonic-gate LOG *lp;
98*0Sstevel@tonic-gate size_t len;
99*0Sstevel@tonic-gate ssize_t nr;
100*0Sstevel@tonic-gate int cnt, ret;
101*0Sstevel@tonic-gate char *np, *tbuf;
102*0Sstevel@tonic-gate const char *fail;
103*0Sstevel@tonic-gate void *p, *shortp;
104*0Sstevel@tonic-gate
105*0Sstevel@tonic-gate lp = dblp->lp;
106*0Sstevel@tonic-gate fail = np = tbuf = NULL;
107*0Sstevel@tonic-gate
108*0Sstevel@tonic-gate nlsn = dblp->c_lsn;
109*0Sstevel@tonic-gate switch (flags) {
110*0Sstevel@tonic-gate case DB_CHECKPOINT:
111*0Sstevel@tonic-gate nlsn = lp->chkpt_lsn;
112*0Sstevel@tonic-gate if (IS_ZERO_LSN(nlsn)) {
113*0Sstevel@tonic-gate __db_err(dblp->dbenv,
114*0Sstevel@tonic-gate "log_get: unable to find checkpoint record: no checkpoint set.");
115*0Sstevel@tonic-gate ret = ENOENT;
116*0Sstevel@tonic-gate goto err2;
117*0Sstevel@tonic-gate }
118*0Sstevel@tonic-gate break;
119*0Sstevel@tonic-gate case DB_NEXT: /* Next log record. */
120*0Sstevel@tonic-gate if (!IS_ZERO_LSN(nlsn)) {
121*0Sstevel@tonic-gate /* Increment the cursor by the cursor record size. */
122*0Sstevel@tonic-gate nlsn.offset += dblp->c_len;
123*0Sstevel@tonic-gate break;
124*0Sstevel@tonic-gate }
125*0Sstevel@tonic-gate /* FALLTHROUGH */
126*0Sstevel@tonic-gate case DB_FIRST: /* Find the first log record. */
127*0Sstevel@tonic-gate /* Find the first log file. */
128*0Sstevel@tonic-gate if ((ret = __log_find(dblp, 1, &cnt)) != 0)
129*0Sstevel@tonic-gate goto err2;
130*0Sstevel@tonic-gate
131*0Sstevel@tonic-gate /*
132*0Sstevel@tonic-gate * We may have only entered records in the buffer, and not
133*0Sstevel@tonic-gate * yet written a log file. If no log files were found and
134*0Sstevel@tonic-gate * there's anything in the buffer, it belongs to file 1.
135*0Sstevel@tonic-gate */
136*0Sstevel@tonic-gate if (cnt == 0)
137*0Sstevel@tonic-gate cnt = 1;
138*0Sstevel@tonic-gate
139*0Sstevel@tonic-gate nlsn.file = cnt;
140*0Sstevel@tonic-gate nlsn.offset = 0;
141*0Sstevel@tonic-gate break;
142*0Sstevel@tonic-gate case DB_CURRENT: /* Current log record. */
143*0Sstevel@tonic-gate break;
144*0Sstevel@tonic-gate case DB_PREV: /* Previous log record. */
145*0Sstevel@tonic-gate if (!IS_ZERO_LSN(nlsn)) {
146*0Sstevel@tonic-gate /* If at start-of-file, move to the previous file. */
147*0Sstevel@tonic-gate if (nlsn.offset == 0) {
148*0Sstevel@tonic-gate if (nlsn.file == 1 ||
149*0Sstevel@tonic-gate __log_valid(dblp, nlsn.file - 1, 0) != 0)
150*0Sstevel@tonic-gate return (DB_NOTFOUND);
151*0Sstevel@tonic-gate
152*0Sstevel@tonic-gate --nlsn.file;
153*0Sstevel@tonic-gate nlsn.offset = dblp->c_off;
154*0Sstevel@tonic-gate } else
155*0Sstevel@tonic-gate nlsn.offset = dblp->c_off;
156*0Sstevel@tonic-gate break;
157*0Sstevel@tonic-gate }
158*0Sstevel@tonic-gate /* FALLTHROUGH */
159*0Sstevel@tonic-gate case DB_LAST: /* Last log record. */
160*0Sstevel@tonic-gate nlsn.file = lp->lsn.file;
161*0Sstevel@tonic-gate nlsn.offset = lp->lsn.offset - lp->len;
162*0Sstevel@tonic-gate break;
163*0Sstevel@tonic-gate case DB_SET: /* Set log record. */
164*0Sstevel@tonic-gate nlsn = *alsn;
165*0Sstevel@tonic-gate break;
166*0Sstevel@tonic-gate }
167*0Sstevel@tonic-gate
168*0Sstevel@tonic-gate retry:
169*0Sstevel@tonic-gate /* Return 1 if the request is past end-of-file. */
170*0Sstevel@tonic-gate if (nlsn.file > lp->lsn.file ||
171*0Sstevel@tonic-gate (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset))
172*0Sstevel@tonic-gate return (DB_NOTFOUND);
173*0Sstevel@tonic-gate
174*0Sstevel@tonic-gate /* If we've switched files, discard the current fd. */
175*0Sstevel@tonic-gate if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) {
176*0Sstevel@tonic-gate (void)__os_close(dblp->c_fd);
177*0Sstevel@tonic-gate dblp->c_fd = -1;
178*0Sstevel@tonic-gate }
179*0Sstevel@tonic-gate
180*0Sstevel@tonic-gate /* If the entire record is in the in-memory buffer, copy it out. */
181*0Sstevel@tonic-gate if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) {
182*0Sstevel@tonic-gate /* Copy the header. */
183*0Sstevel@tonic-gate p = lp->buf + (nlsn.offset - lp->w_off);
184*0Sstevel@tonic-gate memcpy(&hdr, p, sizeof(HDR));
185*0Sstevel@tonic-gate
186*0Sstevel@tonic-gate /* Copy the record. */
187*0Sstevel@tonic-gate len = hdr.len - sizeof(HDR);
188*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR),
189*0Sstevel@tonic-gate len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
190*0Sstevel@tonic-gate goto err1;
191*0Sstevel@tonic-gate goto cksum;
192*0Sstevel@tonic-gate }
193*0Sstevel@tonic-gate
194*0Sstevel@tonic-gate /* Acquire a file descriptor. */
195*0Sstevel@tonic-gate if (dblp->c_fd == -1) {
196*0Sstevel@tonic-gate if ((ret = __log_name(dblp, nlsn.file,
197*0Sstevel@tonic-gate &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) {
198*0Sstevel@tonic-gate fail = np;
199*0Sstevel@tonic-gate goto err1;
200*0Sstevel@tonic-gate }
201*0Sstevel@tonic-gate __os_freestr(np);
202*0Sstevel@tonic-gate np = NULL;
203*0Sstevel@tonic-gate }
204*0Sstevel@tonic-gate
205*0Sstevel@tonic-gate /* Seek to the header offset and read the header. */
206*0Sstevel@tonic-gate if ((ret =
207*0Sstevel@tonic-gate __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) {
208*0Sstevel@tonic-gate fail = "seek";
209*0Sstevel@tonic-gate goto err1;
210*0Sstevel@tonic-gate }
211*0Sstevel@tonic-gate if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) {
212*0Sstevel@tonic-gate fail = "read";
213*0Sstevel@tonic-gate goto err1;
214*0Sstevel@tonic-gate }
215*0Sstevel@tonic-gate if (nr == sizeof(HDR))
216*0Sstevel@tonic-gate shortp = NULL;
217*0Sstevel@tonic-gate else {
218*0Sstevel@tonic-gate /* If read returns EOF, try the next file. */
219*0Sstevel@tonic-gate if (nr == 0) {
220*0Sstevel@tonic-gate if (flags != DB_NEXT || nlsn.file == lp->lsn.file)
221*0Sstevel@tonic-gate goto corrupt;
222*0Sstevel@tonic-gate
223*0Sstevel@tonic-gate /* Move to the next file. */
224*0Sstevel@tonic-gate ++nlsn.file;
225*0Sstevel@tonic-gate nlsn.offset = 0;
226*0Sstevel@tonic-gate goto retry;
227*0Sstevel@tonic-gate }
228*0Sstevel@tonic-gate
229*0Sstevel@tonic-gate /*
230*0Sstevel@tonic-gate * If read returns a short count the rest of the record has
231*0Sstevel@tonic-gate * to be in the in-memory buffer.
232*0Sstevel@tonic-gate */
233*0Sstevel@tonic-gate if (lp->b_off < sizeof(HDR) - nr)
234*0Sstevel@tonic-gate goto corrupt;
235*0Sstevel@tonic-gate
236*0Sstevel@tonic-gate /* Get the rest of the header from the in-memory buffer. */
237*0Sstevel@tonic-gate memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr);
238*0Sstevel@tonic-gate shortp = lp->buf + (sizeof(HDR) - nr);
239*0Sstevel@tonic-gate }
240*0Sstevel@tonic-gate
241*0Sstevel@tonic-gate /*
242*0Sstevel@tonic-gate * Check for buffers of 0's, that's what we usually see during
243*0Sstevel@tonic-gate * recovery, although it's certainly not something on which we
244*0Sstevel@tonic-gate * can depend.
245*0Sstevel@tonic-gate */
246*0Sstevel@tonic-gate if (hdr.len <= sizeof(HDR))
247*0Sstevel@tonic-gate goto corrupt;
248*0Sstevel@tonic-gate len = hdr.len - sizeof(HDR);
249*0Sstevel@tonic-gate
250*0Sstevel@tonic-gate /* If we've already moved to the in-memory buffer, fill from there. */
251*0Sstevel@tonic-gate if (shortp != NULL) {
252*0Sstevel@tonic-gate if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len)
253*0Sstevel@tonic-gate goto corrupt;
254*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, shortp, len,
255*0Sstevel@tonic-gate &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
256*0Sstevel@tonic-gate goto err1;
257*0Sstevel@tonic-gate goto cksum;
258*0Sstevel@tonic-gate }
259*0Sstevel@tonic-gate
260*0Sstevel@tonic-gate /*
261*0Sstevel@tonic-gate * Allocate temporary memory to hold the record.
262*0Sstevel@tonic-gate *
263*0Sstevel@tonic-gate * XXX
264*0Sstevel@tonic-gate * We're calling malloc(3) with a region locked. This isn't
265*0Sstevel@tonic-gate * a good idea.
266*0Sstevel@tonic-gate */
267*0Sstevel@tonic-gate if ((ret = __os_malloc(len, NULL, &tbuf)) != 0)
268*0Sstevel@tonic-gate goto err1;
269*0Sstevel@tonic-gate
270*0Sstevel@tonic-gate /*
271*0Sstevel@tonic-gate * Read the record into the buffer. If read returns a short count,
272*0Sstevel@tonic-gate * there was an error or the rest of the record is in the in-memory
273*0Sstevel@tonic-gate * buffer. Note, the information may be garbage if we're in recovery,
274*0Sstevel@tonic-gate * so don't read past the end of the buffer's memory.
275*0Sstevel@tonic-gate */
276*0Sstevel@tonic-gate if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) {
277*0Sstevel@tonic-gate fail = "read";
278*0Sstevel@tonic-gate goto err1;
279*0Sstevel@tonic-gate }
280*0Sstevel@tonic-gate if (len - nr > sizeof(lp->buf))
281*0Sstevel@tonic-gate goto corrupt;
282*0Sstevel@tonic-gate if (nr != (ssize_t)len) {
283*0Sstevel@tonic-gate if (lp->b_off < len - nr)
284*0Sstevel@tonic-gate goto corrupt;
285*0Sstevel@tonic-gate
286*0Sstevel@tonic-gate /* Get the rest of the record from the in-memory buffer. */
287*0Sstevel@tonic-gate memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr);
288*0Sstevel@tonic-gate }
289*0Sstevel@tonic-gate
290*0Sstevel@tonic-gate /* Copy the record into the user's DBT. */
291*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, tbuf, len,
292*0Sstevel@tonic-gate &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0)
293*0Sstevel@tonic-gate goto err1;
294*0Sstevel@tonic-gate __os_free(tbuf, 0);
295*0Sstevel@tonic-gate tbuf = NULL;
296*0Sstevel@tonic-gate
297*0Sstevel@tonic-gate cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) {
298*0Sstevel@tonic-gate if (!silent)
299*0Sstevel@tonic-gate __db_err(dblp->dbenv, "log_get: checksum mismatch");
300*0Sstevel@tonic-gate goto corrupt;
301*0Sstevel@tonic-gate }
302*0Sstevel@tonic-gate
303*0Sstevel@tonic-gate /* Update the cursor and the return lsn. */
304*0Sstevel@tonic-gate dblp->c_off = hdr.prev;
305*0Sstevel@tonic-gate dblp->c_len = hdr.len;
306*0Sstevel@tonic-gate dblp->c_lsn = *alsn = nlsn;
307*0Sstevel@tonic-gate
308*0Sstevel@tonic-gate return (0);
309*0Sstevel@tonic-gate
310*0Sstevel@tonic-gate corrupt:/*
311*0Sstevel@tonic-gate * This is the catchall -- for some reason we didn't find enough
312*0Sstevel@tonic-gate * information or it wasn't reasonable information, and it wasn't
313*0Sstevel@tonic-gate * because a system call failed.
314*0Sstevel@tonic-gate */
315*0Sstevel@tonic-gate ret = EIO;
316*0Sstevel@tonic-gate fail = "read";
317*0Sstevel@tonic-gate
318*0Sstevel@tonic-gate err1: if (!silent)
319*0Sstevel@tonic-gate if (fail == NULL)
320*0Sstevel@tonic-gate __db_err(dblp->dbenv, "log_get: %s", strerror(ret));
321*0Sstevel@tonic-gate else
322*0Sstevel@tonic-gate __db_err(dblp->dbenv,
323*0Sstevel@tonic-gate "log_get: %s: %s", fail, strerror(ret));
324*0Sstevel@tonic-gate err2: if (np != NULL)
325*0Sstevel@tonic-gate __os_freestr(np);
326*0Sstevel@tonic-gate if (tbuf != NULL)
327*0Sstevel@tonic-gate __os_free(tbuf, 0);
328*0Sstevel@tonic-gate return (ret);
329*0Sstevel@tonic-gate }
330