1*0Sstevel@tonic-gate /*- 2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*0Sstevel@tonic-gate */ 7*0Sstevel@tonic-gate #include "config.h" 8*0Sstevel@tonic-gate 9*0Sstevel@tonic-gate #ifndef lint 10*0Sstevel@tonic-gate static const char sccsid[] = "@(#)log_get.c 10.38 (Sleepycat) 10/3/98"; 11*0Sstevel@tonic-gate #endif /* not lint */ 12*0Sstevel@tonic-gate 13*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 14*0Sstevel@tonic-gate #include <sys/types.h> 15*0Sstevel@tonic-gate 16*0Sstevel@tonic-gate #include <errno.h> 17*0Sstevel@tonic-gate #include <string.h> 18*0Sstevel@tonic-gate #include <unistd.h> 19*0Sstevel@tonic-gate #endif 20*0Sstevel@tonic-gate 21*0Sstevel@tonic-gate #include "db_int.h" 22*0Sstevel@tonic-gate #include "shqueue.h" 23*0Sstevel@tonic-gate #include "db_page.h" 24*0Sstevel@tonic-gate #include "log.h" 25*0Sstevel@tonic-gate #include "hash.h" 26*0Sstevel@tonic-gate #include "common_ext.h" 27*0Sstevel@tonic-gate 28*0Sstevel@tonic-gate /* 29*0Sstevel@tonic-gate * log_get -- 30*0Sstevel@tonic-gate * Get a log record. 31*0Sstevel@tonic-gate */ 32*0Sstevel@tonic-gate int 33*0Sstevel@tonic-gate log_get(dblp, alsn, dbt, flags) 34*0Sstevel@tonic-gate DB_LOG *dblp; 35*0Sstevel@tonic-gate DB_LSN *alsn; 36*0Sstevel@tonic-gate DBT *dbt; 37*0Sstevel@tonic-gate u_int32_t flags; 38*0Sstevel@tonic-gate { 39*0Sstevel@tonic-gate int ret; 40*0Sstevel@tonic-gate 41*0Sstevel@tonic-gate LOG_PANIC_CHECK(dblp); 42*0Sstevel@tonic-gate 43*0Sstevel@tonic-gate /* Validate arguments. */ 44*0Sstevel@tonic-gate if (flags != DB_CHECKPOINT && flags != DB_CURRENT && 45*0Sstevel@tonic-gate flags != DB_FIRST && flags != DB_LAST && 46*0Sstevel@tonic-gate flags != DB_NEXT && flags != DB_PREV && flags != DB_SET) 47*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "log_get", 1)); 48*0Sstevel@tonic-gate 49*0Sstevel@tonic-gate if (F_ISSET(dblp, DB_AM_THREAD)) { 50*0Sstevel@tonic-gate if (flags == DB_NEXT || flags == DB_PREV || flags == DB_CURRENT) 51*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "log_get", 1)); 52*0Sstevel@tonic-gate if (!F_ISSET(dbt, DB_DBT_USERMEM | DB_DBT_MALLOC)) 53*0Sstevel@tonic-gate return (__db_ferr(dblp->dbenv, "threaded data", 1)); 54*0Sstevel@tonic-gate } 55*0Sstevel@tonic-gate 56*0Sstevel@tonic-gate LOCK_LOGREGION(dblp); 57*0Sstevel@tonic-gate 58*0Sstevel@tonic-gate /* 59*0Sstevel@tonic-gate * If we get one of the log's header records, repeat the operation. 60*0Sstevel@tonic-gate * This assumes that applications don't ever request the log header 61*0Sstevel@tonic-gate * records by LSN, but that seems reasonable to me. 62*0Sstevel@tonic-gate */ 63*0Sstevel@tonic-gate ret = __log_get(dblp, alsn, dbt, flags, 0); 64*0Sstevel@tonic-gate if (ret == 0 && alsn->offset == 0) { 65*0Sstevel@tonic-gate switch (flags) { 66*0Sstevel@tonic-gate case DB_FIRST: 67*0Sstevel@tonic-gate flags = DB_NEXT; 68*0Sstevel@tonic-gate break; 69*0Sstevel@tonic-gate case DB_LAST: 70*0Sstevel@tonic-gate flags = DB_PREV; 71*0Sstevel@tonic-gate break; 72*0Sstevel@tonic-gate } 73*0Sstevel@tonic-gate ret = __log_get(dblp, alsn, dbt, flags, 0); 74*0Sstevel@tonic-gate } 75*0Sstevel@tonic-gate 76*0Sstevel@tonic-gate UNLOCK_LOGREGION(dblp); 77*0Sstevel@tonic-gate 78*0Sstevel@tonic-gate return (ret); 79*0Sstevel@tonic-gate } 80*0Sstevel@tonic-gate 81*0Sstevel@tonic-gate /* 82*0Sstevel@tonic-gate * __log_get -- 83*0Sstevel@tonic-gate * Get a log record; internal version. 84*0Sstevel@tonic-gate * 85*0Sstevel@tonic-gate * PUBLIC: int __log_get __P((DB_LOG *, DB_LSN *, DBT *, u_int32_t, int)); 86*0Sstevel@tonic-gate */ 87*0Sstevel@tonic-gate int 88*0Sstevel@tonic-gate __log_get(dblp, alsn, dbt, flags, silent) 89*0Sstevel@tonic-gate DB_LOG *dblp; 90*0Sstevel@tonic-gate DB_LSN *alsn; 91*0Sstevel@tonic-gate DBT *dbt; 92*0Sstevel@tonic-gate u_int32_t flags; 93*0Sstevel@tonic-gate int silent; 94*0Sstevel@tonic-gate { 95*0Sstevel@tonic-gate DB_LSN nlsn; 96*0Sstevel@tonic-gate HDR hdr; 97*0Sstevel@tonic-gate LOG *lp; 98*0Sstevel@tonic-gate size_t len; 99*0Sstevel@tonic-gate ssize_t nr; 100*0Sstevel@tonic-gate int cnt, ret; 101*0Sstevel@tonic-gate char *np, *tbuf; 102*0Sstevel@tonic-gate const char *fail; 103*0Sstevel@tonic-gate void *p, *shortp; 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate lp = dblp->lp; 106*0Sstevel@tonic-gate fail = np = tbuf = NULL; 107*0Sstevel@tonic-gate 108*0Sstevel@tonic-gate nlsn = dblp->c_lsn; 109*0Sstevel@tonic-gate switch (flags) { 110*0Sstevel@tonic-gate case DB_CHECKPOINT: 111*0Sstevel@tonic-gate nlsn = lp->chkpt_lsn; 112*0Sstevel@tonic-gate if (IS_ZERO_LSN(nlsn)) { 113*0Sstevel@tonic-gate __db_err(dblp->dbenv, 114*0Sstevel@tonic-gate "log_get: unable to find checkpoint record: no checkpoint set."); 115*0Sstevel@tonic-gate ret = ENOENT; 116*0Sstevel@tonic-gate goto err2; 117*0Sstevel@tonic-gate } 118*0Sstevel@tonic-gate break; 119*0Sstevel@tonic-gate case DB_NEXT: /* Next log record. */ 120*0Sstevel@tonic-gate if (!IS_ZERO_LSN(nlsn)) { 121*0Sstevel@tonic-gate /* Increment the cursor by the cursor record size. */ 122*0Sstevel@tonic-gate nlsn.offset += dblp->c_len; 123*0Sstevel@tonic-gate break; 124*0Sstevel@tonic-gate } 125*0Sstevel@tonic-gate /* FALLTHROUGH */ 126*0Sstevel@tonic-gate case DB_FIRST: /* Find the first log record. */ 127*0Sstevel@tonic-gate /* Find the first log file. */ 128*0Sstevel@tonic-gate if ((ret = __log_find(dblp, 1, &cnt)) != 0) 129*0Sstevel@tonic-gate goto err2; 130*0Sstevel@tonic-gate 131*0Sstevel@tonic-gate /* 132*0Sstevel@tonic-gate * We may have only entered records in the buffer, and not 133*0Sstevel@tonic-gate * yet written a log file. If no log files were found and 134*0Sstevel@tonic-gate * there's anything in the buffer, it belongs to file 1. 135*0Sstevel@tonic-gate */ 136*0Sstevel@tonic-gate if (cnt == 0) 137*0Sstevel@tonic-gate cnt = 1; 138*0Sstevel@tonic-gate 139*0Sstevel@tonic-gate nlsn.file = cnt; 140*0Sstevel@tonic-gate nlsn.offset = 0; 141*0Sstevel@tonic-gate break; 142*0Sstevel@tonic-gate case DB_CURRENT: /* Current log record. */ 143*0Sstevel@tonic-gate break; 144*0Sstevel@tonic-gate case DB_PREV: /* Previous log record. */ 145*0Sstevel@tonic-gate if (!IS_ZERO_LSN(nlsn)) { 146*0Sstevel@tonic-gate /* If at start-of-file, move to the previous file. */ 147*0Sstevel@tonic-gate if (nlsn.offset == 0) { 148*0Sstevel@tonic-gate if (nlsn.file == 1 || 149*0Sstevel@tonic-gate __log_valid(dblp, nlsn.file - 1, 0) != 0) 150*0Sstevel@tonic-gate return (DB_NOTFOUND); 151*0Sstevel@tonic-gate 152*0Sstevel@tonic-gate --nlsn.file; 153*0Sstevel@tonic-gate nlsn.offset = dblp->c_off; 154*0Sstevel@tonic-gate } else 155*0Sstevel@tonic-gate nlsn.offset = dblp->c_off; 156*0Sstevel@tonic-gate break; 157*0Sstevel@tonic-gate } 158*0Sstevel@tonic-gate /* FALLTHROUGH */ 159*0Sstevel@tonic-gate case DB_LAST: /* Last log record. */ 160*0Sstevel@tonic-gate nlsn.file = lp->lsn.file; 161*0Sstevel@tonic-gate nlsn.offset = lp->lsn.offset - lp->len; 162*0Sstevel@tonic-gate break; 163*0Sstevel@tonic-gate case DB_SET: /* Set log record. */ 164*0Sstevel@tonic-gate nlsn = *alsn; 165*0Sstevel@tonic-gate break; 166*0Sstevel@tonic-gate } 167*0Sstevel@tonic-gate 168*0Sstevel@tonic-gate retry: 169*0Sstevel@tonic-gate /* Return 1 if the request is past end-of-file. */ 170*0Sstevel@tonic-gate if (nlsn.file > lp->lsn.file || 171*0Sstevel@tonic-gate (nlsn.file == lp->lsn.file && nlsn.offset >= lp->lsn.offset)) 172*0Sstevel@tonic-gate return (DB_NOTFOUND); 173*0Sstevel@tonic-gate 174*0Sstevel@tonic-gate /* If we've switched files, discard the current fd. */ 175*0Sstevel@tonic-gate if (dblp->c_lsn.file != nlsn.file && dblp->c_fd != -1) { 176*0Sstevel@tonic-gate (void)__os_close(dblp->c_fd); 177*0Sstevel@tonic-gate dblp->c_fd = -1; 178*0Sstevel@tonic-gate } 179*0Sstevel@tonic-gate 180*0Sstevel@tonic-gate /* If the entire record is in the in-memory buffer, copy it out. */ 181*0Sstevel@tonic-gate if (nlsn.file == lp->lsn.file && nlsn.offset >= lp->w_off) { 182*0Sstevel@tonic-gate /* Copy the header. */ 183*0Sstevel@tonic-gate p = lp->buf + (nlsn.offset - lp->w_off); 184*0Sstevel@tonic-gate memcpy(&hdr, p, sizeof(HDR)); 185*0Sstevel@tonic-gate 186*0Sstevel@tonic-gate /* Copy the record. */ 187*0Sstevel@tonic-gate len = hdr.len - sizeof(HDR); 188*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, (u_int8_t *)p + sizeof(HDR), 189*0Sstevel@tonic-gate len, &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 190*0Sstevel@tonic-gate goto err1; 191*0Sstevel@tonic-gate goto cksum; 192*0Sstevel@tonic-gate } 193*0Sstevel@tonic-gate 194*0Sstevel@tonic-gate /* Acquire a file descriptor. */ 195*0Sstevel@tonic-gate if (dblp->c_fd == -1) { 196*0Sstevel@tonic-gate if ((ret = __log_name(dblp, nlsn.file, 197*0Sstevel@tonic-gate &np, &dblp->c_fd, DB_RDONLY | DB_SEQUENTIAL)) != 0) { 198*0Sstevel@tonic-gate fail = np; 199*0Sstevel@tonic-gate goto err1; 200*0Sstevel@tonic-gate } 201*0Sstevel@tonic-gate __os_freestr(np); 202*0Sstevel@tonic-gate np = NULL; 203*0Sstevel@tonic-gate } 204*0Sstevel@tonic-gate 205*0Sstevel@tonic-gate /* Seek to the header offset and read the header. */ 206*0Sstevel@tonic-gate if ((ret = 207*0Sstevel@tonic-gate __os_seek(dblp->c_fd, 0, 0, nlsn.offset, 0, SEEK_SET)) != 0) { 208*0Sstevel@tonic-gate fail = "seek"; 209*0Sstevel@tonic-gate goto err1; 210*0Sstevel@tonic-gate } 211*0Sstevel@tonic-gate if ((ret = __os_read(dblp->c_fd, &hdr, sizeof(HDR), &nr)) != 0) { 212*0Sstevel@tonic-gate fail = "read"; 213*0Sstevel@tonic-gate goto err1; 214*0Sstevel@tonic-gate } 215*0Sstevel@tonic-gate if (nr == sizeof(HDR)) 216*0Sstevel@tonic-gate shortp = NULL; 217*0Sstevel@tonic-gate else { 218*0Sstevel@tonic-gate /* If read returns EOF, try the next file. */ 219*0Sstevel@tonic-gate if (nr == 0) { 220*0Sstevel@tonic-gate if (flags != DB_NEXT || nlsn.file == lp->lsn.file) 221*0Sstevel@tonic-gate goto corrupt; 222*0Sstevel@tonic-gate 223*0Sstevel@tonic-gate /* Move to the next file. */ 224*0Sstevel@tonic-gate ++nlsn.file; 225*0Sstevel@tonic-gate nlsn.offset = 0; 226*0Sstevel@tonic-gate goto retry; 227*0Sstevel@tonic-gate } 228*0Sstevel@tonic-gate 229*0Sstevel@tonic-gate /* 230*0Sstevel@tonic-gate * If read returns a short count the rest of the record has 231*0Sstevel@tonic-gate * to be in the in-memory buffer. 232*0Sstevel@tonic-gate */ 233*0Sstevel@tonic-gate if (lp->b_off < sizeof(HDR) - nr) 234*0Sstevel@tonic-gate goto corrupt; 235*0Sstevel@tonic-gate 236*0Sstevel@tonic-gate /* Get the rest of the header from the in-memory buffer. */ 237*0Sstevel@tonic-gate memcpy((u_int8_t *)&hdr + nr, lp->buf, sizeof(HDR) - nr); 238*0Sstevel@tonic-gate shortp = lp->buf + (sizeof(HDR) - nr); 239*0Sstevel@tonic-gate } 240*0Sstevel@tonic-gate 241*0Sstevel@tonic-gate /* 242*0Sstevel@tonic-gate * Check for buffers of 0's, that's what we usually see during 243*0Sstevel@tonic-gate * recovery, although it's certainly not something on which we 244*0Sstevel@tonic-gate * can depend. 245*0Sstevel@tonic-gate */ 246*0Sstevel@tonic-gate if (hdr.len <= sizeof(HDR)) 247*0Sstevel@tonic-gate goto corrupt; 248*0Sstevel@tonic-gate len = hdr.len - sizeof(HDR); 249*0Sstevel@tonic-gate 250*0Sstevel@tonic-gate /* If we've already moved to the in-memory buffer, fill from there. */ 251*0Sstevel@tonic-gate if (shortp != NULL) { 252*0Sstevel@tonic-gate if (lp->b_off < ((u_int8_t *)shortp - lp->buf) + len) 253*0Sstevel@tonic-gate goto corrupt; 254*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, shortp, len, 255*0Sstevel@tonic-gate &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 256*0Sstevel@tonic-gate goto err1; 257*0Sstevel@tonic-gate goto cksum; 258*0Sstevel@tonic-gate } 259*0Sstevel@tonic-gate 260*0Sstevel@tonic-gate /* 261*0Sstevel@tonic-gate * Allocate temporary memory to hold the record. 262*0Sstevel@tonic-gate * 263*0Sstevel@tonic-gate * XXX 264*0Sstevel@tonic-gate * We're calling malloc(3) with a region locked. This isn't 265*0Sstevel@tonic-gate * a good idea. 266*0Sstevel@tonic-gate */ 267*0Sstevel@tonic-gate if ((ret = __os_malloc(len, NULL, &tbuf)) != 0) 268*0Sstevel@tonic-gate goto err1; 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* 271*0Sstevel@tonic-gate * Read the record into the buffer. If read returns a short count, 272*0Sstevel@tonic-gate * there was an error or the rest of the record is in the in-memory 273*0Sstevel@tonic-gate * buffer. Note, the information may be garbage if we're in recovery, 274*0Sstevel@tonic-gate * so don't read past the end of the buffer's memory. 275*0Sstevel@tonic-gate */ 276*0Sstevel@tonic-gate if ((ret = __os_read(dblp->c_fd, tbuf, len, &nr)) != 0) { 277*0Sstevel@tonic-gate fail = "read"; 278*0Sstevel@tonic-gate goto err1; 279*0Sstevel@tonic-gate } 280*0Sstevel@tonic-gate if (len - nr > sizeof(lp->buf)) 281*0Sstevel@tonic-gate goto corrupt; 282*0Sstevel@tonic-gate if (nr != (ssize_t)len) { 283*0Sstevel@tonic-gate if (lp->b_off < len - nr) 284*0Sstevel@tonic-gate goto corrupt; 285*0Sstevel@tonic-gate 286*0Sstevel@tonic-gate /* Get the rest of the record from the in-memory buffer. */ 287*0Sstevel@tonic-gate memcpy((u_int8_t *)tbuf + nr, lp->buf, len - nr); 288*0Sstevel@tonic-gate } 289*0Sstevel@tonic-gate 290*0Sstevel@tonic-gate /* Copy the record into the user's DBT. */ 291*0Sstevel@tonic-gate if ((ret = __db_retcopy(dbt, tbuf, len, 292*0Sstevel@tonic-gate &dblp->c_dbt.data, &dblp->c_dbt.ulen, NULL)) != 0) 293*0Sstevel@tonic-gate goto err1; 294*0Sstevel@tonic-gate __os_free(tbuf, 0); 295*0Sstevel@tonic-gate tbuf = NULL; 296*0Sstevel@tonic-gate 297*0Sstevel@tonic-gate cksum: if (hdr.cksum != __ham_func4(dbt->data, dbt->size)) { 298*0Sstevel@tonic-gate if (!silent) 299*0Sstevel@tonic-gate __db_err(dblp->dbenv, "log_get: checksum mismatch"); 300*0Sstevel@tonic-gate goto corrupt; 301*0Sstevel@tonic-gate } 302*0Sstevel@tonic-gate 303*0Sstevel@tonic-gate /* Update the cursor and the return lsn. */ 304*0Sstevel@tonic-gate dblp->c_off = hdr.prev; 305*0Sstevel@tonic-gate dblp->c_len = hdr.len; 306*0Sstevel@tonic-gate dblp->c_lsn = *alsn = nlsn; 307*0Sstevel@tonic-gate 308*0Sstevel@tonic-gate return (0); 309*0Sstevel@tonic-gate 310*0Sstevel@tonic-gate corrupt:/* 311*0Sstevel@tonic-gate * This is the catchall -- for some reason we didn't find enough 312*0Sstevel@tonic-gate * information or it wasn't reasonable information, and it wasn't 313*0Sstevel@tonic-gate * because a system call failed. 314*0Sstevel@tonic-gate */ 315*0Sstevel@tonic-gate ret = EIO; 316*0Sstevel@tonic-gate fail = "read"; 317*0Sstevel@tonic-gate 318*0Sstevel@tonic-gate err1: if (!silent) 319*0Sstevel@tonic-gate if (fail == NULL) 320*0Sstevel@tonic-gate __db_err(dblp->dbenv, "log_get: %s", strerror(ret)); 321*0Sstevel@tonic-gate else 322*0Sstevel@tonic-gate __db_err(dblp->dbenv, 323*0Sstevel@tonic-gate "log_get: %s: %s", fail, strerror(ret)); 324*0Sstevel@tonic-gate err2: if (np != NULL) 325*0Sstevel@tonic-gate __os_freestr(np); 326*0Sstevel@tonic-gate if (tbuf != NULL) 327*0Sstevel@tonic-gate __os_free(tbuf, 0); 328*0Sstevel@tonic-gate return (ret); 329*0Sstevel@tonic-gate } 330