1*0Sstevel@tonic-gate /*- 2*0Sstevel@tonic-gate * See the file LICENSE for redistribution information. 3*0Sstevel@tonic-gate * 4*0Sstevel@tonic-gate * Copyright (c) 1996, 1997, 1998 5*0Sstevel@tonic-gate * Sleepycat Software. All rights reserved. 6*0Sstevel@tonic-gate */ 7*0Sstevel@tonic-gate /* 8*0Sstevel@tonic-gate * Copyright (c) 1990, 1993, 1994 9*0Sstevel@tonic-gate * The Regents of the University of California. All rights reserved. 10*0Sstevel@tonic-gate * 11*0Sstevel@tonic-gate * This code is derived from software contributed to Berkeley by 12*0Sstevel@tonic-gate * Margo Seltzer. 13*0Sstevel@tonic-gate * 14*0Sstevel@tonic-gate * Redistribution and use in source and binary forms, with or without 15*0Sstevel@tonic-gate * modification, are permitted provided that the following conditions 16*0Sstevel@tonic-gate * are met: 17*0Sstevel@tonic-gate * 1. Redistributions of source code must retain the above copyright 18*0Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer. 19*0Sstevel@tonic-gate * 2. Redistributions in binary form must reproduce the above copyright 20*0Sstevel@tonic-gate * notice, this list of conditions and the following disclaimer in the 21*0Sstevel@tonic-gate * documentation and/or other materials provided with the distribution. 22*0Sstevel@tonic-gate * 3. All advertising materials mentioning features or use of this software 23*0Sstevel@tonic-gate * must display the following acknowledgement: 24*0Sstevel@tonic-gate * This product includes software developed by the University of 25*0Sstevel@tonic-gate * California, Berkeley and its contributors. 26*0Sstevel@tonic-gate * 4. Neither the name of the University nor the names of its contributors 27*0Sstevel@tonic-gate * may be used to endorse or promote products derived from this software 28*0Sstevel@tonic-gate * without specific prior written permission. 29*0Sstevel@tonic-gate * 30*0Sstevel@tonic-gate * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 31*0Sstevel@tonic-gate * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32*0Sstevel@tonic-gate * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33*0Sstevel@tonic-gate * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 34*0Sstevel@tonic-gate * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 35*0Sstevel@tonic-gate * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 36*0Sstevel@tonic-gate * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 37*0Sstevel@tonic-gate * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 38*0Sstevel@tonic-gate * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 39*0Sstevel@tonic-gate * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 40*0Sstevel@tonic-gate * SUCH DAMAGE. 41*0Sstevel@tonic-gate */ 42*0Sstevel@tonic-gate #include "config.h" 43*0Sstevel@tonic-gate 44*0Sstevel@tonic-gate #ifndef lint 45*0Sstevel@tonic-gate static const char sccsid[] = "@(#)hash_dup.c 10.27 (Sleepycat) 12/6/98"; 46*0Sstevel@tonic-gate #endif /* not lint */ 47*0Sstevel@tonic-gate 48*0Sstevel@tonic-gate /* 49*0Sstevel@tonic-gate * PACKAGE: hashing 50*0Sstevel@tonic-gate * 51*0Sstevel@tonic-gate * DESCRIPTION: 52*0Sstevel@tonic-gate * Manipulation of duplicates for the hash package. 53*0Sstevel@tonic-gate * 54*0Sstevel@tonic-gate * ROUTINES: 55*0Sstevel@tonic-gate * 56*0Sstevel@tonic-gate * External 57*0Sstevel@tonic-gate * __add_dup 58*0Sstevel@tonic-gate * Internal 59*0Sstevel@tonic-gate */ 60*0Sstevel@tonic-gate 61*0Sstevel@tonic-gate #ifndef NO_SYSTEM_INCLUDES 62*0Sstevel@tonic-gate #include <sys/types.h> 63*0Sstevel@tonic-gate 64*0Sstevel@tonic-gate #include <errno.h> 65*0Sstevel@tonic-gate #include <string.h> 66*0Sstevel@tonic-gate #endif 67*0Sstevel@tonic-gate 68*0Sstevel@tonic-gate #include "db_int.h" 69*0Sstevel@tonic-gate #include "db_page.h" 70*0Sstevel@tonic-gate #include "hash.h" 71*0Sstevel@tonic-gate #include "btree.h" 72*0Sstevel@tonic-gate 73*0Sstevel@tonic-gate static int __ham_check_move __P((DBC *, int32_t)); 74*0Sstevel@tonic-gate static int __ham_dup_convert __P((DBC *)); 75*0Sstevel@tonic-gate static int __ham_make_dup __P((const DBT *, DBT *d, void **, u_int32_t *)); 76*0Sstevel@tonic-gate 77*0Sstevel@tonic-gate /* 78*0Sstevel@tonic-gate * Called from hash_access to add a duplicate key. nval is the new 79*0Sstevel@tonic-gate * value that we want to add. The flags correspond to the flag values 80*0Sstevel@tonic-gate * to cursor_put indicating where to add the new element. 81*0Sstevel@tonic-gate * There are 4 cases. 82*0Sstevel@tonic-gate * Case 1: The existing duplicate set already resides on a separate page. 83*0Sstevel@tonic-gate * We can use common code for this. 84*0Sstevel@tonic-gate * Case 2: The element is small enough to just be added to the existing set. 85*0Sstevel@tonic-gate * Case 3: The element is large enough to be a big item, so we're going to 86*0Sstevel@tonic-gate * have to push the set onto a new page. 87*0Sstevel@tonic-gate * Case 4: The element is large enough to push the duplicate set onto a 88*0Sstevel@tonic-gate * separate page. 89*0Sstevel@tonic-gate * 90*0Sstevel@tonic-gate * PUBLIC: int __ham_add_dup __P((DBC *, DBT *, u_int32_t)); 91*0Sstevel@tonic-gate */ 92*0Sstevel@tonic-gate int 93*0Sstevel@tonic-gate __ham_add_dup(dbc, nval, flags) 94*0Sstevel@tonic-gate DBC *dbc; 95*0Sstevel@tonic-gate DBT *nval; 96*0Sstevel@tonic-gate u_int32_t flags; 97*0Sstevel@tonic-gate { 98*0Sstevel@tonic-gate DB *dbp; 99*0Sstevel@tonic-gate HASH_CURSOR *hcp; 100*0Sstevel@tonic-gate DBT dbt, pval, tmp_val; 101*0Sstevel@tonic-gate u_int32_t del_len, new_size; 102*0Sstevel@tonic-gate int cmp, ret; 103*0Sstevel@tonic-gate u_int8_t *hk; 104*0Sstevel@tonic-gate 105*0Sstevel@tonic-gate dbp = dbc->dbp; 106*0Sstevel@tonic-gate hcp = (HASH_CURSOR *)dbc->internal; 107*0Sstevel@tonic-gate if (flags == DB_CURRENT && hcp->dpgno == PGNO_INVALID) 108*0Sstevel@tonic-gate del_len = hcp->dup_len; 109*0Sstevel@tonic-gate else 110*0Sstevel@tonic-gate del_len = 0; 111*0Sstevel@tonic-gate 112*0Sstevel@tonic-gate if ((ret = __ham_check_move(dbc, 113*0Sstevel@tonic-gate (int32_t)DUP_SIZE(nval->size) - (int32_t)del_len)) != 0) 114*0Sstevel@tonic-gate return (ret); 115*0Sstevel@tonic-gate 116*0Sstevel@tonic-gate /* 117*0Sstevel@tonic-gate * Check if resulting duplicate set is going to need to go 118*0Sstevel@tonic-gate * onto a separate duplicate page. If so, convert the 119*0Sstevel@tonic-gate * duplicate set and add the new one. After conversion, 120*0Sstevel@tonic-gate * hcp->dndx is the first free ndx or the index of the 121*0Sstevel@tonic-gate * current pointer into the duplicate set. 122*0Sstevel@tonic-gate */ 123*0Sstevel@tonic-gate hk = H_PAIRDATA(hcp->pagep, hcp->bndx); 124*0Sstevel@tonic-gate new_size = DUP_SIZE(nval->size) - del_len + LEN_HKEYDATA(hcp->pagep, 125*0Sstevel@tonic-gate hcp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); 126*0Sstevel@tonic-gate 127*0Sstevel@tonic-gate /* 128*0Sstevel@tonic-gate * We convert to off-page duplicates if the item is a big item, 129*0Sstevel@tonic-gate * the addition of the new item will make the set large, or 130*0Sstevel@tonic-gate * if there isn't enough room on this page to add the next item. 131*0Sstevel@tonic-gate */ 132*0Sstevel@tonic-gate if (HPAGE_PTYPE(hk) != H_OFFDUP && 133*0Sstevel@tonic-gate (HPAGE_PTYPE(hk) == H_OFFPAGE || ISBIG(hcp, new_size) || 134*0Sstevel@tonic-gate DUP_SIZE(nval->size) - del_len > P_FREESPACE(hcp->pagep))) { 135*0Sstevel@tonic-gate 136*0Sstevel@tonic-gate if ((ret = __ham_dup_convert(dbc)) != 0) 137*0Sstevel@tonic-gate return (ret); 138*0Sstevel@tonic-gate else 139*0Sstevel@tonic-gate hk = H_PAIRDATA(hcp->pagep, hcp->bndx); 140*0Sstevel@tonic-gate } 141*0Sstevel@tonic-gate 142*0Sstevel@tonic-gate /* There are two separate cases here: on page and off page. */ 143*0Sstevel@tonic-gate if (HPAGE_PTYPE(hk) != H_OFFDUP) { 144*0Sstevel@tonic-gate if (HPAGE_PTYPE(hk) != H_DUPLICATE) { 145*0Sstevel@tonic-gate HPAGE_PTYPE(hk) = H_DUPLICATE; 146*0Sstevel@tonic-gate pval.flags = 0; 147*0Sstevel@tonic-gate pval.data = HKEYDATA_DATA(hk); 148*0Sstevel@tonic-gate pval.size = LEN_HDATA(hcp->pagep, dbp->pgsize, 149*0Sstevel@tonic-gate hcp->bndx); 150*0Sstevel@tonic-gate if ((ret = 151*0Sstevel@tonic-gate __ham_make_dup(&pval, &tmp_val, &dbc->rdata.data, 152*0Sstevel@tonic-gate &dbc->rdata.size)) != 0 || (ret = 153*0Sstevel@tonic-gate __ham_replpair(dbc, &tmp_val, 1)) != 0) 154*0Sstevel@tonic-gate return (ret); 155*0Sstevel@tonic-gate } 156*0Sstevel@tonic-gate 157*0Sstevel@tonic-gate /* Now make the new entry a duplicate. */ 158*0Sstevel@tonic-gate if ((ret = __ham_make_dup(nval, 159*0Sstevel@tonic-gate &tmp_val, &dbc->rdata.data, &dbc->rdata.size)) != 0) 160*0Sstevel@tonic-gate return (ret); 161*0Sstevel@tonic-gate 162*0Sstevel@tonic-gate tmp_val.dlen = 0; 163*0Sstevel@tonic-gate switch (flags) { /* On page. */ 164*0Sstevel@tonic-gate case DB_KEYFIRST: 165*0Sstevel@tonic-gate case DB_KEYLAST: 166*0Sstevel@tonic-gate if (dbp->dup_compare != NULL) 167*0Sstevel@tonic-gate __ham_dsearch(dbc, nval, &tmp_val.doff, &cmp); 168*0Sstevel@tonic-gate else if (flags == DB_KEYFIRST) 169*0Sstevel@tonic-gate tmp_val.doff = 0; 170*0Sstevel@tonic-gate else 171*0Sstevel@tonic-gate tmp_val.doff = LEN_HDATA(hcp->pagep, 172*0Sstevel@tonic-gate hcp->hdr->pagesize, hcp->bndx); 173*0Sstevel@tonic-gate break; 174*0Sstevel@tonic-gate case DB_CURRENT: 175*0Sstevel@tonic-gate /* 176*0Sstevel@tonic-gate * If we have a sort function, we need to verify that 177*0Sstevel@tonic-gate * the new item sorts identically to the old item. 178*0Sstevel@tonic-gate */ 179*0Sstevel@tonic-gate if (dbp->dup_compare != NULL) { 180*0Sstevel@tonic-gate dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, 181*0Sstevel@tonic-gate hcp->bndx)) + hcp->dup_off; 182*0Sstevel@tonic-gate dbt.size = DUP_SIZE(hcp->dup_len); 183*0Sstevel@tonic-gate if (dbp->dup_compare(nval, &dbt) != 0) 184*0Sstevel@tonic-gate return (EINVAL); 185*0Sstevel@tonic-gate } 186*0Sstevel@tonic-gate tmp_val.doff = hcp->dup_off; 187*0Sstevel@tonic-gate tmp_val.dlen = DUP_SIZE(hcp->dup_len); 188*0Sstevel@tonic-gate break; 189*0Sstevel@tonic-gate case DB_BEFORE: 190*0Sstevel@tonic-gate tmp_val.doff = hcp->dup_off; 191*0Sstevel@tonic-gate break; 192*0Sstevel@tonic-gate case DB_AFTER: 193*0Sstevel@tonic-gate tmp_val.doff = hcp->dup_off + DUP_SIZE(hcp->dup_len); 194*0Sstevel@tonic-gate break; 195*0Sstevel@tonic-gate } 196*0Sstevel@tonic-gate /* Add the duplicate. */ 197*0Sstevel@tonic-gate ret = __ham_replpair(dbc, &tmp_val, 0); 198*0Sstevel@tonic-gate if (ret == 0) 199*0Sstevel@tonic-gate ret = __ham_dirty_page(dbp, hcp->pagep); 200*0Sstevel@tonic-gate __ham_c_update(hcp, hcp->pgno, tmp_val.size, 1, 1); 201*0Sstevel@tonic-gate return (ret); 202*0Sstevel@tonic-gate } 203*0Sstevel@tonic-gate 204*0Sstevel@tonic-gate /* If we get here, then we're on duplicate pages. */ 205*0Sstevel@tonic-gate if (hcp->dpgno == PGNO_INVALID) { 206*0Sstevel@tonic-gate memcpy(&hcp->dpgno, HOFFDUP_PGNO(hk), sizeof(db_pgno_t)); 207*0Sstevel@tonic-gate hcp->dndx = 0; 208*0Sstevel@tonic-gate } 209*0Sstevel@tonic-gate 210*0Sstevel@tonic-gate switch (flags) { 211*0Sstevel@tonic-gate case DB_KEYFIRST: 212*0Sstevel@tonic-gate if (dbp->dup_compare != NULL) 213*0Sstevel@tonic-gate goto sorted_dups; 214*0Sstevel@tonic-gate /* 215*0Sstevel@tonic-gate * The only way that we are already on a dup page is 216*0Sstevel@tonic-gate * if we just converted the on-page representation. 217*0Sstevel@tonic-gate * In that case, we've only got one page of duplicates. 218*0Sstevel@tonic-gate */ 219*0Sstevel@tonic-gate if (hcp->dpagep == NULL && (ret = 220*0Sstevel@tonic-gate __db_dend(dbc, hcp->dpgno, &hcp->dpagep)) != 0) 221*0Sstevel@tonic-gate return (ret); 222*0Sstevel@tonic-gate hcp->dndx = 0; 223*0Sstevel@tonic-gate break; 224*0Sstevel@tonic-gate case DB_KEYLAST: 225*0Sstevel@tonic-gate if (dbp->dup_compare != NULL) { 226*0Sstevel@tonic-gate sorted_dups: if ((ret = __db_dsearch(dbc, 1, nval, 227*0Sstevel@tonic-gate hcp->dpgno, &hcp->dndx, &hcp->dpagep, &cmp)) != 0) 228*0Sstevel@tonic-gate return (ret); 229*0Sstevel@tonic-gate if (cmp == 0) 230*0Sstevel@tonic-gate hcp->dpgno = PGNO(hcp->dpagep); 231*0Sstevel@tonic-gate } else { 232*0Sstevel@tonic-gate if (hcp->dpagep == NULL && (ret = 233*0Sstevel@tonic-gate __db_dend(dbc, hcp->dpgno, &hcp->dpagep)) != 0) 234*0Sstevel@tonic-gate return (ret); 235*0Sstevel@tonic-gate hcp->dpgno = PGNO(hcp->dpagep); 236*0Sstevel@tonic-gate hcp->dndx = NUM_ENT(hcp->dpagep); 237*0Sstevel@tonic-gate } 238*0Sstevel@tonic-gate break; 239*0Sstevel@tonic-gate case DB_CURRENT: 240*0Sstevel@tonic-gate if (dbp->dup_compare != NULL && __bam_cmp(dbp, 241*0Sstevel@tonic-gate nval, hcp->dpagep, hcp->dndx, dbp->dup_compare) != 0) 242*0Sstevel@tonic-gate return (EINVAL); 243*0Sstevel@tonic-gate switch (GET_BKEYDATA(hcp->dpagep, hcp->dndx)->type) { 244*0Sstevel@tonic-gate case B_KEYDATA: 245*0Sstevel@tonic-gate del_len = BKEYDATA_SIZE(GET_BKEYDATA(hcp->dpagep, 246*0Sstevel@tonic-gate hcp->dndx)->len); 247*0Sstevel@tonic-gate break; 248*0Sstevel@tonic-gate case B_OVERFLOW: 249*0Sstevel@tonic-gate del_len = BOVERFLOW_SIZE; 250*0Sstevel@tonic-gate break; 251*0Sstevel@tonic-gate } 252*0Sstevel@tonic-gate if ((ret = 253*0Sstevel@tonic-gate __db_ditem(dbc, hcp->dpagep, hcp->dndx, del_len)) != 0) 254*0Sstevel@tonic-gate return (ret); 255*0Sstevel@tonic-gate break; 256*0Sstevel@tonic-gate case DB_BEFORE: /* The default behavior is correct. */ 257*0Sstevel@tonic-gate break; 258*0Sstevel@tonic-gate case DB_AFTER: 259*0Sstevel@tonic-gate hcp->dndx++; 260*0Sstevel@tonic-gate break; 261*0Sstevel@tonic-gate } 262*0Sstevel@tonic-gate 263*0Sstevel@tonic-gate ret = __db_dput(dbc, 264*0Sstevel@tonic-gate nval, &hcp->dpagep, &hcp->dndx, __ham_overflow_page); 265*0Sstevel@tonic-gate hcp->pgno = PGNO(hcp->pagep); 266*0Sstevel@tonic-gate __ham_c_update(hcp, hcp->pgno, nval->size, 1, 1); 267*0Sstevel@tonic-gate return (ret); 268*0Sstevel@tonic-gate } 269*0Sstevel@tonic-gate 270*0Sstevel@tonic-gate /* 271*0Sstevel@tonic-gate * Convert an on-page set of duplicates to an offpage set of duplicates. 272*0Sstevel@tonic-gate */ 273*0Sstevel@tonic-gate static int 274*0Sstevel@tonic-gate __ham_dup_convert(dbc) 275*0Sstevel@tonic-gate DBC *dbc; 276*0Sstevel@tonic-gate { 277*0Sstevel@tonic-gate DB *dbp; 278*0Sstevel@tonic-gate HASH_CURSOR *hcp; 279*0Sstevel@tonic-gate BOVERFLOW bo; 280*0Sstevel@tonic-gate DBT dbt; 281*0Sstevel@tonic-gate HOFFPAGE ho; 282*0Sstevel@tonic-gate db_indx_t dndx, i, len, off; 283*0Sstevel@tonic-gate int ret; 284*0Sstevel@tonic-gate u_int8_t *p, *pend; 285*0Sstevel@tonic-gate 286*0Sstevel@tonic-gate /* 287*0Sstevel@tonic-gate * Create a new page for the duplicates. 288*0Sstevel@tonic-gate */ 289*0Sstevel@tonic-gate dbp = dbc->dbp; 290*0Sstevel@tonic-gate hcp = (HASH_CURSOR *)dbc->internal; 291*0Sstevel@tonic-gate if ((ret = 292*0Sstevel@tonic-gate __ham_overflow_page(dbc, P_DUPLICATE, &hcp->dpagep)) != 0) 293*0Sstevel@tonic-gate return (ret); 294*0Sstevel@tonic-gate hcp->dpagep->type = P_DUPLICATE; 295*0Sstevel@tonic-gate hcp->dpgno = PGNO(hcp->dpagep); 296*0Sstevel@tonic-gate 297*0Sstevel@tonic-gate /* 298*0Sstevel@tonic-gate * Now put the duplicates onto the new page. 299*0Sstevel@tonic-gate */ 300*0Sstevel@tonic-gate dndx = 0; 301*0Sstevel@tonic-gate dbt.flags = 0; 302*0Sstevel@tonic-gate switch (HPAGE_PTYPE(H_PAIRDATA(hcp->pagep, hcp->bndx))) { 303*0Sstevel@tonic-gate case H_KEYDATA: 304*0Sstevel@tonic-gate /* Simple case, one key on page; move it to dup page. */ 305*0Sstevel@tonic-gate dbt.size = 306*0Sstevel@tonic-gate LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx); 307*0Sstevel@tonic-gate dbt.data = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)); 308*0Sstevel@tonic-gate ret = __db_pitem(dbc, hcp->dpagep, 309*0Sstevel@tonic-gate (u_int32_t)dndx, BKEYDATA_SIZE(dbt.size), NULL, &dbt); 310*0Sstevel@tonic-gate if (ret == 0) 311*0Sstevel@tonic-gate __ham_dirty_page(dbp, hcp->dpagep); 312*0Sstevel@tonic-gate break; 313*0Sstevel@tonic-gate case H_OFFPAGE: 314*0Sstevel@tonic-gate /* Simple case, one key on page; move it to dup page. */ 315*0Sstevel@tonic-gate memcpy(&ho, 316*0Sstevel@tonic-gate P_ENTRY(hcp->pagep, H_DATAINDEX(hcp->bndx)), HOFFPAGE_SIZE); 317*0Sstevel@tonic-gate UMRW(bo.unused1); 318*0Sstevel@tonic-gate B_TSET(bo.type, ho.type, 0); 319*0Sstevel@tonic-gate UMRW(bo.unused2); 320*0Sstevel@tonic-gate bo.pgno = ho.pgno; 321*0Sstevel@tonic-gate bo.tlen = ho.tlen; 322*0Sstevel@tonic-gate dbt.size = BOVERFLOW_SIZE; 323*0Sstevel@tonic-gate dbt.data = &bo; 324*0Sstevel@tonic-gate 325*0Sstevel@tonic-gate ret = __db_pitem(dbc, hcp->dpagep, 326*0Sstevel@tonic-gate (u_int32_t)dndx, dbt.size, &dbt, NULL); 327*0Sstevel@tonic-gate if (ret == 0) 328*0Sstevel@tonic-gate __ham_dirty_page(dbp, hcp->dpagep); 329*0Sstevel@tonic-gate break; 330*0Sstevel@tonic-gate case H_DUPLICATE: 331*0Sstevel@tonic-gate p = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)); 332*0Sstevel@tonic-gate pend = p + 333*0Sstevel@tonic-gate LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx); 334*0Sstevel@tonic-gate 335*0Sstevel@tonic-gate /* 336*0Sstevel@tonic-gate * We need to maintain the duplicate cursor position. 337*0Sstevel@tonic-gate * Keep track of where we are in the duplicate set via 338*0Sstevel@tonic-gate * the offset, and when it matches the one in the cursor, 339*0Sstevel@tonic-gate * set the off-page duplicate cursor index to the current 340*0Sstevel@tonic-gate * index. 341*0Sstevel@tonic-gate */ 342*0Sstevel@tonic-gate for (off = 0, i = 0; p < pend; i++) { 343*0Sstevel@tonic-gate if (off == hcp->dup_off) 344*0Sstevel@tonic-gate dndx = i; 345*0Sstevel@tonic-gate memcpy(&len, p, sizeof(db_indx_t)); 346*0Sstevel@tonic-gate dbt.size = len; 347*0Sstevel@tonic-gate p += sizeof(db_indx_t); 348*0Sstevel@tonic-gate dbt.data = p; 349*0Sstevel@tonic-gate p += len + sizeof(db_indx_t); 350*0Sstevel@tonic-gate off += len + 2 * sizeof(db_indx_t); 351*0Sstevel@tonic-gate ret = __db_dput(dbc, &dbt, 352*0Sstevel@tonic-gate &hcp->dpagep, &i, __ham_overflow_page); 353*0Sstevel@tonic-gate if (ret != 0) 354*0Sstevel@tonic-gate break; 355*0Sstevel@tonic-gate } 356*0Sstevel@tonic-gate break; 357*0Sstevel@tonic-gate default: 358*0Sstevel@tonic-gate ret = __db_pgfmt(dbp, (u_long)hcp->pgno); 359*0Sstevel@tonic-gate break; 360*0Sstevel@tonic-gate } 361*0Sstevel@tonic-gate if (ret == 0) { 362*0Sstevel@tonic-gate /* 363*0Sstevel@tonic-gate * Now attach this to the source page in place of 364*0Sstevel@tonic-gate * the old duplicate item. 365*0Sstevel@tonic-gate */ 366*0Sstevel@tonic-gate __ham_move_offpage(dbc, hcp->pagep, 367*0Sstevel@tonic-gate (u_int32_t)H_DATAINDEX(hcp->bndx), hcp->dpgno); 368*0Sstevel@tonic-gate 369*0Sstevel@tonic-gate /* Can probably just do a "put" here. */ 370*0Sstevel@tonic-gate ret = __ham_dirty_page(dbp, hcp->pagep); 371*0Sstevel@tonic-gate hcp->dndx = dndx; 372*0Sstevel@tonic-gate } else { 373*0Sstevel@tonic-gate (void)__ham_del_page(dbc, hcp->dpagep); 374*0Sstevel@tonic-gate hcp->dpagep = NULL; 375*0Sstevel@tonic-gate } 376*0Sstevel@tonic-gate return (ret); 377*0Sstevel@tonic-gate } 378*0Sstevel@tonic-gate 379*0Sstevel@tonic-gate static int 380*0Sstevel@tonic-gate __ham_make_dup(notdup, duplicate, bufp, sizep) 381*0Sstevel@tonic-gate const DBT *notdup; 382*0Sstevel@tonic-gate DBT *duplicate; 383*0Sstevel@tonic-gate void **bufp; 384*0Sstevel@tonic-gate u_int32_t *sizep; 385*0Sstevel@tonic-gate { 386*0Sstevel@tonic-gate db_indx_t tsize, item_size; 387*0Sstevel@tonic-gate int ret; 388*0Sstevel@tonic-gate u_int8_t *p; 389*0Sstevel@tonic-gate 390*0Sstevel@tonic-gate item_size = (db_indx_t)notdup->size; 391*0Sstevel@tonic-gate tsize = DUP_SIZE(item_size); 392*0Sstevel@tonic-gate if ((ret = __ham_init_dbt(duplicate, tsize, bufp, sizep)) != 0) 393*0Sstevel@tonic-gate return (ret); 394*0Sstevel@tonic-gate 395*0Sstevel@tonic-gate duplicate->dlen = 0; 396*0Sstevel@tonic-gate duplicate->flags = notdup->flags; 397*0Sstevel@tonic-gate F_SET(duplicate, DB_DBT_PARTIAL); 398*0Sstevel@tonic-gate 399*0Sstevel@tonic-gate p = duplicate->data; 400*0Sstevel@tonic-gate memcpy(p, &item_size, sizeof(db_indx_t)); 401*0Sstevel@tonic-gate p += sizeof(db_indx_t); 402*0Sstevel@tonic-gate memcpy(p, notdup->data, notdup->size); 403*0Sstevel@tonic-gate p += notdup->size; 404*0Sstevel@tonic-gate memcpy(p, &item_size, sizeof(db_indx_t)); 405*0Sstevel@tonic-gate 406*0Sstevel@tonic-gate duplicate->doff = 0; 407*0Sstevel@tonic-gate duplicate->dlen = notdup->size; 408*0Sstevel@tonic-gate 409*0Sstevel@tonic-gate return (0); 410*0Sstevel@tonic-gate } 411*0Sstevel@tonic-gate 412*0Sstevel@tonic-gate static int 413*0Sstevel@tonic-gate __ham_check_move(dbc, add_len) 414*0Sstevel@tonic-gate DBC *dbc; 415*0Sstevel@tonic-gate int32_t add_len; 416*0Sstevel@tonic-gate { 417*0Sstevel@tonic-gate DB *dbp; 418*0Sstevel@tonic-gate HASH_CURSOR *hcp; 419*0Sstevel@tonic-gate DBT k, d; 420*0Sstevel@tonic-gate DB_LSN new_lsn; 421*0Sstevel@tonic-gate PAGE *next_pagep; 422*0Sstevel@tonic-gate db_pgno_t next_pgno; 423*0Sstevel@tonic-gate u_int32_t new_datalen, old_len, rectype; 424*0Sstevel@tonic-gate u_int8_t *hk; 425*0Sstevel@tonic-gate int ret; 426*0Sstevel@tonic-gate 427*0Sstevel@tonic-gate dbp = dbc->dbp; 428*0Sstevel@tonic-gate hcp = (HASH_CURSOR *)dbc->internal; 429*0Sstevel@tonic-gate /* 430*0Sstevel@tonic-gate * Check if we can do whatever we need to on this page. If not, 431*0Sstevel@tonic-gate * then we'll have to move the current element to a new page. 432*0Sstevel@tonic-gate */ 433*0Sstevel@tonic-gate hk = H_PAIRDATA(hcp->pagep, hcp->bndx); 434*0Sstevel@tonic-gate 435*0Sstevel@tonic-gate /* 436*0Sstevel@tonic-gate * If the item is already off page duplicates or an offpage item, 437*0Sstevel@tonic-gate * then we know we can do whatever we need to do in-place 438*0Sstevel@tonic-gate */ 439*0Sstevel@tonic-gate if (HPAGE_PTYPE(hk) == H_OFFDUP || HPAGE_PTYPE(hk) == H_OFFPAGE) 440*0Sstevel@tonic-gate return (0); 441*0Sstevel@tonic-gate 442*0Sstevel@tonic-gate old_len = 443*0Sstevel@tonic-gate LEN_HITEM(hcp->pagep, hcp->hdr->pagesize, H_DATAINDEX(hcp->bndx)); 444*0Sstevel@tonic-gate new_datalen = old_len - HKEYDATA_SIZE(0) + add_len; 445*0Sstevel@tonic-gate 446*0Sstevel@tonic-gate /* 447*0Sstevel@tonic-gate * We need to add a new page under two conditions: 448*0Sstevel@tonic-gate * 1. The addition makes the total data length cross the BIG 449*0Sstevel@tonic-gate * threshold and the OFFDUP structure won't fit on this page. 450*0Sstevel@tonic-gate * 2. The addition does not make the total data cross the 451*0Sstevel@tonic-gate * threshold, but the new data won't fit on the page. 452*0Sstevel@tonic-gate * If neither of these is true, then we can return. 453*0Sstevel@tonic-gate */ 454*0Sstevel@tonic-gate if (ISBIG(hcp, new_datalen) && (old_len > HOFFDUP_SIZE || 455*0Sstevel@tonic-gate HOFFDUP_SIZE - old_len <= P_FREESPACE(hcp->pagep))) 456*0Sstevel@tonic-gate return (0); 457*0Sstevel@tonic-gate 458*0Sstevel@tonic-gate if (!ISBIG(hcp, new_datalen) && 459*0Sstevel@tonic-gate add_len <= (int32_t)P_FREESPACE(hcp->pagep)) 460*0Sstevel@tonic-gate return (0); 461*0Sstevel@tonic-gate 462*0Sstevel@tonic-gate /* 463*0Sstevel@tonic-gate * If we get here, then we need to move the item to a new page. 464*0Sstevel@tonic-gate * Check if there are more pages in the chain. 465*0Sstevel@tonic-gate */ 466*0Sstevel@tonic-gate 467*0Sstevel@tonic-gate new_datalen = ISBIG(hcp, new_datalen) ? 468*0Sstevel@tonic-gate HOFFDUP_SIZE : HKEYDATA_SIZE(new_datalen); 469*0Sstevel@tonic-gate 470*0Sstevel@tonic-gate next_pagep = NULL; 471*0Sstevel@tonic-gate for (next_pgno = NEXT_PGNO(hcp->pagep); next_pgno != PGNO_INVALID; 472*0Sstevel@tonic-gate next_pgno = NEXT_PGNO(next_pagep)) { 473*0Sstevel@tonic-gate if (next_pagep != NULL && 474*0Sstevel@tonic-gate (ret = __ham_put_page(dbp, next_pagep, 0)) != 0) 475*0Sstevel@tonic-gate return (ret); 476*0Sstevel@tonic-gate 477*0Sstevel@tonic-gate if ((ret = 478*0Sstevel@tonic-gate __ham_get_page(dbp, next_pgno, &next_pagep)) != 0) 479*0Sstevel@tonic-gate return (ret); 480*0Sstevel@tonic-gate 481*0Sstevel@tonic-gate if (P_FREESPACE(next_pagep) >= new_datalen) 482*0Sstevel@tonic-gate break; 483*0Sstevel@tonic-gate } 484*0Sstevel@tonic-gate 485*0Sstevel@tonic-gate /* No more pages, add one. */ 486*0Sstevel@tonic-gate if (next_pagep == NULL && (ret = __ham_add_ovflpage(dbc, 487*0Sstevel@tonic-gate hcp->pagep, 0, &next_pagep)) != 0) 488*0Sstevel@tonic-gate return (ret); 489*0Sstevel@tonic-gate 490*0Sstevel@tonic-gate /* Add new page at the end of the chain. */ 491*0Sstevel@tonic-gate if (P_FREESPACE(next_pagep) < new_datalen && (ret = 492*0Sstevel@tonic-gate __ham_add_ovflpage(dbc, next_pagep, 1, &next_pagep)) != 0) 493*0Sstevel@tonic-gate return (ret); 494*0Sstevel@tonic-gate 495*0Sstevel@tonic-gate /* Copy the item to the new page. */ 496*0Sstevel@tonic-gate if (DB_LOGGING(hcp->dbc)) { 497*0Sstevel@tonic-gate rectype = PUTPAIR; 498*0Sstevel@tonic-gate k.flags = 0; 499*0Sstevel@tonic-gate d.flags = 0; 500*0Sstevel@tonic-gate if (HPAGE_PTYPE( 501*0Sstevel@tonic-gate H_PAIRKEY(hcp->pagep, hcp->bndx)) == H_OFFPAGE) { 502*0Sstevel@tonic-gate rectype |= PAIR_KEYMASK; 503*0Sstevel@tonic-gate k.data = H_PAIRKEY(hcp->pagep, hcp->bndx); 504*0Sstevel@tonic-gate k.size = HOFFPAGE_SIZE; 505*0Sstevel@tonic-gate } else { 506*0Sstevel@tonic-gate k.data = 507*0Sstevel@tonic-gate HKEYDATA_DATA(H_PAIRKEY(hcp->pagep, hcp->bndx)); 508*0Sstevel@tonic-gate k.size = LEN_HKEY(hcp->pagep, 509*0Sstevel@tonic-gate hcp->hdr->pagesize, hcp->bndx); 510*0Sstevel@tonic-gate } 511*0Sstevel@tonic-gate 512*0Sstevel@tonic-gate if (HPAGE_PTYPE(hk) == H_OFFPAGE) { 513*0Sstevel@tonic-gate rectype |= PAIR_DATAMASK; 514*0Sstevel@tonic-gate d.data = H_PAIRDATA(hcp->pagep, hcp->bndx); 515*0Sstevel@tonic-gate d.size = HOFFPAGE_SIZE; 516*0Sstevel@tonic-gate } else { 517*0Sstevel@tonic-gate d.data = 518*0Sstevel@tonic-gate HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)); 519*0Sstevel@tonic-gate d.size = LEN_HDATA(hcp->pagep, 520*0Sstevel@tonic-gate hcp->hdr->pagesize, hcp->bndx); 521*0Sstevel@tonic-gate } 522*0Sstevel@tonic-gate 523*0Sstevel@tonic-gate 524*0Sstevel@tonic-gate if ((ret = __ham_insdel_log(dbp->dbenv->lg_info, 525*0Sstevel@tonic-gate dbc->txn, &new_lsn, 0, rectype, 526*0Sstevel@tonic-gate dbp->log_fileid, PGNO(next_pagep), 527*0Sstevel@tonic-gate (u_int32_t)H_NUMPAIRS(next_pagep), &LSN(next_pagep), 528*0Sstevel@tonic-gate &k, &d)) != 0) 529*0Sstevel@tonic-gate return (ret); 530*0Sstevel@tonic-gate 531*0Sstevel@tonic-gate /* Move lsn onto page. */ 532*0Sstevel@tonic-gate LSN(next_pagep) = new_lsn; /* Structure assignment. */ 533*0Sstevel@tonic-gate } 534*0Sstevel@tonic-gate 535*0Sstevel@tonic-gate __ham_copy_item(dbp->pgsize, 536*0Sstevel@tonic-gate hcp->pagep, H_KEYINDEX(hcp->bndx), next_pagep); 537*0Sstevel@tonic-gate __ham_copy_item(dbp->pgsize, 538*0Sstevel@tonic-gate hcp->pagep, H_DATAINDEX(hcp->bndx), next_pagep); 539*0Sstevel@tonic-gate 540*0Sstevel@tonic-gate /* Now delete the pair from the current page. */ 541*0Sstevel@tonic-gate ret = __ham_del_pair(dbc, 0); 542*0Sstevel@tonic-gate 543*0Sstevel@tonic-gate (void)__ham_put_page(dbp, hcp->pagep, 1); 544*0Sstevel@tonic-gate hcp->pagep = next_pagep; 545*0Sstevel@tonic-gate hcp->pgno = PGNO(hcp->pagep); 546*0Sstevel@tonic-gate hcp->bndx = H_NUMPAIRS(hcp->pagep) - 1; 547*0Sstevel@tonic-gate F_SET(hcp, H_EXPAND); 548*0Sstevel@tonic-gate return (ret); 549*0Sstevel@tonic-gate } 550*0Sstevel@tonic-gate 551*0Sstevel@tonic-gate /* 552*0Sstevel@tonic-gate * __ham_move_offpage -- 553*0Sstevel@tonic-gate * Replace an onpage set of duplicates with the OFFDUP structure 554*0Sstevel@tonic-gate * that references the duplicate page. 555*0Sstevel@tonic-gate * 556*0Sstevel@tonic-gate * XXX 557*0Sstevel@tonic-gate * This is really just a special case of __onpage_replace; we should 558*0Sstevel@tonic-gate * probably combine them. 559*0Sstevel@tonic-gate * 560*0Sstevel@tonic-gate * PUBLIC: void __ham_move_offpage __P((DBC *, PAGE *, u_int32_t, db_pgno_t)); 561*0Sstevel@tonic-gate */ 562*0Sstevel@tonic-gate void 563*0Sstevel@tonic-gate __ham_move_offpage(dbc, pagep, ndx, pgno) 564*0Sstevel@tonic-gate DBC *dbc; 565*0Sstevel@tonic-gate PAGE *pagep; 566*0Sstevel@tonic-gate u_int32_t ndx; 567*0Sstevel@tonic-gate db_pgno_t pgno; 568*0Sstevel@tonic-gate { 569*0Sstevel@tonic-gate DB *dbp; 570*0Sstevel@tonic-gate HASH_CURSOR *hcp; 571*0Sstevel@tonic-gate DBT new_dbt; 572*0Sstevel@tonic-gate DBT old_dbt; 573*0Sstevel@tonic-gate HOFFDUP od; 574*0Sstevel@tonic-gate db_indx_t i; 575*0Sstevel@tonic-gate int32_t shrink; 576*0Sstevel@tonic-gate u_int8_t *src; 577*0Sstevel@tonic-gate 578*0Sstevel@tonic-gate dbp = dbc->dbp; 579*0Sstevel@tonic-gate hcp = (HASH_CURSOR *)dbc->internal; 580*0Sstevel@tonic-gate od.type = H_OFFDUP; 581*0Sstevel@tonic-gate UMRW(od.unused[0]); 582*0Sstevel@tonic-gate UMRW(od.unused[1]); 583*0Sstevel@tonic-gate UMRW(od.unused[2]); 584*0Sstevel@tonic-gate od.pgno = pgno; 585*0Sstevel@tonic-gate 586*0Sstevel@tonic-gate if (DB_LOGGING(dbc)) { 587*0Sstevel@tonic-gate new_dbt.data = &od; 588*0Sstevel@tonic-gate new_dbt.size = HOFFDUP_SIZE; 589*0Sstevel@tonic-gate old_dbt.data = P_ENTRY(pagep, ndx); 590*0Sstevel@tonic-gate old_dbt.size = LEN_HITEM(pagep, hcp->hdr->pagesize, ndx); 591*0Sstevel@tonic-gate (void)__ham_replace_log(dbp->dbenv->lg_info, 592*0Sstevel@tonic-gate dbc->txn, &LSN(pagep), 0, dbp->log_fileid, 593*0Sstevel@tonic-gate PGNO(pagep), (u_int32_t)ndx, &LSN(pagep), -1, 594*0Sstevel@tonic-gate &old_dbt, &new_dbt, 0); 595*0Sstevel@tonic-gate } 596*0Sstevel@tonic-gate 597*0Sstevel@tonic-gate shrink = 598*0Sstevel@tonic-gate LEN_HITEM(pagep, hcp->hdr->pagesize, ndx) - HOFFDUP_SIZE; 599*0Sstevel@tonic-gate 600*0Sstevel@tonic-gate if (shrink != 0) { 601*0Sstevel@tonic-gate /* Copy data. */ 602*0Sstevel@tonic-gate src = (u_int8_t *)(pagep) + HOFFSET(pagep); 603*0Sstevel@tonic-gate memmove(src + shrink, src, pagep->inp[ndx] - HOFFSET(pagep)); 604*0Sstevel@tonic-gate HOFFSET(pagep) += shrink; 605*0Sstevel@tonic-gate 606*0Sstevel@tonic-gate /* Update index table. */ 607*0Sstevel@tonic-gate for (i = ndx; i < NUM_ENT(pagep); i++) 608*0Sstevel@tonic-gate pagep->inp[i] += shrink; 609*0Sstevel@tonic-gate } 610*0Sstevel@tonic-gate 611*0Sstevel@tonic-gate /* Now copy the offdup entry onto the page. */ 612*0Sstevel@tonic-gate memcpy(P_ENTRY(pagep, ndx), &od, HOFFDUP_SIZE); 613*0Sstevel@tonic-gate } 614*0Sstevel@tonic-gate 615*0Sstevel@tonic-gate /* 616*0Sstevel@tonic-gate * __ham_dsearch: 617*0Sstevel@tonic-gate * Locate a particular duplicate in a duplicate set. 618*0Sstevel@tonic-gate * 619*0Sstevel@tonic-gate * PUBLIC: void __ham_dsearch __P((DBC *, DBT *, u_int32_t *, int *)); 620*0Sstevel@tonic-gate */ 621*0Sstevel@tonic-gate void 622*0Sstevel@tonic-gate __ham_dsearch(dbc, dbt, offp, cmpp) 623*0Sstevel@tonic-gate DBC *dbc; 624*0Sstevel@tonic-gate DBT *dbt; 625*0Sstevel@tonic-gate u_int32_t *offp; 626*0Sstevel@tonic-gate int *cmpp; 627*0Sstevel@tonic-gate { 628*0Sstevel@tonic-gate DB *dbp; 629*0Sstevel@tonic-gate HASH_CURSOR *hcp; 630*0Sstevel@tonic-gate DBT cur; 631*0Sstevel@tonic-gate db_indx_t i, len; 632*0Sstevel@tonic-gate int (*func) __P((const DBT *, const DBT *)); 633*0Sstevel@tonic-gate u_int8_t *data; 634*0Sstevel@tonic-gate 635*0Sstevel@tonic-gate dbp = dbc->dbp; 636*0Sstevel@tonic-gate hcp = (HASH_CURSOR *)dbc->internal; 637*0Sstevel@tonic-gate if (dbp->dup_compare == NULL) 638*0Sstevel@tonic-gate func = __bam_defcmp; 639*0Sstevel@tonic-gate else 640*0Sstevel@tonic-gate func = dbp->dup_compare; 641*0Sstevel@tonic-gate 642*0Sstevel@tonic-gate i = F_ISSET(dbc, DBC_CONTINUE) ? hcp->dup_off: 0; 643*0Sstevel@tonic-gate data = HKEYDATA_DATA(H_PAIRDATA(hcp->pagep, hcp->bndx)) + i; 644*0Sstevel@tonic-gate while (i < LEN_HDATA(hcp->pagep, hcp->hdr->pagesize, hcp->bndx)) { 645*0Sstevel@tonic-gate memcpy(&len, data, sizeof(db_indx_t)); 646*0Sstevel@tonic-gate data += sizeof(db_indx_t); 647*0Sstevel@tonic-gate cur.data = data; 648*0Sstevel@tonic-gate cur.size = (u_int32_t)len; 649*0Sstevel@tonic-gate *cmpp = func(dbt, &cur); 650*0Sstevel@tonic-gate if (*cmpp == 0 || (*cmpp < 0 && dbp->dup_compare != NULL)) 651*0Sstevel@tonic-gate break; 652*0Sstevel@tonic-gate i += len + 2 * sizeof(db_indx_t); 653*0Sstevel@tonic-gate data += len + sizeof(db_indx_t); 654*0Sstevel@tonic-gate } 655*0Sstevel@tonic-gate *offp = i; 656*0Sstevel@tonic-gate } 657