1*aae80e6bSchristos /* $NetBSD: bt_put.c,v 1.22 2016/09/24 21:31:25 christos Exp $ */
2a954b078Scgd
39f0aa214Scgd /*-
4a6d14e36Scgd * Copyright (c) 1990, 1993, 1994
59f0aa214Scgd * The Regents of the University of California. All rights reserved.
69f0aa214Scgd *
79f0aa214Scgd * This code is derived from software contributed to Berkeley by
89f0aa214Scgd * Mike Olson.
99f0aa214Scgd *
109f0aa214Scgd * Redistribution and use in source and binary forms, with or without
119f0aa214Scgd * modification, are permitted provided that the following conditions
129f0aa214Scgd * are met:
139f0aa214Scgd * 1. Redistributions of source code must retain the above copyright
149f0aa214Scgd * notice, this list of conditions and the following disclaimer.
159f0aa214Scgd * 2. Redistributions in binary form must reproduce the above copyright
169f0aa214Scgd * notice, this list of conditions and the following disclaimer in the
179f0aa214Scgd * documentation and/or other materials provided with the distribution.
18eb7c1594Sagc * 3. Neither the name of the University nor the names of its contributors
199f0aa214Scgd * may be used to endorse or promote products derived from this software
209f0aa214Scgd * without specific prior written permission.
219f0aa214Scgd *
229f0aa214Scgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
239f0aa214Scgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
249f0aa214Scgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
259f0aa214Scgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
269f0aa214Scgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
279f0aa214Scgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
289f0aa214Scgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
299f0aa214Scgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
309f0aa214Scgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
319f0aa214Scgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
329f0aa214Scgd * SUCH DAMAGE.
339f0aa214Scgd */
349f0aa214Scgd
35d3595ddfSjoerg #if HAVE_NBTOOL_CONFIG_H
36d3595ddfSjoerg #include "nbtool_config.h"
37d3595ddfSjoerg #endif
38d3595ddfSjoerg
3900ae392dSchristos #include <sys/cdefs.h>
40*aae80e6bSchristos __RCSID("$NetBSD: bt_put.c,v 1.22 2016/09/24 21:31:25 christos Exp $");
419f0aa214Scgd
4243fa6fe3Sjtc #include "namespace.h"
439f0aa214Scgd #include <sys/types.h>
449f0aa214Scgd
45cb9daf8fSchristos #include <assert.h>
469f0aa214Scgd #include <errno.h>
479f0aa214Scgd #include <stdio.h>
489f0aa214Scgd #include <stdlib.h>
499f0aa214Scgd #include <string.h>
509f0aa214Scgd
519f0aa214Scgd #include <db.h>
529f0aa214Scgd #include "btree.h"
539f0aa214Scgd
54cb9daf8fSchristos static EPG *bt_fast(BTREE *, const DBT *, const DBT *, int *);
559f0aa214Scgd
569f0aa214Scgd /*
579f0aa214Scgd * __BT_PUT -- Add a btree item to the tree.
589f0aa214Scgd *
599f0aa214Scgd * Parameters:
609f0aa214Scgd * dbp: pointer to access method
619f0aa214Scgd * key: key
629f0aa214Scgd * data: data
639f0aa214Scgd * flag: R_NOOVERWRITE
649f0aa214Scgd *
659f0aa214Scgd * Returns:
669f0aa214Scgd * RET_ERROR, RET_SUCCESS and RET_SPECIAL if the key is already in the
679f0aa214Scgd * tree and R_NOOVERWRITE specified.
689f0aa214Scgd */
699f0aa214Scgd int
__bt_put(const DB * dbp,DBT * key,const DBT * data,u_int flags)70cb9daf8fSchristos __bt_put(const DB *dbp, DBT *key, const DBT *data, u_int flags)
719f0aa214Scgd {
729f0aa214Scgd BTREE *t;
739f0aa214Scgd DBT tkey, tdata;
7400ae392dSchristos EPG *e = NULL; /* pacify gcc */
759f0aa214Scgd PAGE *h;
7661238e71Schristos indx_t idx, nxtindex;
779f0aa214Scgd pgno_t pg;
7840b37a3bSjoerg uint32_t nbytes, temp;
799f0aa214Scgd int dflags, exact, status;
809f0aa214Scgd char *dest, db[NOVFLSIZE], kb[NOVFLSIZE];
819f0aa214Scgd
829f0aa214Scgd t = dbp->internal;
839f0aa214Scgd
8445e27c80Scgd /* Toss any page pinned across calls. */
8545e27c80Scgd if (t->bt_pinned != NULL) {
8645e27c80Scgd mpool_put(t->bt_mp, t->bt_pinned, 0);
8745e27c80Scgd t->bt_pinned = NULL;
8845e27c80Scgd }
8945e27c80Scgd
9024420c01Scgd /* Check for change to a read-only tree. */
9124420c01Scgd if (F_ISSET(t, B_RDONLY)) {
929f0aa214Scgd errno = EPERM;
939f0aa214Scgd return (RET_ERROR);
949f0aa214Scgd }
959f0aa214Scgd
9624420c01Scgd switch (flags) {
9724420c01Scgd case 0:
9824420c01Scgd case R_NOOVERWRITE:
9924420c01Scgd break;
10024420c01Scgd case R_CURSOR:
1019f0aa214Scgd /*
10224420c01Scgd * If flags is R_CURSOR, put the cursor. Must already
10324420c01Scgd * have started a scan and not have already deleted it.
10424420c01Scgd */
10524420c01Scgd if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
10624420c01Scgd !F_ISSET(&t->bt_cursor,
10724420c01Scgd CURS_ACQUIRE | CURS_AFTER | CURS_BEFORE))
10824420c01Scgd break;
10924420c01Scgd /* FALLTHROUGH */
11024420c01Scgd default:
11124420c01Scgd errno = EINVAL;
11224420c01Scgd return (RET_ERROR);
11324420c01Scgd }
11424420c01Scgd
11524420c01Scgd /*
11624420c01Scgd * If the key/data pair won't fit on a page, store it on overflow
11724420c01Scgd * pages. Only put the key on the overflow page if the pair are
11824420c01Scgd * still too big after moving the data to an overflow page.
1199f0aa214Scgd *
1209f0aa214Scgd * XXX
12124420c01Scgd * If the insert fails later on, the overflow pages aren't recovered.
1229f0aa214Scgd */
1239f0aa214Scgd dflags = 0;
1249f0aa214Scgd if (key->size + data->size > t->bt_ovflsize) {
1259f0aa214Scgd if (key->size > t->bt_ovflsize) {
1269f0aa214Scgd storekey: if (__ovfl_put(t, key, &pg) == RET_ERROR)
1279f0aa214Scgd return (RET_ERROR);
1289f0aa214Scgd tkey.data = kb;
1299f0aa214Scgd tkey.size = NOVFLSIZE;
130dcc9c821Schristos memmove(kb, &pg, sizeof(pg));
1319f0aa214Scgd memmove(kb + sizeof(pgno_t),
13240b37a3bSjoerg &key->size, sizeof(uint32_t));
1339f0aa214Scgd dflags |= P_BIGKEY;
1349f0aa214Scgd key = &tkey;
1359f0aa214Scgd }
1369f0aa214Scgd if (key->size + data->size > t->bt_ovflsize) {
1379f0aa214Scgd if (__ovfl_put(t, data, &pg) == RET_ERROR)
1389f0aa214Scgd return (RET_ERROR);
1399f0aa214Scgd tdata.data = db;
1409f0aa214Scgd tdata.size = NOVFLSIZE;
141dcc9c821Schristos memmove(db, &pg, sizeof(pg));
14240b37a3bSjoerg _DBFIT(data->size, uint32_t);
14340b37a3bSjoerg temp = (uint32_t)data->size;
144cb9daf8fSchristos (void)memmove(db + sizeof(pgno_t),
14540b37a3bSjoerg &temp, sizeof(uint32_t));
1469f0aa214Scgd dflags |= P_BIGDATA;
1479f0aa214Scgd data = &tdata;
1489f0aa214Scgd }
1499f0aa214Scgd if (key->size + data->size > t->bt_ovflsize)
1509f0aa214Scgd goto storekey;
1519f0aa214Scgd }
1529f0aa214Scgd
1539f0aa214Scgd /* Replace the cursor. */
1549f0aa214Scgd if (flags == R_CURSOR) {
155*aae80e6bSchristos if ((h = mpool_get(t->bt_mp, t->bt_cursor.pg.pgno, 0)) == NULL)
1569f0aa214Scgd return (RET_ERROR);
15761238e71Schristos idx = t->bt_cursor.pg.index;
1589f0aa214Scgd goto delete;
1599f0aa214Scgd }
1609f0aa214Scgd
1619f0aa214Scgd /*
16224420c01Scgd * Find the key to delete, or, the location at which to insert.
16324420c01Scgd * Bt_fast and __bt_search both pin the returned page.
1649f0aa214Scgd */
1659f0aa214Scgd if (t->bt_order == NOT || (e = bt_fast(t, key, data, &exact)) == NULL)
1669f0aa214Scgd if ((e = __bt_search(t, key, &exact)) == NULL)
1679f0aa214Scgd return (RET_ERROR);
1689f0aa214Scgd h = e->page;
16961238e71Schristos idx = e->index;
1709f0aa214Scgd
1719f0aa214Scgd /*
17224420c01Scgd * Add the key/data pair to the tree. If an identical key is already
17324420c01Scgd * in the tree, and R_NOOVERWRITE is set, an error is returned. If
17424420c01Scgd * R_NOOVERWRITE is not set, the key is either added (if duplicates are
17524420c01Scgd * permitted) or an error is returned.
1769f0aa214Scgd */
1779f0aa214Scgd switch (flags) {
1789f0aa214Scgd case R_NOOVERWRITE:
1799f0aa214Scgd if (!exact)
1809f0aa214Scgd break;
1819f0aa214Scgd mpool_put(t->bt_mp, h, 0);
1829f0aa214Scgd return (RET_SPECIAL);
1839f0aa214Scgd default:
18424420c01Scgd if (!exact || !F_ISSET(t, B_NODUPS))
1859f0aa214Scgd break;
18624420c01Scgd /*
18724420c01Scgd * !!!
18824420c01Scgd * Note, the delete may empty the page, so we need to put a
18924420c01Scgd * new entry into the page immediately.
19024420c01Scgd */
19161238e71Schristos delete: if (__bt_dleaf(t, key, h, (u_int)idx) == RET_ERROR) {
1929f0aa214Scgd mpool_put(t->bt_mp, h, 0);
1939f0aa214Scgd return (RET_ERROR);
1949f0aa214Scgd }
1959f0aa214Scgd break;
1969f0aa214Scgd }
1979f0aa214Scgd
1989f0aa214Scgd /*
1999f0aa214Scgd * If not enough room, or the user has put a ceiling on the number of
2009f0aa214Scgd * keys permitted in the page, split the page. The split code will
2019f0aa214Scgd * insert the key and data and unpin the current page. If inserting
2029f0aa214Scgd * into the offset array, shift the pointers up.
2039f0aa214Scgd */
2049f0aa214Scgd nbytes = NBLEAFDBT(key->size, data->size);
205c7201a0fSlukem if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t)) {
2069f0aa214Scgd if ((status = __bt_split(t, h, key,
20761238e71Schristos data, dflags, nbytes, (u_int)idx)) != RET_SUCCESS)
2089f0aa214Scgd return (status);
2099f0aa214Scgd goto success;
2109f0aa214Scgd }
2119f0aa214Scgd
21261238e71Schristos if (idx < (nxtindex = NEXTINDEX(h)))
21361238e71Schristos memmove(h->linp + idx + 1, h->linp + idx,
21461238e71Schristos (nxtindex - idx) * sizeof(indx_t));
2159f0aa214Scgd h->lower += sizeof(indx_t);
2169f0aa214Scgd
21761238e71Schristos h->linp[idx] = h->upper -= nbytes;
21861238e71Schristos dest = (char *)(void *)h + h->upper;
2199f0aa214Scgd WR_BLEAF(dest, key, data, dflags);
2209f0aa214Scgd
22124420c01Scgd /* If the cursor is on this page, adjust it as necessary. */
22224420c01Scgd if (F_ISSET(&t->bt_cursor, CURS_INIT) &&
22324420c01Scgd !F_ISSET(&t->bt_cursor, CURS_ACQUIRE) &&
22461238e71Schristos t->bt_cursor.pg.pgno == h->pgno && t->bt_cursor.pg.index >= idx)
22524420c01Scgd ++t->bt_cursor.pg.index;
22624420c01Scgd
227e79648d0Sthorpej if (t->bt_order == NOT) {
2289f0aa214Scgd if (h->nextpg == P_INVALID) {
22961238e71Schristos if (idx == NEXTINDEX(h) - 1) {
2309f0aa214Scgd t->bt_order = FORWARD;
23161238e71Schristos t->bt_last.index = idx;
2329f0aa214Scgd t->bt_last.pgno = h->pgno;
2339f0aa214Scgd }
2349f0aa214Scgd } else if (h->prevpg == P_INVALID) {
23561238e71Schristos if (idx == 0) {
2369f0aa214Scgd t->bt_order = BACK;
2379f0aa214Scgd t->bt_last.index = 0;
2389f0aa214Scgd t->bt_last.pgno = h->pgno;
2399f0aa214Scgd }
2409f0aa214Scgd }
241e79648d0Sthorpej }
2429f0aa214Scgd
2439f0aa214Scgd mpool_put(t->bt_mp, h, MPOOL_DIRTY);
2449f0aa214Scgd
2459f0aa214Scgd success:
24624420c01Scgd if (flags == R_SETCURSOR)
24761238e71Schristos __bt_setcur(t, e->page->pgno, (u_int)e->index);
24824420c01Scgd
24924420c01Scgd F_SET(t, B_MODIFIED);
2509f0aa214Scgd return (RET_SUCCESS);
2519f0aa214Scgd }
2529f0aa214Scgd
2539f0aa214Scgd #ifdef STATISTICS
25440b37a3bSjoerg unsigned long bt_cache_hit, bt_cache_miss;
2559f0aa214Scgd #endif
2569f0aa214Scgd
2579f0aa214Scgd /*
2589f0aa214Scgd * BT_FAST -- Do a quick check for sorted data.
2599f0aa214Scgd *
2609f0aa214Scgd * Parameters:
2619f0aa214Scgd * t: tree
2629f0aa214Scgd * key: key to insert
2639f0aa214Scgd *
2649f0aa214Scgd * Returns:
2659f0aa214Scgd * EPG for new record or NULL if not found.
2669f0aa214Scgd */
2679f0aa214Scgd static EPG *
bt_fast(BTREE * t,const DBT * key,const DBT * data,int * exactp)268cb9daf8fSchristos bt_fast(BTREE *t, const DBT *key, const DBT *data, int *exactp)
2699f0aa214Scgd {
2709f0aa214Scgd PAGE *h;
27140b37a3bSjoerg uint32_t nbytes;
2729f0aa214Scgd int cmp;
2739f0aa214Scgd
274*aae80e6bSchristos if ((h = mpool_get(t->bt_mp, t->bt_last.pgno, 0)) == NULL) {
2759f0aa214Scgd t->bt_order = NOT;
2769f0aa214Scgd return (NULL);
2779f0aa214Scgd }
27865aeeefbScgd t->bt_cur.page = h;
27965aeeefbScgd t->bt_cur.index = t->bt_last.index;
2809f0aa214Scgd
2819f0aa214Scgd /*
28224420c01Scgd * If won't fit in this page or have too many keys in this page,
28324420c01Scgd * have to search to get split stack.
2849f0aa214Scgd */
2859f0aa214Scgd nbytes = NBLEAFDBT(key->size, data->size);
286c7201a0fSlukem if ((uint32_t)h->upper - (uint32_t)h->lower < nbytes + sizeof(indx_t))
2879f0aa214Scgd goto miss;
2889f0aa214Scgd
2899f0aa214Scgd if (t->bt_order == FORWARD) {
29065aeeefbScgd if (t->bt_cur.page->nextpg != P_INVALID)
2919f0aa214Scgd goto miss;
29265aeeefbScgd if (t->bt_cur.index != NEXTINDEX(h) - 1)
2939f0aa214Scgd goto miss;
29465aeeefbScgd if ((cmp = __bt_cmp(t, key, &t->bt_cur)) < 0)
2959f0aa214Scgd goto miss;
29665aeeefbScgd t->bt_last.index = cmp ? ++t->bt_cur.index : t->bt_cur.index;
2979f0aa214Scgd } else {
29865aeeefbScgd if (t->bt_cur.page->prevpg != P_INVALID)
2999f0aa214Scgd goto miss;
30065aeeefbScgd if (t->bt_cur.index != 0)
3019f0aa214Scgd goto miss;
30265aeeefbScgd if ((cmp = __bt_cmp(t, key, &t->bt_cur)) > 0)
3039f0aa214Scgd goto miss;
3049f0aa214Scgd t->bt_last.index = 0;
3059f0aa214Scgd }
3069f0aa214Scgd *exactp = cmp == 0;
3079f0aa214Scgd #ifdef STATISTICS
3089f0aa214Scgd ++bt_cache_hit;
3099f0aa214Scgd #endif
31065aeeefbScgd return (&t->bt_cur);
3119f0aa214Scgd
3129f0aa214Scgd miss:
3139f0aa214Scgd #ifdef STATISTICS
3149f0aa214Scgd ++bt_cache_miss;
3159f0aa214Scgd #endif
3169f0aa214Scgd t->bt_order = NOT;
3179f0aa214Scgd mpool_put(t->bt_mp, h, 0);
3189f0aa214Scgd return (NULL);
3199f0aa214Scgd }
320