xref: /csrg-svn/lib/libc/db/btree/bt_get.c (revision 46135)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * Mike Olson.
7  *
8  * %sccs.include.redist.c%
9  */
10 
11 #if defined(LIBC_SCCS) && !defined(lint)
12 static char sccsid[] = "@(#)bt_get.c	5.1 (Berkeley) 01/23/91";
13 #endif /* LIBC_SCCS and not lint */
14 
15 #include <sys/param.h>
16 #include <sys/types.h>
17 #include <sys/errno.h>
18 #include <sys/file.h>
19 #include <db.h>
20 #include "btree.h"
21 
22 /*
23  *  BT_GETPAGE -- Make pgno the current page of the btree.
24  *
25  *	This routine is just a wrapper that decides whether to call the
26  *	memory or disk-based routine to do the work.
27  *
28  *	Parameters:
29  *		t -- btree in which to get page
30  *		pgno -- page number to get
31  *
32  *	Returns:
33  *		RET_SUCCESS or RET_ERROR.
34  */
35 
36 int
37 _bt_getpage(t, pgno)
38 	BTREE_P t;
39 	pgno_t pgno;
40 {
41 #ifdef DEBUG
42 	if (pgno == P_NONE)
43 		_punt();
44 #endif /* DEBUG */
45 
46 	/* see if we can get away without doing any work */
47 	if (t->bt_curpage != (BTHEADER *) NULL) {
48 		if (t->bt_curpage->h_pgno == pgno)
49 			return (RET_SUCCESS);
50 	}
51 
52 	if (t->bt_fname == (char *) NULL)
53 		return (_bt_getmpage(t, pgno));
54 	else
55 		return (_bt_getdpage(t, pgno));
56 }
57 
58 /*
59  *  _BT_GETMPAGE -- Make pgno the current page of the btree.
60  *
61  *	This routine gets pages for in-memory btrees.
62  *
63  *	Parameters:
64  *		t -- btree in which to get page
65  *		pgno -- page number to get
66  *
67  *	Returns:
68  *		RET_SUCCESS or RET_ERROR.
69  */
70 
71 int
72 _bt_getmpage(t, pgno)
73 	register BTREE_P t;
74 	pgno_t pgno;
75 {
76 	int htindex;
77 	BTHEADER *h;
78 	HTBUCKET *b;
79 
80 	if (t->bt_curpage == (BTHEADER *) NULL) {
81 		if (pgno != P_ROOT) {
82 			errno = EBADF;
83 			return (RET_ERROR);
84 		}
85 
86 		t->bt_npages++;
87 		h = (BTHEADER *) malloc((unsigned) t->bt_psize);
88 		if (h == (BTHEADER *) NULL)
89 			return (RET_ERROR);
90 
91 		h->h_pgno = P_ROOT;
92 		h->h_flags = F_LEAF;
93 		h->h_lower = (index_t)
94 				(((char *) &(h->h_linp[0])) - ((char *) h));
95 		h->h_upper = t->bt_psize;
96 		h->h_prevpg = h->h_nextpg = P_NONE;
97 
98 		t->bt_curpage = h;
99 
100 		/* get the root page into the hash table */
101 		if (_bt_write(t, h, RELEASE) == RET_ERROR)
102 			return (RET_ERROR);
103 	}
104 
105 	htindex = HASHKEY(pgno);
106 
107 	for (b = t->bt_s.bt_ht[htindex];
108 	     b != (HTBUCKET *) NULL;
109 	     b = b->ht_next) {
110 		if (b->ht_pgno == pgno) {
111 			t->bt_curpage = b->ht_page;
112 			return (RET_SUCCESS);
113 		}
114 	}
115 	return (RET_ERROR);
116 }
117 
118 /*
119  *  _BT_GETDPAGE -- Make pgno the current page of the btree.
120  *
121  *	This routine gets pages for disk btrees.
122  *
123  *	Because disk btree pages must be readable across machine architectures,
124  *	the btree code writes integers out in network format.  This routine
125  *	converts them back to host format before returning the page.
126  *
127  *	Parameters:
128  *		t -- btree in which to get page
129  *		pgno -- page number to get
130  *
131  *	Returns:
132  *		RET_SUCCESS, RET_ERROR.
133  */
134 
135 int
136 _bt_getdpage(t, pgno)
137 	register BTREE_P t;
138 	pgno_t pgno;
139 {
140 	BTHEADER *h;
141 	char *cache;
142 	long pos;
143 	int n, nbytes;
144 
145 	/* if we have an lru cache, let the cache code do the work */
146 	if (ISCACHE(t)) {
147 		cache = t->bt_s.bt_d.d_cache;
148 
149 		/* release the old page */
150 		if (t->bt_curpage != (BTHEADER *) NULL) {
151 			pgno_t opgno = t->bt_curpage->h_pgno;
152 			t->bt_curpage->h_flags &= ~F_DIRTY;
153 
154 			if (lruwrite(cache, (int) opgno) < 0)
155 				return (RET_ERROR);
156 
157 			if (lrurelease(cache, (int) opgno) < 0)
158 				return (RET_ERROR);
159 		}
160 
161 		if (pgno > t->bt_npages) {
162 			if ((h = (BTHEADER *) lrugetnew(cache, (int)pgno, &nbytes))
163 			    == (BTHEADER *) NULL)
164 				return (RET_ERROR);
165 			t->bt_npages = pgno;
166 		} else {
167 			if ((h = (BTHEADER *) lruget(cache, (int)pgno, &nbytes))
168 			    == (BTHEADER *) NULL)
169 				return (RET_ERROR);
170 		}
171 
172 		/* init this page, if necessary */
173 		if (nbytes == 0) {
174 			h->h_pgno = pgno;
175 			h->h_flags = F_LEAF;
176 			h->h_lower = (index_t)
177 				(((char *) &(h->h_linp[0])) - ((char *) h));
178 			h->h_upper = t->bt_psize;
179 			h->h_prevpg = h->h_nextpg = P_NONE;
180 		}
181 
182 		t->bt_curpage = h;
183 
184 		return (RET_SUCCESS);
185 	}
186 
187 	/* sync the current page, if necessary */
188 	if (t->bt_curpage != (BTHEADER *) NULL) {
189 		if (t->bt_curpage->h_flags & F_DIRTY)
190 			if (_bt_write(t, t->bt_curpage, RELEASE) == RET_ERROR)
191 				return (RET_ERROR);
192 	} else {
193 		if (t->bt_npages == 0)
194 			t->bt_npages = 1;
195 
196 		/* if no current page, get space for one */
197 		if ((t->bt_curpage = (BTHEADER *) malloc((unsigned) t->bt_psize))
198 		    == (BTHEADER *) NULL) {
199 			return (RET_ERROR);
200 		}
201 	}
202 
203 	n = t->bt_psize;
204 	pos = (long) (pgno * n);
205 
206 	/* seek to correct location in file */
207 	if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos) {
208 		return (RET_ERROR);
209 	}
210 
211 	/* read the page */
212 	if ((nbytes = read(t->bt_s.bt_d.d_fd, t->bt_curpage, n)) < n) {
213 
214 		/*
215 		 *  If we didn't get a full page, we must have gotten no
216 		 *  data at all -- in which case we're trying to read a
217 		 *  root page that doesn't exist yet.  This is the only
218 		 *  case in which this is okay.  If this happens, construct
219 		 *  an empty root page by hand.
220 		 */
221 		if (nbytes != 0 || pgno != P_ROOT) {
222 			errno = EBADF;
223 			return (RET_ERROR);
224 		}
225 
226 		h = (BTHEADER *) t->bt_curpage;
227 		h->h_pgno = pgno;
228 		h->h_flags = F_LEAF;
229 		h->h_lower = (index_t)
230 				(((char *) &(h->h_linp[0])) - ((char *) h));
231 		h->h_upper = t->bt_psize;
232 		h->h_prevpg = h->h_nextpg = P_NONE;
233 	} else
234 		(void) _bt_pgin(t->bt_curpage, (char *) t->bt_lorder);
235 
236 	return (RET_SUCCESS);
237 }
238 
239 /*
240  *  _BT_PGOUT, _BT_PGIN -- Convert host-specific number layout to/from
241  *			   the host-independent format stored on disk.
242  *
243  *	Parameters:
244  *		h -- page to convert
245  *		_lorder -- byte order for pages (stored as a char * in the
246  *			   cache, and passed around as a magic cookie).
247  *
248  *	Returns:
249  *		RET_SUCCESS (lru code requires a return value).
250  *
251  *	Side Effects:
252  *		Layout of tree metadata on the page is changed in place.
253  *
254  *	Warnings:
255  *		Everywhere else in the code, the types pgno_t and index_t
256  *		are opaque.  These two routines know what they really
257  *		are.
258  */
259 
260 int
261 _bt_pgout(h, _lorder)
262 	BTHEADER *h;
263 	char *_lorder;
264 {
265 	int i;
266 	int top;
267 	int lorder;
268 	DATUM *d;
269 	IDATUM *id;
270 	size_t chain;
271 
272 	lorder = (int) _lorder;
273 	if (lorder == BYTE_ORDER)
274 		return (RET_SUCCESS);
275 
276 	if (h->h_flags & F_LEAF) {
277 		if (h->h_flags & F_CONT) {
278 			if (h->h_prevpg == P_NONE) {
279 				size_t longsz;
280 
281 				(void) bcopy((char *) &(h->h_linp[0]),
282 					      (char *) &longsz,
283 					      sizeof(longsz));
284 				BLSWAP(longsz);
285 				(void) bcopy((char *) &longsz,
286 					      (char *) &(h->h_linp[0]),
287 					      sizeof(longsz));
288 			}
289 		} else {
290 			top = NEXTINDEX(h);
291 			for (i = 0; i < top; i++) {
292 				d = (DATUM *) GETDATUM(h, i);
293 				if (d->d_flags & D_BIGKEY) {
294 					(void) bcopy((char *) &(d->d_bytes[0]),
295 						      (char *) &chain,
296 						      sizeof(chain));
297 					BLSWAP(chain);
298 					(void) bcopy((char *) &chain,
299 						      (char *) &(d->d_bytes[0]),
300 						      sizeof(chain));
301 				}
302 				if (d->d_flags & D_BIGDATA) {
303 					(void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
304 						      (char *) &chain,
305 						      sizeof(chain));
306 					BLSWAP(chain);
307 					(void) bcopy((char *) &chain,
308 						      (char *) &(d->d_bytes[d->d_ksize]),
309 						      sizeof(chain));
310 				}
311 				BLSWAP(d->d_dsize);
312 				BLSWAP(d->d_ksize);
313 				BLSWAP(d->d_flags);
314 				BLSWAP(h->h_linp[i]);
315 			}
316 		}
317 	} else {
318 		top = NEXTINDEX(h);
319 		for (i = 0; i < top; i++) {
320 			id = (IDATUM *) GETDATUM(h, i);
321 			BLSWAP(id->i_size);
322 			BLSWAP(id->i_pgno);
323 			BLSWAP(id->i_flags);
324 			if (id->i_flags & D_BIGKEY) {
325 				(void) bcopy((char *) &(id->i_bytes[0]),
326 					      (char *) &chain,
327 					      sizeof(chain));
328 				BLSWAP(chain);
329 				(void) bcopy((char *) &chain,
330 					      (char *) &(id->i_bytes[0]),
331 					      sizeof(chain));
332 			}
333 			BLSWAP(h->h_linp[i]);
334 		}
335 	}
336 	BLSWAP(h->h_flags);
337 	BLSWAP(h->h_pgno);
338 	BLSWAP(h->h_prevpg);
339 	BLSWAP(h->h_nextpg);
340 	BLSWAP(h->h_lower);
341 	BLSWAP(h->h_upper);
342 
343 	return (RET_SUCCESS);
344 }
345 
346 int
347 _bt_pgin(h, _lorder)
348 	BTHEADER *h;
349 	char *_lorder;
350 {
351 	int i;
352 	int top;
353 	int lorder;
354 	DATUM *d;
355 	IDATUM *id;
356 	size_t chain;
357 
358 	/*
359 	 *  If btree pages are to be stored in the host byte order, don't
360 	 *  bother swapping.
361 	 */
362 	lorder = (int) _lorder;
363 	if (lorder == BYTE_ORDER)
364 		return (RET_SUCCESS);
365 
366 	BLSWAP(h->h_upper);
367 	BLSWAP(h->h_lower);
368 	BLSWAP(h->h_nextpg);
369 	BLSWAP(h->h_prevpg);
370 	BLSWAP(h->h_pgno);
371 	BLSWAP(h->h_flags);
372 
373 	if (h->h_flags & F_LEAF) {
374 		if (h->h_flags & F_CONT) {
375 			if (h->h_prevpg == P_NONE) {
376 				size_t longsz;
377 
378 				(void) bcopy((char *) &(h->h_linp[0]),
379 					      (char *) &longsz,
380 					      sizeof(longsz));
381 				BLSWAP(longsz);
382 				(void) bcopy((char *) &longsz,
383 					      (char *) &(h->h_linp[0]),
384 					      sizeof(longsz));
385 			}
386 		} else {
387 			top = NEXTINDEX(h);
388 			for (i = 0; i < top; i++) {
389 				BLSWAP(h->h_linp[i]);
390 				d = (DATUM *) GETDATUM(h, i);
391 				BLSWAP(d->d_dsize);
392 				BLSWAP(d->d_ksize);
393 				BLSWAP(d->d_flags);
394 				if (d->d_flags & D_BIGKEY) {
395 					(void) bcopy((char *) &(d->d_bytes[0]),
396 						      (char *) &chain,
397 						      sizeof(chain));
398 					BLSWAP(chain);
399 					(void) bcopy((char *) &chain,
400 						      (char *) &(d->d_bytes[0]),
401 						      sizeof(chain));
402 				}
403 				if (d->d_flags & D_BIGDATA) {
404 					(void) bcopy((char *) &(d->d_bytes[d->d_ksize]),
405 						      (char *) &chain,
406 						      sizeof(chain));
407 					BLSWAP(chain);
408 					(void) bcopy((char *) &chain,
409 						      (char *) &(d->d_bytes[d->d_ksize]),
410 						      sizeof(chain));
411 				}
412 			}
413 		}
414 	} else {
415 		top = NEXTINDEX(h);
416 		for (i = 0; i < top; i++) {
417 			BLSWAP(h->h_linp[i]);
418 			id = (IDATUM *) GETDATUM(h, i);
419 			BLSWAP(id->i_size);
420 			BLSWAP(id->i_pgno);
421 			BLSWAP(id->i_flags);
422 			if (id->i_flags & D_BIGKEY) {
423 				(void) bcopy((char *) &(id->i_bytes[0]),
424 					      (char *) &chain,
425 					      sizeof(chain));
426 				BLSWAP(chain);
427 				(void) bcopy((char *) &chain,
428 					      (char *) &(id->i_bytes[0]),
429 					      sizeof(chain));
430 			}
431 		}
432 	}
433 	return (RET_SUCCESS);
434 }
435 
436 /*
437  *  _BT_ALLOCPG -- allocate a new page in the btree.
438  *
439  *	This is called when we split a page, to get space to do the split.
440  *	For disk btrees, these pages are released when the split is done.
441  *	For memory btrees, they are not.
442  *
443  *	Parameters:
444  *		t -- tree in which to do split
445  *
446  *	Returns:
447  *		Pointer to the newly-allocated page
448  */
449 
450 BTHEADER *
451 _bt_allocpg(t)
452 	BTREE_P t;
453 {
454 	BTHEADER *h = t->bt_curpage;
455 	BTHEADER *nh;
456 	int nbytes;
457 
458 	/* if we have a cache, let the cache code do the work */
459 	if (ISDISK(t) && ISCACHE(t)) {
460 		nh = (BTHEADER *) lrugetnew(t->bt_s.bt_d.d_cache,
461 					    (int) (t->bt_npages + 1),
462 					    &nbytes);
463 	} else {
464 		nh = (BTHEADER *) malloc((unsigned) t->bt_psize);
465 	}
466 
467 	if (nh != (BTHEADER *) NULL) {
468 		nh->h_pgno = nh->h_prevpg = nh->h_nextpg = P_NONE;
469 		nh->h_flags = h->h_flags;
470 		nh->h_lower = (index_t)
471 				(((char *) &(nh->h_linp[0])) - ((char *) nh));
472 		nh->h_upper = t->bt_psize;
473 	}
474 
475 	return (nh);
476 }
477 
478 /*
479  *  _BT_WRITE -- Write a specific page to a btree file.
480  *
481  *	Parameters:
482  *		t -- btree to get the page
483  *		h -- page to write
484  *		relflag -- (int) this page may/may not be released
485  *
486  *	Returns:
487  *		RET_SUCCESS, RET_ERROR.
488  *
489  *	Side Effects:
490  *		Writes a metadata page if none has been written yet.
491  */
492 
493 int
494 _bt_write(t, h, relflag)
495 	BTREE_P t;
496 	BTHEADER *h;
497 	int relflag;
498 {
499 	long pos;
500 	int htindex;
501 	HTBUCKET *b;
502 	char *cache;
503 	pgno_t pgno;
504 
505 	h->h_flags &= ~F_DIRTY;
506 	if (ISDISK(t)) {
507 
508 		/* if we haven't done so yet, write the metadata */
509 		if (!(t->bt_flags & BTF_METAOK)) {
510 			if (_bt_wrtmeta(t) == RET_ERROR)
511 				return (RET_ERROR);
512 		}
513 
514 		pgno = h->h_pgno;
515 
516 
517 		/* if we have a cache, let the cache code do the work */
518 		if ((cache = t->bt_s.bt_d.d_cache) != (char *) NULL) {
519 			if (lruwrite(cache, (int) pgno) < 0)
520 				return (RET_ERROR);
521 			if (relflag && lrurelease(cache, (int) pgno) < 0)
522 				return (RET_ERROR);
523 
524 		} else {
525 			(void) _bt_pgout(h, (char *) t->bt_lorder);
526 			/* now write the current page */
527 			pos = (long) (pgno * t->bt_psize);
528 			if (lseek(t->bt_s.bt_d.d_fd, pos, L_SET) != pos)
529 				return (RET_ERROR);
530 			if (write(t->bt_s.bt_d.d_fd, (char *) h, (int) t->bt_psize)
531 			    < t->bt_psize)
532 				return (RET_ERROR);
533 			if (!relflag)
534 				(void) _bt_pgin(h, (char *) t->bt_lorder);
535 		}
536 	} else {
537 		/* in-memory btree */
538 		htindex = HASHKEY(h->h_pgno);
539 
540 		/* see if we need to overwrite existing entry */
541 		for (b = t->bt_s.bt_ht[htindex];
542 		     b != (HTBUCKET *) NULL;
543 		     b = b->ht_next) {
544 			if (b->ht_pgno == h->h_pgno) {
545 				b->ht_page = h;
546 				return (RET_SUCCESS);
547 			}
548 		}
549 
550 		/* new entry, write it */
551 		b = (HTBUCKET *) malloc((unsigned) sizeof (HTBUCKET));
552 		if (b == (HTBUCKET *) NULL)
553 			return (RET_ERROR);
554 
555 		b->ht_pgno = h->h_pgno;
556 		b->ht_page = h;
557 		b->ht_next = t->bt_s.bt_ht[htindex];
558 		t->bt_s.bt_ht[htindex] = b;
559 	}
560 	return (RET_SUCCESS);
561 }
562 
563 /*
564  *  _BT_RELEASE -- Release a locked-down cache page
565  *
566  *	During page splits, we want to force pages out to the cache
567  *	before we actually release them, in some cases.  This routine
568  *	releases such a page when it is no longer needed.
569  *
570  *	Parameters:
571  *		t -- btree in which to release page
572  *		h -- page to release
573  *
574  *	Returns:
575  *		RET_SUCCESS, RET_ERROR.
576  *
577  *	Side Effects:
578  *		None.
579  */
580 
581 int
582 _bt_release(t, h)
583 	BTREE_P t;
584 	BTHEADER *h;
585 {
586 	if (ISDISK(t) && ISCACHE(t)) {
587 		if (lrurelease(t->bt_s.bt_d.d_cache, (int) (h->h_pgno)) < 0)
588 			return (RET_ERROR);
589 	}
590 	return (RET_SUCCESS);
591 }
592 
593 /*
594  *  _BT_WRTMETA -- Write metadata to the btree.
595  *
596  *	Parameters:
597  *		t -- tree to which to write
598  *
599  *	Returns:
600  *		RET_SUCCESS, RET_ERROR.
601  */
602 
603 int
604 _bt_wrtmeta(t)
605 	BTREE_P t;
606 {
607 	BTMETA m;
608 
609 	if (lseek(t->bt_s.bt_d.d_fd, 0l, L_SET) != 0l)
610 		return (RET_ERROR);
611 
612 	/* lorder has to be in host-independent format */
613 	m.m_lorder = (u_long) htonl((long) t->bt_lorder);
614 
615 	m.m_magic = BTREEMAGIC;
616 	m.m_version = BTREEVERSION;
617 	m.m_psize = t->bt_psize;
618 	m.m_free = t->bt_free;
619 	m.m_flags = t->bt_flags & BTF_NODUPS;
620 
621 	if (t->bt_lorder != BYTE_ORDER) {
622 		BLSWAP(m.m_magic);
623 		BLSWAP(m.m_version);
624 		BLSWAP(m.m_psize);
625 		BLSWAP(m.m_free);
626 		BLSWAP(m.m_flags);
627 	}
628 
629 	if (write(t->bt_s.bt_d.d_fd, (char *) &m, sizeof(m))
630 	    != sizeof(m)) {
631 		return (RET_ERROR);
632 	}
633 
634 	t->bt_flags |= BTF_METAOK;
635 
636 	return (RET_SUCCESS);
637 }
638