1 /* $NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $ */
2
3 /* OpenLDAP WiredTiger backend */
4 /* $OpenLDAP$ */
5 /* This work is part of OpenLDAP Software <http://www.openldap.org/>.
6 *
7 * Copyright 2002-2021 The OpenLDAP Foundation.
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted only as authorized by the OpenLDAP
12 * Public License.
13 *
14 * A copy of this license is available in the file LICENSE in the
15 * top-level directory of the distribution or, alternatively, at
16 * <http://www.OpenLDAP.org/license.html>.
17 */
18 /* ACKNOWLEDGEMENTS:
19 * This work was developed by HAMANO Tsukasa <hamano@osstech.co.jp>
20 * based on back-bdb for inclusion in OpenLDAP Software.
21 * WiredTiger is a product of MongoDB Inc.
22 */
23
24 #include <sys/cdefs.h>
25 __RCSID("$NetBSD: idl.c,v 1.2 2021/08/14 16:15:02 christos Exp $");
26
27 #include "portable.h"
28
29 #include <stdio.h>
30 #include <ac/string.h>
31
32 #include "back-wt.h"
33 #include "idl.h"
34
35 #define IDL_MAX(x,y) ( (x) > (y) ? (x) : (y) )
36 #define IDL_MIN(x,y) ( (x) < (y) ? (x) : (y) )
37 #define IDL_CMP(x,y) ( (x) < (y) ? -1 : (x) > (y) )
38
39 #if IDL_DEBUG > 0
idl_check(ID * ids)40 static void idl_check( ID *ids )
41 {
42 if( WT_IDL_IS_RANGE( ids ) ) {
43 assert( WT_IDL_RANGE_FIRST(ids) <= WT_IDL_RANGE_LAST(ids) );
44 } else {
45 ID i;
46 for( i=1; i < ids[0]; i++ ) {
47 assert( ids[i+1] > ids[i] );
48 }
49 }
50 }
51
52 #if IDL_DEBUG > 1
idl_dump(ID * ids)53 static void idl_dump( ID *ids )
54 {
55 if( WT_IDL_IS_RANGE( ids ) ) {
56 Debug( LDAP_DEBUG_ANY,
57 "IDL: range ( %ld - %ld )\n",
58 (long) WT_IDL_RANGE_FIRST( ids ),
59 (long) WT_IDL_RANGE_LAST( ids ) );
60
61 } else {
62 ID i;
63 Debug( LDAP_DEBUG_ANY, "IDL: size %ld", (long) ids[0] );
64
65 for( i=1; i<=ids[0]; i++ ) {
66 if( i % 16 == 1 ) {
67 Debug( LDAP_DEBUG_ANY, "\n" );
68 }
69 Debug( LDAP_DEBUG_ANY, " %02lx", (long) ids[i] );
70 }
71
72 Debug( LDAP_DEBUG_ANY, "\n" );
73 }
74
75 idl_check( ids );
76 }
77 #endif /* IDL_DEBUG > 1 */
78 #endif /* IDL_DEBUG > 0 */
79
wt_idl_search(ID * ids,ID id)80 unsigned wt_idl_search( ID *ids, ID id )
81 {
82 #define IDL_BINARY_SEARCH 1
83 #ifdef IDL_BINARY_SEARCH
84 /*
85 * binary search of id in ids
86 * if found, returns position of id
87 * if not found, returns first position greater than id
88 */
89 unsigned base = 0;
90 unsigned cursor = 1;
91 int val = 0;
92 unsigned n = ids[0];
93
94 #if IDL_DEBUG > 0
95 idl_check( ids );
96 #endif
97
98 while( 0 < n ) {
99 unsigned pivot = n >> 1;
100 cursor = base + pivot + 1;
101 val = IDL_CMP( id, ids[cursor] );
102
103 if( val < 0 ) {
104 n = pivot;
105
106 } else if ( val > 0 ) {
107 base = cursor;
108 n -= pivot + 1;
109
110 } else {
111 return cursor;
112 }
113 }
114
115 if( val > 0 ) {
116 ++cursor;
117 }
118 return cursor;
119
120 #else
121 /* (reverse) linear search */
122 int i;
123
124 #if IDL_DEBUG > 0
125 idl_check( ids );
126 #endif
127
128 for( i=ids[0]; i; i-- ) {
129 if( id > ids[i] ) {
130 break;
131 }
132 }
133
134 return i+1;
135 #endif
136 }
137
wt_idl_insert(ID * ids,ID id)138 int wt_idl_insert( ID *ids, ID id )
139 {
140 unsigned x;
141
142 #if IDL_DEBUG > 1
143 Debug( LDAP_DEBUG_ANY, "insert: %04lx at %d\n", (long) id, x );
144 idl_dump( ids );
145 #elif IDL_DEBUG > 0
146 idl_check( ids );
147 #endif
148
149 if (WT_IDL_IS_RANGE( ids )) {
150 /* if already in range, treat as a dup */
151 if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids))
152 return -1;
153 if (id < WT_IDL_RANGE_FIRST(ids))
154 ids[1] = id;
155 else if (id > WT_IDL_RANGE_LAST(ids))
156 ids[2] = id;
157 return 0;
158 }
159
160 x = wt_idl_search( ids, id );
161 assert( x > 0 );
162
163 if( x < 1 ) {
164 /* internal error */
165 return -2;
166 }
167
168 if ( x <= ids[0] && ids[x] == id ) {
169 /* duplicate */
170 return -1;
171 }
172
173 if ( ++ids[0] >= WT_IDL_DB_MAX ) {
174 if( id < ids[1] ) {
175 ids[1] = id;
176 ids[2] = ids[ids[0]-1];
177 } else if ( ids[ids[0]-1] < id ) {
178 ids[2] = id;
179 } else {
180 ids[2] = ids[ids[0]-1];
181 }
182 ids[0] = NOID;
183
184 } else {
185 /* insert id */
186 AC_MEMCPY( &ids[x+1], &ids[x], (ids[0]-x) * sizeof(ID) );
187 ids[x] = id;
188 }
189
190 #if IDL_DEBUG > 1
191 idl_dump( ids );
192 #elif IDL_DEBUG > 0
193 idl_check( ids );
194 #endif
195
196 return 0;
197 }
198
wt_idl_delete(ID * ids,ID id)199 static int wt_idl_delete( ID *ids, ID id )
200 {
201 unsigned x;
202
203 #if IDL_DEBUG > 1
204 Debug( LDAP_DEBUG_ANY, "delete: %04lx at %d\n", (long) id, x );
205 idl_dump( ids );
206 #elif IDL_DEBUG > 0
207 idl_check( ids );
208 #endif
209
210 if (WT_IDL_IS_RANGE( ids )) {
211 /* If deleting a range boundary, adjust */
212 if ( ids[1] == id )
213 ids[1]++;
214 else if ( ids[2] == id )
215 ids[2]--;
216 /* deleting from inside a range is a no-op */
217
218 /* If the range has collapsed, re-adjust */
219 if ( ids[1] > ids[2] )
220 ids[0] = 0;
221 else if ( ids[1] == ids[2] )
222 ids[1] = 1;
223 return 0;
224 }
225
226 x = wt_idl_search( ids, id );
227 assert( x > 0 );
228
229 if( x <= 0 ) {
230 /* internal error */
231 return -2;
232 }
233
234 if( x > ids[0] || ids[x] != id ) {
235 /* not found */
236 return -1;
237
238 } else if ( --ids[0] == 0 ) {
239 if( x != 1 ) {
240 return -3;
241 }
242
243 } else {
244 AC_MEMCPY( &ids[x], &ids[x+1], (1+ids[0]-x) * sizeof(ID) );
245 }
246
247 #if IDL_DEBUG > 1
248 idl_dump( ids );
249 #elif IDL_DEBUG > 0
250 idl_check( ids );
251 #endif
252
253 return 0;
254 }
255
256 static char *
wt_show_key(char * buf,void * val,size_t len)257 wt_show_key(
258 char *buf,
259 void *val,
260 size_t len )
261 {
262 if ( len == 4 /* LUTIL_HASH_BYTES */ ) {
263 unsigned char *c = val;
264 sprintf( buf, "[%02x%02x%02x%02x]", c[0], c[1], c[2], c[3] );
265 return buf;
266 } else {
267 return val;
268 }
269 }
270
271 /*
272 * idl_intersection - return a = a intersection b
273 */
274 int
wt_idl_intersection(ID * a,ID * b)275 wt_idl_intersection(
276 ID *a,
277 ID *b )
278 {
279 ID ida, idb;
280 ID idmax, idmin;
281 ID cursora = 0, cursorb = 0, cursorc;
282 int swap = 0;
283
284 if ( WT_IDL_IS_ZERO( a ) || WT_IDL_IS_ZERO( b ) ) {
285 a[0] = 0;
286 return 0;
287 }
288
289 idmin = IDL_MAX( WT_IDL_FIRST(a), WT_IDL_FIRST(b) );
290 idmax = IDL_MIN( WT_IDL_LAST(a), WT_IDL_LAST(b) );
291 if ( idmin > idmax ) {
292 a[0] = 0;
293 return 0;
294 } else if ( idmin == idmax ) {
295 a[0] = 1;
296 a[1] = idmin;
297 return 0;
298 }
299
300 if ( WT_IDL_IS_RANGE( a ) ) {
301 if ( WT_IDL_IS_RANGE(b) ) {
302 /* If both are ranges, just shrink the boundaries */
303 a[1] = idmin;
304 a[2] = idmax;
305 return 0;
306 } else {
307 /* Else swap so that b is the range, a is a list */
308 ID *tmp = a;
309 a = b;
310 b = tmp;
311 swap = 1;
312 }
313 }
314
315 /* If a range completely covers the list, the result is
316 * just the list. If idmin to idmax is contiguous, just
317 * turn it into a range.
318 */
319 if ( WT_IDL_IS_RANGE( b )
320 && WT_IDL_RANGE_FIRST( b ) <= WT_IDL_FIRST( a )
321 && WT_IDL_RANGE_LAST( b ) >= WT_IDL_LLAST( a ) ) {
322 if (idmax - idmin + 1 == a[0])
323 {
324 a[0] = NOID;
325 a[1] = idmin;
326 a[2] = idmax;
327 }
328 goto done;
329 }
330
331 /* Fine, do the intersection one element at a time.
332 * First advance to idmin in both IDLs.
333 */
334 cursora = cursorb = idmin;
335 ida = wt_idl_first( a, &cursora );
336 idb = wt_idl_first( b, &cursorb );
337 cursorc = 0;
338
339 while( ida <= idmax || idb <= idmax ) {
340 if( ida == idb ) {
341 a[++cursorc] = ida;
342 ida = wt_idl_next( a, &cursora );
343 idb = wt_idl_next( b, &cursorb );
344 } else if ( ida < idb ) {
345 ida = wt_idl_next( a, &cursora );
346 } else {
347 idb = wt_idl_next( b, &cursorb );
348 }
349 }
350 a[0] = cursorc;
351 done:
352 if (swap)
353 WT_IDL_CPY( b, a );
354
355 return 0;
356 }
357
358
359 /*
360 * idl_union - return a = a union b
361 */
362 int
wt_idl_union(ID * a,ID * b)363 wt_idl_union(
364 ID *a,
365 ID *b )
366 {
367 ID ida, idb;
368 ID cursora = 0, cursorb = 0, cursorc;
369
370 if ( WT_IDL_IS_ZERO( b ) ) {
371 return 0;
372 }
373
374 if ( WT_IDL_IS_ZERO( a ) ) {
375 WT_IDL_CPY( a, b );
376 return 0;
377 }
378
379 if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ) {
380 over: ida = IDL_MIN( WT_IDL_FIRST(a), WT_IDL_FIRST(b) );
381 idb = IDL_MAX( WT_IDL_LAST(a), WT_IDL_LAST(b) );
382 a[0] = NOID;
383 a[1] = ida;
384 a[2] = idb;
385 return 0;
386 }
387
388 ida = wt_idl_first( a, &cursora );
389 idb = wt_idl_first( b, &cursorb );
390
391 cursorc = b[0];
392
393 /* The distinct elements of a are cat'd to b */
394 while( ida != NOID || idb != NOID ) {
395 if ( ida < idb ) {
396 if( ++cursorc > WT_IDL_UM_MAX ) {
397 goto over;
398 }
399 b[cursorc] = ida;
400 ida = wt_idl_next( a, &cursora );
401
402 } else {
403 if ( ida == idb )
404 ida = wt_idl_next( a, &cursora );
405 idb = wt_idl_next( b, &cursorb );
406 }
407 }
408
409 /* b is copied back to a in sorted order */
410 a[0] = cursorc;
411 cursora = 1;
412 cursorb = 1;
413 cursorc = b[0]+1;
414 while (cursorb <= b[0] || cursorc <= a[0]) {
415 if (cursorc > a[0])
416 idb = NOID;
417 else
418 idb = b[cursorc];
419 if (cursorb <= b[0] && b[cursorb] < idb)
420 a[cursora++] = b[cursorb++];
421 else {
422 a[cursora++] = idb;
423 cursorc++;
424 }
425 }
426
427 return 0;
428 }
429
430
431 #if 0
432 /*
433 * wt_idl_notin - return a intersection ~b (or a minus b)
434 */
435 int
436 wt_idl_notin(
437 ID *a,
438 ID *b,
439 ID *ids )
440 {
441 ID ida, idb;
442 ID cursora = 0, cursorb = 0;
443
444 if( WT_IDL_IS_ZERO( a ) ||
445 WT_IDL_IS_ZERO( b ) ||
446 WT_IDL_IS_RANGE( b ) )
447 {
448 WT_IDL_CPY( ids, a );
449 return 0;
450 }
451
452 if( WT_IDL_IS_RANGE( a ) ) {
453 WT_IDL_CPY( ids, a );
454 return 0;
455 }
456
457 ida = wt_idl_first( a, &cursora ),
458 idb = wt_idl_first( b, &cursorb );
459
460 ids[0] = 0;
461
462 while( ida != NOID ) {
463 if ( idb == NOID ) {
464 /* we could shortcut this */
465 ids[++ids[0]] = ida;
466 ida = wt_idl_next( a, &cursora );
467
468 } else if ( ida < idb ) {
469 ids[++ids[0]] = ida;
470 ida = wt_idl_next( a, &cursora );
471
472 } else if ( ida > idb ) {
473 idb = wt_idl_next( b, &cursorb );
474
475 } else {
476 ida = wt_idl_next( a, &cursora );
477 idb = wt_idl_next( b, &cursorb );
478 }
479 }
480
481 return 0;
482 }
483 #endif
484
wt_idl_first(ID * ids,ID * cursor)485 ID wt_idl_first( ID *ids, ID *cursor )
486 {
487 ID pos;
488
489 if ( ids[0] == 0 ) {
490 *cursor = NOID;
491 return NOID;
492 }
493
494 if ( WT_IDL_IS_RANGE( ids ) ) {
495 if( *cursor < ids[1] ) {
496 *cursor = ids[1];
497 }
498 return *cursor;
499 }
500
501 if ( *cursor == 0 )
502 pos = 1;
503 else
504 pos = wt_idl_search( ids, *cursor );
505
506 if( pos > ids[0] ) {
507 return NOID;
508 }
509
510 *cursor = pos;
511 return ids[pos];
512 }
513
wt_idl_next(ID * ids,ID * cursor)514 ID wt_idl_next( ID *ids, ID *cursor )
515 {
516 if ( WT_IDL_IS_RANGE( ids ) ) {
517 if( ids[2] < ++(*cursor) ) {
518 return NOID;
519 }
520 return *cursor;
521 }
522
523 if ( ++(*cursor) <= ids[0] ) {
524 return ids[*cursor];
525 }
526
527 return NOID;
528 }
529
530 /* Add one ID to an unsorted list. We ensure that the first element is the
531 * minimum and the last element is the maximum, for fast range compaction.
532 * this means IDLs up to length 3 are always sorted...
533 */
wt_idl_append_one(ID * ids,ID id)534 int wt_idl_append_one( ID *ids, ID id )
535 {
536 if (WT_IDL_IS_RANGE( ids )) {
537 /* if already in range, treat as a dup */
538 if (id >= WT_IDL_RANGE_FIRST(ids) && id <= WT_IDL_RANGE_LAST(ids))
539 return -1;
540 if (id < WT_IDL_RANGE_FIRST(ids))
541 ids[1] = id;
542 else if (id > WT_IDL_RANGE_LAST(ids))
543 ids[2] = id;
544 return 0;
545 }
546 if ( ids[0] ) {
547 ID tmp;
548
549 if (id < ids[1]) {
550 tmp = ids[1];
551 ids[1] = id;
552 id = tmp;
553 }
554 if ( ids[0] > 1 && id < ids[ids[0]] ) {
555 tmp = ids[ids[0]];
556 ids[ids[0]] = id;
557 id = tmp;
558 }
559 }
560 ids[0]++;
561 if ( ids[0] >= WT_IDL_UM_MAX ) {
562 ids[0] = NOID;
563 ids[2] = id;
564 } else {
565 ids[ids[0]] = id;
566 }
567 return 0;
568 }
569
570 /* Append sorted list b to sorted list a. The result is unsorted but
571 * a[1] is the min of the result and a[a[0]] is the max.
572 */
wt_idl_append(ID * a,ID * b)573 int wt_idl_append( ID *a, ID *b )
574 {
575 ID ida, idb, tmp, swap = 0;
576
577 if ( WT_IDL_IS_ZERO( b ) ) {
578 return 0;
579 }
580
581 if ( WT_IDL_IS_ZERO( a ) ) {
582 WT_IDL_CPY( a, b );
583 return 0;
584 }
585
586 ida = WT_IDL_LAST( a );
587 idb = WT_IDL_LAST( b );
588 if ( WT_IDL_IS_RANGE( a ) || WT_IDL_IS_RANGE(b) ||
589 a[0] + b[0] >= WT_IDL_UM_MAX ) {
590 a[2] = IDL_MAX( ida, idb );
591 a[1] = IDL_MIN( a[1], b[1] );
592 a[0] = NOID;
593 return 0;
594 }
595
596 if ( b[0] > 1 && ida > idb ) {
597 swap = idb;
598 a[a[0]] = idb;
599 b[b[0]] = ida;
600 }
601
602 if ( b[1] < a[1] ) {
603 tmp = a[1];
604 a[1] = b[1];
605 } else {
606 tmp = b[1];
607 }
608 a[0]++;
609 a[a[0]] = tmp;
610
611 if ( b[0] > 1 ) {
612 int i = b[0] - 1;
613 AC_MEMCPY(a+a[0]+1, b+2, i * sizeof(ID));
614 a[0] += i;
615 }
616 if ( swap ) {
617 b[b[0]] = swap;
618 }
619 return 0;
620 }
621
622 #if 1
623
624 /* Quicksort + Insertion sort for small arrays */
625
626 #define SMALL 8
627 #define SWAP(a,b) itmp=(a);(a)=(b);(b)=itmp
628
629 void
wt_idl_sort(ID * ids,ID * tmp)630 wt_idl_sort( ID *ids, ID *tmp )
631 {
632 int *istack = (int *)tmp; /* Private stack, not used by caller */
633 int i,j,k,l,ir,jstack;
634 ID a, itmp;
635
636 if ( WT_IDL_IS_RANGE( ids ))
637 return;
638
639 ir = ids[0];
640 l = 1;
641 jstack = 0;
642 for(;;) {
643 if (ir - l < SMALL) { /* Insertion sort */
644 for (j=l+1;j<=ir;j++) {
645 a = ids[j];
646 for (i=j-1;i>=1;i--) {
647 if (ids[i] <= a) break;
648 ids[i+1] = ids[i];
649 }
650 ids[i+1] = a;
651 }
652 if (jstack == 0) break;
653 ir = istack[jstack--];
654 l = istack[jstack--];
655 } else {
656 k = (l + ir) >> 1; /* Choose median of left, center, right */
657 SWAP(ids[k], ids[l+1]);
658 if (ids[l] > ids[ir]) {
659 SWAP(ids[l], ids[ir]);
660 }
661 if (ids[l+1] > ids[ir]) {
662 SWAP(ids[l+1], ids[ir]);
663 }
664 if (ids[l] > ids[l+1]) {
665 SWAP(ids[l], ids[l+1]);
666 }
667 i = l+1;
668 j = ir;
669 a = ids[l+1];
670 for(;;) {
671 do i++; while(ids[i] < a);
672 do j--; while(ids[j] > a);
673 if (j < i) break;
674 SWAP(ids[i],ids[j]);
675 }
676 ids[l+1] = ids[j];
677 ids[j] = a;
678 jstack += 2;
679 if (ir-i+1 >= j-l) {
680 istack[jstack] = ir;
681 istack[jstack-1] = i;
682 ir = j-1;
683 } else {
684 istack[jstack] = j-1;
685 istack[jstack-1] = l;
686 l = i;
687 }
688 }
689 }
690 }
691
692 #else
693
694 /* 8 bit Radix sort + insertion sort
695 *
696 * based on code from http://www.cubic.org/docs/radix.htm
697 * with improvements by ebackes@symas.com and hyc@symas.com
698 *
699 * This code is O(n) but has a relatively high constant factor. For lists
700 * up to ~50 Quicksort is slightly faster; up to ~100 they are even.
701 * Much faster than quicksort for lists longer than ~100. Insertion
702 * sort is actually superior for lists <50.
703 */
704
705 #define BUCKETS (1<<8)
706 #define SMALL 50
707
708 void
wt_idl_sort(ID * ids,ID * tmp)709 wt_idl_sort( ID *ids, ID *tmp )
710 {
711 int count, soft_limit, phase = 0, size = ids[0];
712 ID *idls[2];
713 unsigned char *maxv = (unsigned char *)&ids[size];
714
715 if ( WT_IDL_IS_RANGE( ids ))
716 return;
717
718 /* Use insertion sort for small lists */
719 if ( size <= SMALL ) {
720 int i,j;
721 ID a;
722
723 for (j=1;j<=size;j++) {
724 a = ids[j];
725 for (i=j-1;i>=1;i--) {
726 if (ids[i] <= a) break;
727 ids[i+1] = ids[i];
728 }
729 ids[i+1] = a;
730 }
731 return;
732 }
733
734 tmp[0] = size;
735 idls[0] = ids;
736 idls[1] = tmp;
737
738 #if BYTE_ORDER == BIG_ENDIAN
739 for (soft_limit = 0; !maxv[soft_limit]; soft_limit++);
740 #else
741 for (soft_limit = sizeof(ID)-1; !maxv[soft_limit]; soft_limit--);
742 #endif
743
744 for (
745 #if BYTE_ORDER == BIG_ENDIAN
746 count = sizeof(ID)-1; count >= soft_limit; --count
747 #else
748 count = 0; count <= soft_limit; ++count
749 #endif
750 ) {
751 unsigned int num[BUCKETS], * np, n, sum;
752 int i;
753 ID *sp, *source, *dest;
754 unsigned char *bp, *source_start;
755
756 source = idls[phase]+1;
757 dest = idls[phase^1]+1;
758 source_start = ((unsigned char *) source) + count;
759
760 np = num;
761 for ( i = BUCKETS; i > 0; --i ) *np++ = 0;
762
763 /* count occurrences of every byte value */
764 bp = source_start;
765 for ( i = size; i > 0; --i, bp += sizeof(ID) )
766 num[*bp]++;
767
768 /* transform count into index by summing elements and storing
769 * into same array
770 */
771 sum = 0;
772 np = num;
773 for ( i = BUCKETS; i > 0; --i ) {
774 n = *np;
775 *np++ = sum;
776 sum += n;
777 }
778
779 /* fill dest with the right values in the right place */
780 bp = source_start;
781 sp = source;
782 for ( i = size; i > 0; --i, bp += sizeof(ID) ) {
783 np = num + *bp;
784 dest[*np] = *sp++;
785 ++(*np);
786 }
787 phase ^= 1;
788 }
789
790 /* copy back from temp if needed */
791 if ( phase ) {
792 ids++; tmp++;
793 for ( count = 0; count < size; ++count )
794 *ids++ = *tmp++;
795 }
796 }
797 #endif /* Quick vs Radix */
798
799